P4Cms Content Lucene Document.
More...
List of all members.
Public Member Functions |
| __construct (P4Cms_Content $content) |
| Make a new lucene document instance for the given content entry.
|
| getContentEntry () |
| Get the content entry associated with the Lucene document.
|
Protected Member Functions |
| _detectEncoding ($data) |
| Detect the encoding of a string.
|
| _getContentFields () |
| Collect all of the fields for the content entry with information about the field and the value pulled from the content entry.
|
| _getIndexFilters ($name, $data) |
| Get the filters to apply to the given field value before it is indexed.
|
| _getLuceneFieldType ($name, $data) |
| Determine the correct lucene field type to use for the given content field definition/value.
|
| _isIndexDisabled ($name, $data) |
| Determine if a given field should not be indexed.
|
| _loadFields () |
| Convert the content fields into lucene document fields.
|
| _mergeFields ($a, $b) |
| Merge two sets of fields.
|
| _prepareFieldValue ($name, $data) |
| Prepare a field value for indexing by applying filters to it.
|
| _toLuceneField ($name, $data) |
| Convert from a field definition/value to a lucene document field.
|
Protected Attributes |
| $_content = null |
Detailed Description
P4Cms Content Lucene Document.
- Allows a Zend Search Lucene document to be created from a content entry.
- Determines if a P4Cms Content field should be indexed.
- Specifies how a Content field should be indexed.
- Copyright:
- 2011-2012 Perforce Software. All rights reserved
- License:
- Please see LICENSE.txt in top-level folder of this distribution.
- Version:
- 2012.2/486814
Constructor & Destructor Documentation
P4Cms_Content_LuceneDocument::__construct |
( |
P4Cms_Content $ |
content | ) |
|
Make a new lucene document instance for the given content entry.
- Parameters:
-
P4Cms_Content | $content | the content entry to make a lucene document for. |
Member Function Documentation
P4Cms_Content_LuceneDocument::_detectEncoding |
( |
$ |
data | ) |
[protected] |
Detect the encoding of a string.
- Parameters:
-
string | $data | the data to be checked. |
- Returns:
- string the encoding or false if cannot be detected.
{
if (extension_loaded('mbstring')) {
$encoding = mb_detect_encoding($data['value']);
} else {
$finfo = finfo_open(FILEINFO_MIME);
$mime = finfo_file($finfo, $data['tempFile']);
preg_match('/^(.*)\/(.*); charset=(.*)$/', $mime, $matches);
$encoding = isset($matches[3]) ? trim($matches[3]) : false;
}
return $encoding;
}
P4Cms_Content_LuceneDocument::_getContentFields |
( |
| ) |
[protected] |
Collect all of the fields for the content entry with information about the field and the value pulled from the content entry.
- Returns:
- array the list of all content fields and their details/values.
{
$entry = $this->getContentEntry();
$type = $this->getContentEntry()->getContentType();
$fields = array(
'uri' => array(
'value' => $entry->getUri(),
'search' => array('index' => array('type' => 'keyword'))
),
'title' => array(
'value' => $entry->getTitle(),
'search' => array('index' => array('type' => 'text'))
),
'excerpt' => array(
'value' => $entry->getExcerpt(),
'search' => array('index' => array('type' => 'unindexed'))
),
'contentId' => array(
'value' => $entry->getId(),
'search' => array('index' => array('type' => 'unindexed')),
'metadata' => array('mimeType' => 'text/plain')
),
'contentType' => array(
'value' => $entry->getContentTypeId(),
'search' => array('index' => array('type' => 'unindexed')),
'metadata' => array('mimeType' => 'text/plain')
),
'resource' => array(
'value' => 'content',
'search' => array('index' => array('type' => 'unindexed'))
),
'privilege' => array(
'value' => 'access',
'search' => array('index' => array('type' => 'unindexed'))
)
);
$fields = $this->_mergeFields($fields, $type->getElements());
foreach ($entry->getValues() as $field => $value) {
if (array_key_exists($field, $fields)) {
$fields[$field]['value'] = $value;
$fields[$field]['metadata'] = $entry->getFieldMetadata($field);
}
if (isset($fields[$field]['metadata']['filename']) && !array_key_exists('filename', $fields)) {
$fields['filename']['value'] = $fields[$field]['metadata']['filename'];
$fields['filename']['search'] = array('index' => array('type' => 'unstored'));
}
}
return $fields;
}
P4Cms_Content_LuceneDocument::_getIndexFilters |
( |
$ |
name, |
|
|
$ |
data |
|
) |
| [protected] |
Get the filters to apply to the given field value before it is indexed.
The filters to use can be specified in the content type field definition.
- Parameters:
-
string | $name | the name of the field to be indexed. |
array | $data | the details and value of the field. |
- Returns:
- array the set of filters to apply to the field value.
{
if (!isset($data['search']['index']['filters'])) {
return array();
}
$options = array('fieldName' => $name, 'fieldData' => $data);
$filters = $data['search']['index']['filters'];
$form = new P4Cms_Form;
$form->addElement('text', 'dummy', array('filters' => $filters));
return $form->getElement('dummy')->getFilters();
}
P4Cms_Content_LuceneDocument::_getLuceneFieldType |
( |
$ |
name, |
|
|
$ |
data |
|
) |
| [protected] |
Determine the correct lucene field type to use for the given content field definition/value.
Checks for explicit index type in field data - defaults to 'unstored'.
- Parameters:
-
string | $name | the name of the field to convert. |
array | $data | the details and value of the field. |
- Returns:
- string the type of lucene field to use: keyword - [ ] tokenized [x] indexed [x] stored unindexed - [ ] tokenized [ ] indexed [x] stored binary - [ ] tokenized [ ] indexed [x] stored text - [x] tokenized [x] indexed [x] stored unstored - [x] tokenized [x] indexed [ ] stored
{
$types = array('keyword', 'unindexed', 'binary', 'text', 'unstored');
if (isset($data['search']['index']['type'])
&& in_array($data['search']['index']['type'], $types)
) {
return $data['search']['index']['type'];
}
return 'unstored';
}
P4Cms_Content_LuceneDocument::_isIndexDisabled |
( |
$ |
name, |
|
|
$ |
data |
|
) |
| [protected] |
Determine if a given field should not be indexed.
- Parameters:
-
string | $name | the name of the field to be indexed. |
array | $data | the details and value of the field. |
- Returns:
- bool true if we should not index this field; false otherwise.
{
return isset($data['search']['index']['disabled'])
&& $data['search']['index']['disabled'];
}
P4Cms_Content_LuceneDocument::_loadFields |
( |
| ) |
[protected] |
Convert the content fields into lucene document fields.
{
foreach ($this->_getContentFields() as $name => $data) {
if ($this->_isIndexDisabled($name, $data)) {
continue;
}
$field = $this->_toLuceneField($name, $data);
if ($field instanceof Zend_Search_Lucene_Field) {
$this->addField($field);
}
}
}
P4Cms_Content_LuceneDocument::_mergeFields |
( |
$ |
a, |
|
|
$ |
b |
|
) |
| [protected] |
Merge two sets of fields.
Options in the base fields will be replaced by the ones from append fields and the default settings in the base will be kept if none is set in the appending fields.
This works like array_merge_recursive but instead of making values with the same key an array, the value in the first array is replaced.
- Parameters:
-
array | $a | the base fields. |
array | $b | the append fields. |
- Returns:
- array the merged fields.
{
if (!is_array($a)) {
$a = empty($a) ? array() : array($a);
}
if (!is_array($b)) {
$b = array($b);
}
foreach ($b as $key => $value) {
if (!array_key_exists($key, $a) and !is_numeric($key)) {
$a[$key] = $b[$key];
continue;
}
if (is_array($value) or is_array($a[$key])) {
$a[$key] = $this->_mergeFields($a[$key], $b[$key]);
} else if (is_numeric($key)) {
if (!in_array($value, $a)) {
$a[] = $value;
}
} else {
$a[$key] = $value;
}
}
return $a;
}
P4Cms_Content_LuceneDocument::_prepareFieldValue |
( |
$ |
name, |
|
|
$ |
data |
|
) |
| [protected] |
Prepare a field value for indexing by applying filters to it.
- Parameters:
-
string | $name | the name of the field to be indexed. |
array | $data | the details and value of the field. |
- Returns:
- string $value the prepared value.
- Exceptions:
-
{
$filters = $this->_getIndexFilters($name, $data);
if (empty($filters) && strpos($data['mimeType'], 'text/') !== 0) {
throw new P4Cms_Content_Exception(
"Cannot prepare non-plain-text value without filters."
);
}
$value = $data['value'];
foreach ($filters as $filter) {
$value = $filter->filter($value);
}
return $value;
}
P4Cms_Content_LuceneDocument::_toLuceneField |
( |
$ |
name, |
|
|
$ |
data |
|
) |
| [protected] |
Convert from a field definition/value to a lucene document field.
- Parameters:
-
string | $name | the name of the field to convert. |
array | $data | the details and value of the field. |
- Returns:
- Zend_Search_Lucene_Field|null lucene document field object or null if we can't create one.
{
if (!array_key_exists('value', $data) || !is_scalar($data['value'])) {
return null;
}
$tempFile = tempnam(sys_get_temp_dir(), $name);
file_put_contents($tempFile, $data['value']);
$data['tempFile'] = $tempFile;
$encoding = $this->_detectEncoding($data);
$data['encoding'] = $encoding ?: 'utf8';
$data['mimeType'] = isset($data['metadata']['mimeType'])
? $data['metadata']['mimeType']
: P4Cms_Validate_File_MimeType::getTypeOfFile($tempFile);
$type = $this->_getLuceneFieldType($name, $data);
try {
$value = $this->_prepareFieldValue($name, $data);
$field = Zend_Search_Lucene_Field::$type(
$name,
$value,
$data['encoding']
);
} catch (P4Cms_Content_Exception $e) {
$field = null;
}
unlink($tempFile);
return $field;
}
P4Cms_Content_LuceneDocument::getContentEntry |
( |
| ) |
|
Get the content entry associated with the Lucene document.
- Returns:
- P4Cms_Content the content entry this lucene document represents.
{
return $this->_content;
}
Member Data Documentation
P4Cms_Content_LuceneDocument::$_content = null [protected] |
The documentation for this class was generated from the following file: