DC special indexing (date, string)


  • dc:date indexed as date (dc.date_dt) and as text (dc.date)

Edit SOLR schema.xml (/usr/local/solr/islandora/conf/)

...
<fieldType name="date" class="solr.TrieDateField" sortMissingLast="true" omitNorms="true" precisionStep="6" positionIncrementGap="100"/>
...
   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
...

Edit DC_to_solr.xslt (/var/lib/tomcat7/webapps/fedoragsearch/WEB-INF/classes/fgsconfigFinal/index/FgsIndex/islandora_transforms/)

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:xlink="http://www.w3.org/1999/xlink"
  xmlns:foxml="info:fedora/fedora-system:def/foxml#"
  xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
  xmlns:dc="http://purl.org/dc/elements/1.1/">
  <xsl:template match="foxml:datastream[@ID='DC' or @ID='QDC']/foxml:datastreamVersion[last()]">
    <xsl:param name="content"/>
    <xsl:param name="prefix">dc.</xsl:param>
    <xsl:param name="suffix"></xsl:param>
    <xsl:apply-templates select="$content/oai_dc:dc">
      <xsl:with-param name="prefix" select="$prefix"/>
      <xsl:with-param name="suffix" select="$suffix"/>
    </xsl:apply-templates>
  </xsl:template>
  <xsl:template match="oai_dc:dc">
    <xsl:param name="prefix">dc.</xsl:param>
    <xsl:param name="suffix"></xsl:param>
    <xsl:param name="suffixt">_dt</xsl:param>
    <!-- Create fields for the set of selected elements, named according to the 'local-name' and containing the 'text' -->
    <xsl:for-each select="./*">
       <xsl:choose>
         <xsl:when test="local-name() ='date'">
              <field>
                <xsl:attribute name="name">
                  <xsl:value-of select="concat($prefix, local-name(), $suffixt)"/>
                </xsl:attribute>
                <xsl:variable name="rawTextValue" select="normalize-space(text())"/>
                <xsl:variable name="textValue">
                  <xsl:call-template name="get_ISO8601_date">
                    <xsl:with-param name="date" select="$rawTextValue"/>
                    <xsl:with-param name="pid" select="'not provided'"/>
                    <xsl:with-param name="datastream" select="'not provided'"/>
                  </xsl:call-template>
                </xsl:variable>
                <xsl:value-of select="$textValue"/>
              </field>
              <field>
                <xsl:attribute name="name">
                  <xsl:value-of select="concat($prefix, local-name(), $suffix)"/>
                </xsl:attribute>
                <xsl:value-of select="text()"/>
              </field>
         </xsl:when>
         <xsl:otherwise>
              <field>
                <xsl:attribute name="name">
                  <xsl:value-of select="concat($prefix, local-name(), $suffix)"/>
                </xsl:attribute>
                <xsl:value-of select="text()"/>
              </field>
         </xsl:otherwise>
       </xsl:choose>
    </xsl:for-each>
  </xsl:template>
</xsl:stylesheet>
  • dc:coverage/dc:subject indexed as string (dc.coverage/dc.subject) and as text (dc.coverage_dct/dc.subject_dct)

Edit SOLR schema.xml (/usr/local/solr/islandora/conf/)

...
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.HyphenatedWordsFilterFactory"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"
                types="wdfftypes.txt"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"
                types="wdfftypes.txt"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
</fieldType>
...
   <field name="dc.coverage" type="string"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dc.coverage_dct" type="text"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dc.subject" type="string"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dc.subject_dct" type="text"  indexed="true"  stored="true" multiValued="true"/>
 
   <dynamicField name="*" type="text_fgs"  indexed="true"  stored="true" multiValued="true"/>
   <field name="_version_" type="long" indexed="true" stored="true"/>
 </fields>
...
<copyField source="dc.coverage" dest="dc.coverage_dct"/>
<copyField source="dc.subject" dest="dc.subject_dct"/>
...
 
 
isla7x/index.txt ยท Last modified: 2017/02/21 16:15 by giancarlo

Developers: CNR IRCrES IT Office and Library
Giancarlo Birello (giancarlo.birello _@_ ircres.cnr.it) and Anna Perin (anna.perin _@_ ircres.cnr.it)
FAbb@TO.CNR is licensed under: Creative Commons License
Recent changes RSS feed Creative Commons License Valid XHTML 1.0 Valid CSS Driven by DokuWiki
Drupal Garland Theme for Dokuwiki