代码之家  ›  专栏  ›  技术社区  ›  Alihossein shahabi

所有字段的索引对SOLR有负面影响吗?

  •  0
  • Alihossein shahabi  · 技术社区  · 6 年前

    我有一个Solr云(7.4版),有2个节点和10个集合。这是我的一个 managed-schema :

    <!-- Solr managed schema - automatically generated - DO NOT EDIT -->
    <schema name="example" version="1.6">
    <uniqueKey>total_post_id</uniqueKey>
    <fieldType name="boolean" class="solr.BoolField" positionIncrementGap="0"/>
    <fieldType name="date" class="solr.DatePointField"/>
    <fieldType name="double" class="solr.DoublePointField"/>
    <fieldType name="float" class="solr.FloatPointField"/>
    <fieldType name="int" class="solr.IntPointField"/>
    <fieldType name="long" class="solr.LongPointField"/>
    <fieldType name="lowerCase_text" class="solr.TextField" sortMissingLast="true">
     <analyzer>
       <tokenizer class="solr.KeywordTokenizerFactory"/>
       <filter class="solr.LowerCaseFilterFactory"/>
     </analyzer>
    </fieldType>
    <fieldType name="random" class="solr.RandomSortField"/>
    <fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
    <fieldType name="strings" class="solr.StrField" indexed="true" sortMissingLast="true" docValues="true" multiValued="true"/>
    <fieldType name="text_general" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
     <analyzer type="index">
       <tokenizer class="solr.WhitespaceTokenizerFactory"/>
       <filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/>
       <filter class="solr.LowerCaseFilterFactory"/>
     </analyzer>
     <analyzer type="query">
       <tokenizer class="solr.WhitespaceTokenizerFactory"/>
       <filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
       <filter class="solr.LowerCaseFilterFactory"/>
     </analyzer>
    </fieldType>
    <field name="ExtractedHashtag" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="ExtractedIP" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="RobotSeeDate_dt" type="date" omitNorms="true" docValues="true" indexed="true" stored="true"/>
    <field name="_version_" type="long" docValues="true" indexed="true" stored="true"/>
    <field name="address" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="category" type="string" omitNorms="true" docValues="true" indexed="true" stored="true"/>
    <field name="channel_id" type="long" omitNorms="true" docValues="true" indexed="true" stored="true"/>
    <field name="channel_id_real" type="long" docValues="true" indexed="true" stored="true"/>
    <field name="channel_username" type="lowerCase_text" indexed="true" stored="true"/>
    <field name="create_date" type="date" omitNorms="true" docValues="true" indexed="true" stored="true"/>
    <field name="edit_date" type="date" docValues="true" indexed="true" stored="true"/>
    <field name="fax" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="format" type="string" omitNorms="true" docValues="true" indexed="true" stored="true"/>
    <field name="forwarded_channel" type="long" omitNorms="true" docValues="true" indexed="true" stored="true"/>
    <field name="forwarded_channel_username" type="lowerCase_text" indexed="true" stored="true"/>
    <field name="forwarded_post_id" type="long" docValues="true" multiValued="false" indexed="true" stored="true"/>
    <field name="forwarded_user" type="long" omitNorms="true" docValues="true" indexed="true" stored="true"/>
    <field name="grouped_id" type="string" docValues="true" indexed="true" stored="true"/>
    <field name="id" type="long" docValues="true" multiValued="false" indexed="true" stored="true"/>
    <field name="is_silent" type="boolean" docValues="true" indexed="true" stored="true"/>
    <field name="keywords" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="langD_s" type="string" docValues="true" indexed="true" stored="true"/>
    <field name="location" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="media_id" type="long" omitNorms="true" docValues="true" indexed="true" stored="true"/>
    <field name="media_unread" type="boolean" docValues="true" indexed="true" stored="true"/>
    <field name="mentioned" type="boolean" docValues="true" indexed="true" stored="true"/>
    <field name="message_number" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="mime_type" type="string" docValues="true" indexed="true" stored="true"/>
    <field name="norm_ExtractedEmail" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="norm_ExtractedMention" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="norm_ExtractedMobile" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="norm_ExtractedNationalCode" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="norm_ExtractedPhone" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="norm_ExtractedPostalCode" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="norm_ExtractedURL" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="norm_channel_title" type="text_general" omitNorms="true" indexed="true" stored="true"/>
    <field name="norm_media_caption" type="text_general" indexed="true" stored="true"/>
    <field name="norm_post_author" type="string" docValues="true" indexed="true" stored="true"/>
    <field name="organization" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="other" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="out" type="boolean" docValues="true" indexed="true" stored="true"/>
    <field name="person" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="position" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="post" type="boolean" docValues="true" indexed="true" stored="true"/>
    <field name="post_id" type="int" docValues="true" multiValued="false" indexed="true" stored="true"/>
    <field name="processed_text" type="text_general" indexed="false" stored="true"/>
    <field name="producer_name" type="string" docValues="true" indexed="true" stored="true"/>
    <field name="product" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="province" type="string" docValues="true" indexed="true" stored="true"/>
    <field name="raw_channel_title" type="string" omitNorms="true" docValues="false" indexed="false" stored="true"/>
    <field name="raw_media_caption" type="string" indexed="false" stored="true"/>
    <field name="raw_post_author" type="string" indexed="false" stored="true"/>
    <field name="reply_markup" type="string" docValues="true" indexed="true" stored="true"/>
    <field name="signatureField" type="string" docValues="true" multiValued="false" indexed="true" stored="true"/>
    <field name="text" type="text_general" omitNorms="true" indexed="true" stored="true"/>
    <field name="time" type="string" omitNorms="true" docValues="true" multiValued="true" indexed="true" stored="true"/>
    <field name="total_post_id" type="string" docValues="true" multiValued="false" indexed="true" required="true" stored="true"/>
    <field name="view_num" type="int" omitNorms="true" docValues="true" indexed="true" stored="true"/>
    <dynamicField name="random_*" type="random" indexed="true" stored="true"/>
    <dynamicField name="*_raw" type="string" docValues="false" indexed="false" stored="true"/>
    <dynamicField name="*_ss" type="strings" docValues="true" indexed="true" stored="true"/>
    </schema>
    

    我的收藏有10亿份文件。 我可以吗 Index‍ Docvalues 所有领域? . 这对 read write 询问?

    1 回复  |  直到 6 年前
        1
  •  4
  •   Persimmonium    6 年前

    我可以索引和Docvalues所有字段吗?

    是的,你当然可以。

    这对读写查询有负面影响吗?

    索引或启用docvalues的每个字段都有成本。通过启用索引/ DOWORSE,索引将更大,索引(写入)速度变慢。在查询侧(Read)中,您通常会更快(不能在不索引的字段中搜索,但DOCT对于许多操作来说是不必要的,并且可以提高对它们的查询PARF)。

    像往常一样,你需要找到一个平衡点。如果集合很大(10亿个大集合),通常只在绝对需要的字段中启用index/docValues。如果一切正常,也许可以在接下来需要的几个字段中启用,然后迭代测试