Elasticsearch взвешивания
У меня возникли проблемы с пониманием того, как рассчитывается вес в моей реализации Elasticsearch. Насколько я понимаю, если вы не используете Dismax, оценка документа представляет собой сумму всех весов, а не оценку поля max. Во-вторых, расчеты полностью отличаются от документации.
Глядя на мой приведенный ниже запрос и объяснение, у меня есть три вопроса:
- Чем может отличаться doc_count для имени и описания для одного и того же документа?
- Почему подсчет выполняется на основе максимального веса поля?
- Если у меня всего 5 документов во всем индексе, которые содержат критерий поиска, почему doc freq 6.
Заранее спасибо.
запрос
GET localhost_document/_search?explain=1&pretty=1&search_type=dfs_query_then_fetch
{
"query": {
"multi_match" : {
"query": "lhc",
"fields": [ "Metadata.Name", "Metadata.Description^5" ]
}
}
}
Explaination
"_explanation": {
"value": 28.427635,
"description": "max of:",
"details": [
{
"value": 28.427635,
"description": "weight(Metadata.Description:lhc in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 28.427635,
"description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:",
"details": [
{
"value": 5,
"description": "boost",
"details": []
},
{
"value": 5.3759904,
"description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
"details": [
{
"value": 6,
"description": "docFreq",
"details": []
},
{
"value": 1404,
"description": "docCount",
"details": []
}
]
},
{
"value": 1.0575776,
"description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
"details": [
{
"value": 1,
"description": "termFreq=1.0",
"details": []
},
{
"value": 1.2,
"description": "parameter k1",
"details": []
},
{
"value": 0.75,
"description": "parameter b",
"details": []
},
{
"value": 2.9529915,
"description": "avgFieldLength",
"details": []
},
{
"value": 2.56,
"description": "fieldLength",
"details": []
}
]
}
]
}
]
},
{
"value": 4.2207813,
"description": "weight(Metadata.Name:lhc in 0) [PerFieldSimilarity], result of:",
"details": [
{
"value": 4.2207813,
"description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:",
"details": [
{
"value": 5.7578497,
"description": "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
"details": [
{
"value": 16,
"description": "docFreq",
"details": []
},
{
"value": 5224,
"description": "docCount",
"details": []
}
]
},
{
"value": 0.7330482,
"description": "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:",
"details": [
{
"value": 1,
"description": "termFreq=1.0",
"details": []
},
{
"value": 1.2,
"description": "parameter k1",
"details": []
},
{
"value": 0.75,
"description": "parameter b",
"details": []
},
{
"value": 2.1161945,
"description": "avgFieldLength",
"details": []
},
{
"value": 4,
"description": "fieldLength",
"details": []
}
]
}
]
}
]
}
]
}
}