SUTime Duration Разбор Объяснение и Как создавать собственные шаблоны и значения
Мы пробовали эти вопросы
3 квартал последних 5 лет
"tokens": [ { "index": 1, "word": "3RD", "originalText": "3RD", "lemma": "3rd", "characterOffsetBegin": 0, "characterOffsetEnd": 3, "pos": "JJ", "ner": "DATE", "normalizedNER": "PREV P5Y INTERSECT XXXX-Q3", "before": "", "after": " ", "timex": { "tid": "t1", "type": "DATE", "altValue": "PREV P5Y INTERSECT XXXX-Q3" } }, { "index": 2, "word": "QUARTER", "originalText": "QUARTER", "лемма": "четверть", "characterOffsetBegin": 4, "characterOffsetEnd": 11, "pos": "NN", "ner": "DATE", "normalizedNER": "PREV P5Y INTERSECT XXXX-Q3", "before": " ", "after": " ", timex": {"tid": "t1", "type": "DATE", "altValue": "PREV P5Y INTERSECT XXXX-Q3"}}, { "index": 3, "word": "OF", "originalText": "OF", "lemma": "of", "characterOffsetBegin": 12, "characterOffsetEnd": 14, "pos": "IN", "ner": "DATE", "normalizedNER": "PREV P5Y INTERSECT XXXX-Q3", "before": "", "after": "", "timex": {"tid": "t1", "type ":" DATE "," altValue ":" PREV P5Y INTERSECT XXXX-Q3 "}}, {" index ": 4," word ":" LAST "," originalText ":" LAST "," lemma ":" last "," characterOffsetBegin ": 15," characterOffsetEnd ": 19," pos ":" JJ "," ner ":" DATE "," normalizedNER ":" PREV P5Y INTERSECT XXXX-Q3 "," before ":" "," after ":" "," timex ": {" tid ":" t1 "," type ":" DATE "," altValue ":" PREV P5Y INTERSECT XXXX-Q3 "} }, {"index": 5, "word": "5", "originalText": "5", "lemma": "5", "characterOffsetBegin": 20, "characterOffsetEnd": 21, "pos": " CD "," ner ":" DATE "," normalizedNER ":" PREV P5Y INTERSECT XXXX-Q3 "," before ":" "," after ":" "," timex ": {" tid ":" t1 ", "type": "DATE", "altValue": "PREV P5Y INTERSECT XXXX-Q3"}}, {"index": 6, "word": "YEARS", "originalText": "YEARS", "lemma": "year", "characterOffsetBegin": 22, "characterOffsetEnd": 27, "pos": "NNS", "ner": "DATE", "normalizedNER": "PREV P5Y INTERSECT XXXX-Q3", "before": "", "after": "", "timex": {"tid": "t1", "type": "DATE", "altValue": "PREV P5Y INTERSECT XXXX-Q3"}}
3 квартал последних 5 лет
"tokens": [ { "index": 1, "word": "3RD", "originalText": "3RD", "lemma": "3rd", "characterOffsetBegin": 0, "characterOffsetEnd": 3, "pos": "JJ", "ner": "DATE", "normalizedNER": "2018-Q3", "before": "", "after": " ", "timex": { "tid": "t1", "type": "DATE", "value": "2018-Q3" } }, { "index": 2, "word": "QUARTER", "originalText": "QUARTER", "lemma": "четверть ", "characterOffsetBegin": 4, "characterOffsetEnd": 11, "pos": "NN", "ner": "DATE", "normalizedNER": "2018-Q3", "before": " ", "after": " ", "timex": { "tid": "t1", "type": "DATE", "value": "2018-Q3" } }, { "index": 3, "word": "OF", "originalText": "OF", "lemma": "of", "characterOffsetBegin": 12, "characterOffsetEnd": 14, "pos": "IN", "ner": "DATE", "normalizedNER": "OF ", "before": " ", "after": " " }, { "index": 4, "word": "THE", "originalText": "THE", "lemma": "the", "characterOffsetBegin": 15, "characterOffsetEnd": 18, "pos": "DT", "ner": "DATE", "normalizedNER": "P5Y", "before": " ", "after": " ", "timex": { "tid": "t3", "type": "DURATION", "value": "P5Y" } }, { "index": 5, "word": "LAST", "originalText": "LAST", "lemma": "last", "characterOffsetBegin": 19, "characterOffsetEnd": 23, "pos": "JJ", "ner": "DATE", "normalizedNER": "P5Y", "before": " ", "after": " ", "timex": { "tid": "t3", "type": "DURATION", "value": "P5Y" } }, { "index": 6, "word": "5", "originalText": "5", "lemma": "5", "characterOffsetBegin": 24, "characterOffsetEnd": 25, "pos": "CD", "ner": "DATE", "normalizedNER": "P5Y", "before": " ", " после ": " ", "timex": { "tid": "t3", "type": "DURATION", "value": "P5Y" } }, { "index": 7, "word": "YEARS", "originalText": "YEARS", "lemma": "year", "characterOffsetBegin": 26, "characterOffsetEnd": 31, "pos": "NNS", "ner": "DATE", "normalizedNER": "P5Y", "before": " ", "after": "", "timex": { "tid": "t3", "type": "DURATION", "value": "P5Y" } } ]
3 квартал последних 5 лет
"tokens": [ { "index": 1, "word": "In", "originalText": "In", "lemma": "in", "characterOffsetBegin": 0, "characterOffsetEnd": 2, "pos": "IN", "ner": "O", "before": "", "after": " " }, { "index": 2, "word": "the", "originalText": "the", "lemma": "the", "characterOffsetBegin": 3, "characterOffsetEnd": 6, "pos": "DT", "ner": "DATE", "normalizedNER": "2018-Q3", "before": " ", "after": " ", "timex": { "tid": "t1", "type": "DATE", "value": "2018-Q3" } }, { "index": 3, "word": "3rd", "originalText": "3rd", "lemma": "3rd", "characterOffsetBegin": 7, "characterOffsetEnd": 10, "pos": "JJ", "ner": "DATE", "normalizedNER": "2018-Q3", "before": " ", "after": " ", "timex": { "tid": "t1", "type": "DATE", " значение ": "2018-Q3" } }, { "index": 4, "word": " четверть "," originalText ":" четверть "," лемма ":" четверть ", "characterOffsetBegin": 11, "characterOffsetEnd": 18, "pos": "NN", "ner": "DATE", "normalizedNER": "2018-Q3", "before": " ", "after": " ", "timex": { "tid": "t1", "type": "DATE", "value": "2018-Q3" } }, { "index": 5, "word": "of", "originalText": "of", "lemma": "of", "characterOffsetBegin": 19, "characterOffsetEnd": 21, "pos": "IN", "ner": "DATE", "normalizedNER": "of ", "before": " ", "after": " " }, { "index": 6, "word": "the", "originalText": "the", "lemma": "the", "characterOffsetBegin": 22, "characterOffsetEnd": 25, "pos": "DT", "ner": "DATE", "normalizedNER": "P5Y", "before": " ", "after": " ", "timex": { "tid": "t3", "type": "DURATION", "value": "P5Y" } }, { "index": 7, "word": "last", "originalText": "last", "lemma": "last", "characterOffsetBegin": 26, "characterOffsetEnd": 30, "pos": "JJ", "ner": "DATE", "normalizedNER": "P5Y", "before": " ", "after": " ", "timex": { "tid": "t3", "type": "DURATION", "value": "P5Y" } }, { "index": 8, "word": "5", "originalText": "5", "lemma": "5", "characterOffsetBegin": 31, "characterOffsetEnd": 32, "pos": "CD", "ner": "DATE", "normalizedNER": "P5Y", "before": " ", "after": " ", "timex": { "tid": "t3", "type": "DURATION", "value": "P5Y" } }, { "index": 9, "word": "YEARS", "originalText": "YEARS", "lemma": "year", "characterOffsetBegin": 33, "characterOffsetEnd": 38, "pos": "NNS", "ner": "DATE", "normalizedNER": "P5Y", "before": " ", "after": "", "timex": { "tid": "t3", "type": "DURATION", "value": "P5Y" } } ]
В контексте все это должно быть одинаковым, но результат отличается просто добавлением слова "the".
Существуют ли какие-либо правила, которые можно использовать или установить, чтобы эти операторы имели такой же результат?
Может ли кто-нибудь предоставить мне способ редактировать правила в english.sutime.txt
Спасибо