Apache cTAKES логика из Java-приложения
Я пытаюсь встроить логику Apache cTAKES NLP в свое приложение.
Прежде всего, я не могу найти хорошую документацию, чтобы понять, как это можно сделать.
Из разных фрагментов кода, которые я нашел в интернете, я создал следующий тестовый код:
public class CTAKESTest {
public static void main(String[] args) throws UIMAException, MalformedURLException {
final String note = "Serum Cholesterol 154 150 250 mgs/dl\n-\nSerum Triglycerides 67 90 200 mgs /dl\n-\nSerum HDL: Cholesterol 38 35 55 mgs /dl\n-\nSerum LDL: Cholesterol 49 85 150 mgs/d1\n-\nSerum VLDL: Cholesterol 13 10 40 mgs/dl\n-\nTotal Cholesterol / HDL Ratio: 3.90";
final JCas jcas = JCasFactory.createJCas();
jcas.setDocumentText(note);
final AnalysisEngineDescription aed = getFastPipeline();
SimplePipeline.runPipeline(jcas, aed);
Collection<TOP> codes = JCasUtil.selectAll(jcas);
List<TOP> list = new ArrayList(codes);
TOP[] res = list.toArray(new TOP[list.size()]);
// System.out.println(Arrays.toString(res));
String json = new Gson().toJson(res);
System.out.println(json);
}
public static AnalysisEngineDescription getFastPipeline()
throws ResourceInitializationException, MalformedURLException {
AggregateBuilder builder = new AggregateBuilder();
builder.add(getTokenProcessingPipeline());
builder.add(DefaultJCasTermAnnotator.createAnnotatorDescription());
builder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
builder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription());
builder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription());
builder.add(HistoryCleartkAnalysisEngine.createAnnotatorDescription());
builder.add(ConditionalCleartkAnalysisEngine.createAnnotatorDescription());
builder.add(GenericCleartkAnalysisEngine.createAnnotatorDescription());
builder.add(SubjectCleartkAnalysisEngine.createAnnotatorDescription());
return builder.createAggregateDescription();
}
public static AnalysisEngineDescription getTokenProcessingPipeline()
throws ResourceInitializationException, MalformedURLException {
AggregateBuilder builder = new AggregateBuilder();
builder.add(SimpleSegmentAnnotator.createAnnotatorDescription());
builder.add(SentenceDetector.createAnnotatorDescription());
builder.add(TokenizerAnnotatorPTB.createAnnotatorDescription());
builder.add(LvgAnnotator.createAnnotatorDescription());
builder.add(ContextDependentTokenizerAnnotator.createAnnotatorDescription());
builder.add(POSTagger.createAnnotatorDescription());
return builder.createAggregateDescription();
}
}
но во время запуска происходит сбой со следующей ошибкой:
08:37:01.978 [main] INFO o.apache.ctakes.lvg.ae.LvgAnnotator - URL for lvg.properties =file:/C:/Users/Alex/.m2/repository/net/sourceforge/ctakesresources/ctakes-resources-lvg2008/4.0.0/ctakes-resources-lvg2008-4.0.0.jar!/org/apache/ctakes/lvg/data/config/lvg.properties
08:37:03.454 [main] INFO o.a.ctakes.core.ae.SentenceDetector - Sentence detector model file: org/apache/ctakes/core/sentdetect/sd-med-model.zip
08:37:03.566 [main] INFO o.a.c.core.ae.TokenizerAnnotatorPTB - Initializing org.apache.ctakes.core.ae.TokenizerAnnotatorPTB
Exception in thread "main" java.lang.IllegalArgumentException: URI is not hierarchical
at java.io.File.<init>(Unknown Source)
at org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl.load(LvgCmdApiResourceImpl.java:65)
at org.apache.uima.resource.impl.ResourceManager_impl.registerResource(ResourceManager_impl.java:628)
at org.apache.uima.resource.impl.ResourceManager_impl.initializeExternalResources(ResourceManager_impl.java:464)
at org.apache.uima.resource.Resource_ImplBase.initialize(Resource_ImplBase.java:193)
at org.apache.uima.analysis_engine.impl.AnalysisEngineImplBase.initialize(AnalysisEngineImplBase.java:157)
at org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.initialize(PrimitiveAnalysisEngine_impl.java:131)
at org.apache.uima.impl.AnalysisEngineFactory_impl.produceResource(AnalysisEngineFactory_impl.java:94)
at org.apache.uima.impl.CompositeResourceFactory_impl.produceResource(CompositeResourceFactory_impl.java:62)
at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:279)
at org.apache.uima.UIMAFramework.produceAnalysisEngine(UIMAFramework.java:407)
at org.apache.uima.analysis_engine.asb.impl.ASB_impl.setup(ASB_impl.java:256)
at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initASB(AggregateAnalysisEngine_impl.java:429)
at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initializeAggregateAnalysisEngine(AggregateAnalysisEngine_impl.java:373)
at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initialize(AggregateAnalysisEngine_impl.java:186)
at org.apache.uima.impl.AnalysisEngineFactory_impl.produceResource(AnalysisEngineFactory_impl.java:94)
at org.apache.uima.impl.CompositeResourceFactory_impl.produceResource(CompositeResourceFactory_impl.java:62)
at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:279)
at org.apache.uima.UIMAFramework.produceAnalysisEngine(UIMAFramework.java:407)
at org.apache.uima.analysis_engine.asb.impl.ASB_impl.setup(ASB_impl.java:256)
at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initASB(AggregateAnalysisEngine_impl.java:429)
at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initializeAggregateAnalysisEngine(AggregateAnalysisEngine_impl.java:373)
at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initialize(AggregateAnalysisEngine_impl.java:186)
at org.apache.uima.impl.AnalysisEngineFactory_impl.produceResource(AnalysisEngineFactory_impl.java:94)
at org.apache.uima.impl.CompositeResourceFactory_impl.produceResource(CompositeResourceFactory_impl.java:62)
at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:279)
at org.apache.uima.UIMAFramework.produceAnalysisEngine(UIMAFramework.java:407)
at org.apache.uima.analysis_engine.asb.impl.ASB_impl.setup(ASB_impl.java:256)
at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initASB(AggregateAnalysisEngine_impl.java:429)
at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initializeAggregateAnalysisEngine(AggregateAnalysisEngine_impl.java:373)
at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initialize(AggregateAnalysisEngine_impl.java:186)
at org.apache.uima.impl.AnalysisEngineFactory_impl.produceResource(AnalysisEngineFactory_impl.java:94)
at org.apache.uima.impl.CompositeResourceFactory_impl.produceResource(CompositeResourceFactory_impl.java:62)
at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:279)
at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:331)
at org.apache.uima.UIMAFramework.produceAnalysisEngine(UIMAFramework.java:448)
at org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine(AnalysisEngineFactory.java:205)
at org.apache.uima.fit.pipeline.SimplePipeline.runPipeline(SimplePipeline.java:227)
at org.apache.uima.fit.pipeline.SimplePipeline.runPipeline(SimplePipeline.java:260)
Что я делаю не так и как это исправить? Кроме того, как правильно настроить cTAKES для использования AggregatePlaintextFastUMLSProcessor.xml
и мой пользовательский словарь, который я собираюсь создать также?
1 ответ
Я бы попросил вас взглянуть на этот модуль cTAKES-REST, который точно соответствует вашим требованиям. Он может быть вызван с помощью вызова веб-службы, а также может быть настроен для использования вашего пользовательского словаря.