Apache cTAKES логика из Java-приложения

Я пытаюсь встроить логику Apache cTAKES NLP в свое приложение.

Прежде всего, я не могу найти хорошую документацию, чтобы понять, как это можно сделать.

Из разных фрагментов кода, которые я нашел в интернете, я создал следующий тестовый код:

public class CTAKESTest {

    public static void main(String[] args) throws UIMAException, MalformedURLException {

        final String note = "Serum Cholesterol 154 150 250 mgs/dl\n-\nSerum Triglycerides 67 90 200 mgs /dl\n-\nSerum HDL: Cholesterol 38 35 55 mgs /dl\n-\nSerum LDL: Cholesterol 49 85 150 mgs/d1\n-\nSerum VLDL: Cholesterol 13 10 40 mgs/dl\n-\nTotal Cholesterol / HDL Ratio: 3.90";

        final JCas jcas = JCasFactory.createJCas();
        jcas.setDocumentText(note);

        final AnalysisEngineDescription aed = getFastPipeline();
        SimplePipeline.runPipeline(jcas, aed);

        Collection<TOP> codes = JCasUtil.selectAll(jcas);
        List<TOP> list = new ArrayList(codes);

        TOP[] res = list.toArray(new TOP[list.size()]);
        // System.out.println(Arrays.toString(res));
        String json = new Gson().toJson(res);
        System.out.println(json);
    }

    public static AnalysisEngineDescription getFastPipeline()
            throws ResourceInitializationException, MalformedURLException {
        AggregateBuilder builder = new AggregateBuilder();
        builder.add(getTokenProcessingPipeline());
        builder.add(DefaultJCasTermAnnotator.createAnnotatorDescription());
        builder.add(ClearNLPDependencyParserAE.createAnnotatorDescription());
        builder.add(PolarityCleartkAnalysisEngine.createAnnotatorDescription());
        builder.add(UncertaintyCleartkAnalysisEngine.createAnnotatorDescription());
        builder.add(HistoryCleartkAnalysisEngine.createAnnotatorDescription());
        builder.add(ConditionalCleartkAnalysisEngine.createAnnotatorDescription());
        builder.add(GenericCleartkAnalysisEngine.createAnnotatorDescription());
        builder.add(SubjectCleartkAnalysisEngine.createAnnotatorDescription());
        return builder.createAggregateDescription();
    }

    public static AnalysisEngineDescription getTokenProcessingPipeline()
            throws ResourceInitializationException, MalformedURLException {
        AggregateBuilder builder = new AggregateBuilder();
        builder.add(SimpleSegmentAnnotator.createAnnotatorDescription());
        builder.add(SentenceDetector.createAnnotatorDescription());
        builder.add(TokenizerAnnotatorPTB.createAnnotatorDescription());
        builder.add(LvgAnnotator.createAnnotatorDescription());
        builder.add(ContextDependentTokenizerAnnotator.createAnnotatorDescription());
        builder.add(POSTagger.createAnnotatorDescription());
        return builder.createAggregateDescription();
    }

}

но во время запуска происходит сбой со следующей ошибкой:

08:37:01.978 [main] INFO  o.apache.ctakes.lvg.ae.LvgAnnotator - URL for lvg.properties =file:/C:/Users/Alex/.m2/repository/net/sourceforge/ctakesresources/ctakes-resources-lvg2008/4.0.0/ctakes-resources-lvg2008-4.0.0.jar!/org/apache/ctakes/lvg/data/config/lvg.properties
08:37:03.454 [main] INFO  o.a.ctakes.core.ae.SentenceDetector - Sentence detector model file: org/apache/ctakes/core/sentdetect/sd-med-model.zip
08:37:03.566 [main] INFO  o.a.c.core.ae.TokenizerAnnotatorPTB - Initializing org.apache.ctakes.core.ae.TokenizerAnnotatorPTB
Exception in thread "main" java.lang.IllegalArgumentException: URI is not hierarchical
    at java.io.File.<init>(Unknown Source)
    at org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl.load(LvgCmdApiResourceImpl.java:65)
    at org.apache.uima.resource.impl.ResourceManager_impl.registerResource(ResourceManager_impl.java:628)
    at org.apache.uima.resource.impl.ResourceManager_impl.initializeExternalResources(ResourceManager_impl.java:464)
    at org.apache.uima.resource.Resource_ImplBase.initialize(Resource_ImplBase.java:193)
    at org.apache.uima.analysis_engine.impl.AnalysisEngineImplBase.initialize(AnalysisEngineImplBase.java:157)
    at org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.initialize(PrimitiveAnalysisEngine_impl.java:131)
    at org.apache.uima.impl.AnalysisEngineFactory_impl.produceResource(AnalysisEngineFactory_impl.java:94)
    at org.apache.uima.impl.CompositeResourceFactory_impl.produceResource(CompositeResourceFactory_impl.java:62)
    at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:279)
    at org.apache.uima.UIMAFramework.produceAnalysisEngine(UIMAFramework.java:407)
    at org.apache.uima.analysis_engine.asb.impl.ASB_impl.setup(ASB_impl.java:256)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initASB(AggregateAnalysisEngine_impl.java:429)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initializeAggregateAnalysisEngine(AggregateAnalysisEngine_impl.java:373)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initialize(AggregateAnalysisEngine_impl.java:186)
    at org.apache.uima.impl.AnalysisEngineFactory_impl.produceResource(AnalysisEngineFactory_impl.java:94)
    at org.apache.uima.impl.CompositeResourceFactory_impl.produceResource(CompositeResourceFactory_impl.java:62)
    at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:279)
    at org.apache.uima.UIMAFramework.produceAnalysisEngine(UIMAFramework.java:407)
    at org.apache.uima.analysis_engine.asb.impl.ASB_impl.setup(ASB_impl.java:256)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initASB(AggregateAnalysisEngine_impl.java:429)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initializeAggregateAnalysisEngine(AggregateAnalysisEngine_impl.java:373)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initialize(AggregateAnalysisEngine_impl.java:186)
    at org.apache.uima.impl.AnalysisEngineFactory_impl.produceResource(AnalysisEngineFactory_impl.java:94)
    at org.apache.uima.impl.CompositeResourceFactory_impl.produceResource(CompositeResourceFactory_impl.java:62)
    at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:279)
    at org.apache.uima.UIMAFramework.produceAnalysisEngine(UIMAFramework.java:407)
    at org.apache.uima.analysis_engine.asb.impl.ASB_impl.setup(ASB_impl.java:256)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initASB(AggregateAnalysisEngine_impl.java:429)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initializeAggregateAnalysisEngine(AggregateAnalysisEngine_impl.java:373)
    at org.apache.uima.analysis_engine.impl.AggregateAnalysisEngine_impl.initialize(AggregateAnalysisEngine_impl.java:186)
    at org.apache.uima.impl.AnalysisEngineFactory_impl.produceResource(AnalysisEngineFactory_impl.java:94)
    at org.apache.uima.impl.CompositeResourceFactory_impl.produceResource(CompositeResourceFactory_impl.java:62)
    at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:279)
    at org.apache.uima.UIMAFramework.produceResource(UIMAFramework.java:331)
    at org.apache.uima.UIMAFramework.produceAnalysisEngine(UIMAFramework.java:448)
    at org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine(AnalysisEngineFactory.java:205)
    at org.apache.uima.fit.pipeline.SimplePipeline.runPipeline(SimplePipeline.java:227)
    at org.apache.uima.fit.pipeline.SimplePipeline.runPipeline(SimplePipeline.java:260)

Что я делаю не так и как это исправить? Кроме того, как правильно настроить cTAKES для использования AggregatePlaintextFastUMLSProcessor.xml и мой пользовательский словарь, который я собираюсь создать также?

1 ответ

Я бы попросил вас взглянуть на этот модуль cTAKES-REST, который точно соответствует вашим требованиям. Он может быть вызван с помощью вызова веб-службы, а также может быть настроен для использования вашего пользовательского словаря.

Другие вопросы по тегам