В инструменте снегоочистителя, когда я бегу поток, обогащают, получая ошибку

Я настраиваю снегоочиститель на своем локальном компьютере (Ubuntu). Я установил и настроил сборщик потоков Scala. Ниже файл конфигурации (collector.conf)

# Copyright (c) 2013-2016 Snowplow Analytics Ltd. All rights reserved.
#
# This program is licensed to you under the Apache License Version 2.0, and
# you may not use this file except in compliance with the Apache License
# Version 2.0.  You may obtain a copy of the Apache License Version 2.0 at
# http://www.apache.org/licenses/LICENSE-2.0.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Apache License Version 2.0 is distributed on an "AS
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the Apache License Version 2.0 for the specific language
# governing permissions and limitations there under.

# This file (application.conf.example) contains a template with
# configuration options for the Scala Stream Collector.
#
# To use, copy this to 'application.conf' and modify the configuration options.

# 'collector' contains configuration options for the main Scala collector.
collector {
  # The collector runs as a web service specified on the following
  # interface and port.
  interface = "172.16.0.157"
  port = "8080"

  # Production mode disables additional services helpful for configuring and
  # initializing the collector, such as a path '/dump' to view all
  # records stored in the current stream.
  production = true

  # Configure the P3P policy header.
  p3p {
    policyref = "/w3c/p3p.xml"
    CP = "NOI DSP COR NID PSA OUR IND COM NAV STA"
  }

  # The collector returns a cookie to clients for user identification
  # with the following domain and expiration.
  cookie {
    enabled = true
    expiration = "365 days" # e.g. "365 days"
    # Network cookie name
    name = sp
    # The domain is optional and will make the cookie accessible to other
    # applications on the domain. Comment out this line to tie cookies to
    # the collector's full domain
    domain = "com.unilog.analytics"
  }

  # The collector has a configurable sink for storing data in
  # different formats for the enrichment process.
  sink {
    # Sinks currently supported are:
    # 'kinesis' for writing Thrift-serialized records to a Kinesis stream
    # 'kafka' for writing Thrift-serialized records to kafka
    # 'stdout' for writing Base64-encoded Thrift-serialized records to stdout
    #    Recommended settings for 'stdout' so each line printed to stdout
    #    is a serialized record are:
    #      1. Setting 'akka.loglevel = OFF' and 'akka.loggers = []'
    #         to disable all logging.
    #      2. Using 'sbt assembly' and 'java -jar ...' to disable
    #         sbt logging.
    enabled = "stdout"

    kinesis {
      thread-pool-size: 10 # Thread pool size for Kinesis API requests

      # The following are used to authenticate for the Amazon Kinesis sink.
      #
      # If both are set to 'default', the default provider chain is used
      # (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html)
      #
      # If both are set to 'iam', use AWS IAM Roles to provision credentials.
      #
      # If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
      aws {
        access-key: "collector.aws.access-key"
        secret-key: "collector.aws.secret-key"
      }

      # Data will be stored in the following stream.
      stream {
        region: "{{collector.stream.region}}"
        good: "{{collector.stream.good}}"
        bad: "{{collector.stream.bad}}"
      }

      # Minimum and maximum backoff periods
      backoffPolicy: {
        minBackoff: 3000
        maxBackoff: 600000
      }
    }

    kafka {
      brokers: "{{collectorKafkaBrokers}}"

      # Data will be stored in the following topics
      topic {
        good: "{{collectorKafkaTopicGoodName}}"
        bad: "{{collectorKafkaTopicBadName}}"
      }
    }

    # Incoming events are stored in a buffer before being sent to Kinesis/Kafka.
    # The buffer is emptied whenever:
    # - the number of stored records reaches record-limit or
    # - the combined size of the stored records reaches byte-limit or
    # - the time in milliseconds since the buffer was last emptied reaches time-limit
    buffer {
        byte-limit: 4000000
        record-limit: 500
        time-limit: 5000
    }
  }
}

# Akka has a variety of possible configuration options defined at
# http://doc.akka.io/docs/akka/2.2.3/general/configuration.html.
akka {
#    loglevel = DEBUG # 'OFF' for no logging, 'DEBUG' for all logging.
   loglevel = OFF
#    loggers = ["akka.event.slf4j.Slf4jLogger"]
   loggers = []
}

# spray-can is the server the Stream collector uses and has configurable
# options defined at
# https://github.com/spray/spray/blob/master/spray-can/src/main/resources/reference.conf
spray.can.server {
  # To obtain the hostname in the collector, the 'remote-address' header
  # should be set. By default, this is disabled, and enabling it
  # adds the 'Remote-Address' header to every request automatically.
  remote-address-header = on

  uri-parsing-mode = relaxed
  raw-request-uri-header = on

  # Define the maximum request length (the default is 2048)
  parsing {
    max-uri-length = 32768
  }
} 

Я добавил ниже скрипт JavaScript трекер на моей веб-странице

 <script type="text/javascript">
        ;(function(p,l,o,w,i,n,g){if(!p[i]){p.GlobalSnowplowNamespace=p.GlobalSnowplowNamespace||[];
        p.GlobalSnowplowNamespace.push(i);p[i]=function(){(p[i].q=p[i].q||[]).push(arguments)
        };p[i].q=p[i].q||[];n=l.createElement(o);g=l.getElementsByTagName(o)[0];n.async=1;
        n.src=w;g.parentNode.insertBefore(n,g)}}(window,document,"script","//d1fc8wv8zag5ca.cloudfront.net/2.8.0/sp.js","snowplow"));

        window.snowplow('newTracker', 'cf', '172.16.0.157:8080', { // Initialise a tracker
          appId: '1',
          cookieDomain: 'com.unilog.analytics'
        });

        window.snowplow('trackPageView');
        </script>

Ниже получен ответ от трекера сборщику в зашифрованном виде. CwBkAAAACzE /VXNlci =

Я установил и настроил поток enrich ниже, это мой файл конфигурации (enrich.conf)

# Copyright (c) 2013-2016 Snowplow Analytics Ltd. All rights reserved.
#
# This program is licensed to you under the Apache License Version 2.0, and
# you may not use this file except in compliance with the Apache License
# Version 2.0.  You may obtain a copy of the Apache License Version 2.0 at
# http://www.apache.org/licenses/LICENSE-2.0.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Apache License Version 2.0 is distributed on an "AS
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.  See the Apache License Version 2.0 for the specific language
# governing permissions and limitations there under.

# This file (application.conf.example) contains a template with
# configuration options for Stream Enrich.

enrich {
  # Sources currently supported are:
  # 'kinesis' for reading Thrift-serialized records from a Kinesis stream
  # 'kafka' for reading Thrift-serialized records from a Kafka topic
  # 'stdin' for reading Base64-encoded Thrift-serialized records from stdin
  source = "stdin"

  # Sinks currently supported are:
  # 'kinesis' for writing enriched events to one Kinesis stream and invalid events to another.
  # 'kafka' for writing enriched events to one Kafka topic and invalid events to another.
  # 'stdouterr' for writing enriched events to stdout and invalid events to stderr.
  #    Using "sbt assembly" and "java -jar" is recommended to disable sbt
  #    logging.
  sink = "stdouterr"

  # AWS credentials
  #
  # If both are set to 'default', use the default AWS credentials provider chain.
  #
  # If both are set to 'iam', use AWS IAM Roles to provision credentials.
  #
  # If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
  aws {
    access-key: "iam"
    secret-key: "iam"
  }

  # Kafka configuration
  kafka {
    brokers: "{{enrichKafkaBrokers}}"
  }

  streams {
    in: {
      raw: "{{enrichStreamsInRaw}}"

      # Maximum number of records to get from Kinesis per call to GetRecords
      maxRecords: 10000

      # After enrichment, are accumulated in a buffer before being sent to Kinesis.
      # The buffer is emptied whenever:
      # - the number of stored records reaches record-limit or
      # - the combined size of the stored records reaches byte-limit or
      # - the time in milliseconds since it was last emptied exceeds time-limit when
      #   a new event enters the buffer
      buffer: {
        byte-limit: 4000000
        record-limit: 500 # Not supported by Kafka; will be ignored
        time-limit: 5000
      }
    }

    out: {
      enriched: "{{enrichStreamsOutEnriched}}"
      bad: "{{enrichStreamsOutBad}}"

      # Minimum and maximum backoff periods
      # - Units: Milliseconds
      backoffPolicy: {
        minBackoff: 6000
        maxBackoff: 300
      }
    }

    # "app-name" is used for a DynamoDB table to maintain stream state.
    # "app-name" is used as the Kafka consumer group ID.
    # You can set it automatically using: "SnowplowKinesisEnrich-$\\{enrich.streams.in.raw\\}"
    app-name: "{{enrichStreamsAppName}}"

    # LATEST: most recent data.
    # TRIM_HORIZON: oldest available data.
    # Note: This only effects the first run of this application
    # on a stream.
    initial-position = "TRIM_HORIZON"

    region: "{{enrichStreamsRegion}}"
  }

  # Optional section for tracking endpoints
  #monitoring {
    #snowplow {
      #collector-uri: "172.16.0.157"
      #collector-port: 8080
      #app-id: "1"
      #}
  #}
}

И я использую команду enrich с помощью команды ниже. ./snowplow-stream-collector-0.9.0 --config collector.conf | /home/hadoop/snowplow/3-enrich/stream-enrich/target/scala-2.10/snowplow-stream-enrich-0.10.0 --config /home/hadoop/snowplow/3-enrich/stream-enrich/target/scala-2.10/enrich.conf - файл -resolver:/home/hadoop/snowplow/3-enrich/config/iglu_resolver.json - файл -enrichments:/home/hadoop/snowplow/3-enrich/config/enrichments/ >> stream_enrich_log_file.txt 2>&1

Но я получаю ошибку ниже

{"line":"151200121ForkJoinPool+2+worker+5ERRORcsscscalastreamScalaCollector+Failurebindingtopors=","errors":[{"level":"error  ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 1 bytes  . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20  17-07-21T09:42:06.170Z"}
{"line":"javalangRuntimeExceptionCommandFailedBindActorakka//scala+stream+collector/user/handler+540057214/1721601578080100L  istNonQ==","errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried t  o read 2 bytes, but only got 1 bytes. (This is often indicative of an internal error on the server side. Please check your s  erver logs.)"}],"failure_tstamp":"2017-07-21T09:42:06.180Z"}
{"line":"atcomsnowplowanalyticssnowplowcollectorsscalastreamScalaCollectoranonfun3applyScalaCollectorAppscala118snowplow+str  eam+collector+090090=","errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has clo  sed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please   check your server logs.)"}],"failure_tstamp":"2017-07-21T09:42:06.181Z"}
{"line":"atcomsnowplowanalyticssnowplowcollectorsscalastreamScalaCollectoranonfun3applyScalaCollectorAppscala116snowplow+str  eam+collector+090090=","errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has clo  sed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please   check your server logs.)"}],"failure_tstamp":"2017-07-21T09:42:06.181Z"}
{"line":"atscalaconcurrentFutureanonfunflatMap1applyFuturescala251snowplow+stream+collector+09009","errors":[{"level":"error  ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes  . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20  17-07-21T09:42:06.182Z"}
{"line":"atscalaconcurrentFutureanonfunflatMap1applyFuturescala249snowplow+stream+collector+09009","errors":[{"level":"error  ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes  . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20  17-07-21T09:42:06.182Z"}
{"line":"atscalaconcurrentimplCallbackRunnablerunPromisescala32snowplow+stream+collector+09009w==","errors":[{"level":"error  ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 0 bytes  . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20  17-07-21T09:42:06.182Z"}
{"line":"atscalaconcurrentimplExecutionContextImplanon3execExecutionContextImplscala107snowplow+stream+collector+09009w==","  errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 byt  es, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)  "}],"failure_tstamp":"2017-07-21T09:42:06.183Z"}
{"line":"atscalaconcurrentforkjoinForkJoinTaskdoExecForkJoinTaskjava260snowplow+stream+collector+09009w==","errors":[{"level  ":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got   0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tst  amp":"2017-07-21T09:42:06.183Z"}
{"line":"atscalaconcurrentforkjoinForkJoinPoolWorkQueuerunTaskForkJoinPooljava1339snowplow+stream+collector+09009","errors":  [{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but   only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"fai  lure_tstamp":"2017-07-21T09:42:06.184Z"}
{"line":"atscalaconcurrentforkjoinForkJoinPoolrunWorkerForkJoinPooljava1979snowplow+stream+collector+09009w==","errors":[{"l  evel":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only   got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure  _tstamp":"2017-07-21T09:42:06.184Z"}
{"line":"atscalaconcurrentforkjoinForkJoinWorkerThreadrunForkJoinWorkerThreadjava107snowplow+stream+collector+090090=","erro  rs":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes,   but only got 1 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],  "failure_tstamp":"2017-07-21T09:42:06.184Z"}

Я был поражен от 2 дней, пожалуйста, помогите мне, чтобы избавиться от этой ошибки.

0 ответов

Другие вопросы по тегам