В инструменте снегоочистителя, когда я бегу поток, обогащают, получая ошибку
Я настраиваю снегоочиститель на своем локальном компьютере (Ubuntu). Я установил и настроил сборщик потоков Scala. Ниже файл конфигурации (collector.conf)
# Copyright (c) 2013-2016 Snowplow Analytics Ltd. All rights reserved.
#
# This program is licensed to you under the Apache License Version 2.0, and
# you may not use this file except in compliance with the Apache License
# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at
# http://www.apache.org/licenses/LICENSE-2.0.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Apache License Version 2.0 is distributed on an "AS
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the Apache License Version 2.0 for the specific language
# governing permissions and limitations there under.
# This file (application.conf.example) contains a template with
# configuration options for the Scala Stream Collector.
#
# To use, copy this to 'application.conf' and modify the configuration options.
# 'collector' contains configuration options for the main Scala collector.
collector {
# The collector runs as a web service specified on the following
# interface and port.
interface = "172.16.0.157"
port = "8080"
# Production mode disables additional services helpful for configuring and
# initializing the collector, such as a path '/dump' to view all
# records stored in the current stream.
production = true
# Configure the P3P policy header.
p3p {
policyref = "/w3c/p3p.xml"
CP = "NOI DSP COR NID PSA OUR IND COM NAV STA"
}
# The collector returns a cookie to clients for user identification
# with the following domain and expiration.
cookie {
enabled = true
expiration = "365 days" # e.g. "365 days"
# Network cookie name
name = sp
# The domain is optional and will make the cookie accessible to other
# applications on the domain. Comment out this line to tie cookies to
# the collector's full domain
domain = "com.unilog.analytics"
}
# The collector has a configurable sink for storing data in
# different formats for the enrichment process.
sink {
# Sinks currently supported are:
# 'kinesis' for writing Thrift-serialized records to a Kinesis stream
# 'kafka' for writing Thrift-serialized records to kafka
# 'stdout' for writing Base64-encoded Thrift-serialized records to stdout
# Recommended settings for 'stdout' so each line printed to stdout
# is a serialized record are:
# 1. Setting 'akka.loglevel = OFF' and 'akka.loggers = []'
# to disable all logging.
# 2. Using 'sbt assembly' and 'java -jar ...' to disable
# sbt logging.
enabled = "stdout"
kinesis {
thread-pool-size: 10 # Thread pool size for Kinesis API requests
# The following are used to authenticate for the Amazon Kinesis sink.
#
# If both are set to 'default', the default provider chain is used
# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html)
#
# If both are set to 'iam', use AWS IAM Roles to provision credentials.
#
# If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
aws {
access-key: "collector.aws.access-key"
secret-key: "collector.aws.secret-key"
}
# Data will be stored in the following stream.
stream {
region: "{{collector.stream.region}}"
good: "{{collector.stream.good}}"
bad: "{{collector.stream.bad}}"
}
# Minimum and maximum backoff periods
backoffPolicy: {
minBackoff: 3000
maxBackoff: 600000
}
}
kafka {
brokers: "{{collectorKafkaBrokers}}"
# Data will be stored in the following topics
topic {
good: "{{collectorKafkaTopicGoodName}}"
bad: "{{collectorKafkaTopicBadName}}"
}
}
# Incoming events are stored in a buffer before being sent to Kinesis/Kafka.
# The buffer is emptied whenever:
# - the number of stored records reaches record-limit or
# - the combined size of the stored records reaches byte-limit or
# - the time in milliseconds since the buffer was last emptied reaches time-limit
buffer {
byte-limit: 4000000
record-limit: 500
time-limit: 5000
}
}
}
# Akka has a variety of possible configuration options defined at
# http://doc.akka.io/docs/akka/2.2.3/general/configuration.html.
akka {
# loglevel = DEBUG # 'OFF' for no logging, 'DEBUG' for all logging.
loglevel = OFF
# loggers = ["akka.event.slf4j.Slf4jLogger"]
loggers = []
}
# spray-can is the server the Stream collector uses and has configurable
# options defined at
# https://github.com/spray/spray/blob/master/spray-can/src/main/resources/reference.conf
spray.can.server {
# To obtain the hostname in the collector, the 'remote-address' header
# should be set. By default, this is disabled, and enabling it
# adds the 'Remote-Address' header to every request automatically.
remote-address-header = on
uri-parsing-mode = relaxed
raw-request-uri-header = on
# Define the maximum request length (the default is 2048)
parsing {
max-uri-length = 32768
}
}
Я добавил ниже скрипт JavaScript трекер на моей веб-странице
<script type="text/javascript">
;(function(p,l,o,w,i,n,g){if(!p[i]){p.GlobalSnowplowNamespace=p.GlobalSnowplowNamespace||[];
p.GlobalSnowplowNamespace.push(i);p[i]=function(){(p[i].q=p[i].q||[]).push(arguments)
};p[i].q=p[i].q||[];n=l.createElement(o);g=l.getElementsByTagName(o)[0];n.async=1;
n.src=w;g.parentNode.insertBefore(n,g)}}(window,document,"script","//d1fc8wv8zag5ca.cloudfront.net/2.8.0/sp.js","snowplow"));
window.snowplow('newTracker', 'cf', '172.16.0.157:8080', { // Initialise a tracker
appId: '1',
cookieDomain: 'com.unilog.analytics'
});
window.snowplow('trackPageView');
</script>
Ниже получен ответ от трекера сборщику в зашифрованном виде. CwBkAAAACzE /VXNlci =
Я установил и настроил поток enrich ниже, это мой файл конфигурации (enrich.conf)
# Copyright (c) 2013-2016 Snowplow Analytics Ltd. All rights reserved.
#
# This program is licensed to you under the Apache License Version 2.0, and
# you may not use this file except in compliance with the Apache License
# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at
# http://www.apache.org/licenses/LICENSE-2.0.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Apache License Version 2.0 is distributed on an "AS
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the Apache License Version 2.0 for the specific language
# governing permissions and limitations there under.
# This file (application.conf.example) contains a template with
# configuration options for Stream Enrich.
enrich {
# Sources currently supported are:
# 'kinesis' for reading Thrift-serialized records from a Kinesis stream
# 'kafka' for reading Thrift-serialized records from a Kafka topic
# 'stdin' for reading Base64-encoded Thrift-serialized records from stdin
source = "stdin"
# Sinks currently supported are:
# 'kinesis' for writing enriched events to one Kinesis stream and invalid events to another.
# 'kafka' for writing enriched events to one Kafka topic and invalid events to another.
# 'stdouterr' for writing enriched events to stdout and invalid events to stderr.
# Using "sbt assembly" and "java -jar" is recommended to disable sbt
# logging.
sink = "stdouterr"
# AWS credentials
#
# If both are set to 'default', use the default AWS credentials provider chain.
#
# If both are set to 'iam', use AWS IAM Roles to provision credentials.
#
# If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
aws {
access-key: "iam"
secret-key: "iam"
}
# Kafka configuration
kafka {
brokers: "{{enrichKafkaBrokers}}"
}
streams {
in: {
raw: "{{enrichStreamsInRaw}}"
# Maximum number of records to get from Kinesis per call to GetRecords
maxRecords: 10000
# After enrichment, are accumulated in a buffer before being sent to Kinesis.
# The buffer is emptied whenever:
# - the number of stored records reaches record-limit or
# - the combined size of the stored records reaches byte-limit or
# - the time in milliseconds since it was last emptied exceeds time-limit when
# a new event enters the buffer
buffer: {
byte-limit: 4000000
record-limit: 500 # Not supported by Kafka; will be ignored
time-limit: 5000
}
}
out: {
enriched: "{{enrichStreamsOutEnriched}}"
bad: "{{enrichStreamsOutBad}}"
# Minimum and maximum backoff periods
# - Units: Milliseconds
backoffPolicy: {
minBackoff: 6000
maxBackoff: 300
}
}
# "app-name" is used for a DynamoDB table to maintain stream state.
# "app-name" is used as the Kafka consumer group ID.
# You can set it automatically using: "SnowplowKinesisEnrich-$\\{enrich.streams.in.raw\\}"
app-name: "{{enrichStreamsAppName}}"
# LATEST: most recent data.
# TRIM_HORIZON: oldest available data.
# Note: This only effects the first run of this application
# on a stream.
initial-position = "TRIM_HORIZON"
region: "{{enrichStreamsRegion}}"
}
# Optional section for tracking endpoints
#monitoring {
#snowplow {
#collector-uri: "172.16.0.157"
#collector-port: 8080
#app-id: "1"
#}
#}
}
И я использую команду enrich с помощью команды ниже. ./snowplow-stream-collector-0.9.0 --config collector.conf | /home/hadoop/snowplow/3-enrich/stream-enrich/target/scala-2.10/snowplow-stream-enrich-0.10.0 --config /home/hadoop/snowplow/3-enrich/stream-enrich/target/scala-2.10/enrich.conf - файл -resolver:/home/hadoop/snowplow/3-enrich/config/iglu_resolver.json - файл -enrichments:/home/hadoop/snowplow/3-enrich/config/enrichments/ >> stream_enrich_log_file.txt 2>&1
Но я получаю ошибку ниже
{"line":"151200121ForkJoinPool+2+worker+5ERRORcsscscalastreamScalaCollector+Failurebindingtopors=","errors":[{"level":"error ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 1 bytes . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20 17-07-21T09:42:06.170Z"}
{"line":"javalangRuntimeExceptionCommandFailedBindActorakka//scala+stream+collector/user/handler+540057214/1721601578080100L istNonQ==","errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried t o read 2 bytes, but only got 1 bytes. (This is often indicative of an internal error on the server side. Please check your s erver logs.)"}],"failure_tstamp":"2017-07-21T09:42:06.180Z"}
{"line":"atcomsnowplowanalyticssnowplowcollectorsscalastreamScalaCollectoranonfun3applyScalaCollectorAppscala118snowplow+str eam+collector+090090=","errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has clo sed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"2017-07-21T09:42:06.181Z"}
{"line":"atcomsnowplowanalyticssnowplowcollectorsscalastreamScalaCollectoranonfun3applyScalaCollectorAppscala116snowplow+str eam+collector+090090=","errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has clo sed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"2017-07-21T09:42:06.181Z"}
{"line":"atscalaconcurrentFutureanonfunflatMap1applyFuturescala251snowplow+stream+collector+09009","errors":[{"level":"error ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20 17-07-21T09:42:06.182Z"}
{"line":"atscalaconcurrentFutureanonfunflatMap1applyFuturescala249snowplow+stream+collector+09009","errors":[{"level":"error ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20 17-07-21T09:42:06.182Z"}
{"line":"atscalaconcurrentimplCallbackRunnablerunPromisescala32snowplow+stream+collector+09009w==","errors":[{"level":"error ","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 0 bytes . (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tstamp":"20 17-07-21T09:42:06.182Z"}
{"line":"atscalaconcurrentimplExecutionContextImplanon3execExecutionContextImplscala107snowplow+stream+collector+09009w=="," errors":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 byt es, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.) "}],"failure_tstamp":"2017-07-21T09:42:06.183Z"}
{"line":"atscalaconcurrentforkjoinForkJoinTaskdoExecForkJoinTaskjava260snowplow+stream+collector+09009w==","errors":[{"level ":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure_tst amp":"2017-07-21T09:42:06.183Z"}
{"line":"atscalaconcurrentforkjoinForkJoinPoolWorkQueuerunTaskForkJoinPooljava1339snowplow+stream+collector+09009","errors": [{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 1 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"fai lure_tstamp":"2017-07-21T09:42:06.184Z"}
{"line":"atscalaconcurrentforkjoinForkJoinPoolrunWorkerForkJoinPooljava1979snowplow+stream+collector+09009w==","errors":[{"l evel":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 0 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}],"failure _tstamp":"2017-07-21T09:42:06.184Z"}
{"line":"atscalaconcurrentforkjoinForkJoinWorkerThreadrunForkJoinWorkerThreadjava107snowplow+stream+collector+090090=","erro rs":[{"level":"error","message":"Error deserializing raw event: Cannot read. Remote side has closed. Tried to read 2 bytes, but only got 1 bytes. (This is often indicative of an internal error on the server side. Please check your server logs.)"}], "failure_tstamp":"2017-07-21T09:42:06.184Z"}
Я был поражен от 2 дней, пожалуйста, помогите мне, чтобы избавиться от этой ошибки.