Architecture Overview
Key Features
Spark Streaming Consumer (excerpt)
python
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
ssc = StreamingContext(sc, batchDuration=5)
kafka_stream = KafkaUtils.createDirectStream(
ssc, topics=['amazon-reviews'],
kafkaParams={'metadata.broker.list': 'localhost:9092'}
)
predictions = kafka_stream.map(predict_sentiment)
predictions.foreachRDD(save_to_mongodb)
ssc.start()