xtinkarpiu commited on
Commit
e18a159
·
verified ·
1 Parent(s): 44819aa

Upload folder using huggingface_hub

Browse files
.gitignore ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore Python cache and logs
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.log
6
+
7
+ # Ignore environment files with secrets
8
+ .env
9
+ *.env
10
+
11
+ # Ignore IDE/editor config
12
+ .vscode/
13
+ .idea/
14
+
15
+ # Ignore OS/system files
16
+ .DS_Store
17
+ Thumbs.db
18
+
19
+ # Ignore test scripts or diagnostics
20
+ test_*.py
21
+ diagnostic.sh
22
+
23
+ # Ignore raw media or large unneeded assets
24
+ *.mp4
25
+ *.mov
26
+ *.avi
27
+ assets/raw_videos/
28
+ screenshots/
29
+
30
+ # Ignore Kafka jars or build artifacts (if any)
31
+ *.jar
32
+ *.class
33
+ build/
34
+ dist/
35
+
36
+ # Ignore Docker stuff not needed in repo
37
+ *.pid
38
+ *.sock
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ gcc \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ COPY . .
13
+
14
+ CMD ["python", "dashboard.py"]
README.md CHANGED
@@ -1,11 +1,77 @@
1
- ---
2
- title: Sentiment Analysis
3
- emoji: 🦀
4
- colorFrom: red
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- short_description: Real-time dashboard for visualizing tweet sentiment
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Sentiment Analysis Dashboard
3
+ emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: docker
7
+ app_file: dashboard.py
8
+ pinned: false
9
+ ---
10
+
11
+ # 📊 Sentiment Analysis Dashboard
12
+
13
+ A real-time dashboard for visualizing tweet sentiment (positive, negative, neutral) using **Kafka**, **Spark**, and **Flask**.
14
+ It supports both live Twitter streams (via producer.py) and demo mode with mock tweets (via mock_tweet_producer.py).
15
+
16
+ This version runs in mock/demo mode on Hugging Face Spaces.
17
+
18
+ Author: Kristine Karp (karpkristine@gmail.com)
19
+
20
+ ---
21
+
22
+ ## 🚀 Demo Mode (Hugging Face)
23
+
24
+ > This Space runs in **mock mode**, generating fake tweets using `mock_tweet_producer.py`.
25
+ This allows users to explore the dashboard **without requiring Twitter API credentials or external Kafka setup**.
26
+
27
+
28
+ ---
29
+
30
+ ## 🧠 Features
31
+
32
+ - Real-time tweet ingestion (simulated or live)
33
+ - Sentiment counts: Positive, Neutral, Negative
34
+ - Recent tweet stream with sentiment tags
35
+ - Hourly sentiment trend summary
36
+ - WebSocket-powered live updates
37
+
38
+ ---
39
+
40
+ ## 🛠️ File Overview
41
+
42
+ | File/Folder | Purpose |
43
+ |------------------------|---------------------------------------------------|
44
+ | `dashboard.py` | Main Flask app + Kafka consumer for hugging faces demo purposes |
45
+ | `local_dashboard.py` | Flask app + Kafka consumer that can be run locally in http://localhost:5000/ |
46
+ | `templates/dashboard.html` | HTML UI template for the dashboard |
47
+ | `mock_tweet_producer.py` | Generates mock tweets for demo/testing |
48
+ | `producer.py` | Optional Twitter producer if you have API token |
49
+ | `requirements.txt` | All Python dependencies |
50
+ | `.env` (optional) | Set up your Twitter API token if using real data |
51
+
52
+ ---
53
+
54
+ ## 📡 Using Real Twitter Streaming
55
+
56
+ If you want to stream real tweets and analyze their sentiment:
57
+
58
+ 1. Create a Twitter/X Developer App
59
+ 2. Add your **Bearer Token** to a `.env` file:
60
+ ```env
61
+ BEARER_TOKEN=your_token_here
62
+ 3. Run producer.py instead
63
+
64
+ ## 🧪 Local Development
65
+
66
+ git clone https://huggingface.co/spaces/xtinkarpiu/sentiment-analysis
67
+ cd sentiment-analysis
68
+ docker-compose up --build
69
+
70
+ ## 📷 Dashboard Preview
71
+
72
+ Here's a preview of the sentiment dashboard in action:
73
+
74
+ ![Dashboard Overview](assets/dashboard_screenshot1.jpg)
75
+ ![Real-Time Tweets and Charts](assets/dashboard_screenshot2.jpg)
76
+
77
+ *Demo hosted on Hugging Face Spaces*
assets/dashboard_screenshot1.jpg ADDED
assets/dashboard_screenshot2.jpg ADDED
consumer.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pyspark.sql import SparkSession
2
+ from pyspark.sql.functions import col, udf, from_json, to_json, struct
3
+ from pyspark.sql.types import StringType, StructType, StructField, LongType, DoubleType
4
+ import time
5
+ import logging
6
+
7
+ # Set up logging
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def simple_sentiment(text):
12
+ if text is None:
13
+ return 'neutral'
14
+ text = text.lower()
15
+ if any(word in text for word in ['good', 'great', 'awesome', 'happy', 'love', 'excellent', 'amazing']):
16
+ return 'positive'
17
+ elif any(word in text for word in ['bad', 'terrible', 'awful', 'sad', 'hate', 'worst', 'horrible']):
18
+ return 'negative'
19
+ return 'neutral'
20
+
21
+ sentiment_udf = udf(simple_sentiment, StringType())
22
+
23
+ logger.info("Waiting for services to start...")
24
+ time.sleep(15) # Wait for services
25
+
26
+ try:
27
+ spark = SparkSession.builder \
28
+ .appName("KafkaSentimentConsumer") \
29
+ .config("spark.sql.streaming.forceDeleteTempCheckpointLocation", "true") \
30
+ .config("spark.sql.adaptive.enabled", "false") \
31
+ .config("spark.sql.adaptive.coalescePartitions.enabled", "false") \
32
+ .getOrCreate()
33
+
34
+ spark.sparkContext.setLogLevel("WARN")
35
+ logger.info("Spark session created successfully")
36
+
37
+ tweet_schema = StructType([
38
+ StructField("id", LongType(), True),
39
+ StructField("text", StringType(), True),
40
+ StructField("created_at", StringType(), True),
41
+ StructField("author_id", LongType(), True),
42
+ StructField("timestamp", DoubleType(), True)
43
+ ])
44
+
45
+ logger.info("Connecting to Kafka...")
46
+
47
+ # Read from input topic
48
+ df = spark.readStream \
49
+ .format("kafka") \
50
+ .option("kafka.bootstrap.servers", "kafka:9092") \
51
+ .option("subscribe", "sentiment-topic") \
52
+ .option("startingOffsets", "latest") \
53
+ .option("failOnDataLoss", "false") \
54
+ .load()
55
+
56
+ logger.info("Connected to Kafka, processing tweets...")
57
+
58
+ # Parse and process tweets
59
+ parsed_df = df.select(
60
+ col("timestamp").alias("kafka_timestamp"),
61
+ from_json(col("value").cast("string"), tweet_schema).alias("tweet_data")
62
+ ).filter(col("tweet_data").isNotNull())
63
+
64
+ result_df = parsed_df.select(
65
+ col("tweet_data.id").alias("tweet_id"),
66
+ col("tweet_data.text").alias("tweet_text"),
67
+ col("tweet_data.created_at").alias("created_at"),
68
+ col("tweet_data.author_id").alias("author_id"),
69
+ col("kafka_timestamp")
70
+ ).withColumn("sentiment", sentiment_udf(col("tweet_text")))
71
+
72
+ # Create a copy for console output
73
+ console_query = result_df.writeStream \
74
+ .outputMode("append") \
75
+ .format("console") \
76
+ .option("truncate", False) \
77
+ .trigger(processingTime='5 seconds') \
78
+ .start()
79
+
80
+ logger.info("Console output stream started")
81
+
82
+ # Send results to dashboard topic
83
+ dashboard_df = result_df.select(
84
+ to_json(struct(
85
+ col("tweet_id"),
86
+ col("tweet_text"),
87
+ col("sentiment"),
88
+ col("author_id"),
89
+ col("created_at")
90
+ )).alias("value")
91
+ )
92
+
93
+ dashboard_query = dashboard_df.writeStream \
94
+ .format("kafka") \
95
+ .option("kafka.bootstrap.servers", "kafka:9092") \
96
+ .option("topic", "sentiment-results") \
97
+ .option("checkpointLocation", "/tmp/checkpoint-dashboard") \
98
+ .outputMode("append") \
99
+ .trigger(processingTime='5 seconds') \
100
+ .start()
101
+
102
+ logger.info("Dashboard output stream started")
103
+ logger.info("Starting sentiment analysis consumer...")
104
+ logger.info("Processing tweets and sending results to dashboard...")
105
+ logger.info("Topics: sentiment-topic (input) -> sentiment-results (output)")
106
+
107
+ # Wait for both streams
108
+ spark.streams.awaitAnyTermination()
109
+
110
+ except Exception as e:
111
+ logger.error(f"Error in consumer: {e}")
112
+ raise
docker-compose.yml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ kafka:
3
+ image: bitnami/kafka:latest
4
+ container_name: kafka
5
+ environment:
6
+ - KAFKA_CFG_PROCESS_ROLES=broker,controller
7
+ - KAFKA_CFG_NODE_ID=1
8
+ - KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,CONTROLLER:PLAINTEXT
9
+ - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092,CONTROLLER://:9093
10
+ - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092
11
+ - KAFKA_CFG_CONTROLLER_QUORUM_VOTERS=1@localhost:9093
12
+ - KAFKA_CFG_CONTROLLER_LISTENER_NAMES=CONTROLLER
13
+ - KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE=true
14
+ ports:
15
+ - "9092:9092"
16
+ healthcheck:
17
+ test: ["CMD-SHELL", "kafka-topics.sh --bootstrap-server localhost:9092 --list"]
18
+ interval: 30s
19
+ timeout: 10s
20
+ retries: 5
21
+ start_period: 60s
22
+ networks:
23
+ - kafka-network
24
+
25
+ sentiment-producer:
26
+ container_name: sentiment-producer
27
+ build: .
28
+ depends_on:
29
+ kafka:
30
+ condition: service_healthy
31
+ command: ["python", "mock_tweet_producer.py"]
32
+ restart: on-failure
33
+ networks:
34
+ - kafka-network
35
+
36
+ spark:
37
+ image: bitnami/spark:3.4
38
+ container_name: spark
39
+ environment:
40
+ - SPARK_MODE=master
41
+ - SPARK_RPC_AUTHENTICATION_ENABLED=no
42
+ - SPARK_RPC_ENCRYPTION_ENABLED=no
43
+ - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
44
+ - SPARK_SSL_ENABLED=no
45
+ ports:
46
+ - "4040:4040"
47
+ - "7077:7077"
48
+ depends_on:
49
+ kafka:
50
+ condition: service_healthy
51
+ networks:
52
+ - kafka-network
53
+
54
+ spark-worker:
55
+ image: bitnami/spark:3.4
56
+ container_name: spark-worker
57
+ environment:
58
+ - SPARK_MODE=worker
59
+ - SPARK_MASTER_URL=spark://spark:7077
60
+ - SPARK_RPC_AUTHENTICATION_ENABLED=no
61
+ - SPARK_RPC_ENCRYPTION_ENABLED=no
62
+ - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
63
+ - SPARK_SSL_ENABLED=no
64
+ depends_on:
65
+ - spark
66
+ networks:
67
+ - kafka-network
68
+
69
+ sentiment-consumer:
70
+ image: bitnami/spark:3.4
71
+ container_name: sentiment-consumer
72
+ depends_on:
73
+ kafka:
74
+ condition: service_healthy
75
+ spark:
76
+ condition: service_started
77
+ spark-worker:
78
+ condition: service_started
79
+ command: ["spark-submit", "--master", "spark://spark:7077", "--packages", "org.apache.spark:spark-sql-kafka-0-10_2.12:3.4.0", "/app/consumer.py"]
80
+ volumes:
81
+ - .:/app
82
+ restart: on-failure
83
+ networks:
84
+ - kafka-network
85
+
86
+ dashboard:
87
+ container_name: dashboard
88
+ build: .
89
+ depends_on:
90
+ kafka:
91
+ condition: service_healthy
92
+ command: ["python", "dashboard.py"]
93
+ ports:
94
+ - "5000:5000"
95
+ restart: on-failure
96
+ networks:
97
+ - kafka-network
98
+
99
+ networks:
100
+ kafka-network:
101
+ driver: bridge
local_dashboard.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, jsonify
2
+ from flask_socketio import SocketIO, emit
3
+ from kafka import KafkaConsumer
4
+ from kafka.errors import NoBrokersAvailable
5
+ import json
6
+ import threading
7
+ import time
8
+ from datetime import datetime
9
+ from collections import defaultdict, deque
10
+ import logging
11
+ import os
12
+
13
+ app = Flask(__name__)
14
+ app.config['SECRET_KEY'] = 'sentiment-dashboard-secret'
15
+ socketio = SocketIO(app, cors_allowed_origins="*")
16
+
17
+ # In-memory storage for dashboard data
18
+ sentiment_counts = {'positive': 0, 'negative': 0, 'neutral': 0}
19
+ recent_tweets = deque(maxlen=50) # Keep last 50 tweets
20
+ hourly_sentiment = defaultdict(lambda: {'positive': 0, 'negative': 0, 'neutral': 0})
21
+
22
+ logging.basicConfig(level=logging.INFO)
23
+ logger = logging.getLogger(__name__)
24
+
25
+ def create_kafka_consumer(max_retries=10, retry_delay=5):
26
+ """Create Kafka consumer with retry logic"""
27
+ for attempt in range(max_retries):
28
+ try:
29
+ consumer = KafkaConsumer(
30
+ 'sentiment-results',
31
+ bootstrap_servers=['kafka:9092'],
32
+ value_deserializer=lambda m: json.loads(m.decode('utf-8')),
33
+ consumer_timeout_ms=1000,
34
+ auto_offset_reset='earliest', # Changed from 'latest' to 'earliest'
35
+ enable_auto_commit=True,
36
+ group_id='dashboard-group' # Added consumer group
37
+ )
38
+ logger.info("Successfully connected to Kafka consumer!")
39
+ return consumer
40
+ except NoBrokersAvailable as e:
41
+ logger.warning(f"Kafka not ready, attempt {attempt + 1}/{max_retries}. Retrying in {retry_delay}s...")
42
+ time.sleep(retry_delay)
43
+ except Exception as e:
44
+ logger.error(f"Unexpected error connecting to Kafka: {e}")
45
+ time.sleep(retry_delay)
46
+
47
+ raise Exception(f"Could not connect to Kafka consumer after {max_retries} attempts")
48
+
49
+ def kafka_consumer_thread():
50
+ """Background thread to consume processed tweets from Kafka"""
51
+ try:
52
+ # Wait for Kafka and Spark to be ready
53
+ logger.info("Waiting for Kafka and Spark services to be ready...")
54
+ time.sleep(10) # Reduced from 30 to 10 seconds
55
+
56
+ consumer = create_kafka_consumer()
57
+ logger.info("Connected to Kafka consumer for dashboard - waiting for processed tweets...")
58
+ logger.info("Starting to poll for messages from sentiment-results topic...")
59
+
60
+ message_count = 0
61
+
62
+ while True:
63
+ try:
64
+ # Poll for messages with timeout
65
+ message_batch = consumer.poll(timeout_ms=1000)
66
+
67
+ if message_batch:
68
+ logger.info(f"Received batch with {len(message_batch)} topic partitions")
69
+
70
+ for topic_partition, messages in message_batch.items():
71
+ logger.info(f"Processing {len(messages)} messages from {topic_partition}")
72
+
73
+ for message in messages:
74
+ try:
75
+ tweet_data = message.value
76
+ message_count += 1
77
+ logger.info(f"Message {message_count}: Received tweet data: {tweet_data}")
78
+
79
+ # Update sentiment counts
80
+ sentiment = tweet_data.get('sentiment', 'neutral')
81
+ sentiment_counts[sentiment] += 1
82
+
83
+ # Add to recent tweets
84
+ recent_tweets.append({
85
+ 'text': tweet_data.get('tweet_text', '')[:100] + '...' if len(tweet_data.get('tweet_text', '')) > 100 else tweet_data.get('tweet_text', ''),
86
+ 'sentiment': sentiment,
87
+ 'timestamp': datetime.now().strftime('%H:%M:%S'),
88
+ 'author_id': tweet_data.get('author_id', 'Unknown')
89
+ })
90
+
91
+ # Update hourly data
92
+ hour = datetime.now().strftime('%H:00')
93
+ hourly_sentiment[hour][sentiment] += 1
94
+
95
+ # Emit real-time update to connected clients
96
+ socketio.emit('sentiment_update', {
97
+ 'sentiment_counts': dict(sentiment_counts),
98
+ 'recent_tweets': list(recent_tweets),
99
+ 'hourly_data': dict(hourly_sentiment)
100
+ })
101
+
102
+ logger.info(f"Processed tweet with sentiment: {sentiment} - Total counts: {dict(sentiment_counts)}")
103
+
104
+ except Exception as e:
105
+ logger.error(f"Error processing individual tweet data: {e}")
106
+ else:
107
+ # No messages received
108
+ if message_count == 0:
109
+ logger.info("No messages received yet, continuing to poll...")
110
+ time.sleep(1)
111
+
112
+ except Exception as e:
113
+ logger.error(f"Error in polling loop: {e}")
114
+ time.sleep(5)
115
+
116
+ except Exception as e:
117
+ logger.error(f"Error in Kafka consumer thread: {e}")
118
+
119
+ @app.route('/')
120
+ def dashboard():
121
+ """Main dashboard page"""
122
+ return render_template('dashboard.html')
123
+
124
+ @app.route('/api/data')
125
+ def get_data():
126
+ """API endpoint to get current dashboard data"""
127
+ data = {
128
+ 'sentiment_counts': dict(sentiment_counts),
129
+ 'recent_tweets': list(recent_tweets),
130
+ 'hourly_data': dict(hourly_sentiment),
131
+ 'total_tweets': sum(sentiment_counts.values())
132
+ }
133
+ logger.info(f"API request - returning data: {data}")
134
+ return jsonify(data)
135
+
136
+ @socketio.on('connect')
137
+ def handle_connect():
138
+ """Handle client connection"""
139
+ logger.info("Client connected to dashboard")
140
+ emit('sentiment_update', {
141
+ 'sentiment_counts': dict(sentiment_counts),
142
+ 'recent_tweets': list(recent_tweets),
143
+ 'hourly_data': dict(hourly_sentiment)
144
+ })
145
+
146
+ if __name__ == '__main__':
147
+ # Start Kafka consumer in background thread
148
+ consumer_thread = threading.Thread(target=kafka_consumer_thread, daemon=True)
149
+ consumer_thread.start()
150
+
151
+ logger.info("Starting sentiment dashboard on port 5000")
152
+ logger.info("Dashboard will display data once Spark processes tweets from Kafka")
153
+
154
+ # Fix for Werkzeug warning - use allow_unsafe_werkzeug for development
155
+ socketio.run(app, host='0.0.0.0', port=5000, debug=False, allow_unsafe_werkzeug=True)
mock_tweet_producer.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import time
3
+ import random
4
+ from kafka import KafkaProducer
5
+ from kafka.errors import NoBrokersAvailable
6
+ import logging
7
+ from datetime import datetime
8
+
9
+ # Set up logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Kafka settings
14
+ KAFKA_TOPIC = "sentiment-topic"
15
+ KAFKA_BOOTSTRAP_SERVERS = ['kafka:9092']
16
+
17
+ # Sample tweets with different sentiments
18
+ SAMPLE_TWEETS = [
19
+ # Positive tweets
20
+ "I absolutely love this new Python framework! It's amazing how easy it is to use 🐍✨",
21
+ "Just finished my first machine learning project and I'm so excited about the results! 🚀",
22
+ "Beautiful sunny day! Perfect for coding outside with a cup of coffee ☕️💻",
23
+ "Finally understood how Kafka works! This is such an awesome technology 🎉",
24
+ "Great job team! Our deployment went smoothly and everyone is happy 👏",
25
+ "Python makes data analysis so much fun! Love working with pandas and numpy 📊",
26
+ "Incredible performance boost after optimizing our database queries! 🔥",
27
+ "Happy Friday everyone! Time to celebrate another successful sprint 🎊",
28
+ "Just discovered this amazing open source library. The community is fantastic! 💖",
29
+ "Feeling grateful for all the learning opportunities in tech. Best career choice ever! 🙏",
30
+
31
+ # Negative tweets
32
+ "Ugh, spent 3 hours debugging this stupid error. So frustrated right now 😤",
33
+ "This API documentation is terrible. Nothing works as described 😡",
34
+ "Why is deployment always so painful? Something always breaks in production 💔",
35
+ "Hate it when the server crashes right before the demo. Murphy's law strikes again 😭",
36
+ "This legacy code is a nightmare. Who wrote this mess? 🤬",
37
+ "Another day, another merge conflict. Git is driving me crazy today 😵",
38
+ "The client changed requirements again. This project is becoming impossible 😞",
39
+ "Performance is awful after the latest update. Users are complaining non-stop 📉",
40
+ "Terrible meeting. Two hours of my life I'll never get back 😴",
41
+ "Bug fixes breaking more things. This codebase is cursed 👻",
42
+
43
+ # Neutral tweets
44
+ "Working on a new feature for our application. Should be ready next week.",
45
+ "Attending a tech conference tomorrow. Looking forward to the presentations.",
46
+ "Updated the dependencies in our project. Everything seems to be working fine.",
47
+ "Reading about microservices architecture. Interesting design patterns.",
48
+ "Team meeting scheduled for 2 PM. We'll discuss the quarterly roadmap.",
49
+ "Deploying version 2.3.1 to staging environment for testing.",
50
+ "Database migration completed successfully. All tables are updated.",
51
+ "Code review session with the team. Found a few minor issues to fix.",
52
+ "Working with the new intern on their first task. They're learning quickly.",
53
+ "Backup process completed. All data is safely stored in the cloud.",
54
+
55
+ # Python-specific tweets
56
+ "Python 3.12 has some interesting new features. Time to upgrade our projects.",
57
+ "Django vs Flask debate continues. Both have their strengths and use cases.",
58
+ "Love how clean and readable Python code can be. Truly a beautiful language.",
59
+ "Pandas is incredibly powerful for data manipulation. Such a time saver!",
60
+ "FastAPI is becoming my go-to choice for building REST APIs. So fast!",
61
+ "NumPy arrays are so much faster than regular Python lists for calculations.",
62
+ "Jupyter notebooks are perfect for data exploration and prototyping.",
63
+ "PEP 8 style guide helps keep Python code consistent across the team.",
64
+ "Virtual environments in Python save so much dependency headache.",
65
+ "Type hints in Python make the code much more maintainable and clear."
66
+ ]
67
+
68
+ def create_kafka_producer(max_retries=10, retry_delay=5):
69
+ """Create Kafka producer with retry logic"""
70
+ for attempt in range(max_retries):
71
+ try:
72
+ producer = KafkaProducer(
73
+ bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS,
74
+ value_serializer=lambda v: json.dumps(v).encode('utf-8'),
75
+ key_serializer=lambda k: k.encode('utf-8') if k else None
76
+ )
77
+ logger.info("Successfully connected to Kafka!")
78
+ return producer
79
+ except NoBrokersAvailable as e:
80
+ logger.warning(f"Kafka not ready, attempt {attempt + 1}/{max_retries}. Retrying in {retry_delay}s...")
81
+ time.sleep(retry_delay)
82
+ except Exception as e:
83
+ logger.error(f"Unexpected error connecting to Kafka: {e}")
84
+ time.sleep(retry_delay)
85
+
86
+ raise Exception(f"Could not connect to Kafka after {max_retries} attempts")
87
+
88
+ def generate_mock_tweet():
89
+ """Generate a mock tweet with realistic data"""
90
+ tweet_text = random.choice(SAMPLE_TWEETS)
91
+
92
+ tweet_data = {
93
+ 'id': random.randint(100000000000000000, 999999999999999999), # Twitter-like ID
94
+ 'text': tweet_text,
95
+ 'created_at': datetime.now().isoformat(),
96
+ 'author_id': random.randint(100000000, 999999999), # Random author ID
97
+ 'timestamp': time.time()
98
+ }
99
+
100
+ return tweet_data
101
+
102
+ def main():
103
+ """Main function to start mock tweet streaming"""
104
+ logger.info("Starting Mock Tweet Kafka Producer...")
105
+
106
+ # Wait for services to be ready
107
+ logger.info("Waiting for Kafka to be ready...")
108
+ time.sleep(10)
109
+
110
+ try:
111
+ # Create Kafka producer
112
+ producer = create_kafka_producer()
113
+
114
+ logger.info("Starting mock tweet stream...")
115
+ logger.info("Generating tweets with various sentiments...")
116
+
117
+ tweet_count = 0
118
+
119
+ while True:
120
+ try:
121
+ # Generate a mock tweet
122
+ tweet_data = generate_mock_tweet()
123
+
124
+ # Send to Kafka
125
+ producer.send(
126
+ KAFKA_TOPIC,
127
+ value=tweet_data,
128
+ key=str(tweet_data['id'])
129
+ )
130
+
131
+ tweet_count += 1
132
+
133
+ # Log tweet info
134
+ tweet_preview = tweet_data['text'][:50] + "..." if len(tweet_data['text']) > 50 else tweet_data['text']
135
+ logger.info(f"Tweet {tweet_count}: {tweet_preview}")
136
+
137
+ # Random delay between tweets (1-5 seconds)
138
+ delay = random.uniform(1, 5)
139
+ time.sleep(delay)
140
+
141
+ except KeyboardInterrupt:
142
+ logger.info("Stopping tweet generation...")
143
+ break
144
+ except Exception as e:
145
+ logger.error(f"Error generating tweet: {e}")
146
+ time.sleep(1)
147
+
148
+ except Exception as e:
149
+ logger.error(f"Error in main: {e}")
150
+ raise
151
+ finally:
152
+ if 'producer' in locals():
153
+ producer.close()
154
+ logger.info("Kafka producer closed")
155
+
156
+ if __name__ == "__main__":
157
+ main()
producer.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tweepy
2
+ import json
3
+ import time
4
+ from kafka import KafkaProducer
5
+ from kafka.errors import NoBrokersAvailable
6
+ import logging
7
+ import os
8
+ from dotenv import load_dotenv
9
+ import urllib.parse
10
+
11
+ # Set up logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ load_dotenv()
16
+ encoded_token = os.getenv("TWITTER_BEARER_TOKEN")
17
+ BEARER_TOKEN = urllib.parse.unquote(encoded_token)
18
+
19
+ # Kafka settings
20
+ KAFKA_TOPIC = "sentiment-topic"
21
+ KAFKA_BOOTSTRAP_SERVERS = ['kafka:9092']
22
+
23
+ def create_kafka_producer(max_retries=10, retry_delay=5):
24
+ """Create Kafka producer with retry logic"""
25
+ for attempt in range(max_retries):
26
+ try:
27
+ producer = KafkaProducer(
28
+ bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS,
29
+ value_serializer=lambda v: json.dumps(v).encode('utf-8'),
30
+ key_serializer=lambda k: k.encode('utf-8') if k else None
31
+ )
32
+ logger.info("Successfully connected to Kafka!")
33
+ return producer
34
+ except NoBrokersAvailable as e:
35
+ logger.warning(f"Kafka not ready, attempt {attempt + 1}/{max_retries}. Retrying in {retry_delay}s...")
36
+ time.sleep(retry_delay)
37
+ except Exception as e:
38
+ logger.error(f"Unexpected error connecting to Kafka: {e}")
39
+ time.sleep(retry_delay)
40
+
41
+ raise Exception(f"Could not connect to Kafka after {max_retries} attempts")
42
+
43
+ class KafkaTweetStreamer(tweepy.StreamingClient):
44
+ """Streaming client for X API v2"""
45
+
46
+ def __init__(self, bearer_token, kafka_producer, topic):
47
+ super().__init__(bearer_token, wait_on_rate_limit=True)
48
+ self.kafka_producer = kafka_producer
49
+ self.topic = topic
50
+ self.tweet_count = 0
51
+
52
+ def on_tweet(self, tweet):
53
+ """Handle incoming tweets"""
54
+ try:
55
+ # Extract tweet data
56
+ tweet_data = {
57
+ 'id': tweet.id,
58
+ 'text': tweet.text,
59
+ 'created_at': tweet.created_at.isoformat() if tweet.created_at else time.strftime('%Y-%m-%d %H:%M:%S'),
60
+ 'author_id': tweet.author_id if tweet.author_id else 0,
61
+ 'timestamp': time.time()
62
+ }
63
+
64
+ # Send to Kafka
65
+ self.kafka_producer.send(
66
+ self.topic,
67
+ value=tweet_data,
68
+ key=str(tweet.id)
69
+ )
70
+
71
+ self.tweet_count += 1
72
+
73
+ # Log every tweet for debugging
74
+ tweet_preview = tweet_data['text'][:50] + "..." if len(tweet_data['text']) > 50 else tweet_data['text']
75
+ logger.info(f"Tweet {self.tweet_count}: {tweet_preview}")
76
+
77
+ return True
78
+
79
+ except Exception as e:
80
+ logger.error(f"Error processing tweet: {e}")
81
+ return True # Continue streaming
82
+
83
+ def on_errors(self, errors):
84
+ """Handle streaming errors"""
85
+ logger.error(f"Streaming error: {errors}")
86
+
87
+ def on_connection_error(self):
88
+ """Handle connection errors"""
89
+ logger.error("Connection error occurred")
90
+
91
+ def main():
92
+ """Main function to start tweet streaming"""
93
+ logger.info("Starting X (Twitter) Kafka Producer...")
94
+
95
+ # Wait for services to be ready
96
+ logger.info("Waiting for services to be ready...")
97
+ time.sleep(10)
98
+
99
+ try:
100
+ # Create Kafka producer
101
+ producer = create_kafka_producer()
102
+
103
+ # Create streaming client (simplified)
104
+ client = tweepy.Client(bearer_token=BEARER_TOKEN)
105
+ streamer = KafkaTweetStreamer(BEARER_TOKEN, producer, KAFKA_TOPIC)
106
+
107
+ # Clean up any existing rules first
108
+ try:
109
+ rules = streamer.get_rules()
110
+ if rules.data:
111
+ rule_ids = [rule.id for rule in rules.data]
112
+ streamer.delete_rules(rule_ids)
113
+ logger.info(f"Deleted {len(rule_ids)} existing rules")
114
+ except Exception as e:
115
+ logger.info("No existing rules to delete")
116
+
117
+ # Add simple, broad rules that should get tweets
118
+ new_rules = [
119
+ tweepy.StreamRule("python", tag="python"),
120
+ tweepy.StreamRule("happy OR excited", tag="positive"),
121
+ tweepy.StreamRule("sad OR angry", tag="negative"),
122
+ ]
123
+
124
+ streamer.add_rules(new_rules)
125
+ logger.info("Added streaming rules")
126
+
127
+ # Start streaming with basic fields
128
+ logger.info("Starting tweet stream...")
129
+ logger.info("Listening for tweets containing: python, happy, excited, sad, angry")
130
+
131
+ # Start the stream
132
+ streamer.filter(tweet_fields=['created_at', 'author_id'])
133
+
134
+ except tweepy.Forbidden as e:
135
+ logger.error(f"Forbidden error (403): {e}")
136
+ logger.error("This might be a project/app attachment issue")
137
+ except tweepy.Unauthorized as e:
138
+ logger.error(f"Unauthorized error (401): {e}")
139
+ logger.error("Check your Bearer Token")
140
+ except Exception as e:
141
+ logger.error(f"Error in main: {e}")
142
+ raise
143
+ finally:
144
+ if 'producer' in locals():
145
+ producer.close()
146
+ logger.info("Kafka producer closed")
147
+
148
+ if __name__ == "__main__":
149
+ main()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ tweepy>=4.14.0
2
+ kafka-python>=2.0.2
3
+ pyspark>=3.4.0
4
+ requests>=2.28.0
5
+ python-dotenv>=0.19.0
6
+ flask>=2.3.0
7
+ flask-socketio>=5.3.0
8
+ python-dotenv>=0.19.0
templates/dashboard.html ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Real-Time Sentiment Analysis Dashboard</title>
7
+ <script src="https://cdn.socket.io/4.7.2/socket.io.min.js"></script>
8
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/3.9.1/chart.min.js"></script>
9
+ <style>
10
+ * {
11
+ margin: 0;
12
+ padding: 0;
13
+ box-sizing: border-box;
14
+ }
15
+
16
+ body {
17
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
18
+ background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%);
19
+ color: white;
20
+ min-height: 100vh;
21
+ }
22
+
23
+ .container {
24
+ max-width: 1400px;
25
+ margin: 0 auto;
26
+ padding: 20px;
27
+ }
28
+
29
+ .header {
30
+ text-align: center;
31
+ margin-bottom: 30px;
32
+ }
33
+
34
+ .header h1 {
35
+ font-size: 2.5rem;
36
+ margin-bottom: 10px;
37
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
38
+ }
39
+
40
+ .header p {
41
+ font-size: 1.1rem;
42
+ opacity: 0.9;
43
+ }
44
+
45
+ .stats-grid {
46
+ display: grid;
47
+ grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
48
+ gap: 20px;
49
+ margin-bottom: 30px;
50
+ }
51
+
52
+ .stat-card {
53
+ background: rgba(255, 255, 255, 0.1);
54
+ backdrop-filter: blur(10px);
55
+ border: 1px solid rgba(255, 255, 255, 0.2);
56
+ border-radius: 15px;
57
+ padding: 25px;
58
+ text-align: center;
59
+ transition: transform 0.3s ease;
60
+ }
61
+
62
+ .stat-card:hover {
63
+ transform: translateY(-5px);
64
+ }
65
+
66
+ .stat-number {
67
+ font-size: 2.5rem;
68
+ font-weight: bold;
69
+ margin-bottom: 10px;
70
+ }
71
+
72
+ .stat-label {
73
+ font-size: 1rem;
74
+ opacity: 0.8;
75
+ text-transform: uppercase;
76
+ letter-spacing: 1px;
77
+ }
78
+
79
+ .positive { color: #4ade80; }
80
+ .negative { color: #f87171; }
81
+ .neutral { color: #60a5fa; }
82
+ .total { color: #fbbf24; }
83
+
84
+ .charts-section {
85
+ display: grid;
86
+ grid-template-columns: 1fr 1fr;
87
+ gap: 30px;
88
+ margin-bottom: 30px;
89
+ }
90
+
91
+ .chart-container {
92
+ background: rgba(255, 255, 255, 0.1);
93
+ backdrop-filter: blur(10px);
94
+ border: 1px solid rgba(255, 255, 255, 0.2);
95
+ border-radius: 15px;
96
+ padding: 25px;
97
+ }
98
+
99
+ .chart-title {
100
+ font-size: 1.3rem;
101
+ margin-bottom: 20px;
102
+ text-align: center;
103
+ }
104
+
105
+ .tweets-section {
106
+ background: rgba(255, 255, 255, 0.1);
107
+ backdrop-filter: blur(10px);
108
+ border: 1px solid rgba(255, 255, 255, 0.2);
109
+ border-radius: 15px;
110
+ padding: 25px;
111
+ }
112
+
113
+ .section-title {
114
+ font-size: 1.5rem;
115
+ margin-bottom: 20px;
116
+ text-align: center;
117
+ }
118
+
119
+ .tweets-container {
120
+ max-height: 400px;
121
+ overflow-y: auto;
122
+ }
123
+
124
+ .tweet-item {
125
+ background: rgba(255, 255, 255, 0.05);
126
+ border-radius: 10px;
127
+ padding: 15px;
128
+ margin-bottom: 10px;
129
+ border-left: 4px solid;
130
+ transition: all 0.3s ease;
131
+ }
132
+
133
+ .tweet-item:hover {
134
+ background: rgba(255, 255, 255, 0.1);
135
+ }
136
+
137
+ .tweet-item.positive { border-left-color: #4ade80; }
138
+ .tweet-item.negative { border-left-color: #f87171; }
139
+ .tweet-item.neutral { border-left-color: #60a5fa; }
140
+
141
+ .tweet-header {
142
+ display: flex;
143
+ justify-content: space-between;
144
+ align-items: center;
145
+ margin-bottom: 8px;
146
+ }
147
+
148
+ .tweet-sentiment {
149
+ font-size: 0.8rem;
150
+ padding: 4px 8px;
151
+ border-radius: 12px;
152
+ font-weight: bold;
153
+ text-transform: uppercase;
154
+ }
155
+
156
+ .tweet-sentiment.positive { background: #4ade80; color: #000; }
157
+ .tweet-sentiment.negative { background: #f87171; color: #000; }
158
+ .tweet-sentiment.neutral { background: #60a5fa; color: #000; }
159
+
160
+ .tweet-time {
161
+ font-size: 0.8rem;
162
+ opacity: 0.7;
163
+ }
164
+
165
+ .tweet-text {
166
+ font-size: 0.9rem;
167
+ line-height: 1.4;
168
+ }
169
+
170
+ .status-indicator {
171
+ position: fixed;
172
+ top: 20px;
173
+ right: 20px;
174
+ padding: 10px 15px;
175
+ border-radius: 20px;
176
+ font-size: 0.8rem;
177
+ font-weight: bold;
178
+ }
179
+
180
+ .status-connected {
181
+ background: #4ade80;
182
+ color: #000;
183
+ }
184
+
185
+ .status-disconnected {
186
+ background: #f87171;
187
+ color: #000;
188
+ }
189
+
190
+ @media (max-width: 768px) {
191
+ .charts-section {
192
+ grid-template-columns: 1fr;
193
+ }
194
+
195
+ .stats-grid {
196
+ grid-template-columns: repeat(2, 1fr);
197
+ }
198
+
199
+ .header h1 {
200
+ font-size: 2rem;
201
+ }
202
+ }
203
+
204
+ /* Custom scrollbar */
205
+ .tweets-container::-webkit-scrollbar {
206
+ width: 8px;
207
+ }
208
+
209
+ .tweets-container::-webkit-scrollbar-track {
210
+ background: rgba(255, 255, 255, 0.1);
211
+ border-radius: 10px;
212
+ }
213
+
214
+ .tweets-container::-webkit-scrollbar-thumb {
215
+ background: rgba(255, 255, 255, 0.3);
216
+ border-radius: 10px;
217
+ }
218
+
219
+ .tweets-container::-webkit-scrollbar-thumb:hover {
220
+ background: rgba(255, 255, 255, 0.5);
221
+ }
222
+ </style>
223
+ </head>
224
+ <body>
225
+ <div class="status-indicator" id="status">Connecting...</div>
226
+
227
+ <div class="container">
228
+ <div class="header">
229
+ <h1>🚀 Real-Time Sentiment Analysis</h1>
230
+ <p>Live Tweet Processing with Apache Kafka & Apache Spark</p>
231
+ </div>
232
+
233
+ <div class="stats-grid">
234
+ <div class="stat-card">
235
+ <div class="stat-number positive" id="positive-count">0</div>
236
+ <div class="stat-label">Positive Tweets</div>
237
+ </div>
238
+ <div class="stat-card">
239
+ <div class="stat-number negative" id="negative-count">0</div>
240
+ <div class="stat-label">Negative Tweets</div>
241
+ </div>
242
+ <div class="stat-card">
243
+ <div class="stat-number neutral" id="neutral-count">0</div>
244
+ <div class="stat-label">Neutral Tweets</div>
245
+ </div>
246
+ <div class="stat-card">
247
+ <div class="stat-number total" id="total-count">0</div>
248
+ <div class="stat-label">Total Processed</div>
249
+ </div>
250
+ </div>
251
+
252
+ <div class="charts-section">
253
+ <div class="chart-container">
254
+ <h3 class="chart-title">Sentiment Distribution</h3>
255
+ <canvas id="sentiment-pie-chart"></canvas>
256
+ </div>
257
+ <div class="chart-container">
258
+ <h3 class="chart-title">Hourly Sentiment Trend</h3>
259
+ <canvas id="hourly-chart"></canvas>
260
+ </div>
261
+ </div>
262
+
263
+ <div class="tweets-section">
264
+ <h3 class="section-title">📱 Recent Tweets</h3>
265
+ <div class="tweets-container" id="tweets-container">
266
+ <div style="text-align: center; opacity: 0.7; padding: 20px;">
267
+ Waiting for tweets...
268
+ </div>
269
+ </div>
270
+ </div>
271
+ </div>
272
+
273
+ <script>
274
+ // Initialize Socket.IO
275
+ const socket = io();
276
+
277
+ // Status indicator
278
+ const statusElement = document.getElementById('status');
279
+
280
+ // Charts
281
+ let pieChart, hourlyChart;
282
+
283
+ // Initialize charts
284
+ function initCharts() {
285
+ // Pie Chart
286
+ const pieCtx = document.getElementById('sentiment-pie-chart').getContext('2d');
287
+ pieChart = new Chart(pieCtx, {
288
+ type: 'doughnut',
289
+ data: {
290
+ labels: ['Positive', 'Negative', 'Neutral'],
291
+ datasets: [{
292
+ data: [0, 0, 0],
293
+ backgroundColor: ['#4ade80', '#f87171', '#60a5fa'],
294
+ borderWidth: 0
295
+ }]
296
+ },
297
+ options: {
298
+ responsive: true,
299
+ plugins: {
300
+ legend: {
301
+ position: 'bottom',
302
+ labels: { color: '#fff' }
303
+ }
304
+ }
305
+ }
306
+ });
307
+
308
+ // Hourly Chart
309
+ const hourlyCtx = document.getElementById('hourly-chart').getContext('2d');
310
+ hourlyChart = new Chart(hourlyCtx, {
311
+ type: 'line',
312
+ data: {
313
+ labels: [],
314
+ datasets: [
315
+ {
316
+ label: 'Positive',
317
+ data: [],
318
+ borderColor: '#4ade80',
319
+ backgroundColor: 'rgba(74, 222, 128, 0.1)',
320
+ tension: 0.4
321
+ },
322
+ {
323
+ label: 'Negative',
324
+ data: [],
325
+ borderColor: '#f87171',
326
+ backgroundColor: 'rgba(248, 113, 113, 0.1)',
327
+ tension: 0.4
328
+ },
329
+ {
330
+ label: 'Neutral',
331
+ data: [],
332
+ borderColor: '#60a5fa',
333
+ backgroundColor: 'rgba(96, 165, 250, 0.1)',
334
+ tension: 0.4
335
+ }
336
+ ]
337
+ },
338
+ options: {
339
+ responsive: true,
340
+ plugins: {
341
+ legend: {
342
+ labels: { color: '#fff' }
343
+ }
344
+ },
345
+ scales: {
346
+ y: {
347
+ ticks: { color: '#fff' },
348
+ grid: { color: 'rgba(255, 255, 255, 0.1)' }
349
+ },
350
+ x: {
351
+ ticks: { color: '#fff' },
352
+ grid: { color: 'rgba(255, 255, 255, 0.1)' }
353
+ }
354
+ }
355
+ }
356
+ });
357
+ }
358
+
359
+ // Update dashboard with new data
360
+ function updateDashboard(data) {
361
+ // Update counters
362
+ document.getElementById('positive-count').textContent = data.sentiment_counts.positive || 0;
363
+ document.getElementById('negative-count').textContent = data.sentiment_counts.negative || 0;
364
+ document.getElementById('neutral-count').textContent = data.sentiment_counts.neutral || 0;
365
+
366
+ const total = (data.sentiment_counts.positive || 0) +
367
+ (data.sentiment_counts.negative || 0) +
368
+ (data.sentiment_counts.neutral || 0);
369
+ document.getElementById('total-count').textContent = total;
370
+
371
+ // Update pie chart
372
+ pieChart.data.datasets[0].data = [
373
+ data.sentiment_counts.positive || 0,
374
+ data.sentiment_counts.negative || 0,
375
+ data.sentiment_counts.neutral || 0
376
+ ];
377
+ pieChart.update();
378
+
379
+ // Update hourly chart
380
+ if (data.hourly_data) {
381
+ const hours = Object.keys(data.hourly_data).sort();
382
+ hourlyChart.data.labels = hours;
383
+ hourlyChart.data.datasets[0].data = hours.map(h => data.hourly_data[h].positive || 0);
384
+ hourlyChart.data.datasets[1].data = hours.map(h => data.hourly_data[h].negative || 0);
385
+ hourlyChart.data.datasets[2].data = hours.map(h => data.hourly_data[h].neutral || 0);
386
+ hourlyChart.update();
387
+ }
388
+
389
+ // Update recent tweets
390
+ if (data.recent_tweets && data.recent_tweets.length > 0) {
391
+ const container = document.getElementById('tweets-container');
392
+ container.innerHTML = data.recent_tweets.map(tweet => `
393
+ <div class="tweet-item ${tweet.sentiment}">
394
+ <div class="tweet-header">
395
+ <span class="tweet-sentiment ${tweet.sentiment}">${tweet.sentiment}</span>
396
+ <span class="tweet-time">${tweet.timestamp}</span>
397
+ </div>
398
+ <div class="tweet-text">${tweet.text}</div>
399
+ </div>
400
+ `).join('');
401
+ }
402
+ }
403
+
404
+ // Socket event handlers
405
+ socket.on('connect', function() {
406
+ statusElement.textContent = '🟢 Connected';
407
+ statusElement.className = 'status-indicator status-connected';
408
+ });
409
+
410
+ socket.on('disconnect', function() {
411
+ statusElement.textContent = '🔴 Disconnected';
412
+ statusElement.className = 'status-indicator status-disconnected';
413
+ });
414
+
415
+ socket.on('sentiment_update', function(data) {
416
+ updateDashboard(data);
417
+ });
418
+
419
+ // Initialize everything when page loads
420
+ document.addEventListener('DOMContentLoaded', function() {
421
+ initCharts();
422
+
423
+ // Fetch initial data
424
+ fetch('/api/data')
425
+ .then(response => response.json())
426
+ .then(data => updateDashboard(data))
427
+ .catch(error => console.error('Error fetching initial data:', error));
428
+ });
429
+ </script>
430
+ </body>
431
+ </html>