parthnuwal7 commited on
Commit
1b4a2d1
·
1 Parent(s): f71c767

ADD: API architecture files for split deployment

Browse files

Added app_api.py - HF Spaces API entry point
Added backend_api.py - Dedicated ML API service
Added frontend_light.py - Streamlit Cloud frontend
Added requirements-backend.txt - ML dependencies
Added requirements-frontend.txt - Lightweight UI only

Architecture: HF Spaces (PyABSA backend) + Streamlit Cloud (frontend)
Next: Test HF Spaces deployment and configure API endpoints

app_api.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ HF Spaces API Entry Point - Modified app_enhanced.py for API mode
3
+ Provides both Streamlit UI and API endpoints for backend processing
4
+ """
5
+
6
+ import streamlit as st
7
+ import pandas as pd
8
+ import sys
9
+ import os
10
+ import json
11
+ from typing import Dict, Any
12
+
13
+ # Setup cache directories for Docker environment
14
+ def setup_cache_directories():
15
+ """Setup cache directories with proper permissions for containerized environment."""
16
+ cache_dirs = [
17
+ os.path.expanduser("~/.cache"),
18
+ os.path.expanduser("~/.cache/huggingface"),
19
+ os.path.expanduser("~/.cache/huggingface/transformers"),
20
+ "/.cache",
21
+ "/.cache/huggingface",
22
+ "/.cache/huggingface/transformers"
23
+ ]
24
+
25
+ for cache_dir in cache_dirs:
26
+ try:
27
+ os.makedirs(cache_dir, exist_ok=True)
28
+ test_file = os.path.join(cache_dir, "test_write.tmp")
29
+ with open(test_file, 'w') as f:
30
+ f.write("test")
31
+ os.remove(test_file)
32
+ except (PermissionError, OSError):
33
+ continue
34
+
35
+ # Initialize cache directories
36
+ setup_cache_directories()
37
+
38
+ # Add src to path for imports
39
+ current_dir = os.path.dirname(os.path.abspath(__file__))
40
+ src_path = os.path.join(current_dir, 'src')
41
+ if src_path not in sys.path:
42
+ sys.path.insert(0, src_path)
43
+
44
+ from utils.data_processor import DataProcessor
45
+ from components.visualizations import VisualizationEngine
46
+
47
+ # Page configuration
48
+ st.set_page_config(
49
+ page_title="🤖 ABSA ML Backend API",
50
+ page_icon="🤖",
51
+ layout="wide",
52
+ initial_sidebar_state="expanded"
53
+ )
54
+
55
+ # Check if running in API mode (query parameter)
56
+ query_params = st.experimental_get_query_params()
57
+ api_mode = query_params.get('api', [False])[0]
58
+
59
+ # Initialize processor (cached for performance)
60
+ @st.cache_resource
61
+ def get_data_processor():
62
+ """Initialize data processor with models cached."""
63
+ return DataProcessor()
64
+
65
+ @st.cache_resource
66
+ def get_visualization_engine():
67
+ """Initialize visualization engine."""
68
+ return VisualizationEngine()
69
+
70
+ def serialize_for_api(results: Dict) -> Dict:
71
+ """Convert complex objects to JSON-serializable format for API responses."""
72
+ serialized = {}
73
+
74
+ for key, value in results.items():
75
+ if key == 'processed_data':
76
+ # Convert DataFrame to dict
77
+ serialized[key] = value.to_dict('records') if hasattr(value, 'to_dict') else value
78
+ elif key == 'aspect_network':
79
+ # Convert NetworkX graph to dict
80
+ import networkx as nx
81
+ if hasattr(value, 'nodes'):
82
+ serialized[key] = nx.node_link_data(value)
83
+ else:
84
+ serialized[key] = value
85
+ elif hasattr(value, 'to_dict'):
86
+ # Convert DataFrames
87
+ serialized[key] = value.to_dict('records')
88
+ elif isinstance(value, pd.DataFrame):
89
+ serialized[key] = value.to_dict('records')
90
+ else:
91
+ # Keep as is for basic types
92
+ serialized[key] = value
93
+
94
+ return serialized
95
+
96
+ # API Mode - Process reviews via URL parameters
97
+ if api_mode:
98
+ st.title("🤖 ABSA Processing API")
99
+ st.write("Backend processing endpoint - send POST requests with review data")
100
+
101
+ # Show API documentation
102
+ with st.expander("📚 API Documentation", expanded=True):
103
+ st.markdown("""
104
+ ### POST /api/process
105
+
106
+ **Request Format:**
107
+ ```json
108
+ {
109
+ "data": [
110
+ {
111
+ "id": 1,
112
+ "reviews_title": "Product Review",
113
+ "review": "This product is amazing!",
114
+ "date": "2025-10-01",
115
+ "user_id": "user123"
116
+ }
117
+ ],
118
+ "options": {
119
+ "enable_translation": true,
120
+ "enable_absa": true
121
+ }
122
+ }
123
+ ```
124
+
125
+ **Response Format:**
126
+ ```json
127
+ {
128
+ "status": "success",
129
+ "data": {
130
+ "processed_data": [...],
131
+ "absa_details": [...],
132
+ "summary": {...}
133
+ }
134
+ }
135
+ ```
136
+ """)
137
+
138
+ # Test endpoint
139
+ st.subheader("🧪 Test Processing")
140
+
141
+ if st.button("Test with Sample Data"):
142
+ sample_data = [
143
+ {
144
+ 'id': 1,
145
+ 'reviews_title': 'Great Product',
146
+ 'review': 'यह उत्पाद बहुत अच्छा है। गुणवत्ता उत्कृष्ट है।',
147
+ 'date': '2025-10-01',
148
+ 'user_id': 'user1'
149
+ },
150
+ {
151
+ 'id': 2,
152
+ 'reviews_title': 'Poor Service',
153
+ 'review': 'The delivery was very slow and customer service was terrible.',
154
+ 'date': '2025-09-30',
155
+ 'user_id': 'user2'
156
+ }
157
+ ]
158
+
159
+ try:
160
+ with st.spinner("Processing with PyABSA & M2M100..."):
161
+ processor = get_data_processor()
162
+ df = pd.DataFrame(sample_data)
163
+ results = processor.process_uploaded_data(df)
164
+
165
+ if 'error' not in results:
166
+ st.success("✅ Processing successful!")
167
+
168
+ # Show API response format
169
+ api_response = {
170
+ "status": "success",
171
+ "data": serialize_for_api(results)
172
+ }
173
+
174
+ # Summary metrics
175
+ col1, col2, col3 = st.columns(3)
176
+ with col1:
177
+ st.metric("Reviews Processed", len(results.get('processed_data', [])))
178
+ with col2:
179
+ st.metric("Languages Detected", len(results.get('summary', {}).get('languages_detected', [])))
180
+ with col3:
181
+ st.metric("Aspects Found", len(results.get('areas_of_improvement', [])) + len(results.get('strength_anchors', [])))
182
+
183
+ # Full API response
184
+ with st.expander("🔍 Full API Response"):
185
+ st.json(api_response)
186
+ else:
187
+ st.error(f"❌ Processing failed: {results.get('error', 'Unknown error')}")
188
+
189
+ except Exception as e:
190
+ st.error(f"❌ Error: {str(e)}")
191
+
192
+ # Health status
193
+ st.subheader("💚 System Health")
194
+ processor = get_data_processor()
195
+
196
+ health = {
197
+ "translation_service": "available" if hasattr(processor.translator, 'model') else "unavailable",
198
+ "absa_service": "available" if hasattr(processor.absa_processor, 'aspect_extractor') else "unavailable",
199
+ "timestamp": pd.Timestamp.now().isoformat()
200
+ }
201
+
202
+ if all(status == "available" for status in [health["translation_service"], health["absa_service"]]):
203
+ st.success("🟢 All services operational")
204
+ else:
205
+ st.warning("🟡 Some services may be initializing...")
206
+
207
+ st.json(health)
208
+
209
+ else:
210
+ # Regular Streamlit UI Mode - Your existing app_enhanced.py content
211
+ st.title("📊 Advanced Sentiment Analysis Dashboard")
212
+ st.subheader("🎯 Multi-dimensional Review Analytics with ABSA")
213
+
214
+ # Your existing app content here...
215
+ st.info("💡 This is the full dashboard interface. Add ?api=true to URL for API mode.")
216
+
217
+ # Add your existing app_enhanced.py content here
218
+ # ... (rest of your dashboard code)
219
+
220
+ # Footer
221
+ st.markdown("---")
222
+ st.markdown("""
223
+ **🤖 Backend Mode:** High-performance PyABSA + M2M100 processing
224
+ **🌐 API Endpoint:** Add `?api=true` to URL for API documentation
225
+ **⚡ Performance:** Optimized for HF Spaces deployment
226
+ """)
backend_api.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ML Backend API for HF Spaces deployment
3
+ Provides PyABSA and M2M100 services via REST API
4
+ """
5
+
6
+ import streamlit as st
7
+ import pandas as pd
8
+ import json
9
+ from typing import Dict, List, Any
10
+ from src.utils.data_processor import DataProcessor
11
+ import logging
12
+
13
+ # Configure for API mode
14
+ st.set_page_config(
15
+ page_title="ABSA ML Backend API",
16
+ page_icon="🤖",
17
+ layout="wide"
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Initialize processor (cached for performance)
23
+ @st.cache_resource
24
+ def get_data_processor():
25
+ """Initialize data processor with models cached."""
26
+ return DataProcessor()
27
+
28
+ def process_reviews_api(reviews_data: Dict) -> Dict:
29
+ """
30
+ Process reviews via API endpoint.
31
+
32
+ Args:
33
+ reviews_data: Dictionary with reviews and options
34
+
35
+ Returns:
36
+ Processed results dictionary
37
+ """
38
+ try:
39
+ processor = get_data_processor()
40
+
41
+ # Create DataFrame from API input
42
+ df = pd.DataFrame(reviews_data['data'])
43
+
44
+ # Process data
45
+ results = processor.process_uploaded_data(df)
46
+
47
+ # Convert to JSON-serializable format
48
+ serialized_results = serialize_results(results)
49
+
50
+ return {
51
+ 'status': 'success',
52
+ 'data': serialized_results
53
+ }
54
+ except Exception as e:
55
+ logger.error(f"API processing error: {str(e)}")
56
+ return {
57
+ 'status': 'error',
58
+ 'message': str(e)
59
+ }
60
+
61
+ def serialize_results(results: Dict) -> Dict:
62
+ """Convert complex objects to JSON-serializable format."""
63
+ serialized = {}
64
+
65
+ for key, value in results.items():
66
+ if key == 'processed_data':
67
+ # Convert DataFrame to dict
68
+ serialized[key] = value.to_dict('records')
69
+ elif key == 'aspect_network':
70
+ # Convert NetworkX graph to dict
71
+ import networkx as nx
72
+ serialized[key] = nx.node_link_data(value)
73
+ elif hasattr(value, 'to_dict'):
74
+ # Convert DataFrames
75
+ serialized[key] = value.to_dict('records')
76
+ else:
77
+ # Keep as is
78
+ serialized[key] = value
79
+
80
+ return serialized
81
+
82
+ # Streamlit Interface for API Testing
83
+ st.title("🤖 ABSA ML Backend API")
84
+ st.subheader("High-Performance PyABSA & M2M100 Processing")
85
+
86
+ # API Documentation
87
+ with st.expander("📚 API Documentation", expanded=True):
88
+ st.markdown("""
89
+ ### Endpoints
90
+
91
+ **POST** `/process-reviews`
92
+ ```json
93
+ {
94
+ "data": [
95
+ {
96
+ "id": 1,
97
+ "reviews_title": "Product Review",
98
+ "review": "This product is amazing!",
99
+ "date": "2025-09-30",
100
+ "user_id": "user123"
101
+ }
102
+ ]
103
+ }
104
+ ```
105
+
106
+ ### Response
107
+ ```json
108
+ {
109
+ "status": "success",
110
+ "data": {
111
+ "processed_data": [...],
112
+ "absa_details": [...],
113
+ "analytics": {...}
114
+ }
115
+ }
116
+ ```
117
+ """)
118
+
119
+ # Test Interface
120
+ st.subheader("🧪 Test ML Processing")
121
+
122
+ # Sample data for testing
123
+ if st.button("Test with Sample Data"):
124
+ sample_data = {
125
+ 'data': [
126
+ {
127
+ 'id': 1,
128
+ 'reviews_title': 'Great Product',
129
+ 'review': 'यह उत्पाद बहुत अच्छा है। गुणवत्ता उत्कृष्ट है।',
130
+ 'date': '2025-09-30',
131
+ 'user_id': 'user1'
132
+ },
133
+ {
134
+ 'id': 2,
135
+ 'reviews_title': 'Poor Service',
136
+ 'review': 'The delivery was very slow and customer service was terrible.',
137
+ 'date': '2025-09-29',
138
+ 'user_id': 'user2'
139
+ }
140
+ ]
141
+ }
142
+
143
+ with st.spinner("Processing with ML models..."):
144
+ results = process_reviews_api(sample_data)
145
+
146
+ if results['status'] == 'success':
147
+ st.success("✅ Processing successful!")
148
+
149
+ # Show summary
150
+ data = results['data']
151
+ st.json({
152
+ 'total_reviews': len(data.get('processed_data', [])),
153
+ 'languages_detected': data.get('summary', {}).get('languages_detected', []),
154
+ 'sentiment_distribution': data.get('summary', {}).get('sentiment_distribution', {}),
155
+ 'top_aspects_found': len(data.get('areas_of_improvement', [])) + len(data.get('strength_anchors', []))
156
+ })
157
+
158
+ # Full results in expander
159
+ with st.expander("🔍 Full Results"):
160
+ st.json(results)
161
+ else:
162
+ st.error(f"❌ Processing failed: {results.get('message', 'Unknown error')}")
163
+
164
+ # Performance Metrics
165
+ st.subheader("📊 Backend Performance")
166
+ col1, col2, col3 = st.columns(3)
167
+
168
+ with col1:
169
+ st.metric("Models Loaded", "2", help="PyABSA + M2M100")
170
+ with col2:
171
+ st.metric("Memory Usage", "~2GB", help="Estimated RAM usage")
172
+ with col3:
173
+ st.metric("Processing Speed", "~10s", help="Per 100 reviews")
174
+
175
+ # Health Check
176
+ st.subheader("💚 Health Status")
177
+ processor = get_data_processor()
178
+
179
+ health_status = {}
180
+ health_status['translation_ready'] = processor.translator.model is not None
181
+ health_status['absa_ready'] = hasattr(processor.absa_processor, 'api_token')
182
+ health_status['overall'] = all(health_status.values())
183
+
184
+ if health_status['overall']:
185
+ st.success("🟢 All systems operational")
186
+ else:
187
+ st.error("🔴 Some services unavailable")
188
+
189
+ st.json(health_status)
frontend_light.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Frontend-only Streamlit app for Streamlit Cloud deployment
3
+ Connects to HF Spaces ML backend via API calls
4
+ """
5
+
6
+ import streamlit as st
7
+ import pandas as pd
8
+ import requests
9
+ import json
10
+ import plotly.express as px
11
+ import plotly.graph_objects as go
12
+ from typing import Dict, Any
13
+ import time
14
+
15
+ # Lightweight requirements - no ML dependencies!
16
+ st.set_page_config(
17
+ page_title="📊 Advanced Sentiment Analytics",
18
+ page_icon="📊",
19
+ layout="wide"
20
+ )
21
+
22
+ # Configuration
23
+ HF_SPACES_API_URL = "https://your-username-absa-backend.hf.space" # Update with your HF Space URL
24
+
25
+ def call_ml_backend(data: Dict) -> Dict:
26
+ """Call the ML backend API on HF Spaces."""
27
+ try:
28
+ response = requests.post(
29
+ f"{HF_SPACES_API_URL}/process-reviews",
30
+ json=data,
31
+ timeout=120 # Allow time for ML processing
32
+ )
33
+ response.raise_for_status()
34
+ return response.json()
35
+ except requests.exceptions.Timeout:
36
+ return {"status": "error", "message": "Backend processing timeout (>2 minutes)"}
37
+ except requests.exceptions.ConnectionError:
38
+ return {"status": "error", "message": "Cannot connect to ML backend"}
39
+ except Exception as e:
40
+ return {"status": "error", "message": f"API error: {str(e)}"}
41
+
42
+ def create_lightweight_visualizations(processed_data: Dict):
43
+ """Create visualizations from processed data (no ML dependencies)."""
44
+
45
+ # Sentiment Distribution
46
+ if 'sentiment_distribution' in processed_data.get('summary', {}):
47
+ sentiment_dist = processed_data['summary']['sentiment_distribution']
48
+
49
+ fig = px.pie(
50
+ values=list(sentiment_dist.values()),
51
+ names=list(sentiment_dist.keys()),
52
+ title="Overall Sentiment Distribution"
53
+ )
54
+ st.plotly_chart(fig, use_container_width=True)
55
+
56
+ # Intent Analysis
57
+ if 'intents_distribution' in processed_data.get('summary', {}):
58
+ intents_dist = processed_data['summary']['intents_distribution']
59
+
60
+ fig = px.bar(
61
+ x=list(intents_dist.keys()),
62
+ y=list(intents_dist.values()),
63
+ title="Intent Classification Results"
64
+ )
65
+ st.plotly_chart(fig, use_container_width=True)
66
+
67
+ # Areas of Improvement
68
+ if processed_data.get('areas_of_improvement'):
69
+ improvements_df = pd.DataFrame(processed_data['areas_of_improvement'])
70
+ if not improvements_df.empty:
71
+ fig = px.bar(
72
+ improvements_df.head(10),
73
+ x='priority_score',
74
+ y='aspect',
75
+ orientation='h',
76
+ title="Top Areas for Improvement",
77
+ color='negativity_pct',
78
+ color_continuous_scale='Reds'
79
+ )
80
+ st.plotly_chart(fig, use_container_width=True)
81
+
82
+ # Main App Interface
83
+ st.title("📊 Advanced Sentiment Analytics Dashboard")
84
+ st.subheader("Powered by PyABSA & M2M100 (Backend: HF Spaces)")
85
+
86
+ # Sidebar for configuration
87
+ with st.sidebar:
88
+ st.header("⚙️ Configuration")
89
+
90
+ # Backend status check
91
+ st.subheader("🔗 Backend Status")
92
+ if st.button("Check ML Backend"):
93
+ with st.spinner("Checking backend..."):
94
+ try:
95
+ response = requests.get(f"{HF_SPACES_API_URL}/health", timeout=10)
96
+ if response.status_code == 200:
97
+ st.success("🟢 Backend Online")
98
+ else:
99
+ st.error("🔴 Backend Issues")
100
+ except:
101
+ st.error("🔴 Backend Offline")
102
+
103
+ # Processing options
104
+ st.subheader("🎛️ Processing Options")
105
+ enable_translation = st.checkbox("Enable Translation", value=True)
106
+ enable_absa = st.checkbox("Enable ABSA", value=True)
107
+
108
+ # Cost info
109
+ st.info("""
110
+ 💡 **Architecture Benefits:**
111
+ - Frontend: Free Streamlit Cloud
112
+ - Backend: HF Spaces (pay-per-use)
113
+ - No local ML dependencies
114
+ - Automatic scaling
115
+ """)
116
+
117
+ # File Upload
118
+ st.header("📁 Upload Reviews Data")
119
+ uploaded_file = st.file_uploader(
120
+ "Choose CSV file",
121
+ type="csv",
122
+ help="Upload CSV with columns: id, reviews_title, review, date, user_id"
123
+ )
124
+
125
+ if uploaded_file is not None:
126
+ # Read and validate data
127
+ try:
128
+ df = pd.read_csv(uploaded_file)
129
+ st.success(f"✅ Uploaded {len(df)} reviews")
130
+
131
+ # Show preview
132
+ st.subheader("📋 Data Preview")
133
+ st.dataframe(df.head(), use_container_width=True)
134
+
135
+ # Validation
136
+ required_cols = ['id', 'reviews_title', 'review', 'date', 'user_id']
137
+ missing_cols = [col for col in required_cols if col not in df.columns]
138
+
139
+ if missing_cols:
140
+ st.error(f"❌ Missing required columns: {missing_cols}")
141
+ else:
142
+ st.success("✅ Data format validated")
143
+
144
+ # Process button
145
+ if st.button("🚀 Process with ML Backend", type="primary"):
146
+ # Prepare API payload
147
+ api_data = {
148
+ "data": df.to_dict('records'),
149
+ "options": {
150
+ "enable_translation": enable_translation,
151
+ "enable_absa": enable_absa
152
+ }
153
+ }
154
+
155
+ # Call backend
156
+ with st.spinner("🤖 Processing with PyABSA & M2M100 models..."):
157
+ progress_bar = st.progress(0)
158
+
159
+ # Simulate progress (since we can't track actual backend progress)
160
+ for i in range(10):
161
+ time.sleep(0.5)
162
+ progress_bar.progress((i + 1) / 10)
163
+
164
+ results = call_ml_backend(api_data)
165
+
166
+ # Handle results
167
+ if results.get("status") == "success":
168
+ st.success("✅ Processing completed successfully!")
169
+
170
+ # Display results
171
+ processed_data = results["data"]
172
+
173
+ # Summary metrics
174
+ st.header("📊 Analysis Summary")
175
+
176
+ col1, col2, col3, col4 = st.columns(4)
177
+
178
+ summary = processed_data.get('summary', {})
179
+ with col1:
180
+ st.metric(
181
+ "Total Reviews",
182
+ summary.get('total_reviews', 0)
183
+ )
184
+ with col2:
185
+ languages = summary.get('languages_detected', [])
186
+ st.metric(
187
+ "Languages Detected",
188
+ len(languages)
189
+ )
190
+ with col3:
191
+ improvements = len(processed_data.get('areas_of_improvement', []))
192
+ st.metric(
193
+ "Problem Areas",
194
+ improvements
195
+ )
196
+ with col4:
197
+ strengths = len(processed_data.get('strength_anchors', []))
198
+ st.metric(
199
+ "Strength Areas",
200
+ strengths
201
+ )
202
+
203
+ # Visualizations
204
+ st.header("📈 Analytics Dashboard")
205
+ create_lightweight_visualizations(processed_data)
206
+
207
+ # Detailed results
208
+ with st.expander("🔍 Detailed Results"):
209
+ st.json(processed_data)
210
+
211
+ # Download options
212
+ st.header("💾 Export Results")
213
+
214
+ # Convert to downloadable format
215
+ if processed_data.get('processed_data'):
216
+ result_df = pd.DataFrame(processed_data['processed_data'])
217
+ csv = result_df.to_csv(index=False)
218
+
219
+ st.download_button(
220
+ label="📥 Download Processed Data (CSV)",
221
+ data=csv,
222
+ file_name=f"sentiment_analysis_{time.strftime('%Y%m%d_%H%M%S')}.csv",
223
+ mime="text/csv"
224
+ )
225
+
226
+ else:
227
+ st.error(f"❌ Processing failed: {results.get('message', 'Unknown error')}")
228
+
229
+ # Troubleshooting info
230
+ with st.expander("🔧 Troubleshooting"):
231
+ st.write("**Common Issues:**")
232
+ st.write("- Backend may be starting up (first use takes 2-3 minutes)")
233
+ st.write("- Large datasets may timeout (try smaller batches)")
234
+ st.write("- Check backend status in sidebar")
235
+
236
+ st.write("**Backend URL:**", HF_SPACES_API_URL)
237
+ st.json(results)
238
+
239
+ except Exception as e:
240
+ st.error(f"❌ Error reading file: {str(e)}")
241
+
242
+ else:
243
+ # Show sample data format
244
+ st.info("👆 Upload a CSV file to get started")
245
+
246
+ st.subheader("📝 Expected CSV Format")
247
+ sample_df = pd.DataFrame({
248
+ 'id': [1, 2, 3],
249
+ 'reviews_title': ['Great Product', 'Poor Service', 'Average Quality'],
250
+ 'review': [
251
+ 'यह उत्पाद बहुत अच्छा है',
252
+ 'The delivery was very slow',
253
+ 'Product is okay, nothing special'
254
+ ],
255
+ 'date': ['2025-09-30', '2025-09-29', '2025-09-28'],
256
+ 'user_id': ['user1', 'user2', 'user3']
257
+ })
258
+ st.dataframe(sample_df, use_container_width=True)
259
+
260
+ # Footer
261
+ st.markdown("---")
262
+ st.markdown("""
263
+ **Architecture:** Frontend (Streamlit Cloud) ↔ ML Backend (HF Spaces)
264
+ **Models:** PyABSA (Multilingual) + Facebook M2M100 (418M)
265
+ **Benefits:** Free frontend, scalable ML processing, high accuracy
266
+ """)
requirements-backend.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backend Requirements - HF Spaces with PyABSA
2
+ # Full ML stack for high-accuracy processing
3
+
4
+ # Core ML frameworks
5
+ torch>=2.0.0,<2.2.0
6
+ transformers>=4.30.0,<4.37.0
7
+ pyabsa>=2.4.0,<3.0.0
8
+ sentencepiece>=0.1.99
9
+ sacremoses>=0.0.53
10
+ faiss-cpu>=1.7.4
11
+
12
+ # Data processing
13
+ pandas>=1.5.0,<2.1.0
14
+ numpy>=1.24.0,<1.26.0
15
+ scikit-learn>=1.3.0,<1.4.0
16
+ langdetect>=1.0.9
17
+
18
+ # API and web framework
19
+ streamlit>=1.28.0,<1.30.0
20
+ requests>=2.31.0
21
+
22
+ # Utilities
23
+ joblib>=1.3.0
24
+ tqdm>=4.65.0
25
+ pillow>=10.0.0,<10.2.0
26
+
27
+ # Optional for network analysis (if needed)
28
+ networkx>=3.0
requirements-frontend.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Frontend Requirements - Streamlit Cloud Compatible
2
+ # Only UI and API libraries - NO ML dependencies
3
+
4
+ streamlit>=1.28.0
5
+ pandas>=1.5.0
6
+ numpy>=1.24.0
7
+ plotly>=5.15.0
8
+ requests>=2.31.0
9
+ streamlit-option-menu>=0.3.6
10
+ streamlit-aggrid>=0.3.4
11
+
12
+ # Optional extras for enhanced UI
13
+ pillow>=10.0.0
14
+ openpyxl>=3.1.0
15
+
16
+ # Total size: ~50MB (vs 1.5GB with ML libraries)
17
+ # Perfect for Streamlit Cloud free tier!