riazmo commited on
Commit
bf00b99
·
verified ·
1 Parent(s): 30c9917

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +143 -787
app.py CHANGED
@@ -1,832 +1,188 @@
1
- """
2
- HuggingFace Spaces - Review Intelligence System (Streamlit)
3
- Complete app with URL input, progress tracking, and interactive dashboard
4
- FIXED VERSION - Better UI contrast + Proper field mapping
5
- """
6
-
7
- import streamlit as st
8
- import pandas as pd
9
- import plotly.express as px
10
- import plotly.graph_objects as go
11
- import os
12
- from datetime import datetime
13
- from typing import List, Dict, Optional
14
- import time
15
-
16
- from gradio_pipeline import GradioPipeline
17
-
18
-
19
- # ============================================================================
20
- # PAGE CONFIGURATION
21
- # ============================================================================
22
-
23
- st.set_page_config(
24
- page_title="Review Intelligence System",
25
- page_icon="🎯",
26
- layout="wide",
27
- initial_sidebar_state="expanded"
28
- )
29
-
30
- # FIXED Custom CSS - Better Contrast
31
- st.markdown("""
32
- <style>
33
- .main {
34
- padding: 0rem 1rem;
35
- }
36
-
37
- /* FIXED: Metric cards with better contrast */
38
- .stMetric {
39
- background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 100%);
40
- padding: 20px;
41
- border-radius: 10px;
42
- box-shadow: 0 4px 6px rgba(0, 0, 0, 0.3);
43
- border: 1px solid #60a5fa;
44
- }
45
-
46
- .stMetric label {
47
- color: #dbeafe !important;
48
- font-size: 14px !important;
49
- font-weight: 600 !important;
50
- text-transform: uppercase;
51
- letter-spacing: 0.5px;
52
- }
53
 
54
- .stMetric [data-testid="stMetricValue"] {
55
- color: #ffffff !important;
56
- font-size: 36px !important;
57
- font-weight: bold !important;
58
- text-shadow: 0 2px 4px rgba(0,0,0,0.2);
59
- }
60
 
61
- .stMetric [data-testid="stMetricDelta"] {
62
- color: #93c5fd !important;
63
- font-size: 14px !important;
64
- font-weight: 600 !important;
65
- }
66
 
67
- .big-font {
68
- font-size: 24px !important;
69
- font-weight: bold;
70
- }
71
 
72
- .success-box {
73
- padding: 25px;
74
- border-radius: 12px;
75
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
76
- color: white;
77
- margin: 20px 0;
78
- box-shadow: 0 8px 16px rgba(0, 0, 0, 0.3);
79
- }
 
 
 
 
80
 
81
- .success-box h1 {
82
- color: white !important;
83
- text-shadow: 0 2px 4px rgba(0,0,0,0.2);
84
- }
85
 
86
- /* Info boxes */
87
- .stAlert {
88
- border-radius: 8px;
89
- }
90
 
91
- /* Better table styling */
92
- .dataframe {
93
- border: 1px solid #e2e8f0 !important;
94
- }
95
 
96
- /* Tab styling */
97
- .stTabs [data-baseweb="tab-list"] {
98
- gap: 8px;
99
- }
100
 
101
- .stTabs [data-baseweb="tab"] {
102
- background-color: #1e293b;
103
- border-radius: 8px 8px 0 0;
104
- padding: 12px 24px;
105
- color: #94a3b8;
106
- }
107
 
108
- .stTabs [aria-selected="true"] {
109
- background-color: #3b82f6;
110
- color: white;
111
- }
112
- </style>
113
- """, unsafe_allow_html=True)
114
-
115
-
116
- # ============================================================================
117
- # SESSION STATE INITIALIZATION
118
- # ============================================================================
119
-
120
- if 'processing_complete' not in st.session_state:
121
- st.session_state.processing_complete = False
122
-
123
- if 'results' not in st.session_state:
124
- st.session_state.results = None
125
-
126
- if 'insights' not in st.session_state:
127
- st.session_state.insights = None
128
 
129
- if 'scraped_count' not in st.session_state:
130
- st.session_state.scraped_count = 0
131
 
 
132
 
133
- # ============================================================================
134
- # PROCESSING FUNCTIONS
135
- # ============================================================================
136
 
137
- def process_reviews_streamlit(app_store_urls: str, play_store_urls: str,
138
- hf_api_key: str, review_limit: int):
139
- """
140
- Process reviews with Streamlit progress tracking
141
- """
142
 
143
- # Validate inputs
144
- if not hf_api_key or not hf_api_key.strip():
145
- st.error("❌ Please provide your HuggingFace API key")
146
- return False
147
 
148
- if not app_store_urls.strip() and not play_store_urls.strip():
149
- st.error("❌ Please provide at least one App Store or Play Store URL")
150
- return False
151
 
152
- try:
153
- # Set API key
154
- os.environ['HUGGINGFACE_API_KEY'] = hf_api_key.strip()
155
-
156
- # Progress indicators
157
- progress_bar = st.progress(0)
158
- status_text = st.empty()
159
-
160
- # Initialize pipeline
161
- status_text.text("🚀 Initializing pipeline...")
162
- progress_bar.progress(5)
163
- pipeline = GradioPipeline(review_limit=review_limit)
164
-
165
- # Parse URLs
166
- app_urls = [url.strip() for url in app_store_urls.split('\n') if url.strip()]
167
- play_urls = [url.strip() for url in play_store_urls.split('\n') if url.strip()]
168
-
169
- # Stage 0: Scraping
170
- status_text.text("🕷️ Scraping reviews from stores...")
171
- progress_bar.progress(10)
172
-
173
- scraped_count = 0
174
- total_apps = len(app_urls) + len(play_urls)
175
-
176
- for i, app_id in enumerate(app_urls, 1):
177
- status_text.text(f"🍎 Scraping App Store ({i}/{total_apps}): {app_id}")
178
- reviews = pipeline.scraper.scrape_app_store_rss(app_id, country="ae", limit=review_limit)
179
- saved = pipeline.scraper.save_reviews_to_db(reviews)
180
- scraped_count += saved
181
- progress_bar.progress(10 + int(20 * i / total_apps))
182
- time.sleep(1)
183
-
184
- for i, package in enumerate(play_urls, 1):
185
- status_text.text(f"🤖 Scraping Play Store ({i}/{total_apps}): {package}")
186
- reviews = pipeline.scraper.scrape_play_store_api(package, country="ae", limit=review_limit)
187
- saved = pipeline.scraper.save_reviews_to_db(reviews)
188
- scraped_count += saved
189
- progress_bar.progress(10 + int(20 * (len(app_urls) + i) / total_apps))
190
- time.sleep(1)
191
-
192
- if scraped_count == 0:
193
- st.warning("⚠️ No reviews scraped. Please check your URLs and try again.")
194
- progress_bar.empty()
195
- status_text.empty()
196
- return False
197
-
198
- st.session_state.scraped_count = scraped_count
199
-
200
- # Stage 1-3: Processing
201
- status_text.text("🤖 Processing reviews with AI models...")
202
- progress_bar.progress(30)
203
-
204
- reviews = pipeline.db.get_pending_reviews(limit=review_limit)
205
- total_reviews = len(reviews)
206
-
207
- print(f"📊 DEBUG: Found {total_reviews} reviews to process")
208
-
209
- processed_states = []
210
 
211
- for i, review in enumerate(reviews, 1):
212
- review_id = review.get('review_id', 'unknown')[:20]
213
- status_text.text(f"🤖 Processing review {i}/{total_reviews}: {review_id}...")
214
- progress_bar.progress(30 + int(60 * i / total_reviews))
215
-
216
- try:
217
- from langgraph_state import create_initial_state
218
- state = create_initial_state(review)
219
- config = {"configurable": {"thread_id": f"review_{review.get('review_id')}"}}
220
- final_state = pipeline.review_graph.invoke(state, config=config)
221
-
222
- # Convert to dict
223
- state_dict = dict(final_state)
224
- processed_states.append(state_dict)
225
-
226
- # DEBUG: Print what we got
227
- print(f"✅ Processed {review_id}:")
228
- print(f" Type: {state_dict.get('classification_type', 'MISSING')}")
229
- print(f" Dept: {state_dict.get('department', 'MISSING')}")
230
- print(f" Sentiment: {state_dict.get('final_sentiment', 'MISSING')}")
231
-
232
- except Exception as e:
233
- st.warning(f"⚠️ Error processing review: {str(e)}")
234
- print(f"❌ ERROR: {e}")
235
- import traceback
236
- print(traceback.format_exc())
237
- continue
238
 
239
- if len(processed_states) == 0:
240
- st.error(" No reviews were processed successfully.")
241
- progress_bar.empty()
242
- status_text.empty()
243
- return False
244
 
245
- # Stage 4: Batch Analysis
246
- status_text.text("📊 Generating batch insights...")
247
- progress_bar.progress(90)
 
248
 
249
- insights = pipeline.analyze_batch(processed_states)
 
250
 
251
- # Store in session state
252
- st.session_state.results = processed_states
253
- st.session_state.insights = insights
254
- st.session_state.processing_complete = True
255
 
256
- # Complete
257
- progress_bar.progress(100)
258
- status_text.text("✅ Analysis complete!")
259
- time.sleep(1)
260
- progress_bar.empty()
261
- status_text.empty()
262
 
263
- return True
264
 
265
- except Exception as e:
266
- st.error(f"❌ Error during processing: {str(e)}")
267
- import traceback
268
- st.code(traceback.format_exc())
269
- return False
270
 
 
271
 
272
- # ============================================================================
273
- # VISUALIZATION FUNCTIONS
274
- # ============================================================================
275
 
276
- def create_summary_section(scraped_count: int, results: List[Dict], insights: Dict):
277
- """Create summary metrics section"""
278
-
279
- total = len(results)
280
- positive = insights.get('sentiment_distribution', {}).get('POSITIVE', 0)
281
- neutral = insights.get('sentiment_distribution', {}).get('NEUTRAL', 0)
282
- negative = insights.get('sentiment_distribution', {}).get('NEGATIVE', 0)
283
- critical = insights.get('priority_distribution', {}).get('critical', 0)
284
- churn_risk = insights.get('churn_risk', 0)
285
-
286
- # Success header
287
- st.markdown(
288
- f"""
289
- <div class="success-box">
290
- <h1 style="margin: 0;">✅ Analysis Complete!</h1>
291
- <p style="margin: 10px 0 0 0; font-size: 1.2em; opacity: 0.9;">
292
- Review Intelligence System Results
293
- </p>
294
- </div>
295
- """,
296
- unsafe_allow_html=True
297
- )
298
-
299
- # Metrics with better styling
300
- col1, col2, col3, col4, col5 = st.columns(5)
301
-
302
- with col1:
303
- st.metric("📊 Total Reviews", total, f"Scraped: {scraped_count}")
304
-
305
- with col2:
306
- pos_pct = (positive / total * 100) if total > 0 else 0
307
- st.metric("😊 Positive", positive, f"{pos_pct:.1f}%")
308
-
309
- with col3:
310
- neg_pct = (negative / total * 100) if total > 0 else 0
311
- st.metric("😞 Negative", negative, f"{neg_pct:.1f}%")
312
-
313
- with col4:
314
- st.metric("🚨 Critical", critical, "⚠️" if critical > 0 else "✅")
315
-
316
- with col5:
317
- st.metric("📉 Churn Risk", f"{churn_risk:.1f}%",
318
- "🔴 High" if churn_risk > 30 else "🟢 Low")
319
-
320
- # Recommendations
321
- if insights.get('recommendations'):
322
- st.markdown("### 💡 Key Recommendations")
323
- for rec in insights.get('recommendations', []):
324
- st.info(rec)
325
 
 
326
 
327
- def create_sentiment_chart(insights: Dict):
328
- """Create sentiment distribution donut chart"""
329
- sentiment_dist = insights.get('sentiment_distribution', {})
330
-
331
- labels = list(sentiment_dist.keys())
332
- values = list(sentiment_dist.values())
333
- colors = ['#10b981', '#f59e0b', '#ef4444']
334
-
335
- fig = go.Figure(data=[go.Pie(
336
- labels=labels,
337
- values=values,
338
- hole=0.5,
339
- marker_colors=colors,
340
- textinfo='label+percent',
341
- textposition='outside',
342
- textfont_size=14
343
- )])
344
-
345
- fig.update_layout(
346
- title="😊 Sentiment Distribution",
347
- showlegend=True,
348
- height=400
349
- )
350
-
351
- return fig
352
 
 
353
 
354
- def create_priority_chart(insights: Dict):
355
- """Create priority distribution bar chart"""
356
- priority_dist = insights.get('priority_distribution', {})
357
-
358
- priority_order = ['critical', 'high', 'medium', 'low']
359
- labels = [p for p in priority_order if p in priority_dist]
360
- values = [priority_dist.get(p, 0) for p in labels]
361
- colors = ['#dc2626', '#f59e0b', '#3b82f6', '#10b981']
362
-
363
- fig = go.Figure(data=[go.Bar(
364
- x=labels,
365
- y=values,
366
- marker_color=colors[:len(labels)],
367
- text=values,
368
- textposition='auto'
369
- )])
370
-
371
- fig.update_layout(
372
- title="🎯 Priority Levels",
373
- xaxis_title="Priority",
374
- yaxis_title="Count",
375
- height=400
376
- )
377
-
378
- return fig
379
 
 
380
 
381
- def create_department_chart(insights: Dict):
382
- """Create department routing horizontal bar chart"""
383
- dept_dist = insights.get('department_distribution', {})
384
-
385
- labels = list(dept_dist.keys())
386
- values = list(dept_dist.values())
387
-
388
- fig = go.Figure(data=[go.Bar(
389
- x=values,
390
- y=labels,
391
- orientation='h',
392
- marker_color='#667eea',
393
- text=values,
394
- textposition='auto'
395
- )])
396
-
397
- fig.update_layout(
398
- title="🏢 Department Routing",
399
- xaxis_title="Number of Issues",
400
- yaxis_title="Department",
401
- height=400
402
- )
403
-
404
- return fig
405
 
 
 
 
 
 
 
406
 
407
- def create_emotion_chart(insights: Dict):
408
- """Create emotion distribution chart"""
409
- emotion_dist = insights.get('emotion_distribution', {})
410
-
411
- labels = list(emotion_dist.keys())
412
- values = list(emotion_dist.values())
413
-
414
- fig = px.bar(
415
- x=labels,
416
- y=values,
417
- labels={'x': 'Emotion', 'y': 'Count'},
418
- color=values,
419
- color_continuous_scale='Viridis'
420
- )
421
-
422
- fig.update_layout(
423
- title="😊 Emotional Analysis",
424
- xaxis_title="Emotion Type",
425
- yaxis_title="Number of Reviews",
426
- height=300,
427
- showlegend=False
428
- )
429
-
430
- return fig
431
 
 
432
 
433
- def create_reviews_dataframe(results: List[Dict]) -> pd.DataFrame:
434
- """
435
- FIXED: Create DataFrame with proper field mapping
436
- Checks both state field names AND database field names
437
- """
438
-
439
- df_data = []
440
- for review in results:
441
- # FIXED: Check state fields FIRST, fall back to database fields
442
- df_data.append({
443
- 'Review ID': review.get('review_id', 'N/A')[:20],
444
- 'Rating': review.get('rating', 0),
445
- 'Review': (review.get('review_text', 'N/A') or '')[:100] + '...',
446
- 'Sentiment': review.get('final_sentiment', review.get('stage3_final_sentiment', 'N/A')),
447
- 'Type': review.get('classification_type', review.get('stage1_llm1_type', 'N/A')),
448
- 'Department': review.get('department', review.get('stage1_llm1_department', 'N/A')),
449
- 'Priority': review.get('priority', review.get('stage1_llm1_priority', 'N/A')),
450
- 'Emotion': review.get('emotion', review.get('stage1_llm2_emotion', 'N/A')),
451
- 'Needs Review': '🚨 Yes' if review.get('needs_human_review', review.get('stage3_needs_human_review')) else '✅ No'
452
- })
453
-
454
- return pd.DataFrame(df_data)
455
 
 
456
 
457
- # ============================================================================
458
- # MAIN APP
459
- # ============================================================================
 
460
 
461
- def main():
462
- """Main Streamlit app"""
463
-
464
- # Title
465
- st.title("🎯 Review Intelligence System")
466
- st.markdown("### Multi-Stage AI Analysis Dashboard")
467
- st.markdown("Powered by **LangGraph** + **HuggingFace** • 4-Stage Processing Pipeline")
468
- st.markdown("---")
469
-
470
- # Sidebar - Input or View Mode
471
- with st.sidebar:
472
- st.header("🎛️ Control Panel")
473
-
474
- if st.session_state.processing_complete:
475
- st.success("✅ Analysis Complete!")
476
- if st.button("🔄 Start New Analysis", use_container_width=True):
477
- st.session_state.processing_complete = False
478
- st.session_state.results = None
479
- st.session_state.insights = None
480
- st.rerun()
481
- else:
482
- st.info("👈 Enter URLs below to start")
483
-
484
- # Main content - Input or Results
485
- if not st.session_state.processing_complete:
486
- show_input_form()
487
- else:
488
- show_results_dashboard()
489
 
 
 
 
 
490
 
491
- def show_input_form():
492
- """Show input form for URLs and API key"""
493
-
494
- st.markdown("### 📝 Step 1: Enter Store URLs")
495
-
496
- col1, col2 = st.columns(2)
497
-
498
- with col1:
499
- st.markdown("#### 🍎 App Store IDs")
500
- st.markdown(
501
- """
502
- **Format:** Just paste the app ID
503
- - Example: `1158907446` (UAE)
504
- - Example: `1234567890` (US)
505
- """
506
- )
507
- app_store_urls = st.text_area(
508
- "App Store IDs (one per line)",
509
- placeholder="1158907446\n1234567890",
510
- height=150,
511
- key="app_urls"
512
- )
513
-
514
- with col2:
515
- st.markdown("#### 🤖 Play Store Packages")
516
- st.markdown(
517
- """
518
- **Format:** Package name
519
- - Example: `com.yas.app`
520
- - Example: `com.company.app`
521
- """
522
- )
523
- play_store_urls = st.text_area(
524
- "Play Store Package Names (one per line)",
525
- placeholder="com.yas.app\ncom.company.app",
526
- height=150,
527
- key="play_urls"
528
- )
529
-
530
- st.markdown("---")
531
- st.markdown("### 🔑 Step 2: Configure Settings")
532
-
533
- col1, col2 = st.columns([2, 1])
534
-
535
- with col1:
536
- hf_api_key = st.text_input(
537
- "🔑 HuggingFace API Key",
538
- type="password",
539
- placeholder="hf_...",
540
- help="Get your key from: https://huggingface.co/settings/tokens",
541
- key="hf_key"
542
- )
543
-
544
- with col2:
545
- review_limit = st.slider(
546
- "📊 Reviews per App",
547
- min_value=5,
548
- max_value=100,
549
- value=20,
550
- step=5,
551
- help="More reviews = longer processing time",
552
- key="review_limit"
553
- )
554
-
555
- st.markdown("---")
556
-
557
- # Submit button
558
- col1, col2, col3 = st.columns([1, 1, 1])
559
-
560
- with col2:
561
- if st.button("🚀 Start Analysis", use_container_width=True, type="primary"):
562
- with st.spinner("Processing..."):
563
- success = process_reviews_streamlit(
564
- app_store_urls,
565
- play_store_urls,
566
- hf_api_key,
567
- review_limit
568
- )
569
-
570
- if success:
571
- st.balloons()
572
- st.rerun()
573
-
574
- # Documentation
575
- with st.expander("📚 How to Use"):
576
- st.markdown("""
577
- ### 📖 Quick Guide
578
-
579
- **1. Get HuggingFace API Key:**
580
- - Visit: https://huggingface.co/settings/tokens
581
- - Create new token (Read access)
582
- - Copy token (starts with `hf_`)
583
-
584
- **2. Enter URLs:**
585
- - **App Store**: Just the ID number (e.g., `1234567890`)
586
- - **Play Store**: Package name (e.g., `com.company.app`)
587
- - One per line
588
-
589
- **3. Click Start:**
590
- - Watch progress bar
591
- - Wait for completion (~7 sec per review)
592
- - View results automatically
593
-
594
- ### 🏗️ What Happens:
595
- - 🕷️ **Stage 0**: Scrapes reviews from stores
596
- - 🤖 **Stage 1**: Classifies with 3 AI models (Type, Department, Priority)
597
- - 😊 **Stage 2**: Analyzes sentiment with dual BERT models
598
- - 📊 **Stage 3**: Synthesizes insights and recommendations
599
- - 💡 **Stage 4**: Generates batch analytics
600
-
601
- ### ⚡ Performance:
602
- - ~7 seconds per review
603
- - 7 AI models working together
604
- - Parallel execution for speed
605
- """)
606
 
 
607
 
608
- def show_results_dashboard():
609
- """Show results dashboard with charts and tables"""
610
-
611
- results = st.session_state.results
612
- insights = st.session_state.insights
613
- scraped_count = st.session_state.scraped_count
614
-
615
- # Summary section
616
- create_summary_section(scraped_count, results, insights)
617
-
618
- st.markdown("---")
619
-
620
- # Tabs for different views
621
- tab1, tab2, tab3, tab4 = st.tabs([
622
- "📊 Sentiment Analysis",
623
- "🚨 Critical Issues",
624
- "📋 All Reviews",
625
- "📥 Export"
626
- ])
627
-
628
- # TAB 1: Sentiment Analysis
629
- with tab1:
630
- st.header("📊 Sentiment Analysis Overview")
631
-
632
- col1, col2 = st.columns(2)
633
-
634
- with col1:
635
- fig_sentiment = create_sentiment_chart(insights)
636
- st.plotly_chart(fig_sentiment, use_container_width=True)
637
-
638
- with col2:
639
- fig_priority = create_priority_chart(insights)
640
- st.plotly_chart(fig_priority, use_container_width=True)
641
-
642
- st.markdown("### 🏢 Department Routing")
643
- fig_dept = create_department_chart(insights)
644
- st.plotly_chart(fig_dept, use_container_width=True)
645
-
646
- st.markdown("### 😊 Emotional Analysis")
647
- fig_emotion = create_emotion_chart(insights)
648
- st.plotly_chart(fig_emotion, use_container_width=True)
649
-
650
- # TAB 2: Critical Issues
651
- with tab2:
652
- st.header("🚨 Critical Issues Requiring Attention")
653
-
654
- # Filter critical reviews
655
- critical_reviews = [
656
- r for r in results
657
- if (r.get('priority') == 'critical' or
658
- r.get('stage1_llm1_priority') == 'critical' or
659
- r.get('needs_human_review', r.get('stage3_needs_human_review')) or
660
- (r.get('final_sentiment', r.get('stage3_final_sentiment')) == 'NEGATIVE' and r.get('rating', 5) <= 2))
661
- ]
662
-
663
- if len(critical_reviews) == 0:
664
- st.success("✅ No critical issues found! All reviews are in good shape.")
665
- else:
666
- st.warning(f"Found {len(critical_reviews)} critical issues")
667
-
668
- for review in critical_reviews:
669
- with st.expander(
670
- f"⚠️ {review.get('review_id', 'Unknown')[:30]} - "
671
- f"Rating: {review.get('rating', 'N/A')}/5"
672
- ):
673
- col1, col2 = st.columns([2, 1])
674
-
675
- with col1:
676
- st.markdown("**Review Text:**")
677
- st.write(review.get('review_text', 'No text available'))
678
-
679
- st.markdown("**Reasoning:**")
680
- reasoning = review.get('reasoning', review.get('stage3_reasoning', 'No reasoning available'))
681
- st.info(reasoning)
682
-
683
- with col2:
684
- st.markdown("**Classification:**")
685
- st.write(f"📌 Type: {review.get('classification_type', review.get('stage1_llm1_type', 'N/A'))}")
686
- st.write(f"🏢 Department: {review.get('department', review.get('stage1_llm1_department', 'N/A'))}")
687
- st.write(f"🎯 Priority: {review.get('priority', review.get('stage1_llm1_priority', 'N/A'))}")
688
- st.write(f"😔 Emotion: {review.get('emotion', review.get('stage1_llm2_emotion', 'N/A'))}")
689
- st.write(f"💭 Sentiment: {review.get('final_sentiment', review.get('stage3_final_sentiment', 'N/A'))}")
690
-
691
- st.markdown("**Action:**")
692
- action = review.get('action_recommendation', review.get('stage3_action_recommendation', 'No action specified'))
693
- st.error(action)
694
-
695
- # TAB 3: All Reviews
696
- with tab3:
697
- st.header("📋 Detailed Review Analysis")
698
-
699
- # Create DataFrame
700
- df = create_reviews_dataframe(results)
701
-
702
- # Filters
703
- col1, col2, col3 = st.columns(3)
704
-
705
- with col1:
706
- sentiment_filter = st.multiselect(
707
- "Filter by Sentiment",
708
- options=df['Sentiment'].unique(),
709
- default=df['Sentiment'].unique()
710
- )
711
-
712
- with col2:
713
- dept_filter = st.multiselect(
714
- "Filter by Department",
715
- options=df['Department'].unique(),
716
- default=df['Department'].unique()
717
- )
718
-
719
- with col3:
720
- priority_filter = st.multiselect(
721
- "Filter by Priority",
722
- options=df['Priority'].unique(),
723
- default=df['Priority'].unique()
724
- )
725
-
726
- # Apply filters
727
- filtered_df = df[
728
- (df['Sentiment'].isin(sentiment_filter)) &
729
- (df['Department'].isin(dept_filter)) &
730
- (df['Priority'].isin(priority_filter))
731
- ]
732
-
733
- st.info(f"Showing {len(filtered_df)} of {len(df)} reviews")
734
-
735
- # Display table
736
- st.dataframe(
737
- filtered_df,
738
- use_container_width=True,
739
- height=600
740
- )
741
-
742
- # TAB 4: Export
743
- with tab4:
744
- st.header("📥 Export Results")
745
-
746
- st.markdown("### Download Options")
747
-
748
- col1, col2 = st.columns(2)
749
-
750
- with col1:
751
- st.markdown("#### 📊 CSV Export")
752
- st.write("Download complete analysis with all classifications")
753
-
754
- df = create_reviews_dataframe(results)
755
- csv = df.to_csv(index=False)
756
-
757
- st.download_button(
758
- label="📥 Download CSV Report",
759
- data=csv,
760
- file_name=f"review_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
761
- mime="text/csv",
762
- use_container_width=True
763
- )
764
-
765
- with col2:
766
- st.markdown("#### 📋 JSON Export")
767
- st.write("Download raw data with all details")
768
-
769
- import json
770
- json_data = json.dumps({
771
- 'results': results,
772
- 'insights': insights,
773
- 'scraped_count': scraped_count,
774
- 'export_date': datetime.now().isoformat()
775
- }, indent=2)
776
-
777
- st.download_button(
778
- label="📥 Download JSON Data",
779
- data=json_data,
780
- file_name=f"review_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
781
- mime="application/json",
782
- use_container_width=True
783
- )
784
-
785
- st.markdown("---")
786
- st.markdown("### 📊 Summary Statistics")
787
-
788
- col1, col2, col3 = st.columns(3)
789
-
790
- with col1:
791
- st.metric("Total Reviews Analyzed", len(results))
792
-
793
- with col2:
794
- positive = insights.get('sentiment_distribution', {}).get('POSITIVE', 0)
795
- total = len(results)
796
- pct = (positive / total * 100) if total > 0 else 0
797
- st.metric("Positive Rate", f"{pct:.1f}%")
798
-
799
- with col3:
800
- critical = insights.get('priority_distribution', {}).get('critical', 0)
801
- st.metric("Critical Issues", critical)
802
-
803
-
804
- # ============================================================================
805
- # FOOTER
806
- # ============================================================================
807
-
808
- def show_footer():
809
- """Show footer with credits"""
810
- st.markdown("---")
811
- st.markdown(
812
- """
813
- <div style='text-align: center'>
814
- <p>🤖 Powered by Multi-Stage AI Pipeline |
815
- Stage 1: Classification (Qwen, Mistral, Llama) |
816
- Stage 2: Sentiment (Twitter-BERT) |
817
- Stage 3: Finalization (Llama 70B) |
818
- Stage 4: Batch Analysis</p>
819
- <p>Built with ❤️ using LangGraph + HuggingFace + Streamlit</p>
820
- </div>
821
- """,
822
- unsafe_allow_html=True
823
- )
824
-
825
-
826
- # ============================================================================
827
- # RUN APP
828
- # ============================================================================
829
-
830
- if __name__ == "__main__":
831
- main()
832
- show_footer()
 
1
+ # 🔄 AUTOMATIC RESET - Add This to Your app.py
2
+
3
+ ## 📍 WHERE TO ADD THE CODE
4
+
5
+ Find this section in your **app.py** (around line 200-220):
6
+
7
+ ```python
8
+ # After clicking "Start Analysis"
9
+ if st.button("🚀 Start Analysis", type="primary"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # Set HF API key in environment
12
+ os.environ['HUGGINGFACE_API_KEY'] = hf_api_key
 
 
 
 
13
 
14
+ # Scrape reviews
15
+ st.info("Scraping reviews...")
16
+ scraper.scrape_all_sources()
 
 
17
 
18
+ # Initialize database
19
+ db = EnhancedDatabase()
20
+ db.connect()
21
+ db.enhance_schema()
22
 
23
+ # ... rest of code
24
+ ```
25
+
26
+ ---
27
+
28
+ ## ADD THESE 3 LINES
29
+
30
+ **BEFORE** calling `db.enhance_schema()`, add this:
31
+
32
+ ```python
33
+ # After clicking "Start Analysis"
34
+ if st.button("🚀 Start Analysis", type="primary"):
35
 
36
+ # Set HF API key in environment
37
+ os.environ['HUGGINGFACE_API_KEY'] = hf_api_key
 
 
38
 
39
+ # Scrape reviews
40
+ st.info("Scraping reviews...")
41
+ scraper.scrape_all_sources()
 
42
 
43
+ # Initialize database
44
+ db = EnhancedDatabase()
45
+ db.connect()
 
46
 
47
+ # ADD THESE 3 LINES ⭐
48
+ # Reset the most recent 20 reviews to pending status
49
+ st.info("Preparing reviews for processing...")
50
+ db.reset_processing_status(limit=20)
51
 
52
+ # Continue with existing code
53
+ db.enhance_schema()
 
 
 
 
54
 
55
+ # ... rest of code
56
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ ---
 
59
 
60
+ ## 🎯 COMPLETE MODIFICATION
61
 
62
+ Here's the complete section with the fix highlighted:
 
 
63
 
64
+ ```python
65
+ # After clicking "Start Analysis"
66
+ if st.button("🚀 Start Analysis", type="primary"):
 
 
67
 
68
+ # Validate API key
69
+ if not hf_api_key or len(hf_api_key) < 10:
70
+ st.error("❌ Please enter a valid HuggingFace API key!")
71
+ st.stop()
72
 
73
+ # Set environment variable
74
+ os.environ['HUGGINGFACE_API_KEY'] = hf_api_key
 
75
 
76
+ # Create progress container
77
+ progress_container = st.container()
78
+
79
+ with progress_container:
80
+ # Step 1: Scraping
81
+ st.info("🔍 Scraping reviews from App Store and Play Store...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ # Initialize scraper
84
+ scraper = ReviewScraper()
85
+ scraper.scrape_all_sources()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
+ # Step 2: Database setup
88
+ st.info("📁 Setting up database...")
89
+ db = EnhancedDatabase()
90
+ db.connect()
 
91
 
92
+ # ⭐⭐⭐ CRITICAL: ADD THESE 3 LINES ⭐⭐⭐
93
+ # Reset the most recent reviews to pending status
94
+ st.info("🔄 Preparing reviews for processing...")
95
+ reset_count = db.reset_processing_status(limit=20)
96
 
97
+ # Continue with schema enhancement
98
+ db.enhance_schema()
99
 
100
+ # Step 3: Get pending reviews
101
+ reviews_to_process = db.get_pending_reviews()
 
 
102
 
103
+ if not reviews_to_process:
104
+ st.warning("⚠️ No reviews found to process!")
105
+ st.stop()
 
 
 
106
 
107
+ st.success(f"✅ Found {len(reviews_to_process)} reviews to process!")
108
 
109
+ # ... rest of your processing code
110
+ ```
 
 
 
111
 
112
+ ---
113
 
114
+ ## 🎊 WHAT THIS DOES
 
 
115
 
116
+ 1. **Scrapes reviews** from App Store and Play Store
117
+ 2. **Resets the 20 most recent reviews** to `pending` status
118
+ 3. **Processes them** through your AI pipeline
119
+ 4. **Shows results** in the dashboard
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ Even if reviews were already processed before, they'll be reprocessed with the latest AI models!
122
 
123
+ ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ ## 📋 DEPLOYMENT CHECKLIST
126
 
127
+ - [ ] Upload `database_enhanced_UPDATED.py` to HF Spaces
128
+ - [ ] Rename to `database_enhanced.py`
129
+ - [ ] Add the 3 lines to your `app.py` (as shown above)
130
+ - [ ] Upload `langgraph_nodes_FINAL.py` to HF Spaces
131
+ - [ ] Rename to `langgraph_nodes.py`
132
+ - [ ] Commit all changes
133
+ - [ ] Wait for rebuild (2 min)
134
+ - [ ] Enter your API key
135
+ - [ ] Click "Start Analysis"
136
+ - [ ] Watch it work! ✨
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
+ ---
139
 
140
+ ## 🚀 EXPECTED BEHAVIOR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ **Before Fix:**
143
+ ```
144
+ ✅ Scraped 20 reviews
145
+ ✅ Saved 0 new reviews ← Nothing to process!
146
+ ⚠️ No reviews found to process
147
+ ```
148
 
149
+ **After Fix:**
150
+ ```
151
+ Scraped 20 reviews
152
+ ✅ Saved 0 new reviews
153
+ 🔄 Reset 20 reviews to pending status ← Force reprocessing!
154
+ Found 20 reviews to process!
155
+ 📝 Review ID: abc123
156
+ Stage 1 complete (4.23s)
157
+ ✅ Stage 2 complete (0.83s)
158
+ ✅ Stage 3 complete (3.17s)
159
+ ```
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ ---
162
 
163
+ ## 💡 ALTERNATIVE: Change the Limit
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ If you want to process **all reviews** instead of just the latest 20:
166
 
167
+ ```python
168
+ # Reset ALL reviews
169
+ db.reset_processing_status() # No limit parameter
170
+ ```
171
 
172
+ Or process only the latest 5:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
+ ```python
175
+ # Reset only 5 most recent
176
+ db.reset_processing_status(limit=5)
177
+ ```
178
 
179
+ ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
+ ## ⚡ Quick Summary
182
 
183
+ **3 files to upload:**
184
+ 1. `database_enhanced_UPDATED.py` rename to `database_enhanced.py`
185
+ 2. `langgraph_nodes_FINAL.py` → rename to `langgraph_nodes.py`
186
+ 3. Modify `app.py` → add 3 lines as shown above
187
+
188
+ **Then it will work!** 🎉