Spaces:
Sleeping
Sleeping
Upload 11 files
Browse files- app.py +751 -0
- config.yaml +508 -0
- config_loader.py +120 -0
- database_enhanced.py +403 -0
- gradio_pipeline.py +321 -0
- langgraph_graph.py +313 -0
- langgraph_nodes.py +583 -0
- langgraph_state.py +217 -0
- requirements.txt +27 -3
- stage0_scraper.py +302 -0
- stage4_batch_analysis.py +323 -0
app.py
ADDED
|
@@ -0,0 +1,751 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
HuggingFace Spaces - Review Intelligence System (Streamlit)
|
| 3 |
+
Complete app with URL input, progress tracking, and interactive dashboard
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import plotly.express as px
|
| 9 |
+
import plotly.graph_objects as go
|
| 10 |
+
import os
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from typing import List, Dict, Optional
|
| 13 |
+
import time
|
| 14 |
+
|
| 15 |
+
from gradio_pipeline import GradioPipeline
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# ============================================================================
|
| 19 |
+
# PAGE CONFIGURATION
|
| 20 |
+
# ============================================================================
|
| 21 |
+
|
| 22 |
+
st.set_page_config(
|
| 23 |
+
page_title="Review Intelligence System",
|
| 24 |
+
page_icon="🎯",
|
| 25 |
+
layout="wide",
|
| 26 |
+
initial_sidebar_state="expanded"
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Custom CSS
|
| 30 |
+
st.markdown("""
|
| 31 |
+
<style>
|
| 32 |
+
.main {
|
| 33 |
+
padding: 0rem 1rem;
|
| 34 |
+
}
|
| 35 |
+
.stMetric {
|
| 36 |
+
background-color: #f0f2f6;
|
| 37 |
+
padding: 15px;
|
| 38 |
+
border-radius: 5px;
|
| 39 |
+
}
|
| 40 |
+
.big-font {
|
| 41 |
+
font-size: 24px !important;
|
| 42 |
+
font-weight: bold;
|
| 43 |
+
}
|
| 44 |
+
.success-box {
|
| 45 |
+
padding: 20px;
|
| 46 |
+
border-radius: 10px;
|
| 47 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 48 |
+
color: white;
|
| 49 |
+
margin: 20px 0;
|
| 50 |
+
}
|
| 51 |
+
</style>
|
| 52 |
+
""", unsafe_allow_html=True)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# ============================================================================
|
| 56 |
+
# SESSION STATE INITIALIZATION
|
| 57 |
+
# ============================================================================
|
| 58 |
+
|
| 59 |
+
if 'processing_complete' not in st.session_state:
|
| 60 |
+
st.session_state.processing_complete = False
|
| 61 |
+
|
| 62 |
+
if 'results' not in st.session_state:
|
| 63 |
+
st.session_state.results = None
|
| 64 |
+
|
| 65 |
+
if 'insights' not in st.session_state:
|
| 66 |
+
st.session_state.insights = None
|
| 67 |
+
|
| 68 |
+
if 'scraped_count' not in st.session_state:
|
| 69 |
+
st.session_state.scraped_count = 0
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# ============================================================================
|
| 73 |
+
# PROCESSING FUNCTIONS
|
| 74 |
+
# ============================================================================
|
| 75 |
+
|
| 76 |
+
def process_reviews_streamlit(app_store_urls: str, play_store_urls: str,
|
| 77 |
+
hf_api_key: str, review_limit: int):
|
| 78 |
+
"""
|
| 79 |
+
Process reviews with Streamlit progress tracking
|
| 80 |
+
"""
|
| 81 |
+
|
| 82 |
+
# Validate inputs
|
| 83 |
+
if not hf_api_key or not hf_api_key.strip():
|
| 84 |
+
st.error("❌ Please provide your HuggingFace API key")
|
| 85 |
+
return False
|
| 86 |
+
|
| 87 |
+
if not app_store_urls.strip() and not play_store_urls.strip():
|
| 88 |
+
st.error("❌ Please provide at least one App Store or Play Store URL")
|
| 89 |
+
return False
|
| 90 |
+
|
| 91 |
+
try:
|
| 92 |
+
# Set API key
|
| 93 |
+
os.environ['HUGGINGFACE_API_KEY'] = hf_api_key.strip()
|
| 94 |
+
|
| 95 |
+
# Progress indicators
|
| 96 |
+
progress_bar = st.progress(0)
|
| 97 |
+
status_text = st.empty()
|
| 98 |
+
|
| 99 |
+
# Initialize pipeline
|
| 100 |
+
status_text.text("🚀 Initializing pipeline...")
|
| 101 |
+
progress_bar.progress(5)
|
| 102 |
+
pipeline = GradioPipeline(review_limit=review_limit)
|
| 103 |
+
|
| 104 |
+
# Parse URLs
|
| 105 |
+
app_urls = [url.strip() for url in app_store_urls.split('\n') if url.strip()]
|
| 106 |
+
play_urls = [url.strip() for url in play_store_urls.split('\n') if url.strip()]
|
| 107 |
+
|
| 108 |
+
# Stage 0: Scraping
|
| 109 |
+
status_text.text("🕷️ Scraping reviews from stores...")
|
| 110 |
+
progress_bar.progress(10)
|
| 111 |
+
|
| 112 |
+
scraped_count = 0
|
| 113 |
+
total_apps = len(app_urls) + len(play_urls)
|
| 114 |
+
|
| 115 |
+
for i, app_id in enumerate(app_urls, 1):
|
| 116 |
+
status_text.text(f"🍎 Scraping App Store ({i}/{total_apps}): {app_id}")
|
| 117 |
+
reviews = pipeline.scraper.scrape_app_store_rss(app_id, country="ae", limit=review_limit)
|
| 118 |
+
saved = pipeline.scraper.save_reviews_to_db(reviews)
|
| 119 |
+
scraped_count += saved
|
| 120 |
+
progress_bar.progress(10 + int(20 * i / total_apps))
|
| 121 |
+
time.sleep(1)
|
| 122 |
+
|
| 123 |
+
for i, package in enumerate(play_urls, 1):
|
| 124 |
+
status_text.text(f"🤖 Scraping Play Store ({i}/{total_apps}): {package}")
|
| 125 |
+
reviews = pipeline.scraper.scrape_play_store_api(package, country="ae", limit=review_limit)
|
| 126 |
+
saved = pipeline.scraper.save_reviews_to_db(reviews)
|
| 127 |
+
scraped_count += saved
|
| 128 |
+
progress_bar.progress(10 + int(20 * (len(app_urls) + i) / total_apps))
|
| 129 |
+
time.sleep(1)
|
| 130 |
+
|
| 131 |
+
if scraped_count == 0:
|
| 132 |
+
st.warning("⚠️ No reviews scraped. Please check your URLs and try again.")
|
| 133 |
+
progress_bar.empty()
|
| 134 |
+
status_text.empty()
|
| 135 |
+
return False
|
| 136 |
+
|
| 137 |
+
st.session_state.scraped_count = scraped_count
|
| 138 |
+
|
| 139 |
+
# Stage 1-3: Processing
|
| 140 |
+
status_text.text("🤖 Processing reviews with AI models...")
|
| 141 |
+
progress_bar.progress(30)
|
| 142 |
+
|
| 143 |
+
reviews = pipeline.db.get_pending_reviews(limit=review_limit)
|
| 144 |
+
total_reviews = len(reviews)
|
| 145 |
+
|
| 146 |
+
processed_states = []
|
| 147 |
+
|
| 148 |
+
for i, review in enumerate(reviews, 1):
|
| 149 |
+
review_id = review.get('review_id', 'unknown')[:20]
|
| 150 |
+
status_text.text(f"🤖 Processing review {i}/{total_reviews}: {review_id}...")
|
| 151 |
+
progress_bar.progress(30 + int(60 * i / total_reviews))
|
| 152 |
+
|
| 153 |
+
try:
|
| 154 |
+
from langgraph_state import create_initial_state
|
| 155 |
+
state = create_initial_state(review)
|
| 156 |
+
config = {"configurable": {"thread_id": f"review_{review.get('review_id')}"}}
|
| 157 |
+
final_state = pipeline.review_graph.invoke(state, config=config)
|
| 158 |
+
processed_states.append(dict(final_state))
|
| 159 |
+
except Exception as e:
|
| 160 |
+
st.warning(f"⚠️ Error processing review: {str(e)}")
|
| 161 |
+
continue
|
| 162 |
+
|
| 163 |
+
if len(processed_states) == 0:
|
| 164 |
+
st.error("❌ No reviews were processed successfully.")
|
| 165 |
+
progress_bar.empty()
|
| 166 |
+
status_text.empty()
|
| 167 |
+
return False
|
| 168 |
+
|
| 169 |
+
# Stage 4: Batch Analysis
|
| 170 |
+
status_text.text("📊 Generating batch insights...")
|
| 171 |
+
progress_bar.progress(90)
|
| 172 |
+
|
| 173 |
+
insights = pipeline.analyze_batch(processed_states)
|
| 174 |
+
|
| 175 |
+
# Store in session state
|
| 176 |
+
st.session_state.results = processed_states
|
| 177 |
+
st.session_state.insights = insights
|
| 178 |
+
st.session_state.processing_complete = True
|
| 179 |
+
|
| 180 |
+
# Complete
|
| 181 |
+
progress_bar.progress(100)
|
| 182 |
+
status_text.text("✅ Analysis complete!")
|
| 183 |
+
time.sleep(1)
|
| 184 |
+
progress_bar.empty()
|
| 185 |
+
status_text.empty()
|
| 186 |
+
|
| 187 |
+
return True
|
| 188 |
+
|
| 189 |
+
except Exception as e:
|
| 190 |
+
st.error(f"❌ Error during processing: {str(e)}")
|
| 191 |
+
import traceback
|
| 192 |
+
st.code(traceback.format_exc())
|
| 193 |
+
return False
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
# ============================================================================
|
| 197 |
+
# VISUALIZATION FUNCTIONS
|
| 198 |
+
# ============================================================================
|
| 199 |
+
|
| 200 |
+
def create_summary_section(scraped_count: int, results: List[Dict], insights: Dict):
|
| 201 |
+
"""Create summary metrics section"""
|
| 202 |
+
|
| 203 |
+
total = len(results)
|
| 204 |
+
positive = insights.get('sentiment_distribution', {}).get('POSITIVE', 0)
|
| 205 |
+
neutral = insights.get('sentiment_distribution', {}).get('NEUTRAL', 0)
|
| 206 |
+
negative = insights.get('sentiment_distribution', {}).get('NEGATIVE', 0)
|
| 207 |
+
critical = insights.get('priority_distribution', {}).get('critical', 0)
|
| 208 |
+
churn_risk = insights.get('churn_risk', 0)
|
| 209 |
+
|
| 210 |
+
# Success header
|
| 211 |
+
st.markdown(
|
| 212 |
+
f"""
|
| 213 |
+
<div class="success-box">
|
| 214 |
+
<h1 style="margin: 0;">✅ Analysis Complete!</h1>
|
| 215 |
+
<p style="margin: 10px 0 0 0; font-size: 1.2em; opacity: 0.9;">
|
| 216 |
+
Review Intelligence System Results
|
| 217 |
+
</p>
|
| 218 |
+
</div>
|
| 219 |
+
""",
|
| 220 |
+
unsafe_allow_html=True
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
# Metrics
|
| 224 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
| 225 |
+
|
| 226 |
+
with col1:
|
| 227 |
+
st.metric("Total Reviews", total, f"Scraped: {scraped_count}")
|
| 228 |
+
|
| 229 |
+
with col2:
|
| 230 |
+
pos_pct = (positive / total * 100) if total > 0 else 0
|
| 231 |
+
st.metric("Positive", positive, f"{pos_pct:.1f}%")
|
| 232 |
+
|
| 233 |
+
with col3:
|
| 234 |
+
neg_pct = (negative / total * 100) if total > 0 else 0
|
| 235 |
+
st.metric("Negative", negative, f"{neg_pct:.1f}%")
|
| 236 |
+
|
| 237 |
+
with col4:
|
| 238 |
+
st.metric("Critical Issues", critical, "🚨" if critical > 0 else "✅")
|
| 239 |
+
|
| 240 |
+
with col5:
|
| 241 |
+
delta_color = "inverse" if churn_risk > 30 else "normal"
|
| 242 |
+
st.metric("Churn Risk", f"{churn_risk:.1f}%",
|
| 243 |
+
"⚠️ High" if churn_risk > 30 else "✅ Low")
|
| 244 |
+
|
| 245 |
+
# Recommendations
|
| 246 |
+
st.markdown("### 💡 Key Recommendations")
|
| 247 |
+
for rec in insights.get('recommendations', []):
|
| 248 |
+
st.info(rec)
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
def create_sentiment_chart(insights: Dict):
|
| 252 |
+
"""Create sentiment distribution donut chart"""
|
| 253 |
+
sentiment_dist = insights.get('sentiment_distribution', {})
|
| 254 |
+
|
| 255 |
+
labels = list(sentiment_dist.keys())
|
| 256 |
+
values = list(sentiment_dist.values())
|
| 257 |
+
colors = ['#2ca02c', '#ff7f0e', '#d62728']
|
| 258 |
+
|
| 259 |
+
fig = go.Figure(data=[go.Pie(
|
| 260 |
+
labels=labels,
|
| 261 |
+
values=values,
|
| 262 |
+
hole=0.5,
|
| 263 |
+
marker_colors=colors,
|
| 264 |
+
textinfo='label+percent',
|
| 265 |
+
textposition='outside',
|
| 266 |
+
textfont_size=14
|
| 267 |
+
)])
|
| 268 |
+
|
| 269 |
+
fig.update_layout(
|
| 270 |
+
title="😊 Sentiment Distribution",
|
| 271 |
+
showlegend=True,
|
| 272 |
+
height=400
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
return fig
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def create_priority_chart(insights: Dict):
|
| 279 |
+
"""Create priority distribution bar chart"""
|
| 280 |
+
priority_dist = insights.get('priority_distribution', {})
|
| 281 |
+
|
| 282 |
+
priority_order = ['critical', 'high', 'medium', 'low']
|
| 283 |
+
labels = [p for p in priority_order if p in priority_dist]
|
| 284 |
+
values = [priority_dist.get(p, 0) for p in labels]
|
| 285 |
+
colors = ['#d62728', '#ff7f0e', '#1f77b4', '#2ca02c']
|
| 286 |
+
|
| 287 |
+
fig = go.Figure(data=[go.Bar(
|
| 288 |
+
x=labels,
|
| 289 |
+
y=values,
|
| 290 |
+
marker_color=colors[:len(labels)],
|
| 291 |
+
text=values,
|
| 292 |
+
textposition='auto'
|
| 293 |
+
)])
|
| 294 |
+
|
| 295 |
+
fig.update_layout(
|
| 296 |
+
title="🎯 Priority Levels",
|
| 297 |
+
xaxis_title="Priority",
|
| 298 |
+
yaxis_title="Count",
|
| 299 |
+
height=400
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
return fig
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
def create_department_chart(insights: Dict):
|
| 306 |
+
"""Create department routing horizontal bar chart"""
|
| 307 |
+
dept_dist = insights.get('department_distribution', {})
|
| 308 |
+
|
| 309 |
+
labels = list(dept_dist.keys())
|
| 310 |
+
values = list(dept_dist.values())
|
| 311 |
+
|
| 312 |
+
fig = go.Figure(data=[go.Bar(
|
| 313 |
+
x=values,
|
| 314 |
+
y=labels,
|
| 315 |
+
orientation='h',
|
| 316 |
+
marker_color='#667eea',
|
| 317 |
+
text=values,
|
| 318 |
+
textposition='auto'
|
| 319 |
+
)])
|
| 320 |
+
|
| 321 |
+
fig.update_layout(
|
| 322 |
+
title="🏢 Department Routing",
|
| 323 |
+
xaxis_title="Number of Issues",
|
| 324 |
+
yaxis_title="Department",
|
| 325 |
+
height=400
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
return fig
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def create_emotion_chart(insights: Dict):
|
| 332 |
+
"""Create emotion distribution chart"""
|
| 333 |
+
emotion_dist = insights.get('emotion_distribution', {})
|
| 334 |
+
|
| 335 |
+
labels = list(emotion_dist.keys())
|
| 336 |
+
values = list(emotion_dist.values())
|
| 337 |
+
|
| 338 |
+
fig = px.bar(
|
| 339 |
+
x=labels,
|
| 340 |
+
y=values,
|
| 341 |
+
labels={'x': 'Emotion', 'y': 'Count'},
|
| 342 |
+
color=values,
|
| 343 |
+
color_continuous_scale='Viridis'
|
| 344 |
+
)
|
| 345 |
+
|
| 346 |
+
fig.update_layout(
|
| 347 |
+
title="😊 Emotional Analysis",
|
| 348 |
+
xaxis_title="Emotion Type",
|
| 349 |
+
yaxis_title="Number of Reviews",
|
| 350 |
+
height=300,
|
| 351 |
+
showlegend=False
|
| 352 |
+
)
|
| 353 |
+
|
| 354 |
+
return fig
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
def create_reviews_dataframe(results: List[Dict]) -> pd.DataFrame:
|
| 358 |
+
"""Create DataFrame for reviews table"""
|
| 359 |
+
|
| 360 |
+
df_data = []
|
| 361 |
+
for review in results:
|
| 362 |
+
df_data.append({
|
| 363 |
+
'Review ID': review.get('review_id', 'N/A')[:20],
|
| 364 |
+
'Rating': review.get('rating', 0),
|
| 365 |
+
'Review': (review.get('review_text', 'N/A') or '')[:100] + '...',
|
| 366 |
+
'Sentiment': review.get('stage3_final_sentiment', 'N/A'),
|
| 367 |
+
'Type': review.get('stage1_llm1_type', 'N/A'),
|
| 368 |
+
'Department': review.get('stage1_llm1_department', 'N/A'),
|
| 369 |
+
'Priority': review.get('stage1_llm1_priority', 'N/A'),
|
| 370 |
+
'Emotion': review.get('stage1_llm2_emotion', 'N/A'),
|
| 371 |
+
'Needs Review': '🚨 Yes' if review.get('stage3_needs_human_review') else '✅ No'
|
| 372 |
+
})
|
| 373 |
+
|
| 374 |
+
return pd.DataFrame(df_data)
|
| 375 |
+
|
| 376 |
+
|
| 377 |
+
# ============================================================================
|
| 378 |
+
# MAIN APP
|
| 379 |
+
# ============================================================================
|
| 380 |
+
|
| 381 |
+
def main():
|
| 382 |
+
"""Main Streamlit app"""
|
| 383 |
+
|
| 384 |
+
# Title
|
| 385 |
+
st.title("🎯 Review Intelligence System")
|
| 386 |
+
st.markdown("### Multi-Stage AI Analysis for App Store & Play Store Reviews")
|
| 387 |
+
st.markdown("Powered by **LangGraph** + **HuggingFace** • 4-Stage Processing Pipeline")
|
| 388 |
+
st.markdown("---")
|
| 389 |
+
|
| 390 |
+
# Sidebar - Input or View Mode
|
| 391 |
+
with st.sidebar:
|
| 392 |
+
st.header("🎛️ Control Panel")
|
| 393 |
+
|
| 394 |
+
if st.session_state.processing_complete:
|
| 395 |
+
st.success("✅ Analysis Complete!")
|
| 396 |
+
if st.button("🔄 Start New Analysis", use_container_width=True):
|
| 397 |
+
st.session_state.processing_complete = False
|
| 398 |
+
st.session_state.results = None
|
| 399 |
+
st.session_state.insights = None
|
| 400 |
+
st.rerun()
|
| 401 |
+
else:
|
| 402 |
+
st.info("👈 Enter URLs below to start")
|
| 403 |
+
|
| 404 |
+
# Main content - Input or Results
|
| 405 |
+
if not st.session_state.processing_complete:
|
| 406 |
+
# INPUT MODE
|
| 407 |
+
show_input_form()
|
| 408 |
+
else:
|
| 409 |
+
# RESULTS MODE
|
| 410 |
+
show_results_dashboard()
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
def show_input_form():
|
| 414 |
+
"""Show input form for URLs and API key"""
|
| 415 |
+
|
| 416 |
+
st.markdown("### 📝 Step 1: Enter Store URLs")
|
| 417 |
+
|
| 418 |
+
col1, col2 = st.columns(2)
|
| 419 |
+
|
| 420 |
+
with col1:
|
| 421 |
+
st.markdown("#### 🍎 App Store IDs")
|
| 422 |
+
st.markdown(
|
| 423 |
+
"""
|
| 424 |
+
**Format:** Just paste the app ID
|
| 425 |
+
- Example: `1158907446` (UAE)
|
| 426 |
+
- Example: `1234567890` (US)
|
| 427 |
+
"""
|
| 428 |
+
)
|
| 429 |
+
app_store_urls = st.text_area(
|
| 430 |
+
"App Store IDs (one per line)",
|
| 431 |
+
placeholder="1158907446\n1234567890",
|
| 432 |
+
height=150,
|
| 433 |
+
key="app_urls"
|
| 434 |
+
)
|
| 435 |
+
|
| 436 |
+
with col2:
|
| 437 |
+
st.markdown("#### 🤖 Play Store Packages")
|
| 438 |
+
st.markdown(
|
| 439 |
+
"""
|
| 440 |
+
**Format:** Package name
|
| 441 |
+
- Example: `com.yas.app`
|
| 442 |
+
- Example: `com.company.app`
|
| 443 |
+
"""
|
| 444 |
+
)
|
| 445 |
+
play_store_urls = st.text_area(
|
| 446 |
+
"Play Store Package Names (one per line)",
|
| 447 |
+
placeholder="com.yas.app\ncom.company.app",
|
| 448 |
+
height=150,
|
| 449 |
+
key="play_urls"
|
| 450 |
+
)
|
| 451 |
+
|
| 452 |
+
st.markdown("---")
|
| 453 |
+
st.markdown("### 🔑 Step 2: Configure Settings")
|
| 454 |
+
|
| 455 |
+
col1, col2 = st.columns([2, 1])
|
| 456 |
+
|
| 457 |
+
with col1:
|
| 458 |
+
hf_api_key = st.text_input(
|
| 459 |
+
"🔑 HuggingFace API Key",
|
| 460 |
+
type="password",
|
| 461 |
+
placeholder="hf_...",
|
| 462 |
+
help="Get your key from: https://huggingface.co/settings/tokens",
|
| 463 |
+
key="hf_key"
|
| 464 |
+
)
|
| 465 |
+
|
| 466 |
+
with col2:
|
| 467 |
+
review_limit = st.slider(
|
| 468 |
+
"📊 Reviews per App",
|
| 469 |
+
min_value=5,
|
| 470 |
+
max_value=100,
|
| 471 |
+
value=20,
|
| 472 |
+
step=5,
|
| 473 |
+
help="More reviews = longer processing time",
|
| 474 |
+
key="review_limit"
|
| 475 |
+
)
|
| 476 |
+
|
| 477 |
+
st.markdown("---")
|
| 478 |
+
|
| 479 |
+
# Submit button
|
| 480 |
+
col1, col2, col3 = st.columns([1, 1, 1])
|
| 481 |
+
|
| 482 |
+
with col2:
|
| 483 |
+
if st.button("🚀 Start Analysis", use_container_width=True, type="primary"):
|
| 484 |
+
with st.spinner("Processing..."):
|
| 485 |
+
success = process_reviews_streamlit(
|
| 486 |
+
app_store_urls,
|
| 487 |
+
play_store_urls,
|
| 488 |
+
hf_api_key,
|
| 489 |
+
review_limit
|
| 490 |
+
)
|
| 491 |
+
|
| 492 |
+
if success:
|
| 493 |
+
st.balloons()
|
| 494 |
+
st.rerun()
|
| 495 |
+
|
| 496 |
+
# Documentation
|
| 497 |
+
with st.expander("📚 How to Use"):
|
| 498 |
+
st.markdown("""
|
| 499 |
+
### 📖 Quick Guide
|
| 500 |
+
|
| 501 |
+
**1. Get HuggingFace API Key:**
|
| 502 |
+
- Visit: https://huggingface.co/settings/tokens
|
| 503 |
+
- Create new token (Read access)
|
| 504 |
+
- Copy token (starts with `hf_`)
|
| 505 |
+
|
| 506 |
+
**2. Enter URLs:**
|
| 507 |
+
- **App Store**: Just the ID number (e.g., `1234567890`)
|
| 508 |
+
- **Play Store**: Package name (e.g., `com.company.app`)
|
| 509 |
+
- One per line
|
| 510 |
+
|
| 511 |
+
**3. Click Start:**
|
| 512 |
+
- Watch progress bar
|
| 513 |
+
- Wait for completion (~7 sec per review)
|
| 514 |
+
- View results automatically
|
| 515 |
+
|
| 516 |
+
### 🏗️ What Happens:
|
| 517 |
+
- 🕷️ **Stage 0**: Scrapes reviews from stores
|
| 518 |
+
- 🤖 **Stage 1**: Classifies with 3 AI models (Type, Department, Priority)
|
| 519 |
+
- 😊 **Stage 2**: Analyzes sentiment with dual BERT models
|
| 520 |
+
- 📊 **Stage 3**: Synthesizes insights and recommendations
|
| 521 |
+
- 💡 **Stage 4**: Generates batch analytics
|
| 522 |
+
|
| 523 |
+
### ⚡ Performance:
|
| 524 |
+
- ~7 seconds per review
|
| 525 |
+
- 7 AI models working together
|
| 526 |
+
- Parallel execution for speed
|
| 527 |
+
""")
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
def show_results_dashboard():
|
| 531 |
+
"""Show results dashboard with charts and tables"""
|
| 532 |
+
|
| 533 |
+
results = st.session_state.results
|
| 534 |
+
insights = st.session_state.insights
|
| 535 |
+
scraped_count = st.session_state.scraped_count
|
| 536 |
+
|
| 537 |
+
# Summary section
|
| 538 |
+
create_summary_section(scraped_count, results, insights)
|
| 539 |
+
|
| 540 |
+
st.markdown("---")
|
| 541 |
+
|
| 542 |
+
# Tabs for different views
|
| 543 |
+
tab1, tab2, tab3, tab4 = st.tabs([
|
| 544 |
+
"📊 Sentiment Analysis",
|
| 545 |
+
"🚨 Critical Issues",
|
| 546 |
+
"📋 All Reviews",
|
| 547 |
+
"📥 Export"
|
| 548 |
+
])
|
| 549 |
+
|
| 550 |
+
# TAB 1: Sentiment Analysis
|
| 551 |
+
with tab1:
|
| 552 |
+
st.header("📊 Sentiment Analysis Overview")
|
| 553 |
+
|
| 554 |
+
col1, col2 = st.columns(2)
|
| 555 |
+
|
| 556 |
+
with col1:
|
| 557 |
+
fig_sentiment = create_sentiment_chart(insights)
|
| 558 |
+
st.plotly_chart(fig_sentiment, use_container_width=True)
|
| 559 |
+
|
| 560 |
+
with col2:
|
| 561 |
+
fig_priority = create_priority_chart(insights)
|
| 562 |
+
st.plotly_chart(fig_priority, use_container_width=True)
|
| 563 |
+
|
| 564 |
+
st.markdown("### 🏢 Department Routing")
|
| 565 |
+
fig_dept = create_department_chart(insights)
|
| 566 |
+
st.plotly_chart(fig_dept, use_container_width=True)
|
| 567 |
+
|
| 568 |
+
st.markdown("### 😊 Emotional Analysis")
|
| 569 |
+
fig_emotion = create_emotion_chart(insights)
|
| 570 |
+
st.plotly_chart(fig_emotion, use_container_width=True)
|
| 571 |
+
|
| 572 |
+
# TAB 2: Critical Issues
|
| 573 |
+
with tab2:
|
| 574 |
+
st.header("🚨 Critical Issues Requiring Attention")
|
| 575 |
+
|
| 576 |
+
# Filter critical reviews
|
| 577 |
+
critical_reviews = [
|
| 578 |
+
r for r in results
|
| 579 |
+
if (r.get('stage1_llm1_priority') == 'critical' or
|
| 580 |
+
r.get('stage3_needs_human_review') or
|
| 581 |
+
(r.get('stage3_final_sentiment') == 'NEGATIVE' and r.get('rating', 5) <= 2))
|
| 582 |
+
]
|
| 583 |
+
|
| 584 |
+
if len(critical_reviews) == 0:
|
| 585 |
+
st.success("✅ No critical issues found! All reviews are in good shape.")
|
| 586 |
+
else:
|
| 587 |
+
st.warning(f"Found {len(critical_reviews)} critical issues")
|
| 588 |
+
|
| 589 |
+
for review in critical_reviews:
|
| 590 |
+
with st.expander(
|
| 591 |
+
f"⚠️ {review.get('review_id', 'Unknown')[:30]} - "
|
| 592 |
+
f"Rating: {review.get('rating', 'N/A')}/5"
|
| 593 |
+
):
|
| 594 |
+
col1, col2 = st.columns([2, 1])
|
| 595 |
+
|
| 596 |
+
with col1:
|
| 597 |
+
st.markdown("**Review Text:**")
|
| 598 |
+
st.write(review.get('review_text', 'No text available'))
|
| 599 |
+
|
| 600 |
+
st.markdown("**Reasoning:**")
|
| 601 |
+
st.info(review.get('stage3_reasoning', 'No reasoning available'))
|
| 602 |
+
|
| 603 |
+
with col2:
|
| 604 |
+
st.markdown("**Classification:**")
|
| 605 |
+
st.write(f"📌 Type: {review.get('stage1_llm1_type', 'N/A')}")
|
| 606 |
+
st.write(f"🏢 Department: {review.get('stage1_llm1_department', 'N/A')}")
|
| 607 |
+
st.write(f"🎯 Priority: {review.get('stage1_llm1_priority', 'N/A')}")
|
| 608 |
+
st.write(f"😔 Emotion: {review.get('stage1_llm2_emotion', 'N/A')}")
|
| 609 |
+
st.write(f"💭 Sentiment: {review.get('stage3_final_sentiment', 'N/A')}")
|
| 610 |
+
|
| 611 |
+
st.markdown("**Action:**")
|
| 612 |
+
st.error(review.get('stage3_action_recommendation', 'No action specified'))
|
| 613 |
+
|
| 614 |
+
# TAB 3: All Reviews
|
| 615 |
+
with tab3:
|
| 616 |
+
st.header("📋 Detailed Review Analysis")
|
| 617 |
+
|
| 618 |
+
# Create DataFrame
|
| 619 |
+
df = create_reviews_dataframe(results)
|
| 620 |
+
|
| 621 |
+
# Filters
|
| 622 |
+
col1, col2, col3 = st.columns(3)
|
| 623 |
+
|
| 624 |
+
with col1:
|
| 625 |
+
sentiment_filter = st.multiselect(
|
| 626 |
+
"Filter by Sentiment",
|
| 627 |
+
options=df['Sentiment'].unique(),
|
| 628 |
+
default=df['Sentiment'].unique()
|
| 629 |
+
)
|
| 630 |
+
|
| 631 |
+
with col2:
|
| 632 |
+
dept_filter = st.multiselect(
|
| 633 |
+
"Filter by Department",
|
| 634 |
+
options=df['Department'].unique(),
|
| 635 |
+
default=df['Department'].unique()
|
| 636 |
+
)
|
| 637 |
+
|
| 638 |
+
with col3:
|
| 639 |
+
priority_filter = st.multiselect(
|
| 640 |
+
"Filter by Priority",
|
| 641 |
+
options=df['Priority'].unique(),
|
| 642 |
+
default=df['Priority'].unique()
|
| 643 |
+
)
|
| 644 |
+
|
| 645 |
+
# Apply filters
|
| 646 |
+
filtered_df = df[
|
| 647 |
+
(df['Sentiment'].isin(sentiment_filter)) &
|
| 648 |
+
(df['Department'].isin(dept_filter)) &
|
| 649 |
+
(df['Priority'].isin(priority_filter))
|
| 650 |
+
]
|
| 651 |
+
|
| 652 |
+
st.info(f"Showing {len(filtered_df)} of {len(df)} reviews")
|
| 653 |
+
|
| 654 |
+
# Display table
|
| 655 |
+
st.dataframe(
|
| 656 |
+
filtered_df,
|
| 657 |
+
use_container_width=True,
|
| 658 |
+
height=600
|
| 659 |
+
)
|
| 660 |
+
|
| 661 |
+
# TAB 4: Export
|
| 662 |
+
with tab4:
|
| 663 |
+
st.header("📥 Export Results")
|
| 664 |
+
|
| 665 |
+
st.markdown("### Download Options")
|
| 666 |
+
|
| 667 |
+
col1, col2 = st.columns(2)
|
| 668 |
+
|
| 669 |
+
with col1:
|
| 670 |
+
st.markdown("#### 📊 CSV Export")
|
| 671 |
+
st.write("Download complete analysis with all classifications")
|
| 672 |
+
|
| 673 |
+
df = create_reviews_dataframe(results)
|
| 674 |
+
csv = df.to_csv(index=False)
|
| 675 |
+
|
| 676 |
+
st.download_button(
|
| 677 |
+
label="📥 Download CSV Report",
|
| 678 |
+
data=csv,
|
| 679 |
+
file_name=f"review_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
| 680 |
+
mime="text/csv",
|
| 681 |
+
use_container_width=True
|
| 682 |
+
)
|
| 683 |
+
|
| 684 |
+
with col2:
|
| 685 |
+
st.markdown("#### 📋 JSON Export")
|
| 686 |
+
st.write("Download raw data with all details")
|
| 687 |
+
|
| 688 |
+
import json
|
| 689 |
+
json_data = json.dumps({
|
| 690 |
+
'results': results,
|
| 691 |
+
'insights': insights,
|
| 692 |
+
'scraped_count': scraped_count,
|
| 693 |
+
'export_date': datetime.now().isoformat()
|
| 694 |
+
}, indent=2)
|
| 695 |
+
|
| 696 |
+
st.download_button(
|
| 697 |
+
label="📥 Download JSON Data",
|
| 698 |
+
data=json_data,
|
| 699 |
+
file_name=f"review_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
|
| 700 |
+
mime="application/json",
|
| 701 |
+
use_container_width=True
|
| 702 |
+
)
|
| 703 |
+
|
| 704 |
+
st.markdown("---")
|
| 705 |
+
st.markdown("### 📊 Summary Statistics")
|
| 706 |
+
|
| 707 |
+
col1, col2, col3 = st.columns(3)
|
| 708 |
+
|
| 709 |
+
with col1:
|
| 710 |
+
st.metric("Total Reviews Analyzed", len(results))
|
| 711 |
+
|
| 712 |
+
with col2:
|
| 713 |
+
positive = insights.get('sentiment_distribution', {}).get('POSITIVE', 0)
|
| 714 |
+
total = len(results)
|
| 715 |
+
pct = (positive / total * 100) if total > 0 else 0
|
| 716 |
+
st.metric("Positive Rate", f"{pct:.1f}%")
|
| 717 |
+
|
| 718 |
+
with col3:
|
| 719 |
+
critical = insights.get('priority_distribution', {}).get('critical', 0)
|
| 720 |
+
st.metric("Critical Issues", critical)
|
| 721 |
+
|
| 722 |
+
|
| 723 |
+
# ============================================================================
|
| 724 |
+
# FOOTER
|
| 725 |
+
# ============================================================================
|
| 726 |
+
|
| 727 |
+
def show_footer():
|
| 728 |
+
"""Show footer with credits"""
|
| 729 |
+
st.markdown("---")
|
| 730 |
+
st.markdown(
|
| 731 |
+
"""
|
| 732 |
+
<div style='text-align: center'>
|
| 733 |
+
<p>🤖 Powered by Multi-Stage AI Pipeline |
|
| 734 |
+
Stage 1: Classification (Qwen, Mistral, Llama) |
|
| 735 |
+
Stage 2: Sentiment (Twitter-BERT) |
|
| 736 |
+
Stage 3: Finalization (Llama 70B) |
|
| 737 |
+
Stage 4: Batch Analysis</p>
|
| 738 |
+
<p>Built with ❤️ using LangGraph + HuggingFace + Streamlit</p>
|
| 739 |
+
</div>
|
| 740 |
+
""",
|
| 741 |
+
unsafe_allow_html=True
|
| 742 |
+
)
|
| 743 |
+
|
| 744 |
+
|
| 745 |
+
# ============================================================================
|
| 746 |
+
# RUN APP
|
| 747 |
+
# ============================================================================
|
| 748 |
+
|
| 749 |
+
if __name__ == "__main__":
|
| 750 |
+
main()
|
| 751 |
+
show_footer()
|
config.yaml
ADDED
|
@@ -0,0 +1,508 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🎯 Review Intelligence System Configuration
|
| 2 |
+
# Edit this file to customize agent behavior, prompts, and models
|
| 3 |
+
|
| 4 |
+
# =============================================================================
|
| 5 |
+
# MODELS CONFIGURATION
|
| 6 |
+
# =============================================================================
|
| 7 |
+
models:
|
| 8 |
+
# Stage 1: Classification Models
|
| 9 |
+
stage1:
|
| 10 |
+
llm1:
|
| 11 |
+
name: "Qwen/Qwen2.5-72B-Instruct"
|
| 12 |
+
temperature: 0.1
|
| 13 |
+
max_tokens: 200
|
| 14 |
+
role: "Type, Department, Priority classifier"
|
| 15 |
+
|
| 16 |
+
llm2:
|
| 17 |
+
name: "mistralai/Mistral-7B-Instruct-v0.3"
|
| 18 |
+
temperature: 0.1
|
| 19 |
+
max_tokens: 200
|
| 20 |
+
role: "User type, Emotion, Context analyzer"
|
| 21 |
+
|
| 22 |
+
manager:
|
| 23 |
+
name: "meta-llama/Llama-3.1-8B-Instruct"
|
| 24 |
+
temperature: 0.1
|
| 25 |
+
max_tokens: 250
|
| 26 |
+
role: "Synthesis manager"
|
| 27 |
+
|
| 28 |
+
# Stage 2: Sentiment Models (Local BERT)
|
| 29 |
+
stage2:
|
| 30 |
+
best_model:
|
| 31 |
+
name: "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
| 32 |
+
type: "local"
|
| 33 |
+
description: "Twitter-trained RoBERTa (124M tweets)"
|
| 34 |
+
|
| 35 |
+
alternate_model:
|
| 36 |
+
name: "finiteautomata/bertweet-base-sentiment-analysis"
|
| 37 |
+
type: "local"
|
| 38 |
+
description: "BERTweet (850M tweets)"
|
| 39 |
+
|
| 40 |
+
# Stage 3: Finalization Model
|
| 41 |
+
stage3:
|
| 42 |
+
llm3:
|
| 43 |
+
name: "meta-llama/Llama-3.1-70B-Instruct"
|
| 44 |
+
temperature: 0.1
|
| 45 |
+
max_tokens: 400
|
| 46 |
+
role: "Final synthesis and reasoning"
|
| 47 |
+
|
| 48 |
+
# =============================================================================
|
| 49 |
+
# AGENT PERSONAS & PROMPTS
|
| 50 |
+
# =============================================================================
|
| 51 |
+
personas:
|
| 52 |
+
# LLM1: Classification Expert
|
| 53 |
+
llm1:
|
| 54 |
+
name: "Classification Specialist"
|
| 55 |
+
expertise: "Expert at classifying customer reviews for theme park and attraction apps"
|
| 56 |
+
personality: "Analytical, precise, focused on categorization"
|
| 57 |
+
tone: "Professional and systematic"
|
| 58 |
+
|
| 59 |
+
system_prompt: |
|
| 60 |
+
You are an expert at classifying customer reviews for theme park and attraction apps.
|
| 61 |
+
Your job is to analyze reviews and categorize them across multiple dimensions.
|
| 62 |
+
Be precise, analytical, and consistent in your classifications.
|
| 63 |
+
|
| 64 |
+
categories:
|
| 65 |
+
type:
|
| 66 |
+
- complaint: "Customer reports a problem"
|
| 67 |
+
- praise: "Customer expresses satisfaction"
|
| 68 |
+
- suggestion: "Customer proposes improvement"
|
| 69 |
+
- question: "Customer asks about something"
|
| 70 |
+
- bug_report: "Technical issue described"
|
| 71 |
+
|
| 72 |
+
department:
|
| 73 |
+
- engineering: "Technical issues, bugs, crashes"
|
| 74 |
+
- ux: "Design, usability, interface issues"
|
| 75 |
+
- support: "Customer service, help needed"
|
| 76 |
+
- business: "Pricing, policies, marketing"
|
| 77 |
+
|
| 78 |
+
priority:
|
| 79 |
+
- critical: "Service down, major blocker"
|
| 80 |
+
- high: "Significant problem affecting use"
|
| 81 |
+
- medium: "Inconvenience but not blocking"
|
| 82 |
+
- low: "Minor issue or suggestion"
|
| 83 |
+
|
| 84 |
+
# LLM2: Psychology Expert
|
| 85 |
+
llm2:
|
| 86 |
+
name: "User Psychology Analyst"
|
| 87 |
+
expertise: "Expert at understanding customer psychology and emotional context"
|
| 88 |
+
personality: "Empathetic, insightful, human-centered"
|
| 89 |
+
tone: "Warm yet professional"
|
| 90 |
+
|
| 91 |
+
system_prompt: |
|
| 92 |
+
You are an expert at understanding customer psychology and emotional context.
|
| 93 |
+
Your job is to analyze the human behind the review - their emotions, user type, and context.
|
| 94 |
+
Be empathetic, insightful, and focus on the human experience.
|
| 95 |
+
|
| 96 |
+
categories:
|
| 97 |
+
user_type:
|
| 98 |
+
- new_user: "First-time or new user"
|
| 99 |
+
- regular_user: "Returning customer"
|
| 100 |
+
- power_user: "Heavy user, tech-savvy"
|
| 101 |
+
- churning_user: "Considering leaving"
|
| 102 |
+
|
| 103 |
+
emotion:
|
| 104 |
+
- anger: "Angry, hostile tone"
|
| 105 |
+
- frustration: "Frustrated but not angry"
|
| 106 |
+
- joy: "Happy, satisfied"
|
| 107 |
+
- satisfaction: "Content, pleased"
|
| 108 |
+
- disappointment: "Let down, sad"
|
| 109 |
+
- confusion: "Unclear, needs help"
|
| 110 |
+
|
| 111 |
+
# Manager: Synthesis Expert
|
| 112 |
+
manager:
|
| 113 |
+
name: "Synthesis Manager"
|
| 114 |
+
expertise: "Expert at reconciling multiple AI analyses and making final decisions"
|
| 115 |
+
personality: "Balanced, fair, decisive"
|
| 116 |
+
tone: "Authoritative yet collaborative"
|
| 117 |
+
|
| 118 |
+
system_prompt: |
|
| 119 |
+
You are a synthesis manager evaluating two AI analyses of the same review.
|
| 120 |
+
Your job is to validate both analyses, resolve conflicts, and make final classification decisions.
|
| 121 |
+
Be thorough, fair, and provide clear reasoning for your decisions.
|
| 122 |
+
|
| 123 |
+
# LLM3: Strategic Analyst
|
| 124 |
+
llm3:
|
| 125 |
+
name: "Strategic Decision Maker"
|
| 126 |
+
expertise: "Expert at synthesizing complex data and providing actionable recommendations"
|
| 127 |
+
personality: "Strategic, comprehensive, business-focused"
|
| 128 |
+
tone: "Executive-level, actionable"
|
| 129 |
+
|
| 130 |
+
system_prompt: |
|
| 131 |
+
You are a final decision-making AI analyzing customer feedback for a theme park/attraction app.
|
| 132 |
+
Your job is to synthesize all previous analysis stages and provide comprehensive, actionable insights.
|
| 133 |
+
Think strategically about business impact, user satisfaction, and operational priorities.
|
| 134 |
+
Your recommendations should be clear, specific, and immediately actionable.
|
| 135 |
+
|
| 136 |
+
# =============================================================================
|
| 137 |
+
# CLASSIFICATION RULES
|
| 138 |
+
# =============================================================================
|
| 139 |
+
classification_rules:
|
| 140 |
+
# Priority escalation rules
|
| 141 |
+
priority_escalation:
|
| 142 |
+
keywords_critical:
|
| 143 |
+
- "crash"
|
| 144 |
+
- "doesn't work"
|
| 145 |
+
- "broken"
|
| 146 |
+
- "can't use"
|
| 147 |
+
- "completely unusable"
|
| 148 |
+
- "emergency"
|
| 149 |
+
- "urgent"
|
| 150 |
+
|
| 151 |
+
keywords_high:
|
| 152 |
+
- "bug"
|
| 153 |
+
- "error"
|
| 154 |
+
- "problem"
|
| 155 |
+
- "issue"
|
| 156 |
+
- "not working"
|
| 157 |
+
- "frustrated"
|
| 158 |
+
|
| 159 |
+
rating_thresholds:
|
| 160 |
+
critical: 1 # 1-star reviews are critical
|
| 161 |
+
high: 2 # 2-star reviews are high priority
|
| 162 |
+
|
| 163 |
+
# Department routing rules
|
| 164 |
+
department_keywords:
|
| 165 |
+
engineering:
|
| 166 |
+
- "crash"
|
| 167 |
+
- "bug"
|
| 168 |
+
- "error"
|
| 169 |
+
- "not loading"
|
| 170 |
+
- "freeze"
|
| 171 |
+
- "slow"
|
| 172 |
+
- "technical"
|
| 173 |
+
|
| 174 |
+
ux:
|
| 175 |
+
- "confusing"
|
| 176 |
+
- "hard to use"
|
| 177 |
+
- "can't find"
|
| 178 |
+
- "design"
|
| 179 |
+
- "layout"
|
| 180 |
+
- "interface"
|
| 181 |
+
- "navigation"
|
| 182 |
+
|
| 183 |
+
support:
|
| 184 |
+
- "help"
|
| 185 |
+
- "contact"
|
| 186 |
+
- "customer service"
|
| 187 |
+
- "support"
|
| 188 |
+
- "assistance"
|
| 189 |
+
- "question"
|
| 190 |
+
|
| 191 |
+
business:
|
| 192 |
+
- "price"
|
| 193 |
+
- "refund"
|
| 194 |
+
- "subscription"
|
| 195 |
+
- "billing"
|
| 196 |
+
- "expensive"
|
| 197 |
+
- "policy"
|
| 198 |
+
|
| 199 |
+
# Churn risk indicators
|
| 200 |
+
churn_indicators:
|
| 201 |
+
high_risk:
|
| 202 |
+
- "switching to"
|
| 203 |
+
- "deleted the app"
|
| 204 |
+
- "uninstalling"
|
| 205 |
+
- "terrible experience"
|
| 206 |
+
- "never again"
|
| 207 |
+
- "disappointed"
|
| 208 |
+
|
| 209 |
+
medium_risk:
|
| 210 |
+
- "might switch"
|
| 211 |
+
- "considering alternatives"
|
| 212 |
+
- "getting worse"
|
| 213 |
+
- "used to be better"
|
| 214 |
+
|
| 215 |
+
# =============================================================================
|
| 216 |
+
# SENTIMENT ANALYSIS SETTINGS
|
| 217 |
+
# =============================================================================
|
| 218 |
+
sentiment:
|
| 219 |
+
# Agreement thresholds
|
| 220 |
+
agreement:
|
| 221 |
+
strong_threshold: 0.9 # Both models >0.9 confidence
|
| 222 |
+
weak_threshold: 0.6 # One model <0.6 confidence
|
| 223 |
+
|
| 224 |
+
# Confidence weighting
|
| 225 |
+
confidence:
|
| 226 |
+
minimum_acceptable: 0.5
|
| 227 |
+
high_confidence: 0.8
|
| 228 |
+
very_high_confidence: 0.95
|
| 229 |
+
|
| 230 |
+
# Override rules
|
| 231 |
+
override_rules:
|
| 232 |
+
# If rating is 1-star but sentiment is positive, flag for review
|
| 233 |
+
rating_sentiment_mismatch:
|
| 234 |
+
enabled: true
|
| 235 |
+
flag_threshold: 2 # 2-star difference
|
| 236 |
+
|
| 237 |
+
# =============================================================================
|
| 238 |
+
# BATCH ANALYSIS SETTINGS
|
| 239 |
+
# =============================================================================
|
| 240 |
+
batch_analysis:
|
| 241 |
+
# Critical issues detection
|
| 242 |
+
critical_issues:
|
| 243 |
+
max_display: 10
|
| 244 |
+
criteria:
|
| 245 |
+
- priority: "critical"
|
| 246 |
+
- sentiment: "NEGATIVE"
|
| 247 |
+
- rating: "<=2"
|
| 248 |
+
- needs_human_review: true
|
| 249 |
+
|
| 250 |
+
# Quick wins detection
|
| 251 |
+
quick_wins:
|
| 252 |
+
max_display: 10
|
| 253 |
+
criteria:
|
| 254 |
+
- type: "suggestion"
|
| 255 |
+
- priority: ["low", "medium"]
|
| 256 |
+
- feasibility: "easy"
|
| 257 |
+
|
| 258 |
+
# Churn risk calculation
|
| 259 |
+
churn_risk:
|
| 260 |
+
weights:
|
| 261 |
+
churning_user: 2.0
|
| 262 |
+
negative_low_rating: 1.5
|
| 263 |
+
rating_1_star: 1.0
|
| 264 |
+
|
| 265 |
+
thresholds:
|
| 266 |
+
high: 30 # >30% is high risk
|
| 267 |
+
medium: 15 # 15-30% is medium risk
|
| 268 |
+
low: 0 # <15% is low risk
|
| 269 |
+
|
| 270 |
+
# =============================================================================
|
| 271 |
+
# PROMPT TEMPLATES
|
| 272 |
+
# =============================================================================
|
| 273 |
+
prompt_templates:
|
| 274 |
+
# Stage 1 LLM1 Prompt
|
| 275 |
+
stage1_llm1: |
|
| 276 |
+
You are an expert at classifying customer reviews for theme park and attraction apps.
|
| 277 |
+
|
| 278 |
+
REVIEW:
|
| 279 |
+
Rating: {rating}/5
|
| 280 |
+
Text: {review_text}
|
| 281 |
+
|
| 282 |
+
Classify this review across these dimensions:
|
| 283 |
+
|
| 284 |
+
1. TYPE (choose ONE): {type_options}
|
| 285 |
+
2. DEPARTMENT (choose ONE): {department_options}
|
| 286 |
+
3. PRIORITY (choose ONE): {priority_options}
|
| 287 |
+
4. CONFIDENCE (0.0-1.0): How confident are you in this classification?
|
| 288 |
+
5. REASONING: Brief one-sentence explanation
|
| 289 |
+
|
| 290 |
+
Respond ONLY in valid JSON format:
|
| 291 |
+
{{
|
| 292 |
+
"type": "complaint/praise/suggestion/question/bug_report",
|
| 293 |
+
"department": "engineering/ux/support/business",
|
| 294 |
+
"priority": "critical/high/medium/low",
|
| 295 |
+
"confidence": 0.0-1.0,
|
| 296 |
+
"reasoning": "brief explanation"
|
| 297 |
+
}}
|
| 298 |
+
|
| 299 |
+
# Stage 1 LLM2 Prompt
|
| 300 |
+
stage1_llm2: |
|
| 301 |
+
You are an expert at understanding customer psychology and emotional context.
|
| 302 |
+
|
| 303 |
+
REVIEW:
|
| 304 |
+
Rating: {rating}/5
|
| 305 |
+
Text: {review_text}
|
| 306 |
+
|
| 307 |
+
Analyze the user and emotional context:
|
| 308 |
+
|
| 309 |
+
1. USER_TYPE (choose ONE): {user_type_options}
|
| 310 |
+
2. EMOTION (choose ONE): {emotion_options}
|
| 311 |
+
3. CONTEXT (brief): What is the underlying issue or situation? 1-2 words summary
|
| 312 |
+
4. CONFIDENCE (0.0-1.0): How confident are you?
|
| 313 |
+
5. REASONING: Brief one-sentence explanation
|
| 314 |
+
|
| 315 |
+
Respond ONLY in valid JSON format:
|
| 316 |
+
{{
|
| 317 |
+
"user_type": "new_user/regular_user/power_user/churning_user",
|
| 318 |
+
"emotion": "anger/frustration/joy/satisfaction/disappointment/confusion",
|
| 319 |
+
"context": "brief context",
|
| 320 |
+
"confidence": 0.0-1.0,
|
| 321 |
+
"reasoning": "brief explanation"
|
| 322 |
+
}}
|
| 323 |
+
|
| 324 |
+
# Stage 1 Manager Prompt
|
| 325 |
+
stage1_manager: |
|
| 326 |
+
You are a synthesis manager evaluating two AI analyses of the same review.
|
| 327 |
+
|
| 328 |
+
REVIEW:
|
| 329 |
+
Rating: {rating}/5
|
| 330 |
+
Text: {review_text}
|
| 331 |
+
|
| 332 |
+
LLM1 ANALYSIS (Type/Dept/Priority):
|
| 333 |
+
{llm1_result}
|
| 334 |
+
|
| 335 |
+
LLM2 ANALYSIS (User/Emotion/Context):
|
| 336 |
+
{llm2_result}
|
| 337 |
+
|
| 338 |
+
Your task:
|
| 339 |
+
1. Validate both analyses
|
| 340 |
+
2. Resolve any conflicts
|
| 341 |
+
3. Make final classification decision
|
| 342 |
+
4. Provide synthesis reasoning
|
| 343 |
+
|
| 344 |
+
Respond ONLY in valid JSON format:
|
| 345 |
+
{{
|
| 346 |
+
"final_type": "from llm1 or adjusted",
|
| 347 |
+
"final_department": "from llm1 or adjusted",
|
| 348 |
+
"final_priority": "from llm1 or adjusted",
|
| 349 |
+
"final_user_type": "from llm2 or adjusted",
|
| 350 |
+
"final_emotion": "from llm2 or adjusted",
|
| 351 |
+
"confidence": 0.0-1.0,
|
| 352 |
+
"reasoning": "synthesis explanation",
|
| 353 |
+
"conflicts_found": "any conflicts between LLM1 and LLM2, or 'none'"
|
| 354 |
+
}}
|
| 355 |
+
|
| 356 |
+
# Stage 3 LLM3 Prompt
|
| 357 |
+
stage3_llm3: |
|
| 358 |
+
You are a final decision-making AI analyzing customer feedback for a theme park/attraction app.
|
| 359 |
+
|
| 360 |
+
REVIEW DATA:
|
| 361 |
+
Rating: {rating}/5
|
| 362 |
+
Text: {review_text}
|
| 363 |
+
|
| 364 |
+
STAGE 1 CLASSIFICATION:
|
| 365 |
+
- Review Type: {type}
|
| 366 |
+
- Department: {department}
|
| 367 |
+
- Priority: {priority}
|
| 368 |
+
- User Type: {user_type}
|
| 369 |
+
- Emotion: {emotion}
|
| 370 |
+
|
| 371 |
+
STAGE 2 SENTIMENT ANALYSIS:
|
| 372 |
+
- Best Model: {best_sentiment} (confidence: {best_confidence})
|
| 373 |
+
- Alternate Model: {alt_sentiment} (confidence: {alt_confidence})
|
| 374 |
+
- Models Agreement: {agreement}
|
| 375 |
+
|
| 376 |
+
YOUR TASK:
|
| 377 |
+
1. Review all data from both stages
|
| 378 |
+
2. Make FINAL sentiment decision (POSITIVE, NEGATIVE, or NEUTRAL)
|
| 379 |
+
3. Validate that classification and sentiment align
|
| 380 |
+
4. Provide comprehensive reasoning
|
| 381 |
+
5. Identify any conflicts between stages
|
| 382 |
+
6. Generate action recommendation
|
| 383 |
+
7. Flag if human review is needed
|
| 384 |
+
|
| 385 |
+
Respond ONLY in valid JSON format:
|
| 386 |
+
{{
|
| 387 |
+
"final_sentiment": "POSITIVE/NEGATIVE/NEUTRAL",
|
| 388 |
+
"confidence": 0.0-1.0,
|
| 389 |
+
"reasoning": "Comprehensive explanation synthesizing all stages",
|
| 390 |
+
"validation_notes": "Does classification match sentiment?",
|
| 391 |
+
"conflicts_found": "any conflicts or 'none'",
|
| 392 |
+
"action_recommendation": "Specific action to take",
|
| 393 |
+
"needs_human_review": true/false
|
| 394 |
+
}}
|
| 395 |
+
|
| 396 |
+
# =============================================================================
|
| 397 |
+
# PROCESSING SETTINGS
|
| 398 |
+
# =============================================================================
|
| 399 |
+
processing:
|
| 400 |
+
# Batch settings
|
| 401 |
+
batch_size: 10
|
| 402 |
+
max_workers: 3
|
| 403 |
+
timeout_seconds: 30
|
| 404 |
+
retry_attempts: 3
|
| 405 |
+
|
| 406 |
+
# Rate limiting (for HF API)
|
| 407 |
+
rate_limit:
|
| 408 |
+
requests_per_minute: 60
|
| 409 |
+
requests_per_day: 10000 # HF Pro limit
|
| 410 |
+
|
| 411 |
+
# Logging
|
| 412 |
+
logging:
|
| 413 |
+
level: "INFO" # DEBUG, INFO, WARNING, ERROR
|
| 414 |
+
save_logs: true
|
| 415 |
+
log_file: "processing.log"
|
| 416 |
+
|
| 417 |
+
# Checkpointing
|
| 418 |
+
checkpoint:
|
| 419 |
+
enabled: true
|
| 420 |
+
save_after_each_stage: true
|
| 421 |
+
auto_resume: true
|
| 422 |
+
|
| 423 |
+
# =============================================================================
|
| 424 |
+
# DASHBOARD SETTINGS
|
| 425 |
+
# =============================================================================
|
| 426 |
+
dashboard:
|
| 427 |
+
# UI Configuration
|
| 428 |
+
ui:
|
| 429 |
+
title: "Review Intelligence System"
|
| 430 |
+
icon: "🎯"
|
| 431 |
+
layout: "wide"
|
| 432 |
+
theme: "light" # light or dark
|
| 433 |
+
|
| 434 |
+
# Chart colors
|
| 435 |
+
colors:
|
| 436 |
+
positive: "#2ca02c"
|
| 437 |
+
neutral: "#ff7f0e"
|
| 438 |
+
negative: "#d62728"
|
| 439 |
+
critical: "#d62728"
|
| 440 |
+
high: "#ff7f0e"
|
| 441 |
+
medium: "#1f77b4"
|
| 442 |
+
low: "#2ca02c"
|
| 443 |
+
|
| 444 |
+
# Filters
|
| 445 |
+
filters:
|
| 446 |
+
enable_sentiment: true
|
| 447 |
+
enable_department: true
|
| 448 |
+
enable_priority: true
|
| 449 |
+
enable_date_range: false # Future feature
|
| 450 |
+
|
| 451 |
+
# Display limits
|
| 452 |
+
display:
|
| 453 |
+
max_critical_issues: 20
|
| 454 |
+
max_quick_wins: 15
|
| 455 |
+
reviews_per_page: 50
|
| 456 |
+
auto_refresh_seconds: 60
|
| 457 |
+
|
| 458 |
+
# =============================================================================
|
| 459 |
+
# DOMAIN-SPECIFIC CUSTOMIZATION (Theme Parks / Attractions)
|
| 460 |
+
# =============================================================================
|
| 461 |
+
domain:
|
| 462 |
+
name: "Theme Parks & Attractions"
|
| 463 |
+
|
| 464 |
+
# Common features to look for
|
| 465 |
+
features:
|
| 466 |
+
- "ticket booking"
|
| 467 |
+
- "queue times"
|
| 468 |
+
- "express pass"
|
| 469 |
+
- "meal plans"
|
| 470 |
+
- "park maps"
|
| 471 |
+
- "show times"
|
| 472 |
+
- "photo pass"
|
| 473 |
+
- "virtual queue"
|
| 474 |
+
- "ride reservations"
|
| 475 |
+
- "mobile ordering"
|
| 476 |
+
|
| 477 |
+
# Pain points to prioritize
|
| 478 |
+
pain_points:
|
| 479 |
+
high_impact:
|
| 480 |
+
- "can't book tickets"
|
| 481 |
+
- "app crashes during booking"
|
| 482 |
+
- "payment fails"
|
| 483 |
+
- "queue times wrong"
|
| 484 |
+
- "can't access tickets"
|
| 485 |
+
|
| 486 |
+
medium_impact:
|
| 487 |
+
- "map doesn't load"
|
| 488 |
+
- "slow performance"
|
| 489 |
+
- "confusing navigation"
|
| 490 |
+
- "notifications not working"
|
| 491 |
+
|
| 492 |
+
# Positive signals
|
| 493 |
+
positive_signals:
|
| 494 |
+
- "easy booking"
|
| 495 |
+
- "fast check-in"
|
| 496 |
+
- "helpful features"
|
| 497 |
+
- "saved time"
|
| 498 |
+
- "convenient"
|
| 499 |
+
- "great experience"
|
| 500 |
+
|
| 501 |
+
# =============================================================================
|
| 502 |
+
# NOTES
|
| 503 |
+
# =============================================================================
|
| 504 |
+
# - Edit this file to customize agent behavior
|
| 505 |
+
# - Prompts support variables in {curly_braces}
|
| 506 |
+
# - Model names must match HuggingFace model IDs
|
| 507 |
+
# - Temperature: 0.0 = deterministic, 1.0 = creative
|
| 508 |
+
# - Changes take effect on next run (no restart needed for some settings)
|
config_loader.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration Loader
|
| 3 |
+
Loads settings from config.yaml for agent personas and prompts
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import yaml
|
| 7 |
+
import os
|
| 8 |
+
from typing import Dict, Any
|
| 9 |
+
|
| 10 |
+
class Config:
|
| 11 |
+
"""
|
| 12 |
+
Configuration manager for the Review Intelligence System
|
| 13 |
+
Loads and provides access to config.yaml settings
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
def __init__(self, config_file: str = "config.yaml"):
|
| 17 |
+
self.config_file = config_file
|
| 18 |
+
self.config = self._load_config()
|
| 19 |
+
|
| 20 |
+
def _load_config(self) -> Dict[str, Any]:
|
| 21 |
+
"""Load configuration from YAML file"""
|
| 22 |
+
if not os.path.exists(self.config_file):
|
| 23 |
+
print(f"⚠️ Config file not found: {self.config_file}")
|
| 24 |
+
print(" Using default configuration")
|
| 25 |
+
return self._default_config()
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
with open(self.config_file, 'r') as f:
|
| 29 |
+
config = yaml.safe_load(f)
|
| 30 |
+
print(f"✅ Configuration loaded from {self.config_file}")
|
| 31 |
+
return config
|
| 32 |
+
except Exception as e:
|
| 33 |
+
print(f"⚠️ Error loading config: {e}")
|
| 34 |
+
print(" Using default configuration")
|
| 35 |
+
return self._default_config()
|
| 36 |
+
|
| 37 |
+
def _default_config(self) -> Dict[str, Any]:
|
| 38 |
+
"""Return default configuration if YAML not available"""
|
| 39 |
+
return {
|
| 40 |
+
'models': {
|
| 41 |
+
'stage1': {
|
| 42 |
+
'llm1': {'name': 'Qwen/Qwen2.5-72B-Instruct', 'temperature': 0.1},
|
| 43 |
+
'llm2': {'name': 'mistralai/Mistral-7B-Instruct-v0.3', 'temperature': 0.1},
|
| 44 |
+
'manager': {'name': 'meta-llama/Llama-3.1-8B-Instruct', 'temperature': 0.1}
|
| 45 |
+
},
|
| 46 |
+
'stage2': {
|
| 47 |
+
'best_model': {'name': 'cardiffnlp/twitter-roberta-base-sentiment-latest'},
|
| 48 |
+
'alternate_model': {'name': 'finiteautomata/bertweet-base-sentiment-analysis'}
|
| 49 |
+
},
|
| 50 |
+
'stage3': {
|
| 51 |
+
'llm3': {'name': 'meta-llama/Llama-3.1-70B-Instruct', 'temperature': 0.1}
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
def get_model(self, stage: str, model_key: str) -> Dict[str, Any]:
|
| 57 |
+
"""Get model configuration for a specific stage"""
|
| 58 |
+
return self.config.get('models', {}).get(stage, {}).get(model_key, {})
|
| 59 |
+
|
| 60 |
+
def get_persona(self, agent: str) -> Dict[str, Any]:
|
| 61 |
+
"""Get persona configuration for an agent"""
|
| 62 |
+
return self.config.get('personas', {}).get(agent, {})
|
| 63 |
+
|
| 64 |
+
def get_prompt_template(self, template_name: str) -> str:
|
| 65 |
+
"""Get prompt template"""
|
| 66 |
+
return self.config.get('prompt_templates', {}).get(template_name, '')
|
| 67 |
+
|
| 68 |
+
def get_classification_rules(self) -> Dict[str, Any]:
|
| 69 |
+
"""Get classification rules"""
|
| 70 |
+
return self.config.get('classification_rules', {})
|
| 71 |
+
|
| 72 |
+
def get_sentiment_settings(self) -> Dict[str, Any]:
|
| 73 |
+
"""Get sentiment analysis settings"""
|
| 74 |
+
return self.config.get('sentiment', {})
|
| 75 |
+
|
| 76 |
+
def get_batch_settings(self) -> Dict[str, Any]:
|
| 77 |
+
"""Get batch analysis settings"""
|
| 78 |
+
return self.config.get('batch_analysis', {})
|
| 79 |
+
|
| 80 |
+
def get_processing_settings(self) -> Dict[str, Any]:
|
| 81 |
+
"""Get processing settings"""
|
| 82 |
+
return self.config.get('processing', {})
|
| 83 |
+
|
| 84 |
+
def get_dashboard_settings(self) -> Dict[str, Any]:
|
| 85 |
+
"""Get dashboard settings"""
|
| 86 |
+
return self.config.get('dashboard', {})
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# Singleton instance
|
| 90 |
+
_config_instance = None
|
| 91 |
+
|
| 92 |
+
def get_config(config_file: str = "config.yaml") -> Config:
|
| 93 |
+
"""Get or create config singleton"""
|
| 94 |
+
global _config_instance
|
| 95 |
+
if _config_instance is None:
|
| 96 |
+
_config_instance = Config(config_file)
|
| 97 |
+
return _config_instance
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
if __name__ == "__main__":
|
| 101 |
+
# Test config loader
|
| 102 |
+
print("\n" + "="*60)
|
| 103 |
+
print("🧪 TESTING CONFIG LOADER")
|
| 104 |
+
print("="*60 + "\n")
|
| 105 |
+
|
| 106 |
+
config = get_config()
|
| 107 |
+
|
| 108 |
+
# Test model access
|
| 109 |
+
llm1_config = config.get_model('stage1', 'llm1')
|
| 110 |
+
print(f"LLM1 Model: {llm1_config.get('name', 'Not found')}")
|
| 111 |
+
|
| 112 |
+
# Test persona access
|
| 113 |
+
llm1_persona = config.get_persona('llm1')
|
| 114 |
+
print(f"LLM1 Persona: {llm1_persona.get('name', 'Not found')}")
|
| 115 |
+
|
| 116 |
+
# Test prompt template
|
| 117 |
+
prompt = config.get_prompt_template('stage1_llm1')
|
| 118 |
+
print(f"Prompt template loaded: {len(prompt)} characters")
|
| 119 |
+
|
| 120 |
+
print("\n✅ Config loader test complete!")
|
database_enhanced.py
ADDED
|
@@ -0,0 +1,403 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Enhanced Database Schema for Multi-Stage Review Analysis
|
| 3 |
+
Adds Stage 1-4 columns to existing reviews table
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import sqlite3
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from typing import Dict, List, Any, Optional
|
| 9 |
+
import json
|
| 10 |
+
|
| 11 |
+
class EnhancedDatabase:
|
| 12 |
+
"""
|
| 13 |
+
Manages enhanced database schema with Stage 1-4 columns
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
def __init__(self, db_file: str = "review_database.db"):
|
| 17 |
+
self.db_file = db_file
|
| 18 |
+
self.conn = None
|
| 19 |
+
print(f"📁 Database: {db_file}")
|
| 20 |
+
|
| 21 |
+
def connect(self):
|
| 22 |
+
"""Connect to database"""
|
| 23 |
+
self.conn = sqlite3.connect(self.db_file)
|
| 24 |
+
self.conn.row_factory = sqlite3.Row
|
| 25 |
+
print("✅ Connected to database")
|
| 26 |
+
return self.conn
|
| 27 |
+
|
| 28 |
+
def close(self):
|
| 29 |
+
"""Close database connection"""
|
| 30 |
+
if self.conn:
|
| 31 |
+
self.conn.close()
|
| 32 |
+
print("✅ Database connection closed")
|
| 33 |
+
|
| 34 |
+
def enhance_schema(self):
|
| 35 |
+
"""
|
| 36 |
+
Add Stage 1-4 columns to existing reviews table
|
| 37 |
+
Non-destructive: keeps all existing data
|
| 38 |
+
"""
|
| 39 |
+
print("\n" + "="*60)
|
| 40 |
+
print("🔧 ENHANCING DATABASE SCHEMA")
|
| 41 |
+
print("="*60)
|
| 42 |
+
|
| 43 |
+
cursor = self.conn.cursor()
|
| 44 |
+
|
| 45 |
+
# Get existing columns
|
| 46 |
+
cursor.execute("PRAGMA table_info(reviews)")
|
| 47 |
+
existing_columns = [row[1] for row in cursor.fetchall()]
|
| 48 |
+
print(f"📋 Existing columns: {len(existing_columns)}")
|
| 49 |
+
|
| 50 |
+
# Stage 1: Classification columns
|
| 51 |
+
stage1_columns = [
|
| 52 |
+
("stage1_llm1_type", "TEXT"),
|
| 53 |
+
("stage1_llm1_department", "TEXT"),
|
| 54 |
+
("stage1_llm1_priority", "TEXT"),
|
| 55 |
+
("stage1_llm1_confidence", "REAL"),
|
| 56 |
+
("stage1_llm1_reasoning", "TEXT"),
|
| 57 |
+
("stage1_llm2_user_type", "TEXT"),
|
| 58 |
+
("stage1_llm2_emotion", "TEXT"),
|
| 59 |
+
("stage1_llm2_context", "TEXT"),
|
| 60 |
+
("stage1_llm2_confidence", "REAL"),
|
| 61 |
+
("stage1_llm2_reasoning", "TEXT"),
|
| 62 |
+
("stage1_manager_classification", "TEXT"),
|
| 63 |
+
("stage1_manager_reasoning", "TEXT"),
|
| 64 |
+
("stage1_completed_at", "TIMESTAMP"),
|
| 65 |
+
]
|
| 66 |
+
|
| 67 |
+
# Stage 2: Sentiment columns
|
| 68 |
+
stage2_columns = [
|
| 69 |
+
("stage2_best_sentiment", "TEXT"),
|
| 70 |
+
("stage2_best_confidence", "REAL"),
|
| 71 |
+
("stage2_best_prob_positive", "REAL"),
|
| 72 |
+
("stage2_best_prob_neutral", "REAL"),
|
| 73 |
+
("stage2_best_prob_negative", "REAL"),
|
| 74 |
+
("stage2_alt_sentiment", "TEXT"),
|
| 75 |
+
("stage2_alt_confidence", "REAL"),
|
| 76 |
+
("stage2_alt_prob_positive", "REAL"),
|
| 77 |
+
("stage2_alt_prob_neutral", "REAL"),
|
| 78 |
+
("stage2_alt_prob_negative", "REAL"),
|
| 79 |
+
("stage2_agreement", "BOOLEAN"),
|
| 80 |
+
("stage2_layer_sentiment", "TEXT"),
|
| 81 |
+
("stage2_completed_at", "TIMESTAMP"),
|
| 82 |
+
]
|
| 83 |
+
|
| 84 |
+
# Stage 3: Finalization columns
|
| 85 |
+
stage3_columns = [
|
| 86 |
+
("stage3_final_sentiment", "TEXT"),
|
| 87 |
+
("stage3_confidence", "REAL"),
|
| 88 |
+
("stage3_reasoning", "TEXT"),
|
| 89 |
+
("stage3_validation_notes", "TEXT"),
|
| 90 |
+
("stage3_conflicts_found", "TEXT"),
|
| 91 |
+
("stage3_action_recommendation", "TEXT"),
|
| 92 |
+
("stage3_needs_human_review", "BOOLEAN"),
|
| 93 |
+
("stage3_completed_at", "TIMESTAMP"),
|
| 94 |
+
]
|
| 95 |
+
|
| 96 |
+
# Processing metadata
|
| 97 |
+
metadata_columns = [
|
| 98 |
+
("processing_status", "TEXT DEFAULT 'pending'"),
|
| 99 |
+
("processing_version", "TEXT DEFAULT 'v1.0'"),
|
| 100 |
+
("processing_started_at", "TIMESTAMP"),
|
| 101 |
+
("processing_completed_at", "TIMESTAMP"),
|
| 102 |
+
]
|
| 103 |
+
|
| 104 |
+
all_new_columns = (
|
| 105 |
+
stage1_columns +
|
| 106 |
+
stage2_columns +
|
| 107 |
+
stage3_columns +
|
| 108 |
+
metadata_columns
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# Add columns that don't exist
|
| 112 |
+
added_count = 0
|
| 113 |
+
for col_name, col_type in all_new_columns:
|
| 114 |
+
if col_name not in existing_columns:
|
| 115 |
+
try:
|
| 116 |
+
cursor.execute(f"ALTER TABLE reviews ADD COLUMN {col_name} {col_type}")
|
| 117 |
+
added_count += 1
|
| 118 |
+
print(f" ✅ Added column: {col_name}")
|
| 119 |
+
except sqlite3.OperationalError as e:
|
| 120 |
+
if "duplicate column" not in str(e).lower():
|
| 121 |
+
print(f" ⚠️ Error adding {col_name}: {e}")
|
| 122 |
+
|
| 123 |
+
self.conn.commit()
|
| 124 |
+
print(f"\n✅ Schema enhanced: {added_count} new columns added")
|
| 125 |
+
|
| 126 |
+
# Create logs table for LLM decisions
|
| 127 |
+
self._create_logs_table(cursor)
|
| 128 |
+
|
| 129 |
+
# Create batch insights table
|
| 130 |
+
self._create_batch_insights_table(cursor)
|
| 131 |
+
|
| 132 |
+
return added_count
|
| 133 |
+
|
| 134 |
+
def _create_logs_table(self, cursor):
|
| 135 |
+
"""Create table for LLM decision logs"""
|
| 136 |
+
cursor.execute("""
|
| 137 |
+
CREATE TABLE IF NOT EXISTS llm_decision_logs (
|
| 138 |
+
log_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 139 |
+
review_id TEXT NOT NULL,
|
| 140 |
+
stage TEXT NOT NULL,
|
| 141 |
+
model_name TEXT NOT NULL,
|
| 142 |
+
input_prompt TEXT,
|
| 143 |
+
output_response TEXT,
|
| 144 |
+
confidence REAL,
|
| 145 |
+
reasoning TEXT,
|
| 146 |
+
processing_time_seconds REAL,
|
| 147 |
+
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 148 |
+
FOREIGN KEY (review_id) REFERENCES reviews(review_id)
|
| 149 |
+
)
|
| 150 |
+
""")
|
| 151 |
+
|
| 152 |
+
cursor.execute("""
|
| 153 |
+
CREATE INDEX IF NOT EXISTS idx_logs_review_id
|
| 154 |
+
ON llm_decision_logs(review_id)
|
| 155 |
+
""")
|
| 156 |
+
|
| 157 |
+
cursor.execute("""
|
| 158 |
+
CREATE INDEX IF NOT EXISTS idx_logs_stage
|
| 159 |
+
ON llm_decision_logs(stage)
|
| 160 |
+
""")
|
| 161 |
+
|
| 162 |
+
self.conn.commit()
|
| 163 |
+
print(" ✅ Created llm_decision_logs table")
|
| 164 |
+
|
| 165 |
+
def _create_batch_insights_table(self, cursor):
|
| 166 |
+
"""Create table for batch analytics"""
|
| 167 |
+
cursor.execute("""
|
| 168 |
+
CREATE TABLE IF NOT EXISTS batch_insights (
|
| 169 |
+
batch_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 170 |
+
analysis_date DATE,
|
| 171 |
+
total_reviews INTEGER,
|
| 172 |
+
sentiment_positive INTEGER,
|
| 173 |
+
sentiment_neutral INTEGER,
|
| 174 |
+
sentiment_negative INTEGER,
|
| 175 |
+
priority_critical INTEGER,
|
| 176 |
+
priority_high INTEGER,
|
| 177 |
+
priority_medium INTEGER,
|
| 178 |
+
priority_low INTEGER,
|
| 179 |
+
dept_engineering INTEGER,
|
| 180 |
+
dept_ux INTEGER,
|
| 181 |
+
dept_support INTEGER,
|
| 182 |
+
dept_business INTEGER,
|
| 183 |
+
critical_issues TEXT,
|
| 184 |
+
quick_wins TEXT,
|
| 185 |
+
recommendations TEXT,
|
| 186 |
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
| 187 |
+
)
|
| 188 |
+
""")
|
| 189 |
+
|
| 190 |
+
self.conn.commit()
|
| 191 |
+
print(" ✅ Created batch_insights table")
|
| 192 |
+
|
| 193 |
+
def get_pending_reviews(self, limit: Optional[int] = None) -> List[Dict]:
|
| 194 |
+
"""Get reviews that haven't been processed yet"""
|
| 195 |
+
cursor = self.conn.cursor()
|
| 196 |
+
|
| 197 |
+
query = """
|
| 198 |
+
SELECT * FROM reviews
|
| 199 |
+
WHERE processing_status IS NULL OR processing_status = 'pending'
|
| 200 |
+
ORDER BY scraped_at DESC
|
| 201 |
+
"""
|
| 202 |
+
|
| 203 |
+
if limit:
|
| 204 |
+
query += f" LIMIT {limit}"
|
| 205 |
+
|
| 206 |
+
cursor.execute(query)
|
| 207 |
+
rows = cursor.fetchall()
|
| 208 |
+
|
| 209 |
+
return [dict(row) for row in rows]
|
| 210 |
+
|
| 211 |
+
def update_stage1(self, review_id: str, data: Dict[str, Any]):
|
| 212 |
+
"""Update Stage 1 classification data"""
|
| 213 |
+
cursor = self.conn.cursor()
|
| 214 |
+
|
| 215 |
+
cursor.execute("""
|
| 216 |
+
UPDATE reviews SET
|
| 217 |
+
stage1_llm1_type = ?,
|
| 218 |
+
stage1_llm1_department = ?,
|
| 219 |
+
stage1_llm1_priority = ?,
|
| 220 |
+
stage1_llm1_confidence = ?,
|
| 221 |
+
stage1_llm1_reasoning = ?,
|
| 222 |
+
stage1_llm2_user_type = ?,
|
| 223 |
+
stage1_llm2_emotion = ?,
|
| 224 |
+
stage1_llm2_context = ?,
|
| 225 |
+
stage1_llm2_confidence = ?,
|
| 226 |
+
stage1_llm2_reasoning = ?,
|
| 227 |
+
stage1_manager_classification = ?,
|
| 228 |
+
stage1_manager_reasoning = ?,
|
| 229 |
+
stage1_completed_at = ?,
|
| 230 |
+
processing_status = 'stage1_complete'
|
| 231 |
+
WHERE review_id = ?
|
| 232 |
+
""", (
|
| 233 |
+
data.get('llm1_type'),
|
| 234 |
+
data.get('llm1_department'),
|
| 235 |
+
data.get('llm1_priority'),
|
| 236 |
+
data.get('llm1_confidence'),
|
| 237 |
+
data.get('llm1_reasoning'),
|
| 238 |
+
data.get('llm2_user_type'),
|
| 239 |
+
data.get('llm2_emotion'),
|
| 240 |
+
data.get('llm2_context'),
|
| 241 |
+
data.get('llm2_confidence'),
|
| 242 |
+
data.get('llm2_reasoning'),
|
| 243 |
+
data.get('manager_classification'),
|
| 244 |
+
data.get('manager_reasoning'),
|
| 245 |
+
datetime.now().isoformat(),
|
| 246 |
+
review_id
|
| 247 |
+
))
|
| 248 |
+
|
| 249 |
+
self.conn.commit()
|
| 250 |
+
|
| 251 |
+
def update_stage2(self, review_id: str, data: Dict[str, Any]):
|
| 252 |
+
"""Update Stage 2 sentiment data"""
|
| 253 |
+
cursor = self.conn.cursor()
|
| 254 |
+
|
| 255 |
+
cursor.execute("""
|
| 256 |
+
UPDATE reviews SET
|
| 257 |
+
stage2_best_sentiment = ?,
|
| 258 |
+
stage2_best_confidence = ?,
|
| 259 |
+
stage2_best_prob_positive = ?,
|
| 260 |
+
stage2_best_prob_neutral = ?,
|
| 261 |
+
stage2_best_prob_negative = ?,
|
| 262 |
+
stage2_alt_sentiment = ?,
|
| 263 |
+
stage2_alt_confidence = ?,
|
| 264 |
+
stage2_alt_prob_positive = ?,
|
| 265 |
+
stage2_alt_prob_neutral = ?,
|
| 266 |
+
stage2_alt_prob_negative = ?,
|
| 267 |
+
stage2_agreement = ?,
|
| 268 |
+
stage2_layer_sentiment = ?,
|
| 269 |
+
stage2_completed_at = ?,
|
| 270 |
+
processing_status = 'stage2_complete'
|
| 271 |
+
WHERE review_id = ?
|
| 272 |
+
""", (
|
| 273 |
+
data.get('best_sentiment'),
|
| 274 |
+
data.get('best_confidence'),
|
| 275 |
+
data.get('best_prob_positive'),
|
| 276 |
+
data.get('best_prob_neutral'),
|
| 277 |
+
data.get('best_prob_negative'),
|
| 278 |
+
data.get('alt_sentiment'),
|
| 279 |
+
data.get('alt_confidence'),
|
| 280 |
+
data.get('alt_prob_positive'),
|
| 281 |
+
data.get('alt_prob_neutral'),
|
| 282 |
+
data.get('alt_prob_negative'),
|
| 283 |
+
data.get('agreement'),
|
| 284 |
+
data.get('layer_sentiment'),
|
| 285 |
+
datetime.now().isoformat(),
|
| 286 |
+
review_id
|
| 287 |
+
))
|
| 288 |
+
|
| 289 |
+
self.conn.commit()
|
| 290 |
+
|
| 291 |
+
def update_stage3(self, review_id: str, data: Dict[str, Any]):
|
| 292 |
+
"""Update Stage 3 finalization data"""
|
| 293 |
+
cursor = self.conn.cursor()
|
| 294 |
+
|
| 295 |
+
cursor.execute("""
|
| 296 |
+
UPDATE reviews SET
|
| 297 |
+
stage3_final_sentiment = ?,
|
| 298 |
+
stage3_confidence = ?,
|
| 299 |
+
stage3_reasoning = ?,
|
| 300 |
+
stage3_validation_notes = ?,
|
| 301 |
+
stage3_conflicts_found = ?,
|
| 302 |
+
stage3_action_recommendation = ?,
|
| 303 |
+
stage3_needs_human_review = ?,
|
| 304 |
+
stage3_completed_at = ?,
|
| 305 |
+
processing_status = 'complete',
|
| 306 |
+
processing_completed_at = ?
|
| 307 |
+
WHERE review_id = ?
|
| 308 |
+
""", (
|
| 309 |
+
data.get('final_sentiment'),
|
| 310 |
+
data.get('confidence'),
|
| 311 |
+
data.get('reasoning'),
|
| 312 |
+
data.get('validation_notes'),
|
| 313 |
+
data.get('conflicts_found'),
|
| 314 |
+
data.get('action_recommendation'),
|
| 315 |
+
data.get('needs_human_review'),
|
| 316 |
+
datetime.now().isoformat(),
|
| 317 |
+
datetime.now().isoformat(),
|
| 318 |
+
review_id
|
| 319 |
+
))
|
| 320 |
+
|
| 321 |
+
self.conn.commit()
|
| 322 |
+
|
| 323 |
+
def log_llm_decision(self, review_id: str, stage: str, model_name: str,
|
| 324 |
+
input_prompt: str, output_response: str,
|
| 325 |
+
confidence: float, reasoning: str, processing_time: float):
|
| 326 |
+
"""Log LLM decision for audit trail"""
|
| 327 |
+
cursor = self.conn.cursor()
|
| 328 |
+
|
| 329 |
+
cursor.execute("""
|
| 330 |
+
INSERT INTO llm_decision_logs
|
| 331 |
+
(review_id, stage, model_name, input_prompt, output_response,
|
| 332 |
+
confidence, reasoning, processing_time_seconds)
|
| 333 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
| 334 |
+
""", (
|
| 335 |
+
review_id, stage, model_name, input_prompt, output_response,
|
| 336 |
+
confidence, reasoning, processing_time
|
| 337 |
+
))
|
| 338 |
+
|
| 339 |
+
self.conn.commit()
|
| 340 |
+
|
| 341 |
+
def get_all_processed_reviews(self) -> List[Dict]:
|
| 342 |
+
"""Get all reviews that have been fully processed"""
|
| 343 |
+
cursor = self.conn.cursor()
|
| 344 |
+
|
| 345 |
+
cursor.execute("""
|
| 346 |
+
SELECT * FROM reviews
|
| 347 |
+
WHERE processing_status = 'complete'
|
| 348 |
+
ORDER BY processing_completed_at DESC
|
| 349 |
+
""")
|
| 350 |
+
|
| 351 |
+
rows = cursor.fetchall()
|
| 352 |
+
return [dict(row) for row in rows]
|
| 353 |
+
|
| 354 |
+
def save_batch_insights(self, insights: Dict[str, Any]):
|
| 355 |
+
"""Save batch analytics to database"""
|
| 356 |
+
cursor = self.conn.cursor()
|
| 357 |
+
|
| 358 |
+
cursor.execute("""
|
| 359 |
+
INSERT INTO batch_insights
|
| 360 |
+
(analysis_date, total_reviews, sentiment_positive, sentiment_neutral,
|
| 361 |
+
sentiment_negative, priority_critical, priority_high, priority_medium,
|
| 362 |
+
priority_low, dept_engineering, dept_ux, dept_support, dept_business,
|
| 363 |
+
critical_issues, quick_wins, recommendations)
|
| 364 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 365 |
+
""", (
|
| 366 |
+
datetime.now().date(),
|
| 367 |
+
insights.get('total_reviews', 0),
|
| 368 |
+
insights.get('sentiment_positive', 0),
|
| 369 |
+
insights.get('sentiment_neutral', 0),
|
| 370 |
+
insights.get('sentiment_negative', 0),
|
| 371 |
+
insights.get('priority_critical', 0),
|
| 372 |
+
insights.get('priority_high', 0),
|
| 373 |
+
insights.get('priority_medium', 0),
|
| 374 |
+
insights.get('priority_low', 0),
|
| 375 |
+
insights.get('dept_engineering', 0),
|
| 376 |
+
insights.get('dept_ux', 0),
|
| 377 |
+
insights.get('dept_support', 0),
|
| 378 |
+
insights.get('dept_business', 0),
|
| 379 |
+
json.dumps(insights.get('critical_issues', [])),
|
| 380 |
+
json.dumps(insights.get('quick_wins', [])),
|
| 381 |
+
json.dumps(insights.get('recommendations', []))
|
| 382 |
+
))
|
| 383 |
+
|
| 384 |
+
self.conn.commit()
|
| 385 |
+
print(" ✅ Batch insights saved to database")
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
if __name__ == "__main__":
|
| 389 |
+
# Test database enhancement
|
| 390 |
+
print("\n" + "="*60)
|
| 391 |
+
print("🧪 TESTING DATABASE ENHANCEMENT")
|
| 392 |
+
print("="*60 + "\n")
|
| 393 |
+
|
| 394 |
+
db = EnhancedDatabase()
|
| 395 |
+
db.connect()
|
| 396 |
+
db.enhance_schema()
|
| 397 |
+
|
| 398 |
+
# Get pending reviews
|
| 399 |
+
pending = db.get_pending_reviews(limit=5)
|
| 400 |
+
print(f"\n📋 Found {len(pending)} pending reviews")
|
| 401 |
+
|
| 402 |
+
db.close()
|
| 403 |
+
print("\n✅ Database enhancement test complete!")
|
gradio_pipeline.py
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio Pipeline - Streamlined processing for HuggingFace Spaces
|
| 3 |
+
Integrates scraping, classification, sentiment, and batch analysis with progress tracking
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import sqlite3
|
| 8 |
+
import time
|
| 9 |
+
from typing import List, Dict, Any, Optional, Callable
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
import json
|
| 12 |
+
|
| 13 |
+
# Import existing modules
|
| 14 |
+
from stage0_scraper import Stage0WebScraper
|
| 15 |
+
from langgraph_state import ReviewState, create_initial_state
|
| 16 |
+
from langgraph_graph import build_review_graph, build_batch_graph
|
| 17 |
+
from database_enhanced import EnhancedDatabase
|
| 18 |
+
from stage4_batch_analysis import Stage4BatchAnalysis
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class GradioPipeline:
|
| 22 |
+
"""
|
| 23 |
+
Streamlined pipeline for Gradio interface
|
| 24 |
+
Handles scraping, processing, and analysis with progress callbacks
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(self, db_file: str = "review_database.db", review_limit: int = 20):
|
| 28 |
+
self.db_file = db_file
|
| 29 |
+
self.review_limit = review_limit
|
| 30 |
+
|
| 31 |
+
# Initialize database
|
| 32 |
+
self.db = EnhancedDatabase(db_file)
|
| 33 |
+
self.db.connect()
|
| 34 |
+
self.db.enhance_schema()
|
| 35 |
+
|
| 36 |
+
# Initialize scraper
|
| 37 |
+
self.scraper = Stage0WebScraper(db_file)
|
| 38 |
+
self.scraper.create_reviews_table()
|
| 39 |
+
|
| 40 |
+
# Build graphs
|
| 41 |
+
self.review_graph = build_review_graph()
|
| 42 |
+
self.batch_graph = build_batch_graph()
|
| 43 |
+
|
| 44 |
+
print("✅ Gradio Pipeline initialized")
|
| 45 |
+
|
| 46 |
+
def scrape_reviews(
|
| 47 |
+
self,
|
| 48 |
+
app_store_ids: List[str],
|
| 49 |
+
play_store_packages: List[str],
|
| 50 |
+
progress_callback: Optional[Callable] = None
|
| 51 |
+
) -> int:
|
| 52 |
+
"""
|
| 53 |
+
Scrape reviews from App Store and Play Store
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
app_store_ids: List of App Store IDs
|
| 57 |
+
play_store_packages: List of Play Store package names
|
| 58 |
+
progress_callback: Optional Gradio progress callback
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
Total number of reviews scraped
|
| 62 |
+
"""
|
| 63 |
+
total_scraped = 0
|
| 64 |
+
total_apps = len(app_store_ids) + len(play_store_packages)
|
| 65 |
+
|
| 66 |
+
if total_apps == 0:
|
| 67 |
+
return 0
|
| 68 |
+
|
| 69 |
+
current_app = 0
|
| 70 |
+
|
| 71 |
+
# Scrape App Store
|
| 72 |
+
for app_id in app_store_ids:
|
| 73 |
+
current_app += 1
|
| 74 |
+
if progress_callback:
|
| 75 |
+
progress_val = 0.1 + (0.2 * current_app / total_apps)
|
| 76 |
+
progress_callback(
|
| 77 |
+
progress_val,
|
| 78 |
+
desc=f"🍎 Scraping App Store ({current_app}/{total_apps}): {app_id}"
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
reviews = self.scraper.scrape_app_store_rss(
|
| 83 |
+
app_id,
|
| 84 |
+
country="ae",
|
| 85 |
+
limit=self.review_limit
|
| 86 |
+
)
|
| 87 |
+
saved = self.scraper.save_reviews_to_db(reviews)
|
| 88 |
+
total_scraped += saved
|
| 89 |
+
print(f" ✅ App Store {app_id}: {saved} reviews")
|
| 90 |
+
except Exception as e:
|
| 91 |
+
print(f" ❌ App Store {app_id} error: {e}")
|
| 92 |
+
continue
|
| 93 |
+
|
| 94 |
+
time.sleep(1) # Rate limiting
|
| 95 |
+
|
| 96 |
+
# Scrape Play Store
|
| 97 |
+
for package in play_store_packages:
|
| 98 |
+
current_app += 1
|
| 99 |
+
if progress_callback:
|
| 100 |
+
progress_val = 0.1 + (0.2 * current_app / total_apps)
|
| 101 |
+
progress_callback(
|
| 102 |
+
progress_val,
|
| 103 |
+
desc=f"🤖 Scraping Play Store ({current_app}/{total_apps}): {package}"
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
reviews = self.scraper.scrape_play_store_api(
|
| 108 |
+
package,
|
| 109 |
+
country="ae",
|
| 110 |
+
limit=self.review_limit
|
| 111 |
+
)
|
| 112 |
+
saved = self.scraper.save_reviews_to_db(reviews)
|
| 113 |
+
total_scraped += saved
|
| 114 |
+
print(f" ✅ Play Store {package}: {saved} reviews")
|
| 115 |
+
except Exception as e:
|
| 116 |
+
print(f" ❌ Play Store {package} error: {e}")
|
| 117 |
+
continue
|
| 118 |
+
|
| 119 |
+
time.sleep(1) # Rate limiting
|
| 120 |
+
|
| 121 |
+
print(f"\n✅ Total scraped: {total_scraped} reviews")
|
| 122 |
+
return total_scraped
|
| 123 |
+
|
| 124 |
+
def process_reviews(
|
| 125 |
+
self,
|
| 126 |
+
progress_callback: Optional[Callable] = None
|
| 127 |
+
) -> List[Dict[str, Any]]:
|
| 128 |
+
"""
|
| 129 |
+
Process reviews through Stages 1-3
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
progress_callback: Optional Gradio progress callback
|
| 133 |
+
|
| 134 |
+
Returns:
|
| 135 |
+
List of processed review dictionaries
|
| 136 |
+
"""
|
| 137 |
+
# Get pending reviews
|
| 138 |
+
reviews = self.db.get_pending_reviews(limit=self.review_limit)
|
| 139 |
+
total_reviews = len(reviews)
|
| 140 |
+
|
| 141 |
+
if total_reviews == 0:
|
| 142 |
+
print("⚠️ No pending reviews to process")
|
| 143 |
+
return []
|
| 144 |
+
|
| 145 |
+
print(f"\n📊 Processing {total_reviews} reviews...")
|
| 146 |
+
|
| 147 |
+
processed_states = []
|
| 148 |
+
|
| 149 |
+
for i, review in enumerate(reviews, 1):
|
| 150 |
+
review_id = review.get('review_id', 'unknown')
|
| 151 |
+
|
| 152 |
+
if progress_callback:
|
| 153 |
+
progress_val = 0.3 + (0.6 * i / total_reviews)
|
| 154 |
+
progress_callback(
|
| 155 |
+
progress_val,
|
| 156 |
+
desc=f"🤖 Processing review {i}/{total_reviews}: {review_id[:20]}..."
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
try:
|
| 160 |
+
# Create initial state
|
| 161 |
+
state = create_initial_state(review)
|
| 162 |
+
|
| 163 |
+
# Run through LangGraph
|
| 164 |
+
config = {"configurable": {"thread_id": f"review_{review_id}"}}
|
| 165 |
+
final_state = self.review_graph.invoke(state, config=config)
|
| 166 |
+
|
| 167 |
+
# Convert state to dict for easier handling
|
| 168 |
+
processed_states.append(dict(final_state))
|
| 169 |
+
|
| 170 |
+
print(f" ✅ Review {i}/{total_reviews} processed")
|
| 171 |
+
|
| 172 |
+
except Exception as e:
|
| 173 |
+
print(f" ❌ Error processing review {review_id}: {e}")
|
| 174 |
+
continue
|
| 175 |
+
|
| 176 |
+
print(f"\n✅ Processed {len(processed_states)}/{total_reviews} reviews")
|
| 177 |
+
return processed_states
|
| 178 |
+
|
| 179 |
+
def analyze_batch(
|
| 180 |
+
self,
|
| 181 |
+
processed_reviews: List[Dict[str, Any]]
|
| 182 |
+
) -> Dict[str, Any]:
|
| 183 |
+
"""
|
| 184 |
+
Run Stage 4: Batch Analysis
|
| 185 |
+
|
| 186 |
+
Args:
|
| 187 |
+
processed_reviews: List of processed review states
|
| 188 |
+
|
| 189 |
+
Returns:
|
| 190 |
+
Batch insights dictionary
|
| 191 |
+
"""
|
| 192 |
+
if not processed_reviews:
|
| 193 |
+
return {}
|
| 194 |
+
|
| 195 |
+
print(f"\n📊 Running batch analysis on {len(processed_reviews)} reviews...")
|
| 196 |
+
|
| 197 |
+
# Convert states to review dicts for Stage 4
|
| 198 |
+
reviews_for_analysis = []
|
| 199 |
+
for state in processed_reviews:
|
| 200 |
+
review_dict = {
|
| 201 |
+
'review_id': state.get('review_id'),
|
| 202 |
+
'review_text': state.get('review_text'),
|
| 203 |
+
'rating': state.get('rating'),
|
| 204 |
+
'stage1_llm1_type': state.get('classification_type'),
|
| 205 |
+
'stage1_llm1_department': state.get('department'),
|
| 206 |
+
'stage1_llm1_priority': state.get('priority'),
|
| 207 |
+
'stage1_llm2_user_type': state.get('user_type'),
|
| 208 |
+
'stage1_llm2_emotion': state.get('emotion'),
|
| 209 |
+
'stage2_agreement': state.get('sentiment_agreement'),
|
| 210 |
+
'stage3_final_sentiment': state.get('final_sentiment'),
|
| 211 |
+
'stage3_needs_human_review': state.get('needs_human_review'),
|
| 212 |
+
'stage3_reasoning': state.get('reasoning'),
|
| 213 |
+
'stage3_action_recommendation': state.get('action_recommendation'),
|
| 214 |
+
}
|
| 215 |
+
reviews_for_analysis.append(review_dict)
|
| 216 |
+
|
| 217 |
+
# Run Stage 4
|
| 218 |
+
stage4 = Stage4BatchAnalysis()
|
| 219 |
+
insights = stage4.analyze_batch(reviews_for_analysis)
|
| 220 |
+
|
| 221 |
+
# Save to database
|
| 222 |
+
self.db.save_batch_insights(insights)
|
| 223 |
+
|
| 224 |
+
print("✅ Batch analysis complete")
|
| 225 |
+
return insights
|
| 226 |
+
|
| 227 |
+
def get_all_processed_reviews(self) -> List[Dict[str, Any]]:
|
| 228 |
+
"""Get all processed reviews from database"""
|
| 229 |
+
return self.db.get_all_processed_reviews()
|
| 230 |
+
|
| 231 |
+
def close(self):
|
| 232 |
+
"""Clean up"""
|
| 233 |
+
self.db.close()
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
# ============================================================================
|
| 237 |
+
# HELPER FUNCTIONS FOR GRADIO
|
| 238 |
+
# ============================================================================
|
| 239 |
+
|
| 240 |
+
def parse_app_store_url(url: str) -> Optional[str]:
|
| 241 |
+
"""
|
| 242 |
+
Extract App Store ID from URL or return as-is if already an ID
|
| 243 |
+
|
| 244 |
+
Examples:
|
| 245 |
+
- "1234567890" -> "1234567890"
|
| 246 |
+
- "https://apps.apple.com/us/app/name/id1234567890" -> "1234567890"
|
| 247 |
+
"""
|
| 248 |
+
url = url.strip()
|
| 249 |
+
|
| 250 |
+
# Check if it's already just a number
|
| 251 |
+
if url.isdigit():
|
| 252 |
+
return url
|
| 253 |
+
|
| 254 |
+
# Extract from URL
|
| 255 |
+
if 'apps.apple.com' in url:
|
| 256 |
+
parts = url.split('/id')
|
| 257 |
+
if len(parts) > 1:
|
| 258 |
+
app_id = parts[1].split('?')[0].split('/')[0]
|
| 259 |
+
if app_id.isdigit():
|
| 260 |
+
return app_id
|
| 261 |
+
|
| 262 |
+
# Try to find any number in the string
|
| 263 |
+
import re
|
| 264 |
+
numbers = re.findall(r'\d+', url)
|
| 265 |
+
if numbers:
|
| 266 |
+
# Return the longest number (likely the app ID)
|
| 267 |
+
return max(numbers, key=len)
|
| 268 |
+
|
| 269 |
+
return None
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def parse_play_store_url(url: str) -> Optional[str]:
|
| 273 |
+
"""
|
| 274 |
+
Extract package name from Play Store URL or return as-is
|
| 275 |
+
|
| 276 |
+
Examples:
|
| 277 |
+
- "com.company.app" -> "com.company.app"
|
| 278 |
+
- "https://play.google.com/store/apps/details?id=com.company.app" -> "com.company.app"
|
| 279 |
+
"""
|
| 280 |
+
url = url.strip()
|
| 281 |
+
|
| 282 |
+
# Check if it's already a package name (has dots)
|
| 283 |
+
if '.' in url and not url.startswith('http'):
|
| 284 |
+
return url
|
| 285 |
+
|
| 286 |
+
# Extract from URL
|
| 287 |
+
if 'play.google.com' in url:
|
| 288 |
+
if 'id=' in url:
|
| 289 |
+
package = url.split('id=')[1].split('&')[0]
|
| 290 |
+
return package
|
| 291 |
+
|
| 292 |
+
return url if '.' in url else None
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
if __name__ == "__main__":
|
| 296 |
+
print("\n" + "="*60)
|
| 297 |
+
print("🧪 TESTING GRADIO PIPELINE")
|
| 298 |
+
print("="*60)
|
| 299 |
+
|
| 300 |
+
# Test URL parsing
|
| 301 |
+
print("\n📱 Testing URL parsing:")
|
| 302 |
+
|
| 303 |
+
test_app_urls = [
|
| 304 |
+
"1234567890",
|
| 305 |
+
"https://apps.apple.com/us/app/name/id1234567890",
|
| 306 |
+
]
|
| 307 |
+
|
| 308 |
+
for url in test_app_urls:
|
| 309 |
+
app_id = parse_app_store_url(url)
|
| 310 |
+
print(f" {url} -> {app_id}")
|
| 311 |
+
|
| 312 |
+
test_play_urls = [
|
| 313 |
+
"com.company.app",
|
| 314 |
+
"https://play.google.com/store/apps/details?id=com.company.app",
|
| 315 |
+
]
|
| 316 |
+
|
| 317 |
+
for url in test_play_urls:
|
| 318 |
+
package = parse_play_store_url(url)
|
| 319 |
+
print(f" {url} -> {package}")
|
| 320 |
+
|
| 321 |
+
print("\n✅ Gradio pipeline test complete!")
|
langgraph_graph.py
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LangGraph Graph Definition
|
| 3 |
+
Defines the review processing workflow with conditional routing
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from langgraph.graph import StateGraph, END
|
| 7 |
+
from langgraph.checkpoint.memory import MemorySaver
|
| 8 |
+
from typing import Literal
|
| 9 |
+
|
| 10 |
+
from langgraph_state import ReviewState, BatchState, create_initial_state
|
| 11 |
+
from langgraph_nodes import (
|
| 12 |
+
stage1_classification_node,
|
| 13 |
+
stage2_sentiment_node,
|
| 14 |
+
stage3_finalization_node
|
| 15 |
+
)
|
| 16 |
+
from stage4_batch_analysis import Stage4BatchAnalysis
|
| 17 |
+
from database_enhanced import EnhancedDatabase
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# ============================================================================
|
| 21 |
+
# DATABASE SYNC NODES
|
| 22 |
+
# ============================================================================
|
| 23 |
+
|
| 24 |
+
def save_stage1_to_db_node(state: ReviewState) -> dict:
|
| 25 |
+
"""Save Stage 1 results to database"""
|
| 26 |
+
db = EnhancedDatabase()
|
| 27 |
+
db.connect()
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
stage1_data = {
|
| 31 |
+
'llm1_type': state['llm1_result'].get('type'),
|
| 32 |
+
'llm1_department': state['llm1_result'].get('department'),
|
| 33 |
+
'llm1_priority': state['llm1_result'].get('priority'),
|
| 34 |
+
'llm1_confidence': state['llm1_result'].get('confidence'),
|
| 35 |
+
'llm1_reasoning': state['llm1_result'].get('reasoning'),
|
| 36 |
+
|
| 37 |
+
'llm2_user_type': state['llm2_result'].get('user_type'),
|
| 38 |
+
'llm2_emotion': state['llm2_result'].get('emotion'),
|
| 39 |
+
'llm2_context': state['llm2_result'].get('context'),
|
| 40 |
+
'llm2_confidence': state['llm2_result'].get('confidence'),
|
| 41 |
+
'llm2_reasoning': state['llm2_result'].get('reasoning'),
|
| 42 |
+
|
| 43 |
+
'manager_classification': str(state['manager_result']),
|
| 44 |
+
'manager_reasoning': state['manager_result'].get('reasoning'),
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
db.update_stage1(state['review_id'], stage1_data)
|
| 48 |
+
db.close()
|
| 49 |
+
|
| 50 |
+
return {"db_stage1_saved": True}
|
| 51 |
+
except Exception as e:
|
| 52 |
+
db.close()
|
| 53 |
+
errors = state.get('errors', [])
|
| 54 |
+
errors.append(f"DB Stage 1 save error: {str(e)}")
|
| 55 |
+
return {"errors": errors}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def save_stage2_to_db_node(state: ReviewState) -> dict:
|
| 59 |
+
"""Save Stage 2 results to database"""
|
| 60 |
+
db = EnhancedDatabase()
|
| 61 |
+
db.connect()
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
stage2_data = {
|
| 65 |
+
'best_sentiment': state['best_sentiment_result']['sentiment'],
|
| 66 |
+
'best_confidence': state['best_sentiment_result']['confidence'],
|
| 67 |
+
'best_prob_positive': state['best_sentiment_result']['prob_positive'],
|
| 68 |
+
'best_prob_neutral': state['best_sentiment_result']['prob_neutral'],
|
| 69 |
+
'best_prob_negative': state['best_sentiment_result']['prob_negative'],
|
| 70 |
+
|
| 71 |
+
'alt_sentiment': state['alt_sentiment_result']['sentiment'],
|
| 72 |
+
'alt_confidence': state['alt_sentiment_result']['confidence'],
|
| 73 |
+
'alt_prob_positive': state['alt_sentiment_result']['prob_positive'],
|
| 74 |
+
'alt_prob_neutral': state['alt_sentiment_result']['prob_neutral'],
|
| 75 |
+
'alt_prob_negative': state['alt_sentiment_result']['prob_negative'],
|
| 76 |
+
|
| 77 |
+
'agreement': state['sentiment_agreement'],
|
| 78 |
+
'layer_sentiment': state['sentiment'],
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
db.update_stage2(state['review_id'], stage2_data)
|
| 82 |
+
db.close()
|
| 83 |
+
|
| 84 |
+
return {"db_stage2_saved": True}
|
| 85 |
+
except Exception as e:
|
| 86 |
+
db.close()
|
| 87 |
+
errors = state.get('errors', [])
|
| 88 |
+
errors.append(f"DB Stage 2 save error: {str(e)}")
|
| 89 |
+
return {"errors": errors}
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def save_stage3_to_db_node(state: ReviewState) -> dict:
|
| 93 |
+
"""Save Stage 3 results to database"""
|
| 94 |
+
db = EnhancedDatabase()
|
| 95 |
+
db.connect()
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
stage3_data = {
|
| 99 |
+
'final_sentiment': state['final_sentiment'],
|
| 100 |
+
'confidence': state['final_confidence'],
|
| 101 |
+
'reasoning': state['reasoning'],
|
| 102 |
+
'validation_notes': state['validation_notes'],
|
| 103 |
+
'conflicts_found': state['conflicts_found'],
|
| 104 |
+
'action_recommendation': state['action_recommendation'],
|
| 105 |
+
'needs_human_review': state['needs_human_review'],
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
db.update_stage3(state['review_id'], stage3_data)
|
| 109 |
+
db.close()
|
| 110 |
+
|
| 111 |
+
return {"db_stage3_saved": True}
|
| 112 |
+
except Exception as e:
|
| 113 |
+
db.close()
|
| 114 |
+
errors = state.get('errors', [])
|
| 115 |
+
errors.append(f"DB Stage 3 save error: {str(e)}")
|
| 116 |
+
return {"errors": errors}
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
# ============================================================================
|
| 120 |
+
# STAGE 4: BATCH ANALYSIS NODE
|
| 121 |
+
# ============================================================================
|
| 122 |
+
|
| 123 |
+
def stage4_batch_analysis_node(state: BatchState) -> dict:
|
| 124 |
+
"""
|
| 125 |
+
Stage 4 Node: Batch analysis
|
| 126 |
+
Runs after all reviews are processed
|
| 127 |
+
"""
|
| 128 |
+
print(f"\n{'='*70}")
|
| 129 |
+
print(f"📊 STAGE 4: BATCH ANALYSIS")
|
| 130 |
+
print(f"{'='*70}")
|
| 131 |
+
|
| 132 |
+
stage4 = Stage4BatchAnalysis()
|
| 133 |
+
|
| 134 |
+
# Convert ReviewState list to dict format for Stage4
|
| 135 |
+
reviews_for_analysis = []
|
| 136 |
+
for review_state in state['all_reviews']:
|
| 137 |
+
review_dict = {
|
| 138 |
+
'review_id': review_state['review_id'],
|
| 139 |
+
'review_text': review_state['review_text'],
|
| 140 |
+
'rating': review_state['rating'],
|
| 141 |
+
'stage1_llm1_type': review_state.get('classification_type'),
|
| 142 |
+
'stage1_llm1_department': review_state.get('department'),
|
| 143 |
+
'stage1_llm1_priority': review_state.get('priority'),
|
| 144 |
+
'stage1_llm2_user_type': review_state.get('user_type'),
|
| 145 |
+
'stage1_llm2_emotion': review_state.get('emotion'),
|
| 146 |
+
'stage2_agreement': review_state.get('sentiment_agreement'),
|
| 147 |
+
'stage3_final_sentiment': review_state.get('final_sentiment'),
|
| 148 |
+
'stage3_needs_human_review': review_state.get('needs_human_review'),
|
| 149 |
+
'stage3_reasoning': review_state.get('reasoning'),
|
| 150 |
+
'stage3_action_recommendation': review_state.get('action_recommendation'),
|
| 151 |
+
}
|
| 152 |
+
reviews_for_analysis.append(review_dict)
|
| 153 |
+
|
| 154 |
+
# Analyze batch
|
| 155 |
+
insights = stage4.analyze_batch(reviews_for_analysis)
|
| 156 |
+
|
| 157 |
+
# Save to database
|
| 158 |
+
db = EnhancedDatabase()
|
| 159 |
+
db.connect()
|
| 160 |
+
db.save_batch_insights(insights)
|
| 161 |
+
db.close()
|
| 162 |
+
|
| 163 |
+
return {
|
| 164 |
+
'sentiment_distribution': insights.get('sentiment_distribution'),
|
| 165 |
+
'priority_distribution': insights.get('priority_distribution'),
|
| 166 |
+
'department_distribution': insights.get('department_distribution'),
|
| 167 |
+
'emotion_distribution': insights.get('emotion_distribution'),
|
| 168 |
+
'critical_issues': insights.get('critical_issues'),
|
| 169 |
+
'quick_wins': insights.get('quick_wins'),
|
| 170 |
+
'churn_risk': insights.get('churn_risk'),
|
| 171 |
+
'model_agreement_rate': insights.get('model_agreement_rate'),
|
| 172 |
+
'recommendations': insights.get('recommendations'),
|
| 173 |
+
'batch_completed_at': insights.get('batch_completed_at')
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
# ============================================================================
|
| 178 |
+
# ROUTING FUNCTIONS
|
| 179 |
+
# ============================================================================
|
| 180 |
+
|
| 181 |
+
def route_after_stage3(state: ReviewState) -> Literal["human_review", "complete"]:
|
| 182 |
+
"""
|
| 183 |
+
Conditional routing after Stage 3
|
| 184 |
+
Decides if human review is needed
|
| 185 |
+
"""
|
| 186 |
+
# Check if human review needed
|
| 187 |
+
if state.get('needs_human_review', False):
|
| 188 |
+
return "human_review"
|
| 189 |
+
|
| 190 |
+
# Check confidence threshold
|
| 191 |
+
if state.get('final_confidence', 1.0) < 0.5:
|
| 192 |
+
return "human_review"
|
| 193 |
+
|
| 194 |
+
# Check for conflicts
|
| 195 |
+
if state.get('conflicts_found', 'none') != 'none':
|
| 196 |
+
return "human_review"
|
| 197 |
+
|
| 198 |
+
# Check priority
|
| 199 |
+
if state.get('priority') == 'critical':
|
| 200 |
+
return "human_review"
|
| 201 |
+
|
| 202 |
+
return "complete"
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def human_review_queue_node(state: ReviewState) -> dict:
|
| 206 |
+
"""
|
| 207 |
+
Node for reviews flagged for human review
|
| 208 |
+
Just marks them in the database
|
| 209 |
+
"""
|
| 210 |
+
print(f" 🚨 FLAGGED for human review")
|
| 211 |
+
|
| 212 |
+
# Could integrate with ticketing system, email alerts, etc.
|
| 213 |
+
# For now, just mark in state
|
| 214 |
+
return {
|
| 215 |
+
"route_to": "human_review"
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
# ============================================================================
|
| 220 |
+
# BUILD REVIEW PROCESSING GRAPH
|
| 221 |
+
# ============================================================================
|
| 222 |
+
|
| 223 |
+
def build_review_graph():
|
| 224 |
+
"""
|
| 225 |
+
Build the complete review processing graph
|
| 226 |
+
"""
|
| 227 |
+
|
| 228 |
+
# Create graph
|
| 229 |
+
workflow = StateGraph(ReviewState)
|
| 230 |
+
|
| 231 |
+
# Add all nodes
|
| 232 |
+
workflow.add_node("stage1_classify", stage1_classification_node)
|
| 233 |
+
workflow.add_node("save_stage1", save_stage1_to_db_node)
|
| 234 |
+
|
| 235 |
+
workflow.add_node("stage2_sentiment", stage2_sentiment_node)
|
| 236 |
+
workflow.add_node("save_stage2", save_stage2_to_db_node)
|
| 237 |
+
|
| 238 |
+
workflow.add_node("stage3_finalize", stage3_finalization_node)
|
| 239 |
+
workflow.add_node("save_stage3", save_stage3_to_db_node)
|
| 240 |
+
|
| 241 |
+
workflow.add_node("human_review_queue", human_review_queue_node)
|
| 242 |
+
|
| 243 |
+
# Add edges (sequential flow through stages)
|
| 244 |
+
workflow.add_edge("stage1_classify", "save_stage1")
|
| 245 |
+
workflow.add_edge("save_stage1", "stage2_sentiment")
|
| 246 |
+
workflow.add_edge("stage2_sentiment", "save_stage2")
|
| 247 |
+
workflow.add_edge("save_stage2", "stage3_finalize")
|
| 248 |
+
workflow.add_edge("stage3_finalize", "save_stage3")
|
| 249 |
+
|
| 250 |
+
# Add conditional routing after Stage 3
|
| 251 |
+
workflow.add_conditional_edges(
|
| 252 |
+
"save_stage3",
|
| 253 |
+
route_after_stage3,
|
| 254 |
+
{
|
| 255 |
+
"human_review": "human_review_queue",
|
| 256 |
+
"complete": END
|
| 257 |
+
}
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
# Human review goes to END
|
| 261 |
+
workflow.add_edge("human_review_queue", END)
|
| 262 |
+
|
| 263 |
+
# Set entry point
|
| 264 |
+
workflow.set_entry_point("stage1_classify")
|
| 265 |
+
|
| 266 |
+
# Compile with checkpointing
|
| 267 |
+
memory = MemorySaver()
|
| 268 |
+
graph = workflow.compile(checkpointer=memory)
|
| 269 |
+
|
| 270 |
+
return graph
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
# ============================================================================
|
| 274 |
+
# BUILD BATCH ANALYSIS GRAPH (Stage 4)
|
| 275 |
+
# ============================================================================
|
| 276 |
+
|
| 277 |
+
def build_batch_graph():
|
| 278 |
+
"""
|
| 279 |
+
Build the batch analysis graph (Stage 4)
|
| 280 |
+
This runs after all reviews are processed
|
| 281 |
+
"""
|
| 282 |
+
|
| 283 |
+
workflow = StateGraph(BatchState)
|
| 284 |
+
|
| 285 |
+
# Add batch analysis node
|
| 286 |
+
workflow.add_node("stage4_batch", stage4_batch_analysis_node)
|
| 287 |
+
|
| 288 |
+
# Simple linear flow
|
| 289 |
+
workflow.set_entry_point("stage4_batch")
|
| 290 |
+
workflow.add_edge("stage4_batch", END)
|
| 291 |
+
|
| 292 |
+
# Compile
|
| 293 |
+
graph = workflow.compile()
|
| 294 |
+
|
| 295 |
+
return graph
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
if __name__ == "__main__":
|
| 299 |
+
print("\n" + "="*60)
|
| 300 |
+
print("🧪 TESTING LANGGRAPH GRAPH BUILDER")
|
| 301 |
+
print("="*60)
|
| 302 |
+
|
| 303 |
+
# Build review graph
|
| 304 |
+
print("\n📊 Building review processing graph...")
|
| 305 |
+
review_graph = build_review_graph()
|
| 306 |
+
print(" ✅ Review graph built!")
|
| 307 |
+
|
| 308 |
+
# Build batch graph
|
| 309 |
+
print("\n📊 Building batch analysis graph...")
|
| 310 |
+
batch_graph = build_batch_graph()
|
| 311 |
+
print(" ✅ Batch graph built!")
|
| 312 |
+
|
| 313 |
+
print("\n✅ Graph builder test complete!")
|
langgraph_nodes.py
ADDED
|
@@ -0,0 +1,583 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LangGraph Nodes
|
| 3 |
+
All node functions for the review processing graph
|
| 4 |
+
Implements parallel execution where possible
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import json
|
| 9 |
+
import time
|
| 10 |
+
from typing import Dict, Any
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 13 |
+
from huggingface_hub import InferenceClient
|
| 14 |
+
import torch
|
| 15 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 16 |
+
import warnings
|
| 17 |
+
warnings.filterwarnings('ignore')
|
| 18 |
+
|
| 19 |
+
from langgraph_state import ReviewState, BatchState
|
| 20 |
+
from database_enhanced import EnhancedDatabase
|
| 21 |
+
|
| 22 |
+
# Initialize HF client (singleton)
|
| 23 |
+
HF_TOKEN = os.getenv("HUGGINGFACE_API_KEY")
|
| 24 |
+
hf_client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else None
|
| 25 |
+
|
| 26 |
+
# Initialize sentiment models (singleton) - load once
|
| 27 |
+
_sentiment_models_loaded = False
|
| 28 |
+
_best_tokenizer = None
|
| 29 |
+
_best_model = None
|
| 30 |
+
_alt_tokenizer = None
|
| 31 |
+
_alt_model = None
|
| 32 |
+
|
| 33 |
+
def load_sentiment_models():
|
| 34 |
+
"""Load sentiment models once (singleton pattern)"""
|
| 35 |
+
global _sentiment_models_loaded, _best_tokenizer, _best_model, _alt_tokenizer, _alt_model
|
| 36 |
+
|
| 37 |
+
if _sentiment_models_loaded:
|
| 38 |
+
return
|
| 39 |
+
|
| 40 |
+
print(" 📦 Loading Twitter-BERT models (one-time)...")
|
| 41 |
+
|
| 42 |
+
# Best Model
|
| 43 |
+
_best_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
|
| 44 |
+
_best_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
|
| 45 |
+
_best_model.eval()
|
| 46 |
+
|
| 47 |
+
# Alternate Model
|
| 48 |
+
_alt_tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
|
| 49 |
+
_alt_model = AutoModelForSequenceClassification.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
|
| 50 |
+
_alt_model.eval()
|
| 51 |
+
|
| 52 |
+
_sentiment_models_loaded = True
|
| 53 |
+
print(" ✅ Sentiment models loaded!")
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# ============================================================================
|
| 57 |
+
# STAGE 1: CLASSIFICATION NODE (Parallel LLM1 + LLM2)
|
| 58 |
+
# ============================================================================
|
| 59 |
+
|
| 60 |
+
def llm1_classify(review: Dict[str, Any]) -> Dict[str, Any]:
|
| 61 |
+
"""LLM1: Type, Department, Priority classification"""
|
| 62 |
+
review_text = review.get('review_text', '')
|
| 63 |
+
rating = review.get('rating', 3)
|
| 64 |
+
|
| 65 |
+
prompt = f"""You are an expert at classifying customer reviews for theme park and attraction apps.
|
| 66 |
+
|
| 67 |
+
REVIEW:
|
| 68 |
+
Rating: {rating}/5
|
| 69 |
+
Text: {review_text}
|
| 70 |
+
|
| 71 |
+
Classify this review across these dimensions:
|
| 72 |
+
|
| 73 |
+
1. TYPE (choose ONE):
|
| 74 |
+
- complaint: Customer reports a problem
|
| 75 |
+
- praise: Customer expresses satisfaction
|
| 76 |
+
- suggestion: Customer proposes improvement
|
| 77 |
+
- question: Customer asks about something
|
| 78 |
+
- bug_report: Technical issue described
|
| 79 |
+
|
| 80 |
+
2. DEPARTMENT (choose ONE):
|
| 81 |
+
- engineering: Technical issues, bugs, crashes
|
| 82 |
+
- ux: Design, usability, interface issues
|
| 83 |
+
- support: Customer service, help needed
|
| 84 |
+
- business: Pricing, policies, marketing
|
| 85 |
+
|
| 86 |
+
3. PRIORITY (choose ONE):
|
| 87 |
+
- critical: Service down, major blocker
|
| 88 |
+
- high: Significant problem affecting use
|
| 89 |
+
- medium: Inconvenience but not blocking
|
| 90 |
+
- low: Minor issue or suggestion
|
| 91 |
+
|
| 92 |
+
4. CONFIDENCE (0.0-1.0): How confident are you?
|
| 93 |
+
|
| 94 |
+
5. REASONING: Brief one-sentence explanation
|
| 95 |
+
|
| 96 |
+
Respond ONLY in valid JSON format:
|
| 97 |
+
{{
|
| 98 |
+
"type": "complaint/praise/suggestion/question/bug_report",
|
| 99 |
+
"department": "engineering/ux/support/business",
|
| 100 |
+
"priority": "critical/high/medium/low",
|
| 101 |
+
"confidence": 0.0-1.0,
|
| 102 |
+
"reasoning": "brief explanation"
|
| 103 |
+
}}"""
|
| 104 |
+
|
| 105 |
+
try:
|
| 106 |
+
response = hf_client.text_generation(
|
| 107 |
+
prompt,
|
| 108 |
+
model="Qwen/Qwen2.5-72B-Instruct",
|
| 109 |
+
max_new_tokens=200,
|
| 110 |
+
temperature=0.1
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# Clean and parse JSON
|
| 114 |
+
response_clean = response.strip()
|
| 115 |
+
if response_clean.startswith('```'):
|
| 116 |
+
response_clean = response_clean.split('```')[1]
|
| 117 |
+
if response_clean.startswith('json'):
|
| 118 |
+
response_clean = response_clean[4:]
|
| 119 |
+
response_clean = response_clean.strip()
|
| 120 |
+
|
| 121 |
+
result = json.loads(response_clean)
|
| 122 |
+
result['model'] = 'Qwen/Qwen2.5-72B-Instruct'
|
| 123 |
+
return result
|
| 124 |
+
|
| 125 |
+
except Exception as e:
|
| 126 |
+
return {
|
| 127 |
+
'type': 'unknown',
|
| 128 |
+
'department': 'unknown',
|
| 129 |
+
'priority': 'medium',
|
| 130 |
+
'confidence': 0.0,
|
| 131 |
+
'reasoning': f'Error: {str(e)}',
|
| 132 |
+
'model': 'Qwen/Qwen2.5-72B-Instruct'
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def llm2_analyze(review: Dict[str, Any]) -> Dict[str, Any]:
|
| 137 |
+
"""LLM2: User type, Emotion, Context analysis"""
|
| 138 |
+
review_text = review.get('review_text', '')
|
| 139 |
+
rating = review.get('rating', 3)
|
| 140 |
+
|
| 141 |
+
prompt = f"""You are an expert at understanding customer psychology and emotional context.
|
| 142 |
+
|
| 143 |
+
REVIEW:
|
| 144 |
+
Rating: {rating}/5
|
| 145 |
+
Text: {review_text}
|
| 146 |
+
|
| 147 |
+
Analyze the user and emotional context:
|
| 148 |
+
|
| 149 |
+
1. USER_TYPE (choose ONE):
|
| 150 |
+
- new_user: First-time or new user
|
| 151 |
+
- regular_user: Returning customer
|
| 152 |
+
- power_user: Heavy user, tech-savvy
|
| 153 |
+
- churning_user: Considering leaving
|
| 154 |
+
|
| 155 |
+
2. EMOTION (choose ONE):
|
| 156 |
+
- anger: Angry, hostile tone
|
| 157 |
+
- frustration: Frustrated but not angry
|
| 158 |
+
- joy: Happy, satisfied
|
| 159 |
+
- satisfaction: Content, pleased
|
| 160 |
+
- disappointment: Let down, sad
|
| 161 |
+
- confusion: Unclear, needs help
|
| 162 |
+
|
| 163 |
+
3. CONTEXT (brief): What is the underlying issue? 1-2 words
|
| 164 |
+
|
| 165 |
+
4. CONFIDENCE (0.0-1.0): How confident are you?
|
| 166 |
+
|
| 167 |
+
5. REASONING: Brief one-sentence explanation
|
| 168 |
+
|
| 169 |
+
Respond ONLY in valid JSON format:
|
| 170 |
+
{{
|
| 171 |
+
"user_type": "new_user/regular_user/power_user/churning_user",
|
| 172 |
+
"emotion": "anger/frustration/joy/satisfaction/disappointment/confusion",
|
| 173 |
+
"context": "brief context",
|
| 174 |
+
"confidence": 0.0-1.0,
|
| 175 |
+
"reasoning": "brief explanation"
|
| 176 |
+
}}"""
|
| 177 |
+
|
| 178 |
+
try:
|
| 179 |
+
response = hf_client.text_generation(
|
| 180 |
+
prompt,
|
| 181 |
+
model="mistralai/Mistral-7B-Instruct-v0.3",
|
| 182 |
+
max_new_tokens=200,
|
| 183 |
+
temperature=0.1
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
# Clean and parse JSON
|
| 187 |
+
response_clean = response.strip()
|
| 188 |
+
if response_clean.startswith('```'):
|
| 189 |
+
response_clean = response_clean.split('```')[1]
|
| 190 |
+
if response_clean.startswith('json'):
|
| 191 |
+
response_clean = response_clean[4:]
|
| 192 |
+
response_clean = response_clean.strip()
|
| 193 |
+
|
| 194 |
+
result = json.loads(response_clean)
|
| 195 |
+
result['model'] = 'mistralai/Mistral-7B-Instruct-v0.3'
|
| 196 |
+
return result
|
| 197 |
+
|
| 198 |
+
except Exception as e:
|
| 199 |
+
return {
|
| 200 |
+
'user_type': 'unknown',
|
| 201 |
+
'emotion': 'unknown',
|
| 202 |
+
'context': 'unknown',
|
| 203 |
+
'confidence': 0.0,
|
| 204 |
+
'reasoning': f'Error: {str(e)}',
|
| 205 |
+
'model': 'mistralai/Mistral-7B-Instruct-v0.3'
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def manager_synthesize(llm1_result: Dict, llm2_result: Dict, review: Dict) -> Dict[str, Any]:
|
| 210 |
+
"""Manager: Synthesize LLM1 and LLM2 results"""
|
| 211 |
+
review_text = review.get('review_text', '')
|
| 212 |
+
rating = review.get('rating', 3)
|
| 213 |
+
|
| 214 |
+
prompt = f"""You are a synthesis manager evaluating two AI analyses of the same review.
|
| 215 |
+
|
| 216 |
+
REVIEW:
|
| 217 |
+
Rating: {rating}/5
|
| 218 |
+
Text: {review_text}
|
| 219 |
+
|
| 220 |
+
LLM1 ANALYSIS (Type/Dept/Priority):
|
| 221 |
+
{json.dumps(llm1_result, indent=2)}
|
| 222 |
+
|
| 223 |
+
LLM2 ANALYSIS (User/Emotion/Context):
|
| 224 |
+
{json.dumps(llm2_result, indent=2)}
|
| 225 |
+
|
| 226 |
+
Your task:
|
| 227 |
+
1. Validate both analyses
|
| 228 |
+
2. Resolve any conflicts
|
| 229 |
+
3. Make final classification decision
|
| 230 |
+
4. Provide synthesis reasoning
|
| 231 |
+
|
| 232 |
+
Respond ONLY in valid JSON format:
|
| 233 |
+
{{
|
| 234 |
+
"final_type": "from llm1 or adjusted",
|
| 235 |
+
"final_department": "from llm1 or adjusted",
|
| 236 |
+
"final_priority": "from llm1 or adjusted",
|
| 237 |
+
"final_user_type": "from llm2 or adjusted",
|
| 238 |
+
"final_emotion": "from llm2 or adjusted",
|
| 239 |
+
"confidence": 0.0-1.0,
|
| 240 |
+
"reasoning": "synthesis explanation",
|
| 241 |
+
"conflicts_found": "any conflicts or 'none'"
|
| 242 |
+
}}"""
|
| 243 |
+
|
| 244 |
+
try:
|
| 245 |
+
response = hf_client.text_generation(
|
| 246 |
+
prompt,
|
| 247 |
+
model="meta-llama/Llama-3.1-8B-Instruct",
|
| 248 |
+
max_new_tokens=250,
|
| 249 |
+
temperature=0.1
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
# Clean and parse JSON
|
| 253 |
+
response_clean = response.strip()
|
| 254 |
+
if response_clean.startswith('```'):
|
| 255 |
+
response_clean = response_clean.split('```')[1]
|
| 256 |
+
if response_clean.startswith('json'):
|
| 257 |
+
response_clean = response_clean[4:]
|
| 258 |
+
response_clean = response_clean.strip()
|
| 259 |
+
|
| 260 |
+
result = json.loads(response_clean)
|
| 261 |
+
result['model'] = 'meta-llama/Llama-3.1-8B-Instruct'
|
| 262 |
+
return result
|
| 263 |
+
|
| 264 |
+
except Exception as e:
|
| 265 |
+
# Fallback to LLM1 results
|
| 266 |
+
return {
|
| 267 |
+
'final_type': llm1_result.get('type', 'unknown'),
|
| 268 |
+
'final_department': llm1_result.get('department', 'unknown'),
|
| 269 |
+
'final_priority': llm1_result.get('priority', 'medium'),
|
| 270 |
+
'final_user_type': llm2_result.get('user_type', 'unknown'),
|
| 271 |
+
'final_emotion': llm2_result.get('emotion', 'unknown'),
|
| 272 |
+
'confidence': 0.5,
|
| 273 |
+
'reasoning': f'Manager error, used LLM1 results: {str(e)}',
|
| 274 |
+
'conflicts_found': 'error',
|
| 275 |
+
'model': 'meta-llama/Llama-3.1-8B-Instruct'
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
def stage1_classification_node(state: ReviewState) -> Dict[str, Any]:
|
| 280 |
+
"""
|
| 281 |
+
Stage 1 Node: Classification with PARALLEL execution
|
| 282 |
+
Runs LLM1 and LLM2 in parallel, then Manager synthesizes
|
| 283 |
+
"""
|
| 284 |
+
print(f"\n 📝 Review ID: {state['review_id']}")
|
| 285 |
+
print(f" ⏳ STAGE 1: Classification (Parallel LLM1 + LLM2)...")
|
| 286 |
+
|
| 287 |
+
start_time = time.time()
|
| 288 |
+
|
| 289 |
+
# PARALLEL EXECUTION: LLM1 and LLM2 run simultaneously
|
| 290 |
+
with ThreadPoolExecutor(max_workers=2) as executor:
|
| 291 |
+
future1 = executor.submit(llm1_classify, state['review'])
|
| 292 |
+
future2 = executor.submit(llm2_analyze, state['review'])
|
| 293 |
+
|
| 294 |
+
llm1_result = future1.result()
|
| 295 |
+
llm2_result = future2.result()
|
| 296 |
+
|
| 297 |
+
print(f" ✅ LLM1: {llm1_result.get('type')} → {llm1_result.get('department')} (Priority: {llm1_result.get('priority')})")
|
| 298 |
+
print(f" ✅ LLM2: {llm2_result.get('user_type')}, {llm2_result.get('emotion')}")
|
| 299 |
+
|
| 300 |
+
# Manager synthesizes sequentially (needs both results)
|
| 301 |
+
print(f" 🤖 Manager synthesizing...")
|
| 302 |
+
manager_result = manager_synthesize(llm1_result, llm2_result, state['review'])
|
| 303 |
+
|
| 304 |
+
stage1_time = time.time() - start_time
|
| 305 |
+
print(f" ✅ Stage 1 complete ({stage1_time:.2f}s)")
|
| 306 |
+
|
| 307 |
+
# Update state
|
| 308 |
+
return {
|
| 309 |
+
"llm1_result": llm1_result,
|
| 310 |
+
"llm2_result": llm2_result,
|
| 311 |
+
"manager_result": manager_result,
|
| 312 |
+
"classification_type": manager_result.get('final_type'),
|
| 313 |
+
"department": manager_result.get('final_department'),
|
| 314 |
+
"priority": manager_result.get('final_priority'),
|
| 315 |
+
"user_type": manager_result.get('final_user_type'),
|
| 316 |
+
"emotion": manager_result.get('final_emotion'),
|
| 317 |
+
"stage1_completed": True,
|
| 318 |
+
"stage1_time": stage1_time,
|
| 319 |
+
"errors": state.get('errors', [])
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
# ============================================================================
|
| 324 |
+
# STAGE 2: SENTIMENT NODE (Parallel Best + Alternate)
|
| 325 |
+
# ============================================================================
|
| 326 |
+
|
| 327 |
+
def analyze_best_sentiment(text: str) -> Dict[str, Any]:
|
| 328 |
+
"""Best Model: Twitter-RoBERTa"""
|
| 329 |
+
load_sentiment_models()
|
| 330 |
+
|
| 331 |
+
try:
|
| 332 |
+
inputs = _best_tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
|
| 333 |
+
|
| 334 |
+
with torch.no_grad():
|
| 335 |
+
outputs = _best_model(**inputs)
|
| 336 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
| 337 |
+
prediction = torch.argmax(probs, dim=-1).item()
|
| 338 |
+
confidence = probs[0][prediction].item()
|
| 339 |
+
|
| 340 |
+
label_map = {0: "NEGATIVE", 1: "NEUTRAL", 2: "POSITIVE"}
|
| 341 |
+
|
| 342 |
+
return {
|
| 343 |
+
'sentiment': label_map[prediction],
|
| 344 |
+
'confidence': confidence,
|
| 345 |
+
'prob_negative': probs[0][0].item(),
|
| 346 |
+
'prob_neutral': probs[0][1].item(),
|
| 347 |
+
'prob_positive': probs[0][2].item(),
|
| 348 |
+
'model': 'twitter-roberta-base-sentiment-latest'
|
| 349 |
+
}
|
| 350 |
+
except Exception as e:
|
| 351 |
+
return {
|
| 352 |
+
'sentiment': 'NEUTRAL',
|
| 353 |
+
'confidence': 0.0,
|
| 354 |
+
'prob_negative': 0.33,
|
| 355 |
+
'prob_neutral': 0.34,
|
| 356 |
+
'prob_positive': 0.33,
|
| 357 |
+
'model': 'error',
|
| 358 |
+
'error': str(e)
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
def analyze_alt_sentiment(text: str) -> Dict[str, Any]:
|
| 363 |
+
"""Alternate Model: BERTweet"""
|
| 364 |
+
load_sentiment_models()
|
| 365 |
+
|
| 366 |
+
try:
|
| 367 |
+
inputs = _alt_tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
|
| 368 |
+
|
| 369 |
+
with torch.no_grad():
|
| 370 |
+
outputs = _alt_model(**inputs)
|
| 371 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
| 372 |
+
prediction = torch.argmax(probs, dim=-1).item()
|
| 373 |
+
confidence = probs[0][prediction].item()
|
| 374 |
+
|
| 375 |
+
label_map = {0: "NEGATIVE", 1: "NEUTRAL", 2: "POSITIVE"}
|
| 376 |
+
|
| 377 |
+
return {
|
| 378 |
+
'sentiment': label_map[prediction],
|
| 379 |
+
'confidence': confidence,
|
| 380 |
+
'prob_negative': probs[0][0].item(),
|
| 381 |
+
'prob_neutral': probs[0][1].item(),
|
| 382 |
+
'prob_positive': probs[0][2].item(),
|
| 383 |
+
'model': 'bertweet-base-sentiment-analysis'
|
| 384 |
+
}
|
| 385 |
+
except Exception as e:
|
| 386 |
+
return {
|
| 387 |
+
'sentiment': 'NEUTRAL',
|
| 388 |
+
'confidence': 0.0,
|
| 389 |
+
'prob_negative': 0.33,
|
| 390 |
+
'prob_neutral': 0.34,
|
| 391 |
+
'prob_positive': 0.33,
|
| 392 |
+
'model': 'error',
|
| 393 |
+
'error': str(e)
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
def sentiment_layer(best_result: Dict, alt_result: Dict) -> Dict[str, Any]:
|
| 398 |
+
"""Sentiment Layer: Combine with confidence weighting"""
|
| 399 |
+
best_sentiment = best_result.get('sentiment')
|
| 400 |
+
best_confidence = best_result.get('confidence', 0.0)
|
| 401 |
+
|
| 402 |
+
alt_sentiment = alt_result.get('sentiment')
|
| 403 |
+
alt_confidence = alt_result.get('confidence', 0.0)
|
| 404 |
+
|
| 405 |
+
agreement = (best_sentiment == alt_sentiment)
|
| 406 |
+
|
| 407 |
+
if agreement:
|
| 408 |
+
final_sentiment = best_sentiment
|
| 409 |
+
combined_confidence = max(best_confidence, alt_confidence)
|
| 410 |
+
agreement_strength = "STRONG"
|
| 411 |
+
else:
|
| 412 |
+
if best_confidence > alt_confidence:
|
| 413 |
+
final_sentiment = best_sentiment
|
| 414 |
+
combined_confidence = best_confidence
|
| 415 |
+
else:
|
| 416 |
+
final_sentiment = alt_sentiment
|
| 417 |
+
combined_confidence = alt_confidence
|
| 418 |
+
agreement_strength = "WEAK"
|
| 419 |
+
|
| 420 |
+
return {
|
| 421 |
+
'layer_sentiment': final_sentiment,
|
| 422 |
+
'combined_confidence': combined_confidence,
|
| 423 |
+
'agreement': agreement,
|
| 424 |
+
'agreement_strength': agreement_strength
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
def stage2_sentiment_node(state: ReviewState) -> Dict[str, Any]:
|
| 429 |
+
"""
|
| 430 |
+
Stage 2 Node: Sentiment with PARALLEL execution
|
| 431 |
+
Runs Best and Alternate models in parallel, then combines
|
| 432 |
+
"""
|
| 433 |
+
print(f"\n ⏳ STAGE 2: Sentiment Analysis (Parallel Best + Alternate)...")
|
| 434 |
+
|
| 435 |
+
start_time = time.time()
|
| 436 |
+
review_text = state['review_text']
|
| 437 |
+
|
| 438 |
+
# PARALLEL EXECUTION: Best and Alternate models run simultaneously
|
| 439 |
+
with ThreadPoolExecutor(max_workers=2) as executor:
|
| 440 |
+
future_best = executor.submit(analyze_best_sentiment, review_text)
|
| 441 |
+
future_alt = executor.submit(analyze_alt_sentiment, review_text)
|
| 442 |
+
|
| 443 |
+
best_result = future_best.result()
|
| 444 |
+
alt_result = future_alt.result()
|
| 445 |
+
|
| 446 |
+
print(f" ✅ Best: {best_result['sentiment']} ({best_result['confidence']:.3f})")
|
| 447 |
+
print(f" ✅ Alt: {alt_result['sentiment']} ({alt_result['confidence']:.3f})")
|
| 448 |
+
|
| 449 |
+
# Sentiment Layer combines results
|
| 450 |
+
layer_result = sentiment_layer(best_result, alt_result)
|
| 451 |
+
|
| 452 |
+
agreement_icon = "✅" if layer_result['agreement'] else "⚠️ "
|
| 453 |
+
print(f" {agreement_icon} Final: {layer_result['layer_sentiment']} (agreement: {layer_result['agreement']})")
|
| 454 |
+
|
| 455 |
+
stage2_time = time.time() - start_time
|
| 456 |
+
print(f" ✅ Stage 2 complete ({stage2_time:.2f}s)")
|
| 457 |
+
|
| 458 |
+
return {
|
| 459 |
+
"best_sentiment_result": best_result,
|
| 460 |
+
"alt_sentiment_result": alt_result,
|
| 461 |
+
"sentiment_layer_result": layer_result,
|
| 462 |
+
"sentiment": layer_result['layer_sentiment'],
|
| 463 |
+
"sentiment_confidence": layer_result['combined_confidence'],
|
| 464 |
+
"sentiment_agreement": layer_result['agreement'],
|
| 465 |
+
"stage2_completed": True,
|
| 466 |
+
"stage2_time": stage2_time,
|
| 467 |
+
"errors": state.get('errors', [])
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
# ============================================================================
|
| 472 |
+
# STAGE 3: FINALIZATION NODE
|
| 473 |
+
# ============================================================================
|
| 474 |
+
|
| 475 |
+
def stage3_finalization_node(state: ReviewState) -> Dict[str, Any]:
|
| 476 |
+
"""
|
| 477 |
+
Stage 3 Node: Final synthesis with LLM3 (Llama 70B)
|
| 478 |
+
"""
|
| 479 |
+
print(f"\n ⏳ STAGE 3: Finalization (LLM3)...")
|
| 480 |
+
|
| 481 |
+
start_time = time.time()
|
| 482 |
+
|
| 483 |
+
review_text = state['review_text']
|
| 484 |
+
rating = state['rating']
|
| 485 |
+
|
| 486 |
+
prompt = f"""You are a final decision-making AI analyzing customer feedback for a theme park/attraction app.
|
| 487 |
+
|
| 488 |
+
REVIEW DATA:
|
| 489 |
+
Rating: {rating}/5
|
| 490 |
+
Text: {review_text}
|
| 491 |
+
|
| 492 |
+
STAGE 1 CLASSIFICATION:
|
| 493 |
+
- Type: {state.get('classification_type')}
|
| 494 |
+
- Department: {state.get('department')}
|
| 495 |
+
- Priority: {state.get('priority')}
|
| 496 |
+
- User Type: {state.get('user_type')}
|
| 497 |
+
- Emotion: {state.get('emotion')}
|
| 498 |
+
|
| 499 |
+
STAGE 2 SENTIMENT:
|
| 500 |
+
- Best: {state['best_sentiment_result'].get('sentiment')} ({state['best_sentiment_result'].get('confidence'):.2f})
|
| 501 |
+
- Alternate: {state['alt_sentiment_result'].get('sentiment')} ({state['alt_sentiment_result'].get('confidence'):.2f})
|
| 502 |
+
- Agreement: {state.get('sentiment_agreement')}
|
| 503 |
+
|
| 504 |
+
YOUR TASK:
|
| 505 |
+
1. Review all data from both stages
|
| 506 |
+
2. Make FINAL sentiment decision
|
| 507 |
+
3. Provide comprehensive reasoning
|
| 508 |
+
4. Generate action recommendation
|
| 509 |
+
5. Flag if human review needed
|
| 510 |
+
|
| 511 |
+
Respond ONLY in valid JSON format:
|
| 512 |
+
{{
|
| 513 |
+
"final_sentiment": "POSITIVE/NEGATIVE/NEUTRAL",
|
| 514 |
+
"confidence": 0.0-1.0,
|
| 515 |
+
"reasoning": "Comprehensive explanation",
|
| 516 |
+
"validation_notes": "Does classification match sentiment?",
|
| 517 |
+
"conflicts_found": "any conflicts or 'none'",
|
| 518 |
+
"action_recommendation": "Specific action",
|
| 519 |
+
"needs_human_review": true/false
|
| 520 |
+
}}"""
|
| 521 |
+
|
| 522 |
+
try:
|
| 523 |
+
response = hf_client.text_generation(
|
| 524 |
+
prompt,
|
| 525 |
+
model="meta-llama/Llama-3.1-70B-Instruct",
|
| 526 |
+
max_new_tokens=400,
|
| 527 |
+
temperature=0.1
|
| 528 |
+
)
|
| 529 |
+
|
| 530 |
+
response_clean = response.strip()
|
| 531 |
+
if response_clean.startswith('```'):
|
| 532 |
+
response_clean = response_clean.split('```')[1]
|
| 533 |
+
if response_clean.startswith('json'):
|
| 534 |
+
response_clean = response_clean[4:]
|
| 535 |
+
response_clean = response_clean.strip()
|
| 536 |
+
|
| 537 |
+
result = json.loads(response_clean)
|
| 538 |
+
result['model'] = 'meta-llama/Llama-3.1-70B-Instruct'
|
| 539 |
+
|
| 540 |
+
except Exception as e:
|
| 541 |
+
result = {
|
| 542 |
+
'final_sentiment': state.get('sentiment', 'NEUTRAL'),
|
| 543 |
+
'confidence': state.get('sentiment_confidence', 0.5),
|
| 544 |
+
'reasoning': f'Error in LLM3: {str(e)}',
|
| 545 |
+
'validation_notes': 'Error',
|
| 546 |
+
'conflicts_found': 'error',
|
| 547 |
+
'action_recommendation': f"Route to {state.get('department')}",
|
| 548 |
+
'needs_human_review': True,
|
| 549 |
+
'model': 'meta-llama/Llama-3.1-70B-Instruct'
|
| 550 |
+
}
|
| 551 |
+
|
| 552 |
+
stage3_time = time.time() - start_time
|
| 553 |
+
|
| 554 |
+
print(f" ✅ Final: {result['final_sentiment']} ({result.get('confidence', 0):.3f})")
|
| 555 |
+
print(f" 📋 Needs Review: {result.get('needs_human_review', False)}")
|
| 556 |
+
print(f" ✅ Stage 3 complete ({stage3_time:.2f}s)")
|
| 557 |
+
|
| 558 |
+
# Calculate total time
|
| 559 |
+
total_time = state.get('stage1_time', 0) + state.get('stage2_time', 0) + stage3_time
|
| 560 |
+
|
| 561 |
+
return {
|
| 562 |
+
"final_result": result,
|
| 563 |
+
"final_sentiment": result['final_sentiment'],
|
| 564 |
+
"final_confidence": result['confidence'],
|
| 565 |
+
"reasoning": result['reasoning'],
|
| 566 |
+
"action_recommendation": result['action_recommendation'],
|
| 567 |
+
"conflicts_found": result['conflicts_found'],
|
| 568 |
+
"validation_notes": result['validation_notes'],
|
| 569 |
+
"needs_human_review": result['needs_human_review'],
|
| 570 |
+
"stage3_completed": True,
|
| 571 |
+
"stage3_time": stage3_time,
|
| 572 |
+
"total_time": total_time,
|
| 573 |
+
"processing_completed_at": datetime.now().isoformat(),
|
| 574 |
+
"errors": state.get('errors', [])
|
| 575 |
+
}
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
if __name__ == "__main__":
|
| 579 |
+
print("\n✅ LangGraph nodes module loaded!")
|
| 580 |
+
print(" Nodes available:")
|
| 581 |
+
print(" - stage1_classification_node (parallel LLM1+LLM2)")
|
| 582 |
+
print(" - stage2_sentiment_node (parallel Best+Alt)")
|
| 583 |
+
print(" - stage3_finalization_node (LLM3)")
|
langgraph_state.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LangGraph State Schema
|
| 3 |
+
Defines the state that flows through the graph
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from typing import TypedDict, Optional, Dict, Any, List
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
|
| 9 |
+
class ReviewState(TypedDict):
|
| 10 |
+
"""
|
| 11 |
+
State schema for review processing graph
|
| 12 |
+
All stages add to this state as it flows through the graph
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
# Input data
|
| 16 |
+
review: Dict[str, Any]
|
| 17 |
+
review_id: str
|
| 18 |
+
review_text: str
|
| 19 |
+
rating: int
|
| 20 |
+
|
| 21 |
+
# Stage 1: Classification outputs
|
| 22 |
+
llm1_result: Optional[Dict[str, Any]]
|
| 23 |
+
llm2_result: Optional[Dict[str, Any]]
|
| 24 |
+
manager_result: Optional[Dict[str, Any]]
|
| 25 |
+
|
| 26 |
+
# Stage 1: Extracted fields for easy access
|
| 27 |
+
classification_type: Optional[str]
|
| 28 |
+
department: Optional[str]
|
| 29 |
+
priority: Optional[str]
|
| 30 |
+
user_type: Optional[str]
|
| 31 |
+
emotion: Optional[str]
|
| 32 |
+
|
| 33 |
+
# Stage 2: Sentiment outputs
|
| 34 |
+
best_sentiment_result: Optional[Dict[str, Any]]
|
| 35 |
+
alt_sentiment_result: Optional[Dict[str, Any]]
|
| 36 |
+
sentiment_layer_result: Optional[Dict[str, Any]]
|
| 37 |
+
|
| 38 |
+
# Stage 2: Extracted fields
|
| 39 |
+
sentiment: Optional[str] # POSITIVE, NEGATIVE, NEUTRAL
|
| 40 |
+
sentiment_confidence: Optional[float]
|
| 41 |
+
sentiment_agreement: Optional[bool]
|
| 42 |
+
|
| 43 |
+
# Stage 3: Finalization outputs
|
| 44 |
+
final_result: Optional[Dict[str, Any]]
|
| 45 |
+
|
| 46 |
+
# Stage 3: Extracted fields
|
| 47 |
+
final_sentiment: Optional[str]
|
| 48 |
+
final_confidence: Optional[float]
|
| 49 |
+
reasoning: Optional[str]
|
| 50 |
+
action_recommendation: Optional[str]
|
| 51 |
+
conflicts_found: Optional[str]
|
| 52 |
+
validation_notes: Optional[str]
|
| 53 |
+
|
| 54 |
+
# Routing decisions
|
| 55 |
+
needs_human_review: bool
|
| 56 |
+
route_to: Optional[str] # 'human_review', 'complete', 'batch_analysis'
|
| 57 |
+
|
| 58 |
+
# Processing metadata
|
| 59 |
+
stage1_completed: bool
|
| 60 |
+
stage2_completed: bool
|
| 61 |
+
stage3_completed: bool
|
| 62 |
+
processing_started_at: Optional[str]
|
| 63 |
+
processing_completed_at: Optional[str]
|
| 64 |
+
|
| 65 |
+
# Timing information
|
| 66 |
+
stage1_time: Optional[float]
|
| 67 |
+
stage2_time: Optional[float]
|
| 68 |
+
stage3_time: Optional[float]
|
| 69 |
+
total_time: Optional[float]
|
| 70 |
+
|
| 71 |
+
# Error handling
|
| 72 |
+
errors: List[str]
|
| 73 |
+
retry_count: int
|
| 74 |
+
|
| 75 |
+
# Database sync status
|
| 76 |
+
db_stage1_saved: bool
|
| 77 |
+
db_stage2_saved: bool
|
| 78 |
+
db_stage3_saved: bool
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class BatchState(TypedDict):
|
| 82 |
+
"""
|
| 83 |
+
State for batch analysis (Stage 4)
|
| 84 |
+
Aggregates results from multiple reviews
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
# Input
|
| 88 |
+
all_reviews: List[ReviewState]
|
| 89 |
+
total_count: int
|
| 90 |
+
|
| 91 |
+
# Aggregated metrics
|
| 92 |
+
sentiment_distribution: Optional[Dict[str, int]]
|
| 93 |
+
priority_distribution: Optional[Dict[str, int]]
|
| 94 |
+
department_distribution: Optional[Dict[str, int]]
|
| 95 |
+
emotion_distribution: Optional[Dict[str, int]]
|
| 96 |
+
|
| 97 |
+
# Analysis outputs
|
| 98 |
+
critical_issues: Optional[List[Dict[str, Any]]]
|
| 99 |
+
quick_wins: Optional[List[Dict[str, Any]]]
|
| 100 |
+
churn_risk: Optional[float]
|
| 101 |
+
model_agreement_rate: Optional[float]
|
| 102 |
+
|
| 103 |
+
# Recommendations
|
| 104 |
+
recommendations: Optional[List[str]]
|
| 105 |
+
|
| 106 |
+
# Processing metadata
|
| 107 |
+
batch_started_at: Optional[str]
|
| 108 |
+
batch_completed_at: Optional[str]
|
| 109 |
+
batch_processing_time: Optional[float]
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def create_initial_state(review: Dict[str, Any]) -> ReviewState:
|
| 113 |
+
"""
|
| 114 |
+
Create initial state for a review
|
| 115 |
+
"""
|
| 116 |
+
return ReviewState(
|
| 117 |
+
# Input
|
| 118 |
+
review=review,
|
| 119 |
+
review_id=review.get('review_id', 'unknown'),
|
| 120 |
+
review_text=review.get('review_text', ''),
|
| 121 |
+
rating=review.get('rating', 3),
|
| 122 |
+
|
| 123 |
+
# Stage 1
|
| 124 |
+
llm1_result=None,
|
| 125 |
+
llm2_result=None,
|
| 126 |
+
manager_result=None,
|
| 127 |
+
classification_type=None,
|
| 128 |
+
department=None,
|
| 129 |
+
priority=None,
|
| 130 |
+
user_type=None,
|
| 131 |
+
emotion=None,
|
| 132 |
+
|
| 133 |
+
# Stage 2
|
| 134 |
+
best_sentiment_result=None,
|
| 135 |
+
alt_sentiment_result=None,
|
| 136 |
+
sentiment_layer_result=None,
|
| 137 |
+
sentiment=None,
|
| 138 |
+
sentiment_confidence=None,
|
| 139 |
+
sentiment_agreement=None,
|
| 140 |
+
|
| 141 |
+
# Stage 3
|
| 142 |
+
final_result=None,
|
| 143 |
+
final_sentiment=None,
|
| 144 |
+
final_confidence=None,
|
| 145 |
+
reasoning=None,
|
| 146 |
+
action_recommendation=None,
|
| 147 |
+
conflicts_found=None,
|
| 148 |
+
validation_notes=None,
|
| 149 |
+
|
| 150 |
+
# Routing
|
| 151 |
+
needs_human_review=False,
|
| 152 |
+
route_to=None,
|
| 153 |
+
|
| 154 |
+
# Processing metadata
|
| 155 |
+
stage1_completed=False,
|
| 156 |
+
stage2_completed=False,
|
| 157 |
+
stage3_completed=False,
|
| 158 |
+
processing_started_at=datetime.now().isoformat(),
|
| 159 |
+
processing_completed_at=None,
|
| 160 |
+
|
| 161 |
+
# Timing
|
| 162 |
+
stage1_time=None,
|
| 163 |
+
stage2_time=None,
|
| 164 |
+
stage3_time=None,
|
| 165 |
+
total_time=None,
|
| 166 |
+
|
| 167 |
+
# Errors
|
| 168 |
+
errors=[],
|
| 169 |
+
retry_count=0,
|
| 170 |
+
|
| 171 |
+
# Database
|
| 172 |
+
db_stage1_saved=False,
|
| 173 |
+
db_stage2_saved=False,
|
| 174 |
+
db_stage3_saved=False
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def create_batch_state(reviews: List[ReviewState]) -> BatchState:
|
| 179 |
+
"""
|
| 180 |
+
Create batch state from processed reviews
|
| 181 |
+
"""
|
| 182 |
+
return BatchState(
|
| 183 |
+
all_reviews=reviews,
|
| 184 |
+
total_count=len(reviews),
|
| 185 |
+
sentiment_distribution=None,
|
| 186 |
+
priority_distribution=None,
|
| 187 |
+
department_distribution=None,
|
| 188 |
+
emotion_distribution=None,
|
| 189 |
+
critical_issues=None,
|
| 190 |
+
quick_wins=None,
|
| 191 |
+
churn_risk=None,
|
| 192 |
+
model_agreement_rate=None,
|
| 193 |
+
recommendations=None,
|
| 194 |
+
batch_started_at=datetime.now().isoformat(),
|
| 195 |
+
batch_completed_at=None,
|
| 196 |
+
batch_processing_time=None
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
if __name__ == "__main__":
|
| 201 |
+
# Test state creation
|
| 202 |
+
print("\n" + "="*60)
|
| 203 |
+
print("🧪 TESTING LANGGRAPH STATE")
|
| 204 |
+
print("="*60)
|
| 205 |
+
|
| 206 |
+
test_review = {
|
| 207 |
+
'review_id': 'test_001',
|
| 208 |
+
'review_text': 'App crashes!',
|
| 209 |
+
'rating': 1
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
state = create_initial_state(test_review)
|
| 213 |
+
print(f"\n✅ Initial state created for: {state['review_id']}")
|
| 214 |
+
print(f" Review text: {state['review_text']}")
|
| 215 |
+
print(f" Stage 1 completed: {state['stage1_completed']}")
|
| 216 |
+
|
| 217 |
+
print("\n✅ State schema test complete!")
|
requirements.txt
CHANGED
|
@@ -1,3 +1,27 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core Dependencies
|
| 2 |
+
python-dotenv==1.0.0
|
| 3 |
+
pandas>=2.2.0
|
| 4 |
+
PyYAML==6.0.1
|
| 5 |
+
|
| 6 |
+
# LangGraph & LangChain
|
| 7 |
+
langgraph>=0.2.0
|
| 8 |
+
langchain>=0.2.0
|
| 9 |
+
langchain-core>=0.2.0
|
| 10 |
+
|
| 11 |
+
# HuggingFace
|
| 12 |
+
huggingface-hub>=0.20.3
|
| 13 |
+
transformers>=4.36.2
|
| 14 |
+
torch>=2.1.2
|
| 15 |
+
|
| 16 |
+
# Gradio (for HuggingFace Spaces UI)
|
| 17 |
+
gradio>=4.0.0
|
| 18 |
+
|
| 19 |
+
# Plotly for visualizations
|
| 20 |
+
plotly>=5.18.0
|
| 21 |
+
|
| 22 |
+
# Web Scraping
|
| 23 |
+
google-play-scraper>=1.2.4
|
| 24 |
+
requests>=2.31.0
|
| 25 |
+
|
| 26 |
+
# Database (SQLite is built-in to Python)
|
| 27 |
+
# sqlite3 is included with Python
|
stage0_scraper.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Stage 0: Web Scraping (App Store & Play Store)
|
| 3 |
+
Scrapes reviews and stores in database
|
| 4 |
+
This integrates with your existing scraper or can be used standalone
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sqlite3
|
| 9 |
+
import requests
|
| 10 |
+
import json
|
| 11 |
+
import time
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
from typing import List, Dict, Any
|
| 14 |
+
import re
|
| 15 |
+
|
| 16 |
+
class Stage0WebScraper:
|
| 17 |
+
"""
|
| 18 |
+
Stage 0: Web scraping for App Store and Play Store reviews
|
| 19 |
+
Integrates with existing database structure
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, db_file: str = "review_database.db"):
|
| 23 |
+
self.db_file = db_file
|
| 24 |
+
print(f" 📁 Database: {db_file}")
|
| 25 |
+
|
| 26 |
+
def create_reviews_table(self):
|
| 27 |
+
"""
|
| 28 |
+
Create reviews table if it doesn't exist
|
| 29 |
+
This is your Stage 0 schema
|
| 30 |
+
"""
|
| 31 |
+
conn = sqlite3.connect(self.db_file)
|
| 32 |
+
cursor = conn.cursor()
|
| 33 |
+
|
| 34 |
+
cursor.execute("""
|
| 35 |
+
CREATE TABLE IF NOT EXISTS reviews (
|
| 36 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 37 |
+
review_id TEXT UNIQUE,
|
| 38 |
+
product_url TEXT,
|
| 39 |
+
platform TEXT,
|
| 40 |
+
app_name TEXT,
|
| 41 |
+
user_name TEXT,
|
| 42 |
+
review_text TEXT,
|
| 43 |
+
rating INTEGER,
|
| 44 |
+
review_date TEXT,
|
| 45 |
+
app_version TEXT,
|
| 46 |
+
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
| 47 |
+
)
|
| 48 |
+
""")
|
| 49 |
+
|
| 50 |
+
# Create index for faster lookups
|
| 51 |
+
cursor.execute("""
|
| 52 |
+
CREATE INDEX IF NOT EXISTS idx_review_id
|
| 53 |
+
ON reviews(review_id)
|
| 54 |
+
""")
|
| 55 |
+
|
| 56 |
+
cursor.execute("""
|
| 57 |
+
CREATE INDEX IF NOT EXISTS idx_platform
|
| 58 |
+
ON reviews(platform)
|
| 59 |
+
""")
|
| 60 |
+
|
| 61 |
+
conn.commit()
|
| 62 |
+
conn.close()
|
| 63 |
+
|
| 64 |
+
print(" ✅ Reviews table ready (Stage 0)")
|
| 65 |
+
|
| 66 |
+
def scrape_app_store_rss(self, app_id: str, country: str = "us",
|
| 67 |
+
limit: int = 100) -> List[Dict]:
|
| 68 |
+
"""
|
| 69 |
+
Scrape App Store reviews using RSS feed
|
| 70 |
+
This is a simple, free method (no API key needed)
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
app_id: App Store app ID (e.g., "1234567890")
|
| 74 |
+
country: Country code (e.g., "us", "ae", "uk")
|
| 75 |
+
limit: Number of reviews to fetch (max 500 per request)
|
| 76 |
+
"""
|
| 77 |
+
print(f" 🍎 Scraping App Store: {app_id} ({country})")
|
| 78 |
+
|
| 79 |
+
# App Store RSS feed URL
|
| 80 |
+
url = f"https://itunes.apple.com/{country}/rss/customerreviews/id={app_id}/sortBy=mostRecent/json"
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
response = requests.get(url, timeout=30)
|
| 84 |
+
response.raise_for_status()
|
| 85 |
+
|
| 86 |
+
data = response.json()
|
| 87 |
+
|
| 88 |
+
reviews = []
|
| 89 |
+
entries = data.get('feed', {}).get('entry', [])
|
| 90 |
+
|
| 91 |
+
# Skip first entry (it's the app info)
|
| 92 |
+
if entries and 'author' not in entries[0]:
|
| 93 |
+
entries = entries[1:]
|
| 94 |
+
|
| 95 |
+
for entry in entries[:limit]:
|
| 96 |
+
try:
|
| 97 |
+
review = {
|
| 98 |
+
'review_id': entry.get('id', {}).get('label', ''),
|
| 99 |
+
'platform': 'app_store',
|
| 100 |
+
'app_name': data.get('feed', {}).get('title', {}).get('label', 'Unknown'),
|
| 101 |
+
'user_name': entry.get('author', {}).get('name', {}).get('label', 'Anonymous'),
|
| 102 |
+
'review_text': entry.get('content', {}).get('label', ''),
|
| 103 |
+
'rating': int(entry.get('im:rating', {}).get('label', '3')),
|
| 104 |
+
'review_date': entry.get('updated', {}).get('label', ''),
|
| 105 |
+
'app_version': entry.get('im:version', {}).get('label', ''),
|
| 106 |
+
'product_url': entry.get('link', {}).get('attributes', {}).get('href', '')
|
| 107 |
+
}
|
| 108 |
+
reviews.append(review)
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f" ⚠️ Error parsing review: {e}")
|
| 111 |
+
continue
|
| 112 |
+
|
| 113 |
+
print(f" ✅ Scraped {len(reviews)} reviews")
|
| 114 |
+
return reviews
|
| 115 |
+
|
| 116 |
+
except Exception as e:
|
| 117 |
+
print(f" ❌ Error scraping App Store: {e}")
|
| 118 |
+
return []
|
| 119 |
+
|
| 120 |
+
def scrape_play_store_api(self, app_id: str, country: str = "us",
|
| 121 |
+
limit: int = 100) -> List[Dict]:
|
| 122 |
+
"""
|
| 123 |
+
Scrape Google Play Store reviews
|
| 124 |
+
Note: This is a simplified version. For production, use google-play-scraper library
|
| 125 |
+
|
| 126 |
+
Args:
|
| 127 |
+
app_id: Play Store package name (e.g., "com.company.app")
|
| 128 |
+
country: Country code
|
| 129 |
+
limit: Number of reviews to fetch
|
| 130 |
+
"""
|
| 131 |
+
print(f" 🤖 Scraping Play Store: {app_id} ({country})")
|
| 132 |
+
|
| 133 |
+
try:
|
| 134 |
+
# Using unofficial API endpoint (works without auth)
|
| 135 |
+
# For production, recommend: pip install google-play-scraper
|
| 136 |
+
from google_play_scraper import Sort, reviews_all
|
| 137 |
+
|
| 138 |
+
result = reviews_all(
|
| 139 |
+
app_id,
|
| 140 |
+
sleep_milliseconds=0,
|
| 141 |
+
lang='en',
|
| 142 |
+
country=country,
|
| 143 |
+
sort=Sort.NEWEST
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
reviews = []
|
| 147 |
+
for r in result[:limit]:
|
| 148 |
+
review = {
|
| 149 |
+
'review_id': r.get('reviewId', ''),
|
| 150 |
+
'platform': 'play_store',
|
| 151 |
+
'app_name': app_id,
|
| 152 |
+
'user_name': r.get('userName', 'Anonymous'),
|
| 153 |
+
'review_text': r.get('content', ''),
|
| 154 |
+
'rating': r.get('score', 3),
|
| 155 |
+
'review_date': r.get('at', '').isoformat() if r.get('at') else '',
|
| 156 |
+
'app_version': r.get('reviewCreatedVersion', ''),
|
| 157 |
+
'product_url': f"https://play.google.com/store/apps/details?id={app_id}"
|
| 158 |
+
}
|
| 159 |
+
reviews.append(review)
|
| 160 |
+
|
| 161 |
+
print(f" ✅ Scraped {len(reviews)} reviews")
|
| 162 |
+
return reviews
|
| 163 |
+
|
| 164 |
+
except ImportError:
|
| 165 |
+
print(" ⚠️ google-play-scraper not installed")
|
| 166 |
+
print(" Run: pip install google-play-scraper")
|
| 167 |
+
return []
|
| 168 |
+
except Exception as e:
|
| 169 |
+
print(f" ❌ Error scraping Play Store: {e}")
|
| 170 |
+
return []
|
| 171 |
+
|
| 172 |
+
def save_reviews_to_db(self, reviews: List[Dict]) -> int:
|
| 173 |
+
"""
|
| 174 |
+
Save scraped reviews to database
|
| 175 |
+
Returns number of new reviews saved
|
| 176 |
+
"""
|
| 177 |
+
if not reviews:
|
| 178 |
+
return 0
|
| 179 |
+
|
| 180 |
+
conn = sqlite3.connect(self.db_file)
|
| 181 |
+
cursor = conn.cursor()
|
| 182 |
+
|
| 183 |
+
saved_count = 0
|
| 184 |
+
|
| 185 |
+
for review in reviews:
|
| 186 |
+
try:
|
| 187 |
+
cursor.execute("""
|
| 188 |
+
INSERT OR IGNORE INTO reviews
|
| 189 |
+
(review_id, product_url, platform, app_name, user_name,
|
| 190 |
+
review_text, rating, review_date, app_version)
|
| 191 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
| 192 |
+
""", (
|
| 193 |
+
review.get('review_id'),
|
| 194 |
+
review.get('product_url', ''),
|
| 195 |
+
review.get('platform'),
|
| 196 |
+
review.get('app_name', ''),
|
| 197 |
+
review.get('user_name'),
|
| 198 |
+
review.get('review_text'),
|
| 199 |
+
review.get('rating'),
|
| 200 |
+
review.get('review_date', ''),
|
| 201 |
+
review.get('app_version', '')
|
| 202 |
+
))
|
| 203 |
+
|
| 204 |
+
if cursor.rowcount > 0:
|
| 205 |
+
saved_count += 1
|
| 206 |
+
|
| 207 |
+
except Exception as e:
|
| 208 |
+
print(f" ⚠️ Error saving review: {e}")
|
| 209 |
+
continue
|
| 210 |
+
|
| 211 |
+
conn.commit()
|
| 212 |
+
conn.close()
|
| 213 |
+
|
| 214 |
+
print(f" ✅ Saved {saved_count} new reviews to database")
|
| 215 |
+
return saved_count
|
| 216 |
+
|
| 217 |
+
def scrape_from_urls_file(self, urls_file: str = "urls.txt") -> int:
|
| 218 |
+
"""
|
| 219 |
+
Scrape reviews from URLs listed in a text file
|
| 220 |
+
|
| 221 |
+
URLs file format (one per line):
|
| 222 |
+
app_store:1234567890:us
|
| 223 |
+
play_store:com.company.app:us
|
| 224 |
+
"""
|
| 225 |
+
print(f"\n 📄 Reading URLs from: {urls_file}")
|
| 226 |
+
|
| 227 |
+
if not os.path.exists(urls_file):
|
| 228 |
+
print(f" ⚠️ File not found: {urls_file}")
|
| 229 |
+
return 0
|
| 230 |
+
|
| 231 |
+
total_saved = 0
|
| 232 |
+
|
| 233 |
+
with open(urls_file, 'r') as f:
|
| 234 |
+
urls = [line.strip() for line in f if line.strip() and not line.startswith('#')]
|
| 235 |
+
|
| 236 |
+
print(f" ✅ Found {len(urls)} URLs")
|
| 237 |
+
|
| 238 |
+
for i, url in enumerate(urls, 1):
|
| 239 |
+
print(f"\n [{i}/{len(urls)}] Processing: {url}")
|
| 240 |
+
|
| 241 |
+
parts = url.split(':')
|
| 242 |
+
if len(parts) < 2:
|
| 243 |
+
print(f" ⚠️ Invalid format: {url}")
|
| 244 |
+
continue
|
| 245 |
+
|
| 246 |
+
platform = parts[0].lower()
|
| 247 |
+
app_id = parts[1]
|
| 248 |
+
country = parts[2] if len(parts) > 2 else 'us'
|
| 249 |
+
|
| 250 |
+
if platform == 'app_store':
|
| 251 |
+
reviews = self.scrape_app_store_rss(app_id, country)
|
| 252 |
+
elif platform == 'play_store':
|
| 253 |
+
reviews = self.scrape_play_store_api(app_id, country)
|
| 254 |
+
else:
|
| 255 |
+
print(f" ⚠️ Unknown platform: {platform}")
|
| 256 |
+
continue
|
| 257 |
+
|
| 258 |
+
saved = self.save_reviews_to_db(reviews)
|
| 259 |
+
total_saved += saved
|
| 260 |
+
|
| 261 |
+
# Be nice to servers
|
| 262 |
+
time.sleep(2)
|
| 263 |
+
|
| 264 |
+
return total_saved
|
| 265 |
+
|
| 266 |
+
def get_review_count(self) -> int:
|
| 267 |
+
"""Get total number of reviews in database"""
|
| 268 |
+
conn = sqlite3.connect(self.db_file)
|
| 269 |
+
cursor = conn.cursor()
|
| 270 |
+
cursor.execute("SELECT COUNT(*) FROM reviews")
|
| 271 |
+
count = cursor.fetchone()[0]
|
| 272 |
+
conn.close()
|
| 273 |
+
return count
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
if __name__ == "__main__":
|
| 277 |
+
# Run Stage 0 scraper - reads from urls.txt
|
| 278 |
+
print("\n" + "="*70)
|
| 279 |
+
print("🕷️ STAGE 0: WEB SCRAPER")
|
| 280 |
+
print("="*70)
|
| 281 |
+
|
| 282 |
+
scraper = Stage0WebScraper(db_file="review_database.db")
|
| 283 |
+
|
| 284 |
+
# Create table if not exists
|
| 285 |
+
print("\n📁 Ensuring database table exists...")
|
| 286 |
+
scraper.create_reviews_table()
|
| 287 |
+
|
| 288 |
+
# Scrape from urls.txt
|
| 289 |
+
print("\n🔄 Starting scraping from urls.txt...")
|
| 290 |
+
total_saved = scraper.scrape_from_urls_file("urls.txt")
|
| 291 |
+
|
| 292 |
+
# Show results
|
| 293 |
+
total_reviews = scraper.get_review_count()
|
| 294 |
+
|
| 295 |
+
print("\n" + "="*70)
|
| 296 |
+
print("✅ SCRAPING COMPLETE!")
|
| 297 |
+
print("="*70)
|
| 298 |
+
print(f"📊 New reviews saved: {total_saved}")
|
| 299 |
+
print(f"📊 Total reviews in database: {total_reviews}")
|
| 300 |
+
print("\n🎯 Next step: Run the analysis!")
|
| 301 |
+
print(" python main_langgraph.py")
|
| 302 |
+
print("="*70 + "\n")
|
stage4_batch_analysis.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Stage 4: Batch Analysis & Aggregation
|
| 3 |
+
- Aggregate insights across all processed reviews
|
| 4 |
+
- Identify patterns, trends, critical issues
|
| 5 |
+
- Generate actionable recommendations
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import json
|
| 9 |
+
from typing import Dict, Any, List
|
| 10 |
+
from collections import Counter
|
| 11 |
+
|
| 12 |
+
class Stage4BatchAnalysis:
|
| 13 |
+
"""
|
| 14 |
+
Stage 4: Batch-level intelligence and recommendations
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def __init__(self):
|
| 18 |
+
print(" 📊 Stage 4: Batch Analysis initialized")
|
| 19 |
+
|
| 20 |
+
def analyze_batch(self, reviews: List[Dict[str, Any]]) -> Dict[str, Any]:
|
| 21 |
+
"""
|
| 22 |
+
Analyze a batch of processed reviews
|
| 23 |
+
"""
|
| 24 |
+
if not reviews:
|
| 25 |
+
print(" ⚠️ No reviews to analyze")
|
| 26 |
+
return self._empty_insights()
|
| 27 |
+
|
| 28 |
+
print(f"\n 📊 Analyzing batch of {len(reviews)} reviews...")
|
| 29 |
+
|
| 30 |
+
# Initialize counters
|
| 31 |
+
total = len(reviews)
|
| 32 |
+
|
| 33 |
+
# Sentiment distribution
|
| 34 |
+
sentiment_counts = Counter()
|
| 35 |
+
for review in reviews:
|
| 36 |
+
sentiment = review.get('stage3_final_sentiment', 'NEUTRAL')
|
| 37 |
+
sentiment_counts[sentiment] += 1
|
| 38 |
+
|
| 39 |
+
print(f" 📈 Sentiment: "
|
| 40 |
+
f"POS={sentiment_counts.get('POSITIVE', 0)}, "
|
| 41 |
+
f"NEU={sentiment_counts.get('NEUTRAL', 0)}, "
|
| 42 |
+
f"NEG={sentiment_counts.get('NEGATIVE', 0)}")
|
| 43 |
+
|
| 44 |
+
# Priority distribution
|
| 45 |
+
priority_counts = Counter()
|
| 46 |
+
for review in reviews:
|
| 47 |
+
priority = review.get('stage1_llm1_priority', 'unknown')
|
| 48 |
+
priority_counts[priority] += 1
|
| 49 |
+
|
| 50 |
+
print(f" 🎯 Priority: "
|
| 51 |
+
f"Critical={priority_counts.get('critical', 0)}, "
|
| 52 |
+
f"High={priority_counts.get('high', 0)}, "
|
| 53 |
+
f"Medium={priority_counts.get('medium', 0)}, "
|
| 54 |
+
f"Low={priority_counts.get('low', 0)}")
|
| 55 |
+
|
| 56 |
+
# Department routing
|
| 57 |
+
dept_counts = Counter()
|
| 58 |
+
for review in reviews:
|
| 59 |
+
dept = review.get('stage1_llm1_department', 'unknown')
|
| 60 |
+
dept_counts[dept] += 1
|
| 61 |
+
|
| 62 |
+
print(f" 🏢 Departments: "
|
| 63 |
+
f"Eng={dept_counts.get('engineering', 0)}, "
|
| 64 |
+
f"UX={dept_counts.get('ux', 0)}, "
|
| 65 |
+
f"Support={dept_counts.get('support', 0)}, "
|
| 66 |
+
f"Business={dept_counts.get('business', 0)}")
|
| 67 |
+
|
| 68 |
+
# Emotion distribution
|
| 69 |
+
emotion_counts = Counter()
|
| 70 |
+
for review in reviews:
|
| 71 |
+
emotion = review.get('stage1_llm2_emotion', 'unknown')
|
| 72 |
+
emotion_counts[emotion] += 1
|
| 73 |
+
|
| 74 |
+
# Review type distribution
|
| 75 |
+
type_counts = Counter()
|
| 76 |
+
for review in reviews:
|
| 77 |
+
review_type = review.get('stage1_llm1_type', 'unknown')
|
| 78 |
+
type_counts[review_type] += 1
|
| 79 |
+
|
| 80 |
+
# Identify critical issues
|
| 81 |
+
critical_issues = self._identify_critical_issues(reviews)
|
| 82 |
+
print(f" 🚨 Critical Issues: {len(critical_issues)}")
|
| 83 |
+
|
| 84 |
+
# Identify quick wins
|
| 85 |
+
quick_wins = self._identify_quick_wins(reviews)
|
| 86 |
+
print(f" ⚡ Quick Wins: {len(quick_wins)}")
|
| 87 |
+
|
| 88 |
+
# Calculate churn risk
|
| 89 |
+
churn_risk = self._calculate_churn_risk(reviews)
|
| 90 |
+
print(f" ⚠️ Churn Risk: {churn_risk:.1f}%")
|
| 91 |
+
|
| 92 |
+
# Model agreement rate
|
| 93 |
+
agreement_count = sum(1 for r in reviews if r.get('stage2_agreement', False))
|
| 94 |
+
agreement_rate = (agreement_count / total * 100) if total > 0 else 0
|
| 95 |
+
print(f" 🤝 Model Agreement: {agreement_rate:.1f}%")
|
| 96 |
+
|
| 97 |
+
# Generate recommendations
|
| 98 |
+
recommendations = self._generate_recommendations(
|
| 99 |
+
sentiment_counts, priority_counts, dept_counts,
|
| 100 |
+
critical_issues, quick_wins, churn_risk
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
# Compile batch insights
|
| 104 |
+
insights = {
|
| 105 |
+
'total_reviews': total,
|
| 106 |
+
|
| 107 |
+
# Sentiment
|
| 108 |
+
'sentiment_positive': sentiment_counts.get('POSITIVE', 0),
|
| 109 |
+
'sentiment_neutral': sentiment_counts.get('NEUTRAL', 0),
|
| 110 |
+
'sentiment_negative': sentiment_counts.get('NEGATIVE', 0),
|
| 111 |
+
'sentiment_distribution': dict(sentiment_counts),
|
| 112 |
+
|
| 113 |
+
# Priority
|
| 114 |
+
'priority_critical': priority_counts.get('critical', 0),
|
| 115 |
+
'priority_high': priority_counts.get('high', 0),
|
| 116 |
+
'priority_medium': priority_counts.get('medium', 0),
|
| 117 |
+
'priority_low': priority_counts.get('low', 0),
|
| 118 |
+
'priority_distribution': dict(priority_counts),
|
| 119 |
+
|
| 120 |
+
# Department
|
| 121 |
+
'dept_engineering': dept_counts.get('engineering', 0),
|
| 122 |
+
'dept_ux': dept_counts.get('ux', 0),
|
| 123 |
+
'dept_support': dept_counts.get('support', 0),
|
| 124 |
+
'dept_business': dept_counts.get('business', 0),
|
| 125 |
+
'department_distribution': dict(dept_counts),
|
| 126 |
+
|
| 127 |
+
# Additional insights
|
| 128 |
+
'emotion_distribution': dict(emotion_counts),
|
| 129 |
+
'type_distribution': dict(type_counts),
|
| 130 |
+
'model_agreement_rate': agreement_rate,
|
| 131 |
+
'churn_risk': churn_risk,
|
| 132 |
+
|
| 133 |
+
# Actionable lists
|
| 134 |
+
'critical_issues': critical_issues,
|
| 135 |
+
'quick_wins': quick_wins,
|
| 136 |
+
'recommendations': recommendations
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
return insights
|
| 140 |
+
|
| 141 |
+
def _identify_critical_issues(self, reviews: List[Dict]) -> List[Dict]:
|
| 142 |
+
"""Identify critical issues requiring immediate attention"""
|
| 143 |
+
critical = []
|
| 144 |
+
|
| 145 |
+
for review in reviews:
|
| 146 |
+
priority = review.get('stage1_llm1_priority', '')
|
| 147 |
+
sentiment = review.get('stage3_final_sentiment', '')
|
| 148 |
+
needs_review = review.get('stage3_needs_human_review', False)
|
| 149 |
+
|
| 150 |
+
if priority == 'critical' or (sentiment == 'NEGATIVE' and needs_review):
|
| 151 |
+
critical.append({
|
| 152 |
+
'review_id': review.get('review_id', 'unknown'),
|
| 153 |
+
'type': review.get('stage1_llm1_type', 'unknown'),
|
| 154 |
+
'department': review.get('stage1_llm1_department', 'unknown'),
|
| 155 |
+
'reasoning': review.get('stage3_reasoning', ''),
|
| 156 |
+
'action': review.get('stage3_action_recommendation', ''),
|
| 157 |
+
'rating': review.get('rating', 0)
|
| 158 |
+
})
|
| 159 |
+
|
| 160 |
+
# Sort by rating (lowest first)
|
| 161 |
+
critical.sort(key=lambda x: x['rating'])
|
| 162 |
+
|
| 163 |
+
return critical[:10] # Top 10 critical issues
|
| 164 |
+
|
| 165 |
+
def _identify_quick_wins(self, reviews: List[Dict]) -> List[Dict]:
|
| 166 |
+
"""Identify easy-to-fix issues for quick wins"""
|
| 167 |
+
quick_wins = []
|
| 168 |
+
|
| 169 |
+
for review in reviews:
|
| 170 |
+
review_type = review.get('stage1_llm1_type', '')
|
| 171 |
+
priority = review.get('stage1_llm1_priority', '')
|
| 172 |
+
sentiment = review.get('stage3_final_sentiment', '')
|
| 173 |
+
|
| 174 |
+
# Suggestions with low priority = quick wins
|
| 175 |
+
if review_type == 'suggestion' and priority in ['low', 'medium']:
|
| 176 |
+
quick_wins.append({
|
| 177 |
+
'review_id': review.get('review_id', 'unknown'),
|
| 178 |
+
'suggestion': review.get('review_text', '')[:100],
|
| 179 |
+
'department': review.get('stage1_llm1_department', 'unknown'),
|
| 180 |
+
'action': review.get('stage3_action_recommendation', ''),
|
| 181 |
+
'rating': review.get('rating', 0)
|
| 182 |
+
})
|
| 183 |
+
|
| 184 |
+
return quick_wins[:10] # Top 10 quick wins
|
| 185 |
+
|
| 186 |
+
def _calculate_churn_risk(self, reviews: List[Dict]) -> float:
|
| 187 |
+
"""Calculate overall churn risk percentage"""
|
| 188 |
+
if not reviews:
|
| 189 |
+
return 0.0
|
| 190 |
+
|
| 191 |
+
churn_indicators = 0
|
| 192 |
+
|
| 193 |
+
for review in reviews:
|
| 194 |
+
user_type = review.get('stage1_llm2_user_type', '')
|
| 195 |
+
sentiment = review.get('stage3_final_sentiment', '')
|
| 196 |
+
rating = review.get('rating', 3)
|
| 197 |
+
|
| 198 |
+
# Churn indicators
|
| 199 |
+
if user_type == 'churning_user':
|
| 200 |
+
churn_indicators += 2
|
| 201 |
+
elif sentiment == 'NEGATIVE' and rating <= 2:
|
| 202 |
+
churn_indicators += 1
|
| 203 |
+
elif rating == 1:
|
| 204 |
+
churn_indicators += 1
|
| 205 |
+
|
| 206 |
+
# Calculate percentage
|
| 207 |
+
max_possible = len(reviews) * 2
|
| 208 |
+
churn_risk = (churn_indicators / max_possible * 100) if max_possible > 0 else 0.0
|
| 209 |
+
|
| 210 |
+
return min(churn_risk, 100.0)
|
| 211 |
+
|
| 212 |
+
def _generate_recommendations(self, sentiment_counts, priority_counts,
|
| 213 |
+
dept_counts, critical_issues, quick_wins,
|
| 214 |
+
churn_risk) -> List[str]:
|
| 215 |
+
"""Generate actionable recommendations"""
|
| 216 |
+
recommendations = []
|
| 217 |
+
|
| 218 |
+
# Sentiment-based
|
| 219 |
+
total = sum(sentiment_counts.values())
|
| 220 |
+
if total > 0:
|
| 221 |
+
neg_pct = (sentiment_counts.get('NEGATIVE', 0) / total * 100)
|
| 222 |
+
if neg_pct > 40:
|
| 223 |
+
recommendations.append(
|
| 224 |
+
f"🚨 HIGH: {neg_pct:.0f}% negative sentiment. Immediate investigation needed."
|
| 225 |
+
)
|
| 226 |
+
elif neg_pct > 25:
|
| 227 |
+
recommendations.append(
|
| 228 |
+
f"⚠️ MEDIUM: {neg_pct:.0f}% negative sentiment. Monitor closely."
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
# Priority-based
|
| 232 |
+
if priority_counts.get('critical', 0) > 0:
|
| 233 |
+
recommendations.append(
|
| 234 |
+
f"🔥 URGENT: {priority_counts['critical']} critical issues require immediate attention."
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
# Department-based
|
| 238 |
+
if dept_counts:
|
| 239 |
+
top_dept = max(dept_counts, key=dept_counts.get)
|
| 240 |
+
top_count = dept_counts[top_dept]
|
| 241 |
+
recommendations.append(
|
| 242 |
+
f"🎯 FOCUS: {top_count} issues routed to {top_dept} department."
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
# Churn risk
|
| 246 |
+
if churn_risk > 30:
|
| 247 |
+
recommendations.append(
|
| 248 |
+
f"⚠️ CHURN: {churn_risk:.0f}% churn risk detected. Implement retention strategy."
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
# Quick wins
|
| 252 |
+
if quick_wins:
|
| 253 |
+
recommendations.append(
|
| 254 |
+
f"⚡ OPPORTUNITY: {len(quick_wins)} quick wins available for easy improvements."
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
return recommendations
|
| 258 |
+
|
| 259 |
+
def _empty_insights(self) -> Dict[str, Any]:
|
| 260 |
+
"""Return empty insights structure"""
|
| 261 |
+
return {
|
| 262 |
+
'total_reviews': 0,
|
| 263 |
+
'sentiment_positive': 0,
|
| 264 |
+
'sentiment_neutral': 0,
|
| 265 |
+
'sentiment_negative': 0,
|
| 266 |
+
'priority_critical': 0,
|
| 267 |
+
'priority_high': 0,
|
| 268 |
+
'priority_medium': 0,
|
| 269 |
+
'priority_low': 0,
|
| 270 |
+
'dept_engineering': 0,
|
| 271 |
+
'dept_ux': 0,
|
| 272 |
+
'dept_support': 0,
|
| 273 |
+
'dept_business': 0,
|
| 274 |
+
'critical_issues': [],
|
| 275 |
+
'quick_wins': [],
|
| 276 |
+
'recommendations': []
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
if __name__ == "__main__":
|
| 281 |
+
# Test Stage 4
|
| 282 |
+
print("\n" + "="*60)
|
| 283 |
+
print("🧪 TESTING STAGE 4 BATCH ANALYSIS")
|
| 284 |
+
print("="*60)
|
| 285 |
+
|
| 286 |
+
# Sample processed reviews
|
| 287 |
+
sample_reviews = [
|
| 288 |
+
{
|
| 289 |
+
'review_id': '001',
|
| 290 |
+
'review_text': 'App crashes!',
|
| 291 |
+
'rating': 1,
|
| 292 |
+
'stage1_llm1_type': 'bug_report',
|
| 293 |
+
'stage1_llm1_department': 'engineering',
|
| 294 |
+
'stage1_llm1_priority': 'critical',
|
| 295 |
+
'stage1_llm2_user_type': 'power_user',
|
| 296 |
+
'stage1_llm2_emotion': 'frustration',
|
| 297 |
+
'stage2_agreement': True,
|
| 298 |
+
'stage3_final_sentiment': 'NEGATIVE',
|
| 299 |
+
'stage3_needs_human_review': True,
|
| 300 |
+
'stage3_reasoning': 'Critical bug',
|
| 301 |
+
'stage3_action_recommendation': 'Fix immediately'
|
| 302 |
+
},
|
| 303 |
+
{
|
| 304 |
+
'review_id': '002',
|
| 305 |
+
'review_text': 'Great app!',
|
| 306 |
+
'rating': 5,
|
| 307 |
+
'stage1_llm1_type': 'praise',
|
| 308 |
+
'stage1_llm1_department': 'ux',
|
| 309 |
+
'stage1_llm1_priority': 'low',
|
| 310 |
+
'stage1_llm2_user_type': 'regular_user',
|
| 311 |
+
'stage1_llm2_emotion': 'joy',
|
| 312 |
+
'stage2_agreement': True,
|
| 313 |
+
'stage3_final_sentiment': 'POSITIVE',
|
| 314 |
+
'stage3_needs_human_review': False
|
| 315 |
+
}
|
| 316 |
+
]
|
| 317 |
+
|
| 318 |
+
stage4 = Stage4BatchAnalysis()
|
| 319 |
+
insights = stage4.analyze_batch(sample_reviews)
|
| 320 |
+
|
| 321 |
+
print("\n📊 BATCH INSIGHTS:")
|
| 322 |
+
print(json.dumps(insights, indent=2))
|
| 323 |
+
print("\n✅ Stage 4 test complete!")
|