Update app.py
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ FastAPI backend with:
|
|
| 8 |
- Upload/Download breakdown
|
| 9 |
- Dynamic visualization generation
|
| 10 |
- On-demand clustering
|
| 11 |
-
-
|
| 12 |
- HuggingFace embeddings for semantic search
|
| 13 |
"""
|
| 14 |
|
|
@@ -28,7 +28,7 @@ from fastapi import FastAPI, HTTPException, Query
|
|
| 28 |
from fastapi.middleware.cors import CORSMiddleware
|
| 29 |
from fastapi.responses import JSONResponse, Response
|
| 30 |
from pydantic import BaseModel
|
| 31 |
-
|
| 32 |
from sentence_transformers import SentenceTransformer
|
| 33 |
import faiss
|
| 34 |
|
|
@@ -52,9 +52,7 @@ import plotly.express as px
|
|
| 52 |
# CONFIGURATION
|
| 53 |
# ============================================
|
| 54 |
|
| 55 |
-
|
| 56 |
-
if GEMINI_API_KEY:
|
| 57 |
-
genai.configure(api_key=GEMINI_API_KEY)
|
| 58 |
|
| 59 |
# Data paths
|
| 60 |
MERGED_DATA_PATH = "merged_subscriber_data.csv"
|
|
@@ -70,7 +68,7 @@ df_full = None # Full data with all fields
|
|
| 70 |
conn = None
|
| 71 |
embedding_model = None
|
| 72 |
faiss_index = None
|
| 73 |
-
|
| 74 |
|
| 75 |
|
| 76 |
# ============================================
|
|
@@ -80,7 +78,7 @@ gemini_model = None
|
|
| 80 |
@asynccontextmanager
|
| 81 |
async def lifespan(app: FastAPI):
|
| 82 |
"""Initialize resources on startup"""
|
| 83 |
-
global df, df_full, conn, embedding_model, faiss_index,
|
| 84 |
|
| 85 |
print("🚀 Starting Enhanced Telecom API...")
|
| 86 |
|
|
@@ -123,12 +121,12 @@ async def lifespan(app: FastAPI):
|
|
| 123 |
except Exception as e:
|
| 124 |
print(f"⚠ Embedding model error: {e}")
|
| 125 |
|
| 126 |
-
if
|
| 127 |
try:
|
| 128 |
-
|
| 129 |
-
print("✓ Initialized
|
| 130 |
except Exception as e:
|
| 131 |
-
print(f"⚠
|
| 132 |
|
| 133 |
# FAISS index will build on first search request (lazy loading)
|
| 134 |
print("ℹ FAISS index will build on first search request")
|
|
@@ -629,9 +627,9 @@ def run_clustering(request: ClusterRequest):
|
|
| 629 |
|
| 630 |
@app.post("/api/query")
|
| 631 |
def query_with_llm(request: QueryRequest):
|
| 632 |
-
"""Query data using
|
| 633 |
-
if
|
| 634 |
-
raise HTTPException(status_code=503, detail="
|
| 635 |
|
| 636 |
# Build context with safe column access
|
| 637 |
def safe_col_sum(col_name, default=0):
|
|
@@ -681,20 +679,48 @@ CUSTOMER DATABASE STATISTICS:
|
|
| 681 |
|
| 682 |
USER QUESTION: {request.question}
|
| 683 |
|
| 684 |
-
INSTRUCTIONS:
|
| 685 |
-
- If asked for package recommendations, provide
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 690 |
- Use data-driven insights from statistics above
|
| 691 |
- Be specific with numbers and percentages
|
| 692 |
-
-
|
| 693 |
"""
|
| 694 |
|
| 695 |
try:
|
| 696 |
-
response =
|
| 697 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
except Exception as e:
|
| 699 |
import traceback
|
| 700 |
error_details = traceback.format_exc()
|
|
|
|
| 8 |
- Upload/Download breakdown
|
| 9 |
- Dynamic visualization generation
|
| 10 |
- On-demand clustering
|
| 11 |
+
- Groq LLM integration
|
| 12 |
- HuggingFace embeddings for semantic search
|
| 13 |
"""
|
| 14 |
|
|
|
|
| 28 |
from fastapi.middleware.cors import CORSMiddleware
|
| 29 |
from fastapi.responses import JSONResponse, Response
|
| 30 |
from pydantic import BaseModel
|
| 31 |
+
from groq import Groq
|
| 32 |
from sentence_transformers import SentenceTransformer
|
| 33 |
import faiss
|
| 34 |
|
|
|
|
| 52 |
# CONFIGURATION
|
| 53 |
# ============================================
|
| 54 |
|
| 55 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
|
|
|
|
|
|
|
| 56 |
|
| 57 |
# Data paths
|
| 58 |
MERGED_DATA_PATH = "merged_subscriber_data.csv"
|
|
|
|
| 68 |
conn = None
|
| 69 |
embedding_model = None
|
| 70 |
faiss_index = None
|
| 71 |
+
groq_client = None
|
| 72 |
|
| 73 |
|
| 74 |
# ============================================
|
|
|
|
| 78 |
@asynccontextmanager
|
| 79 |
async def lifespan(app: FastAPI):
|
| 80 |
"""Initialize resources on startup"""
|
| 81 |
+
global df, df_full, conn, embedding_model, faiss_index, groq_client
|
| 82 |
|
| 83 |
print("🚀 Starting Enhanced Telecom API...")
|
| 84 |
|
|
|
|
| 121 |
except Exception as e:
|
| 122 |
print(f"⚠ Embedding model error: {e}")
|
| 123 |
|
| 124 |
+
if GROQ_API_KEY:
|
| 125 |
try:
|
| 126 |
+
groq_client = Groq(api_key=GROQ_API_KEY)
|
| 127 |
+
print("✓ Initialized Groq")
|
| 128 |
except Exception as e:
|
| 129 |
+
print(f"⚠ Groq error: {e}")
|
| 130 |
|
| 131 |
# FAISS index will build on first search request (lazy loading)
|
| 132 |
print("ℹ FAISS index will build on first search request")
|
|
|
|
| 627 |
|
| 628 |
@app.post("/api/query")
|
| 629 |
def query_with_llm(request: QueryRequest):
|
| 630 |
+
"""Query data using Groq LLM"""
|
| 631 |
+
if groq_client is None:
|
| 632 |
+
raise HTTPException(status_code=503, detail="Groq API not configured")
|
| 633 |
|
| 634 |
# Build context with safe column access
|
| 635 |
def safe_col_sum(col_name, default=0):
|
|
|
|
| 679 |
|
| 680 |
USER QUESTION: {request.question}
|
| 681 |
|
| 682 |
+
CRITICAL INSTRUCTIONS:
|
| 683 |
+
- If asked for package recommendations, you MUST provide ALL 4 sections in this exact format:
|
| 684 |
+
|
| 685 |
+
**1. USAGE PROFILE**
|
| 686 |
+
[Analyze customer patterns with specific numbers]
|
| 687 |
+
- Intelligently identify usage patterns from the time distribution percentages
|
| 688 |
+
- Mention ALL significant time periods (generally >25% is significant)
|
| 689 |
+
- Recognize patterns: bimodal (2 peaks), uniform (balanced), concentrated (1 dominant)
|
| 690 |
+
- Consider work patterns: morning+night = commuter, night-heavy = night owl, etc.
|
| 691 |
+
|
| 692 |
+
**2. RECOMMENDED PACKAGE**
|
| 693 |
+
[Specific package details with pricing]
|
| 694 |
+
- Size the package to cover 120-150% of their actual usage for growth headroom
|
| 695 |
+
- EXCLUDE services with 0 usage (if data=0 MB, don't include data)
|
| 696 |
+
- Name should reflect the dominant pattern intelligently
|
| 697 |
+
- Be realistic with pricing ($15-50/month range typical)
|
| 698 |
+
|
| 699 |
+
**3. KEY BENEFITS**
|
| 700 |
+
[List 3-4 specific benefits with bullet points]
|
| 701 |
+
- Focus on: cost savings, usage coverage, flexibility, value match
|
| 702 |
+
- Quantify benefits where possible ("save 20%", "covers 150% of usage")
|
| 703 |
+
- Address their specific pain points
|
| 704 |
+
|
| 705 |
+
**4. PRICING STRATEGY**
|
| 706 |
+
[Upsell/retention approach with revenue impact]
|
| 707 |
+
- Suggest specific discounts with business justification
|
| 708 |
+
- Include upsell opportunities for underutilized services
|
| 709 |
+
- Quantify expected impact (ARPU increase, churn reduction)
|
| 710 |
+
|
| 711 |
- Use data-driven insights from statistics above
|
| 712 |
- Be specific with numbers and percentages
|
| 713 |
+
- Include all 4 sections - DO NOT skip any
|
| 714 |
"""
|
| 715 |
|
| 716 |
try:
|
| 717 |
+
response = groq_client.chat.completions.create(
|
| 718 |
+
model="llama-3.3-70b-versatile",
|
| 719 |
+
messages=[{"role": "user", "content": context}],
|
| 720 |
+
temperature=0.7,
|
| 721 |
+
max_tokens=1024
|
| 722 |
+
)
|
| 723 |
+
return QueryResponse(answer=response.choices[0].message.content, data=None)
|
| 724 |
except Exception as e:
|
| 725 |
import traceback
|
| 726 |
error_details = traceback.format_exc()
|