triflix commited on
Commit
78ded87
ยท
verified ยท
1 Parent(s): 28dcf64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -94
app.py CHANGED
@@ -1,9 +1,5 @@
1
- # -------------------------------
2
- # ๐Ÿ“Œ FastAPI AI Data Insights App
3
- # -------------------------------
4
-
5
  from fastapi import FastAPI, Request, File, UploadFile, Form
6
- from fastapi.responses import HTMLResponse, JSONResponse
7
  from fastapi.staticfiles import StaticFiles
8
  from fastapi.templating import Jinja2Templates
9
  import pandas as pd
@@ -11,13 +7,13 @@ from google import genai
11
  from google.genai import types
12
  import os
13
  import json
 
14
 
15
  # -------------------------------
16
  # ๐Ÿ”‘ Configuration
17
  # -------------------------------
18
- API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_GEMINI_KEY")
19
  MODEL = "gemini-2.5-flash-lite"
20
-
21
  client = genai.Client(api_key=API_KEY)
22
 
23
  # -------------------------------
@@ -27,12 +23,10 @@ app = FastAPI()
27
  app.mount("/static", StaticFiles(directory="static"), name="static")
28
  templates = Jinja2Templates(directory="templates")
29
 
30
-
31
  # -------------------------------
32
  # ๐Ÿ› ๏ธ Helper Functions
33
  # -------------------------------
34
  def get_metadata(df: pd.DataFrame):
35
- """Extract lightweight metadata for prompting."""
36
  return {
37
  "columns": list(df.columns),
38
  "dtypes": df.dtypes.apply(lambda x: str(x)).to_dict(),
@@ -43,81 +37,39 @@ def get_metadata(df: pd.DataFrame):
43
  "sample_rows": df.head(3).to_dict(orient="records"),
44
  }
45
 
 
 
46
 
47
- def generate_json_insight(user_query, metadata, df):
48
- """Ask Gemini for structured JSON insights across fixed sections."""
 
 
 
 
 
 
 
49
  system_prompt = """
50
- You are a data analysis assistant.
51
- You will be given dataset metadata and a user request.
52
-
53
- Always return JSON with exactly these 5 sections:
54
- 1. Efficiency Analysis (bar chart, actual vs target if available)
55
- 2. Cumulative Performance (line chart over time if possible)
56
- 3. Process Issues (pie chart breakdown if available)
57
- 4. Planning vs Projection (comparison planned vs projected values)
58
- 5. Loss Analysis (summary with stats: total, avg, min, max)
59
-
60
- Schema:
61
- {
62
- "insights": [
63
- {
64
- "title": "Efficiency Analysis",
65
- "type": "bar",
66
- "description": "Actual vs Target Efficiency",
67
- "chartData": [{"x": "...", "y": ..., "target": ...}],
68
- "stats": {}
69
- },
70
- {
71
- "title": "Cumulative Performance",
72
- "type": "line",
73
- "description": "Cumulative trend over time",
74
- "chartData": [],
75
- "stats": {}
76
- },
77
- {
78
- "title": "Process Issues",
79
- "type": "pie",
80
- "description": "Breakdown of process issues",
81
- "chartData": [],
82
- "stats": {}
83
- },
84
  {
85
- "title": "Planning vs Projection",
86
- "type": "comparison",
87
- "description": "Planned vs Projected output",
88
- "chartData": [],
89
- "stats": {}
90
- },
91
- {
92
- "title": "Loss Analysis",
93
- "type": "summary",
94
- "description": "Key loss statistics",
95
- "chartData": [],
96
- "stats": {}
97
  }
98
- ]
99
- }
100
-
101
- Rules:
102
- - All 5 sections must be present in the JSON.
103
- - If no data available, return empty arrays/objects.
104
- - Do NOT output Python code or text explanations, JSON only.
105
- """
106
-
107
- user_prompt = f"""
108
- Dataset metadata:
109
- Columns: {metadata['columns']}
110
- Data types: {metadata['dtypes']}
111
- Null counts: {metadata['null_counts']}
112
- Unique counts: {metadata['unique_counts']}
113
- Sample rows: {metadata['sample_rows']}
114
-
115
- User request: {user_query}
116
- """
117
-
118
- contents = [
119
- types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])
120
- ]
121
  config = types.GenerateContentConfig(
122
  temperature=0,
123
  max_output_tokens=2000,
@@ -125,16 +77,34 @@ User request: {user_query}
125
  )
126
 
127
  result = ""
128
- for chunk in client.models.generate_content_stream(
129
- model=MODEL, contents=contents, config=config
130
- ):
131
  if chunk.text:
132
  result += chunk.text
133
 
134
  try:
135
- return json.loads(result)
136
  except Exception:
137
- return {"raw_output": result}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
 
140
  # -------------------------------
@@ -144,18 +114,13 @@ User request: {user_query}
144
  async def home(request: Request):
145
  return templates.TemplateResponse("index.html", {"request": request})
146
 
147
-
148
- @app.post("/generate_insight_file")
149
- async def generate_insight_file(
150
- file: UploadFile = File(...), query: str = Form("Analyze the dataset")
151
- ):
152
- """Upload Excel, generate structured JSON insights."""
153
  try:
154
  df = pd.read_excel(file.file)
155
  except Exception as e:
156
  return JSONResponse({"success": False, "error": f"Failed to read file: {str(e)}"})
157
 
158
  metadata = get_metadata(df)
159
- insights = generate_json_insight(query, metadata, df)
160
-
161
- return JSONResponse({"success": True, "insights": insights})
 
 
 
 
 
1
  from fastapi import FastAPI, Request, File, UploadFile, Form
2
+ from fastapi.responses import StreamingResponse, JSONResponse, HTMLResponse
3
  from fastapi.staticfiles import StaticFiles
4
  from fastapi.templating import Jinja2Templates
5
  import pandas as pd
 
7
  from google.genai import types
8
  import os
9
  import json
10
+ import asyncio
11
 
12
  # -------------------------------
13
  # ๐Ÿ”‘ Configuration
14
  # -------------------------------
15
+ API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs")
16
  MODEL = "gemini-2.5-flash-lite"
 
17
  client = genai.Client(api_key=API_KEY)
18
 
19
  # -------------------------------
 
23
  app.mount("/static", StaticFiles(directory="static"), name="static")
24
  templates = Jinja2Templates(directory="templates")
25
 
 
26
  # -------------------------------
27
  # ๐Ÿ› ๏ธ Helper Functions
28
  # -------------------------------
29
  def get_metadata(df: pd.DataFrame):
 
30
  return {
31
  "columns": list(df.columns),
32
  "dtypes": df.dtypes.apply(lambda x: str(x)).to_dict(),
 
37
  "sample_rows": df.head(3).to_dict(orient="records"),
38
  }
39
 
40
+ async def stream_insights(user_query, metadata):
41
+ """Generator that yields insights step by step as text/json strings."""
42
 
43
+ # Step 1: Start message
44
+ yield json.dumps({"status": "started", "message": "File received. Extracting metadata..."}) + "\n"
45
+ await asyncio.sleep(0.5)
46
+
47
+ # Step 2: Metadata
48
+ yield json.dumps({"status": "metadata", "metadata": metadata}) + "\n"
49
+ await asyncio.sleep(0.5)
50
+
51
+ # Step 3: Call Gemini for structured insights
52
  system_prompt = """
53
+ You are a data analysis assistant.
54
+ Always return JSON with this schema:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  {
56
+ "excel_info": {...},
57
+ "data_type_context": "...",
58
+ "auto_insights": {
59
+ "insights": [
60
+ {... Efficiency Analysis ...},
61
+ {... Cumulative Performance ...},
62
+ {... Process Issues ...},
63
+ {... Planning vs Projection ...},
64
+ {... Loss Analysis ...}
65
+ ]
66
+ },
67
+ "query_insights": {...}
68
  }
69
+ """
70
+ user_prompt = f"Dataset metadata: {metadata}\nUser request: {user_query}"
71
+
72
+ contents = [types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  config = types.GenerateContentConfig(
74
  temperature=0,
75
  max_output_tokens=2000,
 
77
  )
78
 
79
  result = ""
80
+ for chunk in client.models.generate_content_stream(model=MODEL, contents=contents, config=config):
 
 
81
  if chunk.text:
82
  result += chunk.text
83
 
84
  try:
85
+ parsed = json.loads(result)
86
  except Exception:
87
+ yield json.dumps({"status": "error", "raw_output": result}) + "\n"
88
+ return
89
+
90
+ # Step 4: Excel info
91
+ yield json.dumps({"status": "excel_info", "excel_info": parsed.get("excel_info", {})}) + "\n"
92
+ await asyncio.sleep(0.5)
93
+
94
+ # Step 5: Data type context
95
+ yield json.dumps({"status": "context", "data_type_context": parsed.get("data_type_context", "")}) + "\n"
96
+ await asyncio.sleep(0.5)
97
+
98
+ # Step 6: Stream each insight one by one
99
+ for insight in parsed.get("auto_insights", {}).get("insights", []):
100
+ yield json.dumps({"status": "insight", "insight": insight}) + "\n"
101
+ await asyncio.sleep(0.5)
102
+
103
+ # Step 7: Query insights
104
+ yield json.dumps({"status": "query", "query_insights": parsed.get("query_insights", {})}) + "\n"
105
+
106
+ # Step 8: Completed
107
+ yield json.dumps({"status": "completed", "message": "All insights generated"}) + "\n"
108
 
109
 
110
  # -------------------------------
 
114
  async def home(request: Request):
115
  return templates.TemplateResponse("index.html", {"request": request})
116
 
117
+ @app.post("/stream_insights")
118
+ async def stream_insight_file(file: UploadFile = File(...), query: str = Form("Analyze the dataset")):
119
+ """Stream structured JSON insights step by step."""
 
 
 
120
  try:
121
  df = pd.read_excel(file.file)
122
  except Exception as e:
123
  return JSONResponse({"success": False, "error": f"Failed to read file: {str(e)}"})
124
 
125
  metadata = get_metadata(df)
126
+ return StreamingResponse(stream_insights(query, metadata), media_type="application/json")