triflix commited on
Commit
28dcf64
ยท
verified ยท
1 Parent(s): b84d240

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -0
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -------------------------------
2
+ # ๐Ÿ“Œ FastAPI AI Data Insights App
3
+ # -------------------------------
4
+
5
+ from fastapi import FastAPI, Request, File, UploadFile, Form
6
+ from fastapi.responses import HTMLResponse, JSONResponse
7
+ from fastapi.staticfiles import StaticFiles
8
+ from fastapi.templating import Jinja2Templates
9
+ import pandas as pd
10
+ from google import genai
11
+ from google.genai import types
12
+ import os
13
+ import json
14
+
15
+ # -------------------------------
16
+ # ๐Ÿ”‘ Configuration
17
+ # -------------------------------
18
+ API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_GEMINI_KEY")
19
+ MODEL = "gemini-2.5-flash-lite"
20
+
21
+ client = genai.Client(api_key=API_KEY)
22
+
23
+ # -------------------------------
24
+ # โšก FastAPI Setup
25
+ # -------------------------------
26
+ app = FastAPI()
27
+ app.mount("/static", StaticFiles(directory="static"), name="static")
28
+ templates = Jinja2Templates(directory="templates")
29
+
30
+
31
+ # -------------------------------
32
+ # ๐Ÿ› ๏ธ Helper Functions
33
+ # -------------------------------
34
+ def get_metadata(df: pd.DataFrame):
35
+ """Extract lightweight metadata for prompting."""
36
+ return {
37
+ "columns": list(df.columns),
38
+ "dtypes": df.dtypes.apply(lambda x: str(x)).to_dict(),
39
+ "num_rows": df.shape[0],
40
+ "num_cols": df.shape[1],
41
+ "null_counts": df.isnull().sum().to_dict(),
42
+ "unique_counts": df.nunique().to_dict(),
43
+ "sample_rows": df.head(3).to_dict(orient="records"),
44
+ }
45
+
46
+
47
+ def generate_json_insight(user_query, metadata, df):
48
+ """Ask Gemini for structured JSON insights across fixed sections."""
49
+ system_prompt = """
50
+ You are a data analysis assistant.
51
+ You will be given dataset metadata and a user request.
52
+
53
+ Always return JSON with exactly these 5 sections:
54
+ 1. Efficiency Analysis (bar chart, actual vs target if available)
55
+ 2. Cumulative Performance (line chart over time if possible)
56
+ 3. Process Issues (pie chart breakdown if available)
57
+ 4. Planning vs Projection (comparison planned vs projected values)
58
+ 5. Loss Analysis (summary with stats: total, avg, min, max)
59
+
60
+ Schema:
61
+ {
62
+ "insights": [
63
+ {
64
+ "title": "Efficiency Analysis",
65
+ "type": "bar",
66
+ "description": "Actual vs Target Efficiency",
67
+ "chartData": [{"x": "...", "y": ..., "target": ...}],
68
+ "stats": {}
69
+ },
70
+ {
71
+ "title": "Cumulative Performance",
72
+ "type": "line",
73
+ "description": "Cumulative trend over time",
74
+ "chartData": [],
75
+ "stats": {}
76
+ },
77
+ {
78
+ "title": "Process Issues",
79
+ "type": "pie",
80
+ "description": "Breakdown of process issues",
81
+ "chartData": [],
82
+ "stats": {}
83
+ },
84
+ {
85
+ "title": "Planning vs Projection",
86
+ "type": "comparison",
87
+ "description": "Planned vs Projected output",
88
+ "chartData": [],
89
+ "stats": {}
90
+ },
91
+ {
92
+ "title": "Loss Analysis",
93
+ "type": "summary",
94
+ "description": "Key loss statistics",
95
+ "chartData": [],
96
+ "stats": {}
97
+ }
98
+ ]
99
+ }
100
+
101
+ Rules:
102
+ - All 5 sections must be present in the JSON.
103
+ - If no data available, return empty arrays/objects.
104
+ - Do NOT output Python code or text explanations, JSON only.
105
+ """
106
+
107
+ user_prompt = f"""
108
+ Dataset metadata:
109
+ Columns: {metadata['columns']}
110
+ Data types: {metadata['dtypes']}
111
+ Null counts: {metadata['null_counts']}
112
+ Unique counts: {metadata['unique_counts']}
113
+ Sample rows: {metadata['sample_rows']}
114
+
115
+ User request: {user_query}
116
+ """
117
+
118
+ contents = [
119
+ types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])
120
+ ]
121
+ config = types.GenerateContentConfig(
122
+ temperature=0,
123
+ max_output_tokens=2000,
124
+ system_instruction=[types.Part.from_text(text=system_prompt)],
125
+ )
126
+
127
+ result = ""
128
+ for chunk in client.models.generate_content_stream(
129
+ model=MODEL, contents=contents, config=config
130
+ ):
131
+ if chunk.text:
132
+ result += chunk.text
133
+
134
+ try:
135
+ return json.loads(result)
136
+ except Exception:
137
+ return {"raw_output": result}
138
+
139
+
140
+ # -------------------------------
141
+ # ๐ŸŒ Routes
142
+ # -------------------------------
143
+ @app.get("/", response_class=HTMLResponse)
144
+ async def home(request: Request):
145
+ return templates.TemplateResponse("index.html", {"request": request})
146
+
147
+
148
+ @app.post("/generate_insight_file")
149
+ async def generate_insight_file(
150
+ file: UploadFile = File(...), query: str = Form("Analyze the dataset")
151
+ ):
152
+ """Upload Excel, generate structured JSON insights."""
153
+ try:
154
+ df = pd.read_excel(file.file)
155
+ except Exception as e:
156
+ return JSONResponse({"success": False, "error": f"Failed to read file: {str(e)}"})
157
+
158
+ metadata = get_metadata(df)
159
+ insights = generate_json_insight(query, metadata, df)
160
+
161
+ return JSONResponse({"success": True, "insights": insights})