triflix commited on
Commit
93ab69f
Β·
verified Β·
1 Parent(s): 35d6639

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -50
app.py CHANGED
@@ -1,5 +1,5 @@
1
- from fastapi import FastAPI, File, UploadFile, Form, StreamingResponse
2
- from fastapi.responses import JSONResponse # <- correct import
3
  from fastapi.middleware.cors import CORSMiddleware
4
  import pandas as pd
5
  from google import genai
@@ -7,13 +7,11 @@ from google.genai import types
7
  import os
8
  import json
9
  import asyncio
10
- import numpy as np
11
-
12
 
13
  # -------------------------------
14
  # πŸ”‘ Configuration
15
  # -------------------------------
16
- API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_GEMINI_KEY")
17
  MODEL = "gemini-2.5-flash-lite"
18
  client = genai.Client(api_key=API_KEY)
19
 
@@ -22,11 +20,10 @@ client = genai.Client(api_key=API_KEY)
22
  # -------------------------------
23
  app = FastAPI()
24
 
25
- # Enable CORS for all origins
26
  app.add_middleware(
27
  CORSMiddleware,
28
- allow_origins=["*"], # or specify your frontend URLs
29
- allow_credentials=True,
30
  allow_methods=["*"],
31
  allow_headers=["*"],
32
  )
@@ -35,44 +32,29 @@ app.add_middleware(
35
  # πŸ› οΈ Helper Functions
36
  # -------------------------------
37
  def get_metadata(df: pd.DataFrame):
38
- """Extract JSON-serializable metadata."""
39
- def serialize_value(x):
40
- if isinstance(x, pd.Timestamp):
41
- return x.isoformat()
42
- elif isinstance(x, (np.integer, np.int64, np.int32)):
43
- return int(x)
44
- elif isinstance(x, (np.floating, np.float64, np.float32)):
45
- return float(x)
46
- elif pd.isna(x):
47
- return None
48
- else:
49
- return x
50
-
51
- metadata = {
52
- "columns": list(df.columns),
53
- "dtypes": df.dtypes.apply(lambda x: str(x)).to_dict(),
54
- "num_rows": df.shape[0],
55
- "num_cols": df.shape[1],
56
- "null_counts": {k: int(v) for k, v in df.isnull().sum().to_dict().items()},
57
- "unique_counts": {k: int(v) for k, v in df.nunique().to_dict().items()},
58
- "sample_rows": [
59
- {col: serialize_value(val) for col, val in row.items()}
60
- for row in df.head(3).to_dict(orient="records")
61
- ],
62
  }
63
- return metadata
64
 
65
  async def stream_insights(user_query, metadata):
66
- """Generator that yields insights step by step as JSON strings."""
67
- # Step 1: Start
68
  yield json.dumps({"status": "started", "message": "File received. Extracting metadata..."}) + "\n"
69
- await asyncio.sleep(0.2)
70
 
71
- # Step 2: Metadata
72
  yield json.dumps({"status": "metadata", "metadata": metadata}) + "\n"
73
- await asyncio.sleep(0.2)
74
 
75
- # Step 3: Call Gemini for structured insights
76
  system_prompt = """
77
  You are a data analysis assistant.
78
  Always return JSON with this schema:
@@ -111,31 +93,24 @@ async def stream_insights(user_query, metadata):
111
  yield json.dumps({"status": "error", "raw_output": result}) + "\n"
112
  return
113
 
114
- # Step 4: Excel info
115
  yield json.dumps({"status": "excel_info", "excel_info": parsed.get("excel_info", {})}) + "\n"
116
- await asyncio.sleep(0.2)
117
 
118
- # Step 5: Data type context
119
  yield json.dumps({"status": "context", "data_type_context": parsed.get("data_type_context", "")}) + "\n"
120
- await asyncio.sleep(0.2)
121
 
122
- # Step 6: Stream each insight
123
  for insight in parsed.get("auto_insights", {}).get("insights", []):
124
  yield json.dumps({"status": "insight", "insight": insight}) + "\n"
125
- await asyncio.sleep(0.2)
126
 
127
- # Step 7: Query insights
128
  yield json.dumps({"status": "query", "query_insights": parsed.get("query_insights", {})}) + "\n"
129
-
130
- # Step 8: Completed
131
  yield json.dumps({"status": "completed", "message": "All insights generated"}) + "\n"
132
 
133
  # -------------------------------
134
- # 🌐 API Routes
135
  # -------------------------------
136
  @app.post("/stream_insights")
137
  async def stream_insight_file(file: UploadFile = File(...), query: str = Form("Analyze the dataset")):
138
- """Stream structured JSON insights step by step."""
139
  try:
140
  df = pd.read_excel(file.file)
141
  except Exception as e:
 
1
+ from fastapi import FastAPI, File, UploadFile, Form
2
+ from fastapi.responses import JSONResponse, StreamingResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  import pandas as pd
5
  from google import genai
 
7
  import os
8
  import json
9
  import asyncio
 
 
10
 
11
  # -------------------------------
12
  # πŸ”‘ Configuration
13
  # -------------------------------
14
+ API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs")
15
  MODEL = "gemini-2.5-flash-lite"
16
  client = genai.Client(api_key=API_KEY)
17
 
 
20
  # -------------------------------
21
  app = FastAPI()
22
 
23
+ # Enable CORS
24
  app.add_middleware(
25
  CORSMiddleware,
26
+ allow_origins=["*"],
 
27
  allow_methods=["*"],
28
  allow_headers=["*"],
29
  )
 
32
  # πŸ› οΈ Helper Functions
33
  # -------------------------------
34
  def get_metadata(df: pd.DataFrame):
35
+ # Convert all timestamps to string to avoid JSON serialization issues
36
+ df_serializable = df.copy()
37
+ for col in df_serializable.select_dtypes(include=['datetime64[ns]']).columns:
38
+ df_serializable[col] = df_serializable[col].astype(str)
39
+ return {
40
+ "columns": list(df_serializable.columns),
41
+ "dtypes": df_serializable.dtypes.apply(lambda x: str(x)).to_dict(),
42
+ "num_rows": df_serializable.shape[0],
43
+ "num_cols": df_serializable.shape[1],
44
+ "null_counts": df_serializable.isnull().sum().to_dict(),
45
+ "unique_counts": df_serializable.nunique().to_dict(),
46
+ "sample_rows": df_serializable.head(3).to_dict(orient="records"),
 
 
 
 
 
 
 
 
 
 
 
 
47
  }
 
48
 
49
  async def stream_insights(user_query, metadata):
50
+ """Stream insights step by step."""
 
51
  yield json.dumps({"status": "started", "message": "File received. Extracting metadata..."}) + "\n"
52
+ await asyncio.sleep(0.5)
53
 
 
54
  yield json.dumps({"status": "metadata", "metadata": metadata}) + "\n"
55
+ await asyncio.sleep(0.5)
56
 
57
+ # Gemini system prompt
58
  system_prompt = """
59
  You are a data analysis assistant.
60
  Always return JSON with this schema:
 
93
  yield json.dumps({"status": "error", "raw_output": result}) + "\n"
94
  return
95
 
 
96
  yield json.dumps({"status": "excel_info", "excel_info": parsed.get("excel_info", {})}) + "\n"
97
+ await asyncio.sleep(0.5)
98
 
 
99
  yield json.dumps({"status": "context", "data_type_context": parsed.get("data_type_context", "")}) + "\n"
100
+ await asyncio.sleep(0.5)
101
 
 
102
  for insight in parsed.get("auto_insights", {}).get("insights", []):
103
  yield json.dumps({"status": "insight", "insight": insight}) + "\n"
104
+ await asyncio.sleep(0.5)
105
 
 
106
  yield json.dumps({"status": "query", "query_insights": parsed.get("query_insights", {})}) + "\n"
 
 
107
  yield json.dumps({"status": "completed", "message": "All insights generated"}) + "\n"
108
 
109
  # -------------------------------
110
+ # 🌐 Routes
111
  # -------------------------------
112
  @app.post("/stream_insights")
113
  async def stream_insight_file(file: UploadFile = File(...), query: str = Form("Analyze the dataset")):
 
114
  try:
115
  df = pd.read_excel(file.file)
116
  except Exception as e: