triflix commited on
Commit
27c947d
·
verified ·
1 Parent(s): 79211c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -217
app.py CHANGED
@@ -1,90 +1,34 @@
1
- # -----------------------------
2
- # Imports
3
- # -----------------------------
4
  import os
5
  import uuid
6
  import json
7
- import logging
8
- import subprocess
9
- import sys
10
- from pathlib import Path
11
-
12
  import pandas as pd
13
- from dotenv import load_dotenv
14
- from fastapi import FastAPI, UploadFile, File, HTTPException, Body
15
- from pydantic import BaseModel, Field
16
-
17
  from google import genai
18
- from google.generativeai import types
19
 
20
  # -----------------------------
21
- # Initial Configuration
22
  # -----------------------------
23
-
24
- # Load environment variables (will load from Hugging Face secrets)
25
- load_dotenv()
26
-
27
- # Set up logging
28
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
29
- logger = logging.getLogger(__name__)
30
-
31
- # --- MODIFICATION FOR HUGGING FACE ---
32
- # Use the /tmp directory for ephemeral file storage.
33
- # This is a standard temporary directory in Linux environments like HF Spaces.
34
- UPLOADS_DIR = Path("/tmp/uploads")
35
- # Create the directory; parents=True ensures creation of parent dirs if needed.
36
- UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
37
- logger.info(f"Using temporary directory for uploads: {UPLOADS_DIR}")
38
 
39
  # -----------------------------
40
- # Initialize Gemini Client & FastAPI App
41
  # -----------------------------
 
42
 
43
- # Configure the Gemini client with the API key from environment variables/secrets
44
- try:
45
- api_key = os.getenv("GOOGLE_API_KEY")
46
- if not api_key:
47
- raise ValueError("GOOGLE_API_KEY not found in environment variables or secrets.")
48
- genai.configure(api_key=api_key)
49
- logger.info("Google GenAI client configured successfully.")
50
- except Exception as e:
51
- logger.error(f"FATAL: Failed to configure Google GenAI client: {e}")
52
- # Exit if the client can't be configured, as the app is non-functional without it.
53
- sys.exit(1)
54
-
55
- # Initialize FastAPI app
56
- app = FastAPI(
57
- title="Data Analysis and Visualization API",
58
- description="An API to analyze Excel files and generate Python code for visualizations using Google's Gemini.",
59
- version="1.1.0"
60
- )
61
 
62
  # -----------------------------
63
- # Pydantic Models for API I/O
64
  # -----------------------------
65
-
66
- class AnalysisResponse(BaseModel):
67
- file_id: str = Field(..., description="Unique identifier for the uploaded file.")
68
- summary: str = Field(..., description="AI-generated summary of the data.")
69
- suggestions: list[str] = Field(..., description="List of AI-generated analysis/visualization suggestions.")
70
-
71
- class VisualizationRequest(BaseModel):
72
- file_id: str = Field(..., description="The unique identifier of the file to be visualized.")
73
- command: str = Field(..., description="The selected suggestion/command from the analysis step.")
74
-
75
- class VisualizationResponse(BaseModel):
76
- type: str = Field(..., description="The type of visualization (e.g., 'bar', 'pie').")
77
- explanation: str = Field(..., description="A one-sentence description of the visualization.")
78
- data: dict | list = Field(..., description="The numeric JSON data produced by the executed code.")
79
- generated_code: str = Field(..., description="The Python code that was generated and executed.")
80
-
81
-
82
- # -----------------------------
83
- # Helper Functions
84
- # -----------------------------
85
-
86
- def get_metadata(df: pd.DataFrame) -> dict:
87
- """Extracts metadata from a pandas DataFrame."""
88
  return {
89
  "columns": list(df.columns),
90
  "dtypes": df.dtypes.apply(str).to_dict(),
@@ -93,155 +37,97 @@ def get_metadata(df: pd.DataFrame) -> dict:
93
  "sample_rows": df.head(3).to_dict(orient="records")
94
  }
95
 
96
- def generate_metadata_analysis(metadata: dict) -> dict:
97
- """Generates a JSON summary and suggestions from metadata using Gemini."""
98
- metadata_text = json.dumps(metadata, indent=2)
99
- model = "gemini-pro"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
- system_instruction = """
102
- You are a structured data analysis AI. Your output must be strict JSON.
 
 
 
 
103
 
104
- 1. Summary:
105
- Provide a concise description of what kind of data this is, what it likely represents, and its domain or use-case.
 
 
 
106
 
107
- 2. Suggestions:
108
- Provide exactly three actionable analyses and visualizations based on the metadata.
109
 
110
- Respond in this exact JSON format:
111
- {
112
- "summary": "<short summary>",
113
- "suggestions": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]
114
- }
115
- """
116
- try:
117
- response = genai.GenerativeModel(
118
- model_name=model,
119
- system_instruction=system_instruction
120
- ).generate_content(
121
- f"Analyze the following structured data metadata:\n{metadata_text}",
122
- generation_config=types.GenerationConfig(response_mime_type="application/json")
123
- )
124
- return json.loads(response.text)
125
- except Exception as e:
126
- logger.error(f"Error generating metadata analysis from Gemini: {e}")
127
- raise HTTPException(status_code=500, detail="Failed to get analysis from AI model.")
128
-
129
- def generate_visualization_code(file_path: str, command: str) -> dict:
130
- """Generates Python code for visualization based on a user command."""
131
- model = "gemini-pro"
132
-
133
- system_instruction = f"""
134
- You are a Python assistant that MUST return output strictly in JSON format.
135
- The JSON MUST contain exactly three keys: "type", "code", "explanation".
136
-
137
- - "type": Lowercase visualization type (e.g., "bar", "pie", "line").
138
- - "code": A string of Python code that prints a JSON object to standard output. The code must access data using this exact line: df = pd.read_excel(r"{file_path}")
139
- - "explanation": A one-sentence description of the visualization.
140
- """
141
- try:
142
- response = genai.GenerativeModel(
143
- model_name=model,
144
- system_instruction=system_instruction
145
- ).generate_content(
146
- f"Generate Python code to create a {command}",
147
- generation_config=types.GenerationConfig(response_mime_type="application/json")
148
- )
149
- return json.loads(response.text)
150
- except Exception as e:
151
- logger.error(f"Error generating visualization code from Gemini: {e}")
152
- raise HTTPException(status_code=500, detail="Failed to generate visualization code from AI model.")
153
 
154
- # -----------------------------
155
- # API Endpoints
156
- # -----------------------------
 
 
 
157
 
158
- @app.post("/analyze", response_model=AnalysisResponse)
159
- async def analyze_file(file: UploadFile = File(...)):
160
- """
161
- Upload an Excel file, get its metadata, and receive an AI-generated
162
- summary and a list of visualization suggestions.
163
- """
164
- if not file.filename.endswith(('.xlsx', '.xls')):
165
- raise HTTPException(status_code=400, detail="Invalid file type. Please upload an Excel file.")
166
 
167
- file_id = str(uuid.uuid4())
168
- file_path = UPLOADS_DIR / f"{file_id}_{file.filename}"
169
-
170
- try:
171
- with open(file_path, "wb") as buffer:
172
- buffer.write(await file.read())
173
- logger.info(f"File '{file.filename}' saved to temp path '{file_path}'")
174
-
175
- df = pd.read_excel(file_path)
176
- metadata = get_metadata(df)
177
- logger.info(f"Metadata extracted for file_id: {file_id}")
178
-
179
- analysis = generate_metadata_analysis(metadata)
180
- logger.info(f"Metadata analysis generated for file_id: {file_id}")
181
-
182
- return AnalysisResponse(
183
- file_id=file_id,
184
- summary=analysis.get("summary", "No summary provided."),
185
- suggestions=analysis.get("suggestions", [])
186
- )
187
- except Exception as e:
188
- logger.error(f"An error occurred during file analysis: {e}")
189
- if file_path.exists():
190
- os.remove(file_path)
191
- raise HTTPException(status_code=500, detail=f"An internal server error occurred: {e}")
192
-
193
-
194
- @app.post("/visualize", response_model=VisualizationResponse)
195
- async def visualize_data(request: VisualizationRequest):
196
- """
197
- Generate and execute Python code for a visualization based on a file_id
198
- and a selected command from the analysis step.
199
- """
200
- matching_files = list(UPLOADS_DIR.glob(f"{request.file_id}_*"))
201
- if not matching_files:
202
- logger.error(f"File with ID '{request.file_id}' not found in {UPLOADS_DIR}.")
203
- raise HTTPException(status_code=404, detail="File not found. It may have been cleared from the temporary cache. Please re-upload.")
204
-
205
- file_path = matching_files[0]
206
- logger.info(f"Found file '{file_path}' for file_id '{request.file_id}'")
207
-
208
- agent_output = generate_visualization_code(str(file_path), request.command)
209
- code_to_run = agent_output.get("code")
210
-
211
- if not code_to_run:
212
- raise HTTPException(status_code=500, detail="AI model failed to generate valid code.")
213
- logger.info(f"Code generated for command: '{request.command}'")
214
-
215
- try:
216
- logger.info("Executing generated code in a sandboxed subprocess...")
217
- process = subprocess.run(
218
- [sys.executable, "-c", code_to_run],
219
- capture_output=True, text=True, check=True, timeout=20
220
- )
221
- stdout = process.stdout.strip()
222
- logger.info(f"Code executed successfully. Stdout length: {len(stdout)}")
223
- chart_data = json.loads(stdout)
224
-
225
- return VisualizationResponse(
226
- type=agent_output.get("type", "unknown"),
227
- explanation=agent_output.get("explanation", "No explanation provided."),
228
- data=chart_data,
229
- generated_code=code_to_run
230
- )
231
- except subprocess.CalledProcessError as e:
232
- logger.error(f"Error executing generated code. Stderr: {e.stderr}")
233
- raise HTTPException(status_code=500, detail=f"Error during code execution: {e.stderr}")
234
- except json.JSONDecodeError:
235
- logger.error(f"Failed to decode JSON from stdout. Output was: {stdout}")
236
- raise HTTPException(status_code=500, detail="Generated code did not produce valid JSON output.")
237
- except subprocess.TimeoutExpired:
238
- logger.error("Code execution timed out.")
239
- raise HTTPException(status_code=408, detail="Code execution took too long and was terminated.")
240
- except Exception as e:
241
- logger.error(f"An unexpected error occurred during visualization: {e}")
242
- raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
243
-
244
-
245
- @app.get("/", include_in_schema=False)
246
- def root():
247
- return {"message": "Welcome to the Data Analysis API. Visit /docs for the API interface."}
 
 
 
 
1
  import os
2
  import uuid
3
  import json
4
+ from fastapi import FastAPI, File, UploadFile, Form
5
+ from fastapi.responses import HTMLResponse, JSONResponse
6
+ from fastapi.staticfiles import StaticFiles
7
+ from fastapi.templating import Jinja2Templates
8
+ from fastapi.requests import Request
9
  import pandas as pd
 
 
 
 
10
  from google import genai
11
+ from google.genai import types
12
 
13
  # -----------------------------
14
+ # FastAPI setup
15
  # -----------------------------
16
+ app = FastAPI()
17
+ app.mount("/static", StaticFiles(directory="static"), name="static")
18
+ templates = Jinja2Templates(directory="templates")
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  # -----------------------------
21
+ # Gemini client setup
22
  # -----------------------------
23
+ client = genai.Client(api_key="AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs")
24
 
25
+ UPLOAD_DIR = "tmp/uploads"
26
+ os.makedirs(UPLOAD_DIR, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # -----------------------------
29
+ # Helper functions
30
  # -----------------------------
31
+ def get_metadata(df):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  return {
33
  "columns": list(df.columns),
34
  "dtypes": df.dtypes.apply(str).to_dict(),
 
37
  "sample_rows": df.head(3).to_dict(orient="records")
38
  }
39
 
40
+ def generate_metadata_analysis(metadata):
41
+ metadata_text = str(metadata)
42
+ model = "gemini-2.5-flash-lite"
43
+
44
+ contents = [
45
+ types.Content(
46
+ role="user",
47
+ parts=[types.Part.from_text(
48
+ text=f"Analyze the following structured data metadata:\n{metadata_text}"
49
+ )],
50
+ ),
51
+ ]
52
+
53
+ generate_content_config = types.GenerateContentConfig(
54
+ thinking_config=types.ThinkingConfig(thinking_budget=0),
55
+ response_mime_type="application/json",
56
+ system_instruction=[types.Part.from_text(text="""You are a structured data analysis AI.
57
+ 1️⃣ Summary: concise description of data, assumptions
58
+ 2️⃣ Suggestions: up to 3 actionable analyses/visualizations
59
+ Output must be strict JSON: {"Summary": "<short summary>", "Suggestion": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]}
60
+ """)],
61
+ )
62
+
63
+ output_text = ""
64
+ for chunk in client.models.generate_content_stream(
65
+ model=model,
66
+ contents=contents,
67
+ config=generate_content_config,
68
+ ):
69
+ output_text += chunk.text
70
+
71
+ return json.loads(output_text)
72
+
73
+ def generate_visualization(command, file_path):
74
+ system_prompt_text = f"""
75
+ You are a Python assistant that MUST return output strictly in JSON format and NOTHING else.
76
+ The top-level JSON MUST contain exactly three keys in this order: "type", "code", "explanation".
77
+ Requirements:
78
+ - "type": visualization type ("bar", "pie", "line", etc.)
79
+ - "code": Python code as a string that prints numeric JSON to stdout. Use this for data access: df = pd.read_excel(r"{file_path}")
80
+ - "explanation": one-sentence description
81
+ """
82
+ MODEL = "gemini-2.5-flash-lite"
83
+ contents = [types.Content(role="user", parts=[types.Part.from_text(text=command)])]
84
+
85
+ generate_content_config = types.GenerateContentConfig(
86
+ thinking_config=types.ThinkingConfig(thinking_budget=0),
87
+ response_mime_type="application/json",
88
+ system_instruction=[types.Part.from_text(text=system_prompt_text)],
89
+ )
90
+
91
+ output = ""
92
+ for chunk in client.models.generate_content_stream(
93
+ model=MODEL,
94
+ contents=contents,
95
+ config=generate_content_config,
96
+ ):
97
+ output += chunk.text
98
+
99
+ return json.loads(output)
100
 
101
+ # -----------------------------
102
+ # Routes
103
+ # -----------------------------
104
+ @app.get("/", response_class=HTMLResponse)
105
+ def home(request: Request):
106
+ return templates.TemplateResponse("index.html", {"request": request})
107
 
108
+ @app.post("/upload", response_class=JSONResponse)
109
+ async def upload_excel(file: UploadFile = File(...)):
110
+ file_ext = os.path.splitext(file.filename)[1]
111
+ file_id = str(uuid.uuid4())
112
+ file_path = os.path.join(UPLOAD_DIR, f"{file_id}{file_ext}")
113
 
114
+ with open(file_path, "wb") as f:
115
+ f.write(await file.read())
116
 
117
+ df = pd.read_excel(file_path)
118
+ metadata = get_metadata(df)
119
+ analysis = generate_metadata_analysis(metadata)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ # Store session info temporarily
122
+ session_data = {
123
+ "file_path": file_path,
124
+ "metadata": metadata,
125
+ "analysis": analysis
126
+ }
127
 
128
+ return JSONResponse(session_data)
 
 
 
 
 
 
 
129
 
130
+ @app.post("/generate_plot", response_class=JSONResponse)
131
+ async def generate_plot(command: str = Form(...), file_path: str = Form(...)):
132
+ visualization_json = generate_visualization(command, file_path)
133
+ return JSONResponse(visualization_json)