Spaces:
Sleeping
Sleeping
| import os | |
| import uuid | |
| import json | |
| from fastapi import FastAPI, File, UploadFile, Form | |
| from fastapi.responses import HTMLResponse, JSONResponse | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.templating import Jinja2Templates | |
| from fastapi.requests import Request | |
| import pandas as pd | |
| from google import genai | |
| from google.genai import types | |
| # ----------------------------- | |
| # FastAPI setup | |
| # ----------------------------- | |
| app = FastAPI() | |
| app.mount("/static", StaticFiles(directory="static"), name="static") | |
| templates = Jinja2Templates(directory="templates") | |
| # ----------------------------- | |
| # Gemini client setup | |
| # ----------------------------- | |
| client = genai.Client(api_key="AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs") | |
| UPLOAD_DIR = "tmp/uploads" | |
| os.makedirs(UPLOAD_DIR, exist_ok=True) | |
| # ----------------------------- | |
| # Helper functions | |
| # ----------------------------- | |
| def get_metadata(df): | |
| return { | |
| "columns": list(df.columns), | |
| "dtypes": df.dtypes.apply(str).to_dict(), | |
| "null_counts": df.isnull().sum().to_dict(), | |
| "unique_counts": df.nunique().to_dict(), | |
| "sample_rows": df.head(3).to_dict(orient="records") | |
| } | |
| def generate_metadata_analysis(metadata): | |
| metadata_text = str(metadata) | |
| model = "gemini-2.5-flash-lite" | |
| contents = [ | |
| types.Content( | |
| role="user", | |
| parts=[types.Part.from_text( | |
| text=f"Analyze the following structured data metadata:\n{metadata_text}" | |
| )], | |
| ), | |
| ] | |
| generate_content_config = types.GenerateContentConfig( | |
| thinking_config=types.ThinkingConfig(thinking_budget=0), | |
| response_mime_type="application/json", | |
| system_instruction=[types.Part.from_text(text="""You are a structured data analysis AI. | |
| 1️⃣ Summary: concise description of data, assumptions | |
| 2️⃣ Suggestions: up to 3 actionable analyses/visualizations | |
| Output must be strict JSON: {"Summary": "<short summary>", "Suggestion": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]} | |
| """)], | |
| ) | |
| output_text = "" | |
| for chunk in client.models.generate_content_stream( | |
| model=model, | |
| contents=contents, | |
| config=generate_content_config, | |
| ): | |
| output_text += chunk.text | |
| return json.loads(output_text) | |
| def generate_visualization(command, file_path): | |
| system_prompt_text = f""" | |
| You are a Python assistant that MUST return output strictly in JSON format and NOTHING else. | |
| The top-level JSON MUST contain exactly three keys in this order: "type", "code", "explanation". | |
| Requirements: | |
| - "type": visualization type ("bar", "pie", "line", etc.) | |
| - "code": Python code as a string that prints numeric JSON to stdout. Use this for data access: df = pd.read_excel(r"{file_path}") | |
| - "explanation": one-sentence description | |
| """ | |
| MODEL = "gemini-2.5-flash-lite" | |
| contents = [types.Content(role="user", parts=[types.Part.from_text(text=command)])] | |
| generate_content_config = types.GenerateContentConfig( | |
| thinking_config=types.ThinkingConfig(thinking_budget=0), | |
| response_mime_type="application/json", | |
| system_instruction=[types.Part.from_text(text=system_prompt_text)], | |
| ) | |
| output = "" | |
| for chunk in client.models.generate_content_stream( | |
| model=MODEL, | |
| contents=contents, | |
| config=generate_content_config, | |
| ): | |
| output += chunk.text | |
| return json.loads(output) | |
| # ----------------------------- | |
| # Routes | |
| # ----------------------------- | |
| def home(request: Request): | |
| return templates.TemplateResponse("index.html", {"request": request}) | |
| async def upload_excel(file: UploadFile = File(...)): | |
| file_ext = os.path.splitext(file.filename)[1] | |
| file_id = str(uuid.uuid4()) | |
| file_path = os.path.join(UPLOAD_DIR, f"{file_id}{file_ext}") | |
| with open(file_path, "wb") as f: | |
| f.write(await file.read()) | |
| df = pd.read_excel(file_path) | |
| metadata = get_metadata(df) | |
| analysis = generate_metadata_analysis(metadata) | |
| # Store session info temporarily | |
| session_data = { | |
| "file_path": file_path, | |
| "metadata": metadata, | |
| "analysis": analysis | |
| } | |
| return JSONResponse(session_data) | |
| async def generate_plot(command: str = Form(...), file_path: str = Form(...)): | |
| visualization_json = generate_visualization(command, file_path) | |
| return JSONResponse(visualization_json) | |