import os import uuid import json from fastapi import FastAPI, File, UploadFile, Form from fastapi.responses import HTMLResponse, JSONResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates from fastapi.requests import Request import pandas as pd from google import genai from google.genai import types # ----------------------------- # FastAPI setup # ----------------------------- app = FastAPI() app.mount("/static", StaticFiles(directory="static"), name="static") templates = Jinja2Templates(directory="templates") # ----------------------------- # Gemini client setup # ----------------------------- client = genai.Client(api_key="AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs") UPLOAD_DIR = "tmp/uploads" os.makedirs(UPLOAD_DIR, exist_ok=True) # ----------------------------- # Helper functions # ----------------------------- def get_metadata(df): return { "columns": list(df.columns), "dtypes": df.dtypes.apply(str).to_dict(), "null_counts": df.isnull().sum().to_dict(), "unique_counts": df.nunique().to_dict(), "sample_rows": df.head(3).to_dict(orient="records") } def generate_metadata_analysis(metadata): metadata_text = str(metadata) model = "gemini-2.5-flash-lite" contents = [ types.Content( role="user", parts=[types.Part.from_text( text=f"Analyze the following structured data metadata:\n{metadata_text}" )], ), ] generate_content_config = types.GenerateContentConfig( thinking_config=types.ThinkingConfig(thinking_budget=0), response_mime_type="application/json", system_instruction=[types.Part.from_text(text="""You are a structured data analysis AI. 1️⃣ Summary: concise description of data, assumptions 2️⃣ Suggestions: up to 3 actionable analyses/visualizations Output must be strict JSON: {"Summary": "", "Suggestion": ["", "", ""]} """)], ) output_text = "" for chunk in client.models.generate_content_stream( model=model, contents=contents, config=generate_content_config, ): output_text += chunk.text return json.loads(output_text) def generate_visualization(command, file_path): system_prompt_text = f""" You are a Python assistant that MUST return output strictly in JSON format and NOTHING else. The top-level JSON MUST contain exactly three keys in this order: "type", "code", "explanation". Requirements: - "type": visualization type ("bar", "pie", "line", etc.) - "code": Python code as a string that prints numeric JSON to stdout. Use this for data access: df = pd.read_excel(r"{file_path}") - "explanation": one-sentence description """ MODEL = "gemini-2.5-flash-lite" contents = [types.Content(role="user", parts=[types.Part.from_text(text=command)])] generate_content_config = types.GenerateContentConfig( thinking_config=types.ThinkingConfig(thinking_budget=0), response_mime_type="application/json", system_instruction=[types.Part.from_text(text=system_prompt_text)], ) output = "" for chunk in client.models.generate_content_stream( model=MODEL, contents=contents, config=generate_content_config, ): output += chunk.text return json.loads(output) # ----------------------------- # Routes # ----------------------------- @app.get("/", response_class=HTMLResponse) def home(request: Request): return templates.TemplateResponse("index.html", {"request": request}) @app.post("/upload", response_class=JSONResponse) async def upload_excel(file: UploadFile = File(...)): file_ext = os.path.splitext(file.filename)[1] file_id = str(uuid.uuid4()) file_path = os.path.join(UPLOAD_DIR, f"{file_id}{file_ext}") with open(file_path, "wb") as f: f.write(await file.read()) df = pd.read_excel(file_path) metadata = get_metadata(df) analysis = generate_metadata_analysis(metadata) # Store session info temporarily session_data = { "file_path": file_path, "metadata": metadata, "analysis": analysis } return JSONResponse(session_data) @app.post("/generate_plot", response_class=JSONResponse) async def generate_plot(command: str = Form(...), file_path: str = Form(...)): visualization_json = generate_visualization(command, file_path) return JSONResponse(visualization_json)