# ------------------------------- # 📌 FastAPI AI Data Insights App # ------------------------------- from fastapi import FastAPI, Request, File, UploadFile, Form from fastapi.responses import HTMLResponse, JSONResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates import pandas as pd from google import genai from google.genai import types import os import json # ------------------------------- # 🔑 Configuration # ------------------------------- API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_GEMINI_KEY") MODEL = "gemini-2.5-flash-lite" client = genai.Client(api_key=API_KEY) # ------------------------------- # ⚡ FastAPI Setup # ------------------------------- app = FastAPI() app.mount("/static", StaticFiles(directory="static"), name="static") templates = Jinja2Templates(directory="templates") # ------------------------------- # 🛠️ Helper Functions # ------------------------------- def get_metadata(df: pd.DataFrame): """Extract lightweight metadata for prompting.""" return { "columns": list(df.columns), "dtypes": df.dtypes.apply(lambda x: str(x)).to_dict(), "num_rows": df.shape[0], "num_cols": df.shape[1], "null_counts": df.isnull().sum().to_dict(), "unique_counts": df.nunique().to_dict(), "sample_rows": df.head(3).to_dict(orient="records"), } def generate_json_insight(user_query, metadata, df): """Ask Gemini for structured JSON insights across fixed sections.""" system_prompt = """ You are a data analysis assistant. You will be given dataset metadata and a user request. Always return JSON with exactly these 5 sections: 1. Efficiency Analysis (bar chart, actual vs target if available) 2. Cumulative Performance (line chart over time if possible) 3. Process Issues (pie chart breakdown if available) 4. Planning vs Projection (comparison planned vs projected values) 5. Loss Analysis (summary with stats: total, avg, min, max) Schema: { "insights": [ { "title": "Efficiency Analysis", "type": "bar", "description": "Actual vs Target Efficiency", "chartData": [{"x": "...", "y": ..., "target": ...}], "stats": {} }, { "title": "Cumulative Performance", "type": "line", "description": "Cumulative trend over time", "chartData": [], "stats": {} }, { "title": "Process Issues", "type": "pie", "description": "Breakdown of process issues", "chartData": [], "stats": {} }, { "title": "Planning vs Projection", "type": "comparison", "description": "Planned vs Projected output", "chartData": [], "stats": {} }, { "title": "Loss Analysis", "type": "summary", "description": "Key loss statistics", "chartData": [], "stats": {} } ] } Rules: - All 5 sections must be present in the JSON. - If no data available, return empty arrays/objects. - Do NOT output Python code or text explanations, JSON only. """ user_prompt = f""" Dataset metadata: Columns: {metadata['columns']} Data types: {metadata['dtypes']} Null counts: {metadata['null_counts']} Unique counts: {metadata['unique_counts']} Sample rows: {metadata['sample_rows']} User request: {user_query} """ contents = [ types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)]) ] config = types.GenerateContentConfig( temperature=0, max_output_tokens=2000, system_instruction=[types.Part.from_text(text=system_prompt)], ) result = "" for chunk in client.models.generate_content_stream( model=MODEL, contents=contents, config=config ): if chunk.text: result += chunk.text try: return json.loads(result) except Exception: return {"raw_output": result} # ------------------------------- # 🌐 Routes # ------------------------------- @app.get("/", response_class=HTMLResponse) async def home(request: Request): return templates.TemplateResponse("index.html", {"request": request}) @app.post("/generate_insight_file") async def generate_insight_file( file: UploadFile = File(...), query: str = Form("Analyze the dataset") ): """Upload Excel, generate structured JSON insights.""" try: df = pd.read_excel(file.file) except Exception as e: return JSONResponse({"success": False, "error": f"Failed to read file: {str(e)}"}) metadata = get_metadata(df) insights = generate_json_insight(query, metadata, df) return JSONResponse({"success": True, "insights": insights})