Spaces:
Sleeping
Sleeping
| # ------------------------------- | |
| # π FastAPI AI Data Insights App | |
| # ------------------------------- | |
| from fastapi import FastAPI, Request, File, UploadFile, Form | |
| from fastapi.responses import HTMLResponse, JSONResponse | |
| from fastapi.staticfiles import StaticFiles | |
| from fastapi.templating import Jinja2Templates | |
| import pandas as pd | |
| from google import genai | |
| from google.genai import types | |
| import os | |
| import json | |
| # ------------------------------- | |
| # π Configuration | |
| # ------------------------------- | |
| API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_GEMINI_KEY") | |
| MODEL = "gemini-2.5-flash-lite" | |
| client = genai.Client(api_key=API_KEY) | |
| # ------------------------------- | |
| # β‘ FastAPI Setup | |
| # ------------------------------- | |
| app = FastAPI() | |
| app.mount("/static", StaticFiles(directory="static"), name="static") | |
| templates = Jinja2Templates(directory="templates") | |
| # ------------------------------- | |
| # π οΈ Helper Functions | |
| # ------------------------------- | |
| def get_metadata(df: pd.DataFrame): | |
| """Extract lightweight metadata for prompting.""" | |
| return { | |
| "columns": list(df.columns), | |
| "dtypes": df.dtypes.apply(lambda x: str(x)).to_dict(), | |
| "num_rows": df.shape[0], | |
| "num_cols": df.shape[1], | |
| "null_counts": df.isnull().sum().to_dict(), | |
| "unique_counts": df.nunique().to_dict(), | |
| "sample_rows": df.head(3).to_dict(orient="records"), | |
| } | |
| def generate_json_insight(user_query, metadata, df): | |
| """Ask Gemini for structured JSON insights across fixed sections.""" | |
| system_prompt = """ | |
| You are a data analysis assistant. | |
| You will be given dataset metadata and a user request. | |
| Always return JSON with exactly these 5 sections: | |
| 1. Efficiency Analysis (bar chart, actual vs target if available) | |
| 2. Cumulative Performance (line chart over time if possible) | |
| 3. Process Issues (pie chart breakdown if available) | |
| 4. Planning vs Projection (comparison planned vs projected values) | |
| 5. Loss Analysis (summary with stats: total, avg, min, max) | |
| Schema: | |
| { | |
| "insights": [ | |
| { | |
| "title": "Efficiency Analysis", | |
| "type": "bar", | |
| "description": "Actual vs Target Efficiency", | |
| "chartData": [{"x": "...", "y": ..., "target": ...}], | |
| "stats": {} | |
| }, | |
| { | |
| "title": "Cumulative Performance", | |
| "type": "line", | |
| "description": "Cumulative trend over time", | |
| "chartData": [], | |
| "stats": {} | |
| }, | |
| { | |
| "title": "Process Issues", | |
| "type": "pie", | |
| "description": "Breakdown of process issues", | |
| "chartData": [], | |
| "stats": {} | |
| }, | |
| { | |
| "title": "Planning vs Projection", | |
| "type": "comparison", | |
| "description": "Planned vs Projected output", | |
| "chartData": [], | |
| "stats": {} | |
| }, | |
| { | |
| "title": "Loss Analysis", | |
| "type": "summary", | |
| "description": "Key loss statistics", | |
| "chartData": [], | |
| "stats": {} | |
| } | |
| ] | |
| } | |
| Rules: | |
| - All 5 sections must be present in the JSON. | |
| - If no data available, return empty arrays/objects. | |
| - Do NOT output Python code or text explanations, JSON only. | |
| """ | |
| user_prompt = f""" | |
| Dataset metadata: | |
| Columns: {metadata['columns']} | |
| Data types: {metadata['dtypes']} | |
| Null counts: {metadata['null_counts']} | |
| Unique counts: {metadata['unique_counts']} | |
| Sample rows: {metadata['sample_rows']} | |
| User request: {user_query} | |
| """ | |
| contents = [ | |
| types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)]) | |
| ] | |
| config = types.GenerateContentConfig( | |
| temperature=0, | |
| max_output_tokens=2000, | |
| system_instruction=[types.Part.from_text(text=system_prompt)], | |
| ) | |
| result = "" | |
| for chunk in client.models.generate_content_stream( | |
| model=MODEL, contents=contents, config=config | |
| ): | |
| if chunk.text: | |
| result += chunk.text | |
| try: | |
| return json.loads(result) | |
| except Exception: | |
| return {"raw_output": result} | |
| # ------------------------------- | |
| # π Routes | |
| # ------------------------------- | |
| async def home(request: Request): | |
| return templates.TemplateResponse("index.html", {"request": request}) | |
| async def generate_insight_file( | |
| file: UploadFile = File(...), query: str = Form("Analyze the dataset") | |
| ): | |
| """Upload Excel, generate structured JSON insights.""" | |
| try: | |
| df = pd.read_excel(file.file) | |
| except Exception as e: | |
| return JSONResponse({"success": False, "error": f"Failed to read file: {str(e)}"}) | |
| metadata = get_metadata(df) | |
| insights = generate_json_insight(query, metadata, df) | |
| return JSONResponse({"success": True, "insights": insights}) | |