Spaces:
Sleeping
Sleeping
File size: 4,653 Bytes
28dcf64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# -------------------------------
# π FastAPI AI Data Insights App
# -------------------------------
from fastapi import FastAPI, Request, File, UploadFile, Form
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
import pandas as pd
from google import genai
from google.genai import types
import os
import json
# -------------------------------
# π Configuration
# -------------------------------
API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_GEMINI_KEY")
MODEL = "gemini-2.5-flash-lite"
client = genai.Client(api_key=API_KEY)
# -------------------------------
# β‘ FastAPI Setup
# -------------------------------
app = FastAPI()
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
# -------------------------------
# π οΈ Helper Functions
# -------------------------------
def get_metadata(df: pd.DataFrame):
"""Extract lightweight metadata for prompting."""
return {
"columns": list(df.columns),
"dtypes": df.dtypes.apply(lambda x: str(x)).to_dict(),
"num_rows": df.shape[0],
"num_cols": df.shape[1],
"null_counts": df.isnull().sum().to_dict(),
"unique_counts": df.nunique().to_dict(),
"sample_rows": df.head(3).to_dict(orient="records"),
}
def generate_json_insight(user_query, metadata, df):
"""Ask Gemini for structured JSON insights across fixed sections."""
system_prompt = """
You are a data analysis assistant.
You will be given dataset metadata and a user request.
Always return JSON with exactly these 5 sections:
1. Efficiency Analysis (bar chart, actual vs target if available)
2. Cumulative Performance (line chart over time if possible)
3. Process Issues (pie chart breakdown if available)
4. Planning vs Projection (comparison planned vs projected values)
5. Loss Analysis (summary with stats: total, avg, min, max)
Schema:
{
"insights": [
{
"title": "Efficiency Analysis",
"type": "bar",
"description": "Actual vs Target Efficiency",
"chartData": [{"x": "...", "y": ..., "target": ...}],
"stats": {}
},
{
"title": "Cumulative Performance",
"type": "line",
"description": "Cumulative trend over time",
"chartData": [],
"stats": {}
},
{
"title": "Process Issues",
"type": "pie",
"description": "Breakdown of process issues",
"chartData": [],
"stats": {}
},
{
"title": "Planning vs Projection",
"type": "comparison",
"description": "Planned vs Projected output",
"chartData": [],
"stats": {}
},
{
"title": "Loss Analysis",
"type": "summary",
"description": "Key loss statistics",
"chartData": [],
"stats": {}
}
]
}
Rules:
- All 5 sections must be present in the JSON.
- If no data available, return empty arrays/objects.
- Do NOT output Python code or text explanations, JSON only.
"""
user_prompt = f"""
Dataset metadata:
Columns: {metadata['columns']}
Data types: {metadata['dtypes']}
Null counts: {metadata['null_counts']}
Unique counts: {metadata['unique_counts']}
Sample rows: {metadata['sample_rows']}
User request: {user_query}
"""
contents = [
types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])
]
config = types.GenerateContentConfig(
temperature=0,
max_output_tokens=2000,
system_instruction=[types.Part.from_text(text=system_prompt)],
)
result = ""
for chunk in client.models.generate_content_stream(
model=MODEL, contents=contents, config=config
):
if chunk.text:
result += chunk.text
try:
return json.loads(result)
except Exception:
return {"raw_output": result}
# -------------------------------
# π Routes
# -------------------------------
@app.get("/", response_class=HTMLResponse)
async def home(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
@app.post("/generate_insight_file")
async def generate_insight_file(
file: UploadFile = File(...), query: str = Form("Analyze the dataset")
):
"""Upload Excel, generate structured JSON insights."""
try:
df = pd.read_excel(file.file)
except Exception as e:
return JSONResponse({"success": False, "error": f"Failed to read file: {str(e)}"})
metadata = get_metadata(df)
insights = generate_json_insight(query, metadata, df)
return JSONResponse({"success": True, "insights": insights})
|