File size: 4,653 Bytes
28dcf64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# -------------------------------
# πŸ“Œ FastAPI AI Data Insights App
# -------------------------------

from fastapi import FastAPI, Request, File, UploadFile, Form
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
import pandas as pd
from google import genai
from google.genai import types
import os
import json

# -------------------------------
# πŸ”‘ Configuration
# -------------------------------
API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_GEMINI_KEY")
MODEL = "gemini-2.5-flash-lite"

client = genai.Client(api_key=API_KEY)

# -------------------------------
# ⚑ FastAPI Setup
# -------------------------------
app = FastAPI()
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")


# -------------------------------
# πŸ› οΈ Helper Functions
# -------------------------------
def get_metadata(df: pd.DataFrame):
    """Extract lightweight metadata for prompting."""
    return {
        "columns": list(df.columns),
        "dtypes": df.dtypes.apply(lambda x: str(x)).to_dict(),
        "num_rows": df.shape[0],
        "num_cols": df.shape[1],
        "null_counts": df.isnull().sum().to_dict(),
        "unique_counts": df.nunique().to_dict(),
        "sample_rows": df.head(3).to_dict(orient="records"),
    }


def generate_json_insight(user_query, metadata, df):
    """Ask Gemini for structured JSON insights across fixed sections."""
    system_prompt = """
You are a data analysis assistant.
You will be given dataset metadata and a user request.

Always return JSON with exactly these 5 sections:
1. Efficiency Analysis (bar chart, actual vs target if available)
2. Cumulative Performance (line chart over time if possible)
3. Process Issues (pie chart breakdown if available)
4. Planning vs Projection (comparison planned vs projected values)
5. Loss Analysis (summary with stats: total, avg, min, max)

Schema:
{
  "insights": [
    {
      "title": "Efficiency Analysis",
      "type": "bar",
      "description": "Actual vs Target Efficiency",
      "chartData": [{"x": "...", "y": ..., "target": ...}],
      "stats": {}
    },
    {
      "title": "Cumulative Performance",
      "type": "line",
      "description": "Cumulative trend over time",
      "chartData": [],
      "stats": {}
    },
    {
      "title": "Process Issues",
      "type": "pie",
      "description": "Breakdown of process issues",
      "chartData": [],
      "stats": {}
    },
    {
      "title": "Planning vs Projection",
      "type": "comparison",
      "description": "Planned vs Projected output",
      "chartData": [],
      "stats": {}
    },
    {
      "title": "Loss Analysis",
      "type": "summary",
      "description": "Key loss statistics",
      "chartData": [],
      "stats": {}
    }
  ]
}

Rules:
- All 5 sections must be present in the JSON.
- If no data available, return empty arrays/objects.
- Do NOT output Python code or text explanations, JSON only.
"""

    user_prompt = f"""
Dataset metadata:
Columns: {metadata['columns']}
Data types: {metadata['dtypes']}
Null counts: {metadata['null_counts']}
Unique counts: {metadata['unique_counts']}
Sample rows: {metadata['sample_rows']}

User request: {user_query}
"""

    contents = [
        types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])
    ]
    config = types.GenerateContentConfig(
        temperature=0,
        max_output_tokens=2000,
        system_instruction=[types.Part.from_text(text=system_prompt)],
    )

    result = ""
    for chunk in client.models.generate_content_stream(
        model=MODEL, contents=contents, config=config
    ):
        if chunk.text:
            result += chunk.text

    try:
        return json.loads(result)
    except Exception:
        return {"raw_output": result}


# -------------------------------
# 🌐 Routes
# -------------------------------
@app.get("/", response_class=HTMLResponse)
async def home(request: Request):
    return templates.TemplateResponse("index.html", {"request": request})


@app.post("/generate_insight_file")
async def generate_insight_file(
    file: UploadFile = File(...), query: str = Form("Analyze the dataset")
):
    """Upload Excel, generate structured JSON insights."""
    try:
        df = pd.read_excel(file.file)
    except Exception as e:
        return JSONResponse({"success": False, "error": f"Failed to read file: {str(e)}"})

    metadata = get_metadata(df)
    insights = generate_json_insight(query, metadata, df)

    return JSONResponse({"success": True, "insights": insights})