Spaces:

triflix
/

chatplotapi

Paused

App Files Files Community

triflix commited on Sep 23, 2025

Commit

f9990dd

verified ·

1 Parent(s): 93ab69f

Update app.py

Browse files

Files changed (1) hide show

app.py +239 -95

app.py CHANGED Viewed

@@ -1,120 +1,264 @@
-from fastapi import FastAPI, File, UploadFile, Form
-from fastapi.responses import JSONResponse, StreamingResponse
-from fastapi.middleware.cors import CORSMiddleware
 import pandas as pd
 from google import genai
 from google.genai import types
-import os
-import json
-import asyncio
-# -------------------------------
-# 🔑 Configuration
-# -------------------------------
-API_KEY = os.getenv("GEMINI_API_KEY", "AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs")
-MODEL = "gemini-2.5-flash-lite"
-client = genai.Client(api_key=API_KEY)
-# -------------------------------
-# ⚡ FastAPI Setup
-# -------------------------------
-app = FastAPI()
-# Enable CORS
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
-# -------------------------------
-# 🛠️ Helper Functions
-# -------------------------------
-def get_metadata(df: pd.DataFrame):
-    # Convert all timestamps to string to avoid JSON serialization issues
-    df_serializable = df.copy()
-    for col in df_serializable.select_dtypes(include=['datetime64[ns]']).columns:
-        df_serializable[col] = df_serializable[col].astype(str)
     return {
-        "columns": list(df_serializable.columns),
-        "dtypes": df_serializable.dtypes.apply(lambda x: str(x)).to_dict(),
-        "num_rows": df_serializable.shape[0],
-        "num_cols": df_serializable.shape[1],
-        "null_counts": df_serializable.isnull().sum().to_dict(),
-        "unique_counts": df_serializable.nunique().to_dict(),
-        "sample_rows": df_serializable.head(3).to_dict(orient="records"),
     }
-async def stream_insights(user_query, metadata):
-    """Stream insights step by step."""
-    yield json.dumps({"status": "started", "message": "File received. Extracting metadata..."}) + "\n"
-    await asyncio.sleep(0.5)
-    yield json.dumps({"status": "metadata", "metadata": metadata}) + "\n"
-    await asyncio.sleep(0.5)
-    # Gemini system prompt
-    system_prompt = """
-    You are a data analysis assistant.
-    Always return JSON with this schema:
     {
-      "excel_info": {...},
-      "data_type_context": "...",
-      "auto_insights": {
-        "insights": [
-          {... Efficiency Analysis ...},
-          {... Cumulative Performance ...},
-          {... Process Issues ...},
-          {... Planning vs Projection ...},
-          {... Loss Analysis ...}
-        ]
-      },
-      "query_insights": {...}
     }
     """
-    user_prompt = f"Dataset metadata: {metadata}\nUser request: {user_query}"
-    contents = [types.Content(role="user", parts=[types.Part.from_text(text=user_prompt)])]
-    config = types.GenerateContentConfig(
-        temperature=0,
-        max_output_tokens=2000,
-        system_instruction=[types.Part.from_text(text=system_prompt)],
-    )
-    result = ""
-    for chunk in client.models.generate_content_stream(model=MODEL, contents=contents, config=config):
-        if chunk.text:
-            result += chunk.text
     try:
-        parsed = json.loads(result)
-    except Exception:
-        yield json.dumps({"status": "error", "raw_output": result}) + "\n"
-        return
-    yield json.dumps({"status": "excel_info", "excel_info": parsed.get("excel_info", {})}) + "\n"
-    await asyncio.sleep(0.5)
-    yield json.dumps({"status": "context", "data_type_context": parsed.get("data_type_context", "")}) + "\n"
-    await asyncio.sleep(0.5)
-    for insight in parsed.get("auto_insights", {}).get("insights", []):
-        yield json.dumps({"status": "insight", "insight": insight}) + "\n"
-        await asyncio.sleep(0.5)
-    yield json.dumps({"status": "query", "query_insights": parsed.get("query_insights", {})}) + "\n"
-    yield json.dumps({"status": "completed", "message": "All insights generated"}) + "\n"
-# -------------------------------
-# 🌐 Routes
-# -------------------------------
-@app.post("/stream_insights")
-async def stream_insight_file(file: UploadFile = File(...), query: str = Form("Analyze the dataset")):
     try:
-        df = pd.read_excel(file.file)
     except Exception as e:
-        return JSONResponse({"success": False, "error": f"Failed to read file: {str(e)}"})
-    metadata = get_metadata(df)
-    return StreamingResponse(stream_insights(query, metadata), media_type="application/json")

+# -----------------------------
+# Imports
+# -----------------------------
+import os
+import uuid
+import json
+import logging
+import subprocess
+import sys
+from pathlib import Path
 import pandas as pd
+from dotenv import load_dotenv
+from fastapi import FastAPI, UploadFile, File, HTTPException, Body
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
 from google import genai
 from google.genai import types
+# -----------------------------
+# Initial Configuration
+# -----------------------------
+# Load environment variables from .env file
+load_dotenv()
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Create an 'uploads' directory if it doesn't exist
+UPLOADS_DIR = Path("uploads")
+UPLOADS_DIR.mkdir(exist_ok=True)
+# -----------------------------
+# Initialize Gemini Client & FastAPI App
+# -----------------------------
+# Configure the Gemini client with the API key from environment variables
+try:
+    api_key = "AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs"
+    if not api_key:
+        raise ValueError("GOOGLE_API_KEY not found in environment variables.")
+    genai.configure(api_key=api_key)
+    logger.info("Google GenAI client configured successfully.")
+except Exception as e:
+    logger.error(f"Failed to configure Google GenAI client: {e}")
+    # We exit if the client can't be configured as the app is useless without it.
+    sys.exit(1)
+# Initialize FastAPI app
+app = FastAPI(
+    title="Data Analysis and Visualization API",
+    description="An API to analyze Excel files and generate Python code for visualizations using Google's Gemini.",
+    version="1.0.0"
 )
+# -----------------------------
+# Pydantic Models for API I/O
+# -----------------------------
+class AnalysisResponse(BaseModel):
+    file_id: str = Field(..., description="Unique identifier for the uploaded file.")
+    summary: str = Field(..., description="AI-generated summary of the data.")
+    suggestions: list[str] = Field(..., description="List of AI-generated analysis/visualization suggestions.")
+class VisualizationRequest(BaseModel):
+    file_id: str = Field(..., description="The unique identifier of the file to be visualized.")
+    command: str = Field(..., description="The selected suggestion/command from the analysis step.")
+class VisualizationResponse(BaseModel):
+    type: str = Field(..., description="The type of visualization (e.g., 'bar', 'pie').")
+    explanation: str = Field(..., description="A one-sentence description of the visualization.")
+    data: dict | list = Field(..., description="The numeric JSON data produced by the executed code.")
+    generated_code: str = Field(..., description="The Python code that was generated and executed.")
+# -----------------------------
+# Helper Functions (Adapted from your script)
+# -----------------------------
+def get_metadata(df: pd.DataFrame) -> dict:
+    """Extracts metadata from a pandas DataFrame."""
     return {
+        "columns": list(df.columns),
+        "dtypes": df.dtypes.apply(str).to_dict(),
+        "null_counts": df.isnull().sum().to_dict(),
+        "unique_counts": df.nunique().to_dict(),
+        "sample_rows": df.head(3).to_dict(orient="records")
     }
+def generate_metadata_analysis(metadata: dict) -> dict:
+    """Generates a JSON summary and suggestions from metadata using Gemini."""
+    metadata_text = json.dumps(metadata, indent=2)
+    model = "gemini-pro" # Using gemini-pro as it's better for this kind of structured generation
+    system_instruction = """
+    You are a structured data analysis AI. Your output must be strict JSON.
+    1. Summary:
+    Provide a concise description of what kind of data this is, what it likely represents, and its domain or use-case. Indicate assumptions if needed.
+    2. Suggestions:
+    Provide exactly three actionable analyses and visualizations based on the metadata. For each, specify the columns to use and the type of insight to be gained.
+    Respond in this exact JSON format:
     {
+      "summary": "<short summary>",
+      "suggestions": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]
     }
     """
+    try:
+        response = genai.GenerativeModel(
+            model_name=model,
+            system_instruction=system_instruction
+        ).generate_content(
+            f"Analyze the following structured data metadata:\n{metadata_text}",
+            generation_config=types.GenerationConfig(response_mime_type="application/json")
+        )
+        return json.loads(response.text)
+    except Exception as e:
+        logger.error(f"Error generating metadata analysis from Gemini: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get analysis from AI model.")
+def generate_visualization_code(file_path: str, command: str) -> dict:
+    """Generates Python code for visualization based on a user command."""
+    model = "gemini-pro"
+    system_instruction = f"""
+    You are a Python assistant that MUST return output strictly in JSON format and NOTHING else.
+    The top-level JSON MUST contain exactly three keys in this order: "type", "code", "explanation".
+    Requirements:
+    - "type": The suggested visualization type as a lowercase string (e.g., "bar", "pie", "line", "scatter").
+    - "code": A string of Python code. This code MUST print a JSON object to standard output. The JSON should contain the data needed for the plot. Use pandas to process the data.
+    - The code must access the data using this exact line: df = pd.read_excel(r"{file_path}")
+    - "explanation": A concise, one-sentence description of what the visualization shows.
+    """
     try:
+        response = genai.GenerativeModel(
+            model_name=model,
+            system_instruction=system_instruction
+        ).generate_content(
+            f"Generate Python code to create a {command}",
+            generation_config=types.GenerationConfig(response_mime_type="application/json")
+        )
+        return json.loads(response.text)
+    except Exception as e:
+        logger.error(f"Error generating visualization code from Gemini: {e}")
+        raise HTTPException(status_code=500, detail="Failed to generate visualization code from AI model.")
+# -----------------------------
+# API Endpoints
+# -----------------------------
+@app.post("/analyze", response_model=AnalysisResponse)
+async def analyze_file(file: UploadFile = File(...)):
+    """
+    Upload an Excel file, get its metadata, and receive an AI-generated
+    summary and a list of visualization suggestions.
+    """
+    if not file.filename.endswith(('.xlsx', '.xls')):
+        raise HTTPException(status_code=400, detail="Invalid file type. Please upload an Excel file.")
+    file_id = str(uuid.uuid4())
+    file_path = UPLOADS_DIR / f"{file_id}_{file.filename}"
+    try:
+        # Save the uploaded file
+        with open(file_path, "wb") as buffer:
+            buffer.write(await file.read())
+        logger.info(f"File '{file.filename}' saved as '{file_path.name}'")
+        # Process the file
+        df = pd.read_excel(file_path)
+        metadata = get_metadata(df)
+        logger.info(f"Metadata extracted for file_id: {file_id}")
+        analysis = generate_metadata_analysis(metadata)
+        logger.info(f"Metadata analysis generated for file_id: {file_id}")
+        return AnalysisResponse(
+            file_id=file_id,
+            summary=analysis.get("summary", "No summary provided."),
+            suggestions=analysis.get("suggestions", [])
+        )
+    except Exception as e:
+        logger.error(f"An error occurred during file analysis: {e}")
+        # Clean up the saved file in case of an error
+        if file_path.exists():
+            os.remove(file_path)
+        raise HTTPException(status_code=500, detail=f"An internal error occurred: {e}")
+@app.post("/visualize", response_model=VisualizationResponse)
+async def visualize_data(request: VisualizationRequest):
+    """
+    Generate and execute Python code for a visualization based on a file_id
+    and a selected command from the analysis step.
+    """
+    # Find the file corresponding to the file_id
+    matching_files = list(UPLOADS_DIR.glob(f"{request.file_id}_*"))
+    if not matching_files:
+        logger.error(f"File with ID '{request.file_id}' not found.")
+        raise HTTPException(status_code=404, detail="File not found. Please re-upload the file.")
+    file_path = matching_files[0]
+    logger.info(f"Found file '{file_path}' for file_id '{request.file_id}'")
+    # Generate the visualization code from Gemini
+    agent_output = generate_visualization_code(str(file_path), request.command)
+    code_to_run = agent_output.get("code")
+    if not code_to_run:
+        raise HTTPException(status_code=500, detail="AI model failed to generate valid code.")
+    logger.info(f"Code generated for command: '{request.command}'")
+    # --- Safe Code Execution using subprocess ---
     try:
+        logger.info("Executing generated code in a sandboxed subprocess...")
+        process = subprocess.run(
+            [sys.executable, "-c", code_to_run],
+            capture_output=True,
+            text=True,
+            check=True,  # Raises CalledProcessError for non-zero exit codes
+            timeout=15  # Add a timeout for safety
+        )
+        # The output from the script is expected to be a JSON string
+        stdout = process.stdout.strip()
+        logger.info(f"Code executed successfully. Stdout: {stdout[:200]}...") # Log first 200 chars
+        # Parse the JSON output from the executed code
+        chart_data = json.loads(stdout)
+        return VisualizationResponse(
+            type=agent_output.get("type", "unknown"),
+            explanation=agent_output.get("explanation", "No explanation provided."),
+            data=chart_data,
+            generated_code=code_to_run
+        )
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Error executing generated code. Stderr: {e.stderr}")
+        raise HTTPException(status_code=500, detail=f"Error during code execution: {e.stderr}")
+    except json.JSONDecodeError:
+        logger.error(f"Failed to decode JSON from executed code's stdout. Output was: {stdout}")
+        raise HTTPException(status_code=500, detail="Generated code did not produce valid JSON output.")
+    except subprocess.TimeoutExpired:
+        logger.error("Code execution timed out.")
+        raise HTTPException(status_code=408, detail="Code execution took too long and was terminated.")
     except Exception as e:
+        logger.error(f"An unexpected error occurred during visualization: {e}")
+        raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
+@app.get("/", include_in_schema=False)
+def root():
+    return {"message": "Welcome to the Data Analysis and Visualization API. Visit /docs for more info."}