Spaces:

triflix
/

chatplotapi

Paused

App Files Files Community

triflix commited on Sep 23, 2025

Commit

27c947d

verified ·

1 Parent(s): 79211c5

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -217

app.py CHANGED Viewed

@@ -1,90 +1,34 @@
-# -----------------------------
-# Imports
-# -----------------------------
 import os
 import uuid
 import json
-import logging
-import subprocess
-import sys
-from pathlib import Path
 import pandas as pd
-from dotenv import load_dotenv
-from fastapi import FastAPI, UploadFile, File, HTTPException, Body
-from pydantic import BaseModel, Field
 from google import genai
-from google.generativeai import types
 # -----------------------------
-# Initial Configuration
 # -----------------------------
-# Load environment variables (will load from Hugging Face secrets)
-load_dotenv()
-# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-# --- MODIFICATION FOR HUGGING FACE ---
-# Use the /tmp directory for ephemeral file storage.
-# This is a standard temporary directory in Linux environments like HF Spaces.
-UPLOADS_DIR = Path("/tmp/uploads")
-# Create the directory; parents=True ensures creation of parent dirs if needed.
-UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
-logger.info(f"Using temporary directory for uploads: {UPLOADS_DIR}")
 # -----------------------------
-# Initialize Gemini Client & FastAPI App
 # -----------------------------
-# Configure the Gemini client with the API key from environment variables/secrets
-try:
-    api_key = os.getenv("GOOGLE_API_KEY")
-    if not api_key:
-        raise ValueError("GOOGLE_API_KEY not found in environment variables or secrets.")
-    genai.configure(api_key=api_key)
-    logger.info("Google GenAI client configured successfully.")
-except Exception as e:
-    logger.error(f"FATAL: Failed to configure Google GenAI client: {e}")
-    # Exit if the client can't be configured, as the app is non-functional without it.
-    sys.exit(1)
-# Initialize FastAPI app
-app = FastAPI(
-    title="Data Analysis and Visualization API",
-    description="An API to analyze Excel files and generate Python code for visualizations using Google's Gemini.",
-    version="1.1.0"
-)
 # -----------------------------
-# Pydantic Models for API I/O
 # -----------------------------
-class AnalysisResponse(BaseModel):
-    file_id: str = Field(..., description="Unique identifier for the uploaded file.")
-    summary: str = Field(..., description="AI-generated summary of the data.")
-    suggestions: list[str] = Field(..., description="List of AI-generated analysis/visualization suggestions.")
-class VisualizationRequest(BaseModel):
-    file_id: str = Field(..., description="The unique identifier of the file to be visualized.")
-    command: str = Field(..., description="The selected suggestion/command from the analysis step.")
-class VisualizationResponse(BaseModel):
-    type: str = Field(..., description="The type of visualization (e.g., 'bar', 'pie').")
-    explanation: str = Field(..., description="A one-sentence description of the visualization.")
-    data: dict | list = Field(..., description="The numeric JSON data produced by the executed code.")
-    generated_code: str = Field(..., description="The Python code that was generated and executed.")
-# -----------------------------
-# Helper Functions
-# -----------------------------
-def get_metadata(df: pd.DataFrame) -> dict:
-    """Extracts metadata from a pandas DataFrame."""
     return {
         "columns": list(df.columns),
         "dtypes": df.dtypes.apply(str).to_dict(),
@@ -93,155 +37,97 @@ def get_metadata(df: pd.DataFrame) -> dict:
         "sample_rows": df.head(3).to_dict(orient="records")
     }
-def generate_metadata_analysis(metadata: dict) -> dict:
-    """Generates a JSON summary and suggestions from metadata using Gemini."""
-    metadata_text = json.dumps(metadata, indent=2)
-    model = "gemini-pro"
-    system_instruction = """
-    You are a structured data analysis AI. Your output must be strict JSON.
-    1. Summary:
-    Provide a concise description of what kind of data this is, what it likely represents, and its domain or use-case.
-    2. Suggestions:
-    Provide exactly three actionable analyses and visualizations based on the metadata.
-    Respond in this exact JSON format:
-    {
-      "summary": "<short summary>",
-      "suggestions": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]
-    }
-    """
-    try:
-        response = genai.GenerativeModel(
-            model_name=model,
-            system_instruction=system_instruction
-        ).generate_content(
-            f"Analyze the following structured data metadata:\n{metadata_text}",
-            generation_config=types.GenerationConfig(response_mime_type="application/json")
-        )
-        return json.loads(response.text)
-    except Exception as e:
-        logger.error(f"Error generating metadata analysis from Gemini: {e}")
-        raise HTTPException(status_code=500, detail="Failed to get analysis from AI model.")
-def generate_visualization_code(file_path: str, command: str) -> dict:
-    """Generates Python code for visualization based on a user command."""
-    model = "gemini-pro"
-    system_instruction = f"""
-    You are a Python assistant that MUST return output strictly in JSON format.
-    The JSON MUST contain exactly three keys: "type", "code", "explanation".
-    - "type": Lowercase visualization type (e.g., "bar", "pie", "line").
-    - "code": A string of Python code that prints a JSON object to standard output. The code must access data using this exact line: df = pd.read_excel(r"{file_path}")
-    - "explanation": A one-sentence description of the visualization.
-    """
-    try:
-        response = genai.GenerativeModel(
-            model_name=model,
-            system_instruction=system_instruction
-        ).generate_content(
-            f"Generate Python code to create a {command}",
-            generation_config=types.GenerationConfig(response_mime_type="application/json")
-        )
-        return json.loads(response.text)
-    except Exception as e:
-        logger.error(f"Error generating visualization code from Gemini: {e}")
-        raise HTTPException(status_code=500, detail="Failed to generate visualization code from AI model.")
-# -----------------------------
-# API Endpoints
-# -----------------------------
-@app.post("/analyze", response_model=AnalysisResponse)
-async def analyze_file(file: UploadFile = File(...)):
-    """
-    Upload an Excel file, get its metadata, and receive an AI-generated
-    summary and a list of visualization suggestions.
-    """
-    if not file.filename.endswith(('.xlsx', '.xls')):
-        raise HTTPException(status_code=400, detail="Invalid file type. Please upload an Excel file.")
-    file_id = str(uuid.uuid4())
-    file_path = UPLOADS_DIR / f"{file_id}_{file.filename}"
-    try:
-        with open(file_path, "wb") as buffer:
-            buffer.write(await file.read())
-        logger.info(f"File '{file.filename}' saved to temp path '{file_path}'")
-        df = pd.read_excel(file_path)
-        metadata = get_metadata(df)
-        logger.info(f"Metadata extracted for file_id: {file_id}")
-        analysis = generate_metadata_analysis(metadata)
-        logger.info(f"Metadata analysis generated for file_id: {file_id}")
-        return AnalysisResponse(
-            file_id=file_id,
-            summary=analysis.get("summary", "No summary provided."),
-            suggestions=analysis.get("suggestions", [])
-        )
-    except Exception as e:
-        logger.error(f"An error occurred during file analysis: {e}")
-        if file_path.exists():
-            os.remove(file_path)
-        raise HTTPException(status_code=500, detail=f"An internal server error occurred: {e}")
-@app.post("/visualize", response_model=VisualizationResponse)
-async def visualize_data(request: VisualizationRequest):
-    """
-    Generate and execute Python code for a visualization based on a file_id
-    and a selected command from the analysis step.
-    """
-    matching_files = list(UPLOADS_DIR.glob(f"{request.file_id}_*"))
-    if not matching_files:
-        logger.error(f"File with ID '{request.file_id}' not found in {UPLOADS_DIR}.")
-        raise HTTPException(status_code=404, detail="File not found. It may have been cleared from the temporary cache. Please re-upload.")
-    file_path = matching_files[0]
-    logger.info(f"Found file '{file_path}' for file_id '{request.file_id}'")
-    agent_output = generate_visualization_code(str(file_path), request.command)
-    code_to_run = agent_output.get("code")
-    if not code_to_run:
-        raise HTTPException(status_code=500, detail="AI model failed to generate valid code.")
-    logger.info(f"Code generated for command: '{request.command}'")
-    try:
-        logger.info("Executing generated code in a sandboxed subprocess...")
-        process = subprocess.run(
-            [sys.executable, "-c", code_to_run],
-            capture_output=True, text=True, check=True, timeout=20
-        )
-        stdout = process.stdout.strip()
-        logger.info(f"Code executed successfully. Stdout length: {len(stdout)}")
-        chart_data = json.loads(stdout)
-        return VisualizationResponse(
-            type=agent_output.get("type", "unknown"),
-            explanation=agent_output.get("explanation", "No explanation provided."),
-            data=chart_data,
-            generated_code=code_to_run
-        )
-    except subprocess.CalledProcessError as e:
-        logger.error(f"Error executing generated code. Stderr: {e.stderr}")
-        raise HTTPException(status_code=500, detail=f"Error during code execution: {e.stderr}")
-    except json.JSONDecodeError:
-        logger.error(f"Failed to decode JSON from stdout. Output was: {stdout}")
-        raise HTTPException(status_code=500, detail="Generated code did not produce valid JSON output.")
-    except subprocess.TimeoutExpired:
-        logger.error("Code execution timed out.")
-        raise HTTPException(status_code=408, detail="Code execution took too long and was terminated.")
-    except Exception as e:
-        logger.error(f"An unexpected error occurred during visualization: {e}")
-        raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
-@app.get("/", include_in_schema=False)
-def root():
-    return {"message": "Welcome to the Data Analysis API. Visit /docs for the API interface."}

 import os
 import uuid
 import json
+from fastapi import FastAPI, File, UploadFile, Form
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from fastapi.requests import Request
 import pandas as pd
 from google import genai
+from google.genai import types
 # -----------------------------
+# FastAPI setup
 # -----------------------------
+app = FastAPI()
+app.mount("/static", StaticFiles(directory="static"), name="static")
+templates = Jinja2Templates(directory="templates")
 # -----------------------------
+# Gemini client setup
 # -----------------------------
+client = genai.Client(api_key="AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs")
+UPLOAD_DIR = "tmp/uploads"
+os.makedirs(UPLOAD_DIR, exist_ok=True)
 # -----------------------------
+# Helper functions
 # -----------------------------
+def get_metadata(df):
     return {
         "columns": list(df.columns),
         "dtypes": df.dtypes.apply(str).to_dict(),
         "sample_rows": df.head(3).to_dict(orient="records")
     }
+def generate_metadata_analysis(metadata):
+    metadata_text = str(metadata)
+    model = "gemini-2.5-flash-lite"
+    contents = [
+        types.Content(
+            role="user",
+            parts=[types.Part.from_text(
+                text=f"Analyze the following structured data metadata:\n{metadata_text}"
+            )],
+        ),
+    ]
+    generate_content_config = types.GenerateContentConfig(
+        thinking_config=types.ThinkingConfig(thinking_budget=0),
+        response_mime_type="application/json",
+        system_instruction=[types.Part.from_text(text="""You are a structured data analysis AI.
+1️⃣ Summary: concise description of data, assumptions
+2️⃣ Suggestions: up to 3 actionable analyses/visualizations
+Output must be strict JSON: {"Summary": "<short summary>", "Suggestion": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]}
+""")],
+    )
+    output_text = ""
+    for chunk in client.models.generate_content_stream(
+        model=model,
+        contents=contents,
+        config=generate_content_config,
+    ):
+        output_text += chunk.text
+    return json.loads(output_text)
+def generate_visualization(command, file_path):
+    system_prompt_text = f"""
+You are a Python assistant that MUST return output strictly in JSON format and NOTHING else.
+The top-level JSON MUST contain exactly three keys in this order: "type", "code", "explanation".
+Requirements:
+- "type": visualization type ("bar", "pie", "line", etc.)
+- "code": Python code as a string that prints numeric JSON to stdout. Use this for data access: df = pd.read_excel(r"{file_path}")
+- "explanation": one-sentence description
+"""
+    MODEL = "gemini-2.5-flash-lite"
+    contents = [types.Content(role="user", parts=[types.Part.from_text(text=command)])]
+    generate_content_config = types.GenerateContentConfig(
+        thinking_config=types.ThinkingConfig(thinking_budget=0),
+        response_mime_type="application/json",
+        system_instruction=[types.Part.from_text(text=system_prompt_text)],
+    )
+    output = ""
+    for chunk in client.models.generate_content_stream(
+        model=MODEL,
+        contents=contents,
+        config=generate_content_config,
+    ):
+        output += chunk.text
+    return json.loads(output)
+# -----------------------------
+# Routes
+# -----------------------------
+@app.get("/", response_class=HTMLResponse)
+def home(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
+@app.post("/upload", response_class=JSONResponse)
+async def upload_excel(file: UploadFile = File(...)):
+    file_ext = os.path.splitext(file.filename)[1]
+    file_id = str(uuid.uuid4())
+    file_path = os.path.join(UPLOAD_DIR, f"{file_id}{file_ext}")
+    with open(file_path, "wb") as f:
+        f.write(await file.read())
+    df = pd.read_excel(file_path)
+    metadata = get_metadata(df)
+    analysis = generate_metadata_analysis(metadata)
+    # Store session info temporarily
+    session_data = {
+        "file_path": file_path,
+        "metadata": metadata,
+        "analysis": analysis
+    }
+    return JSONResponse(session_data)
+@app.post("/generate_plot", response_class=JSONResponse)
+async def generate_plot(command: str = Form(...), file_path: str = Form(...)):
+    visualization_json = generate_visualization(command, file_path)
+    return JSONResponse(visualization_json)