Spaces:

triflix
/

chatplotapi

Paused

App Files Files Community

triflix commited on Sep 23, 2025

Commit

0c92577

verified ·

1 Parent(s): 09a8302

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -50

app.py CHANGED Viewed

@@ -12,48 +12,51 @@ from pathlib import Path
 import pandas as pd
 from dotenv import load_dotenv
 from fastapi import FastAPI, UploadFile, File, HTTPException, Body
-from fastapi.responses import JSONResponse
 from pydantic import BaseModel, Field
 from google import genai
-from google.genai import types
 # -----------------------------
 # Initial Configuration
 # -----------------------------
-# Load environment variables from .env file
 load_dotenv()
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
-# Create an 'uploads' directory if it doesn't exist
-UPLOADS_DIR = Path("uploads")
-UPLOADS_DIR.mkdir(exist_ok=True)
 # -----------------------------
 # Initialize Gemini Client & FastAPI App
 # -----------------------------
-# Configure the Gemini client with the API key from environment variables
 try:
-    api_key = "AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs"
     if not api_key:
-        raise ValueError("GOOGLE_API_KEY not found in environment variables.")
     genai.configure(api_key=api_key)
     logger.info("Google GenAI client configured successfully.")
 except Exception as e:
-    logger.error(f"Failed to configure Google GenAI client: {e}")
-    # We exit if the client can't be configured as the app is useless without it.
     sys.exit(1)
 # Initialize FastAPI app
 app = FastAPI(
     title="Data Analysis and Visualization API",
     description="An API to analyze Excel files and generate Python code for visualizations using Google's Gemini.",
-    version="1.0.0"
 )
 # -----------------------------
@@ -77,7 +80,7 @@ class VisualizationResponse(BaseModel):
 # -----------------------------
-# Helper Functions (Adapted from your script)
 # -----------------------------
 def get_metadata(df: pd.DataFrame) -> dict:
@@ -93,16 +96,16 @@ def get_metadata(df: pd.DataFrame) -> dict:
 def generate_metadata_analysis(metadata: dict) -> dict:
     """Generates a JSON summary and suggestions from metadata using Gemini."""
     metadata_text = json.dumps(metadata, indent=2)
-    model = "gemini-pro" # Using gemini-pro as it's better for this kind of structured generation
     system_instruction = """
     You are a structured data analysis AI. Your output must be strict JSON.
     1. Summary:
-    Provide a concise description of what kind of data this is, what it likely represents, and its domain or use-case. Indicate assumptions if needed.
     2. Suggestions:
-    Provide exactly three actionable analyses and visualizations based on the metadata. For each, specify the columns to use and the type of insight to be gained.
     Respond in this exact JSON format:
     {
@@ -110,7 +113,6 @@ def generate_metadata_analysis(metadata: dict) -> dict:
       "suggestions": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]
     }
     """
     try:
         response = genai.GenerativeModel(
             model_name=model,
@@ -129,16 +131,13 @@ def generate_visualization_code(file_path: str, command: str) -> dict:
     model = "gemini-pro"
     system_instruction = f"""
-    You are a Python assistant that MUST return output strictly in JSON format and NOTHING else.
-    The top-level JSON MUST contain exactly three keys in this order: "type", "code", "explanation".
-    Requirements:
-    - "type": The suggested visualization type as a lowercase string (e.g., "bar", "pie", "line", "scatter").
-    - "code": A string of Python code. This code MUST print a JSON object to standard output. The JSON should contain the data needed for the plot. Use pandas to process the data.
-    - The code must access the data using this exact line: df = pd.read_excel(r"{file_path}")
-    - "explanation": A concise, one-sentence description of what the visualization shows.
-    """
     try:
         response = genai.GenerativeModel(
             model_name=model,
@@ -169,12 +168,10 @@ async def analyze_file(file: UploadFile = File(...)):
     file_path = UPLOADS_DIR / f"{file_id}_{file.filename}"
     try:
-        # Save the uploaded file
         with open(file_path, "wb") as buffer:
             buffer.write(await file.read())
-        logger.info(f"File '{file.filename}' saved as '{file_path.name}'")
-        # Process the file
         df = pd.read_excel(file_path)
         metadata = get_metadata(df)
         logger.info(f"Metadata extracted for file_id: {file_id}")
@@ -187,13 +184,11 @@ async def analyze_file(file: UploadFile = File(...)):
             summary=analysis.get("summary", "No summary provided."),
             suggestions=analysis.get("suggestions", [])
         )
     except Exception as e:
         logger.error(f"An error occurred during file analysis: {e}")
-        # Clean up the saved file in case of an error
         if file_path.exists():
             os.remove(file_path)
-        raise HTTPException(status_code=500, detail=f"An internal error occurred: {e}")
 @app.post("/visualize", response_model=VisualizationResponse)
@@ -202,40 +197,29 @@ async def visualize_data(request: VisualizationRequest):
     Generate and execute Python code for a visualization based on a file_id
     and a selected command from the analysis step.
     """
-    # Find the file corresponding to the file_id
     matching_files = list(UPLOADS_DIR.glob(f"{request.file_id}_*"))
     if not matching_files:
-        logger.error(f"File with ID '{request.file_id}' not found.")
-        raise HTTPException(status_code=404, detail="File not found. Please re-upload the file.")
     file_path = matching_files[0]
     logger.info(f"Found file '{file_path}' for file_id '{request.file_id}'")
-    # Generate the visualization code from Gemini
     agent_output = generate_visualization_code(str(file_path), request.command)
     code_to_run = agent_output.get("code")
     if not code_to_run:
         raise HTTPException(status_code=500, detail="AI model failed to generate valid code.")
     logger.info(f"Code generated for command: '{request.command}'")
-    # --- Safe Code Execution using subprocess ---
     try:
         logger.info("Executing generated code in a sandboxed subprocess...")
         process = subprocess.run(
             [sys.executable, "-c", code_to_run],
-            capture_output=True,
-            text=True,
-            check=True,  # Raises CalledProcessError for non-zero exit codes
-            timeout=15  # Add a timeout for safety
         )
-        # The output from the script is expected to be a JSON string
         stdout = process.stdout.strip()
-        logger.info(f"Code executed successfully. Stdout: {stdout[:200]}...") # Log first 200 chars
-        # Parse the JSON output from the executed code
         chart_data = json.loads(stdout)
         return VisualizationResponse(
@@ -244,12 +228,11 @@ async def visualize_data(request: VisualizationRequest):
             data=chart_data,
             generated_code=code_to_run
         )
     except subprocess.CalledProcessError as e:
         logger.error(f"Error executing generated code. Stderr: {e.stderr}")
         raise HTTPException(status_code=500, detail=f"Error during code execution: {e.stderr}")
     except json.JSONDecodeError:
-        logger.error(f"Failed to decode JSON from executed code's stdout. Output was: {stdout}")
         raise HTTPException(status_code=500, detail="Generated code did not produce valid JSON output.")
     except subprocess.TimeoutExpired:
         logger.error("Code execution timed out.")
@@ -261,4 +244,4 @@ async def visualize_data(request: VisualizationRequest):
 @app.get("/", include_in_schema=False)
 def root():
-    return {"message": "Welcome to the Data Analysis and Visualization API. Visit /docs for more info."}

 import pandas as pd
 from dotenv import load_dotenv
 from fastapi import FastAPI, UploadFile, File, HTTPException, Body
 from pydantic import BaseModel, Field
 from google import genai
+from google.generativeai import types
 # -----------------------------
 # Initial Configuration
 # -----------------------------
+# Load environment variables (will load from Hugging Face secrets)
 load_dotenv()
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
+# --- MODIFICATION FOR HUGGING FACE ---
+# Use the /tmp directory for ephemeral file storage.
+# This is a standard temporary directory in Linux environments like HF Spaces.
+UPLOADS_DIR = Path("/tmp/uploads")
+# Create the directory; parents=True ensures creation of parent dirs if needed.
+UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
+logger.info(f"Using temporary directory for uploads: {UPLOADS_DIR}")
 # -----------------------------
 # Initialize Gemini Client & FastAPI App
 # -----------------------------
+# Configure the Gemini client with the API key from environment variables/secrets
 try:
+    api_key = os.getenv("GOOGLE_API_KEY")
     if not api_key:
+        raise ValueError("GOOGLE_API_KEY not found in environment variables or secrets.")
     genai.configure(api_key=api_key)
     logger.info("Google GenAI client configured successfully.")
 except Exception as e:
+    logger.error(f"FATAL: Failed to configure Google GenAI client: {e}")
+    # Exit if the client can't be configured, as the app is non-functional without it.
     sys.exit(1)
 # Initialize FastAPI app
 app = FastAPI(
     title="Data Analysis and Visualization API",
     description="An API to analyze Excel files and generate Python code for visualizations using Google's Gemini.",
+    version="1.1.0"
 )
 # -----------------------------
 # -----------------------------
+# Helper Functions
 # -----------------------------
 def get_metadata(df: pd.DataFrame) -> dict:
 def generate_metadata_analysis(metadata: dict) -> dict:
     """Generates a JSON summary and suggestions from metadata using Gemini."""
     metadata_text = json.dumps(metadata, indent=2)
+    model = "gemini-pro"
     system_instruction = """
     You are a structured data analysis AI. Your output must be strict JSON.
     1. Summary:
+    Provide a concise description of what kind of data this is, what it likely represents, and its domain or use-case.
     2. Suggestions:
+    Provide exactly three actionable analyses and visualizations based on the metadata.
     Respond in this exact JSON format:
     {
       "suggestions": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]
     }
     """
     try:
         response = genai.GenerativeModel(
             model_name=model,
     model = "gemini-pro"
     system_instruction = f"""
+    You are a Python assistant that MUST return output strictly in JSON format.
+    The JSON MUST contain exactly three keys: "type", "code", "explanation".
+    - "type": Lowercase visualization type (e.g., "bar", "pie", "line").
+    - "code": A string of Python code that prints a JSON object to standard output. The code must access data using this exact line: df = pd.read_excel(r"{file_path}")
+    - "explanation": A one-sentence description of the visualization.
+    """
     try:
         response = genai.GenerativeModel(
             model_name=model,
     file_path = UPLOADS_DIR / f"{file_id}_{file.filename}"
     try:
         with open(file_path, "wb") as buffer:
             buffer.write(await file.read())
+        logger.info(f"File '{file.filename}' saved to temp path '{file_path}'")
         df = pd.read_excel(file_path)
         metadata = get_metadata(df)
         logger.info(f"Metadata extracted for file_id: {file_id}")
             summary=analysis.get("summary", "No summary provided."),
             suggestions=analysis.get("suggestions", [])
         )
     except Exception as e:
         logger.error(f"An error occurred during file analysis: {e}")
         if file_path.exists():
             os.remove(file_path)
+        raise HTTPException(status_code=500, detail=f"An internal server error occurred: {e}")
 @app.post("/visualize", response_model=VisualizationResponse)
     Generate and execute Python code for a visualization based on a file_id
     and a selected command from the analysis step.
     """
     matching_files = list(UPLOADS_DIR.glob(f"{request.file_id}_*"))
     if not matching_files:
+        logger.error(f"File with ID '{request.file_id}' not found in {UPLOADS_DIR}.")
+        raise HTTPException(status_code=404, detail="File not found. It may have been cleared from the temporary cache. Please re-upload.")
     file_path = matching_files[0]
     logger.info(f"Found file '{file_path}' for file_id '{request.file_id}'")
     agent_output = generate_visualization_code(str(file_path), request.command)
     code_to_run = agent_output.get("code")
     if not code_to_run:
         raise HTTPException(status_code=500, detail="AI model failed to generate valid code.")
     logger.info(f"Code generated for command: '{request.command}'")
     try:
         logger.info("Executing generated code in a sandboxed subprocess...")
         process = subprocess.run(
             [sys.executable, "-c", code_to_run],
+            capture_output=True, text=True, check=True, timeout=20
         )
         stdout = process.stdout.strip()
+        logger.info(f"Code executed successfully. Stdout length: {len(stdout)}")
         chart_data = json.loads(stdout)
         return VisualizationResponse(
             data=chart_data,
             generated_code=code_to_run
         )
     except subprocess.CalledProcessError as e:
         logger.error(f"Error executing generated code. Stderr: {e.stderr}")
         raise HTTPException(status_code=500, detail=f"Error during code execution: {e.stderr}")
     except json.JSONDecodeError:
+        logger.error(f"Failed to decode JSON from stdout. Output was: {stdout}")
         raise HTTPException(status_code=500, detail="Generated code did not produce valid JSON output.")
     except subprocess.TimeoutExpired:
         logger.error("Code execution timed out.")
 @app.get("/", include_in_schema=False)
 def root():
+    return {"message": "Welcome to the Data Analysis API. Visit /docs for the API interface."}