Spaces:

triflix
/

chatplotapi

Sleeping

App Files Files Community

chatplotapi / app.py

triflix

Update app.py

0c92577 verified 4 months ago

raw

history blame

10.2 kB

	# -----------------------------
	# Imports
	# -----------------------------
	import os
	import uuid
	import json
	import logging
	import subprocess
	import sys
	from pathlib import Path

	import pandas as pd
	from dotenv import load_dotenv
	from fastapi import FastAPI, UploadFile, File, HTTPException, Body
	from pydantic import BaseModel, Field

	from google import genai
	from google.generativeai import types

	# -----------------------------
	# Initial Configuration
	# -----------------------------

	# Load environment variables (will load from Hugging Face secrets)
	load_dotenv()

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	# --- MODIFICATION FOR HUGGING FACE ---
	# Use the /tmp directory for ephemeral file storage.
	# This is a standard temporary directory in Linux environments like HF Spaces.
	UPLOADS_DIR = Path("/tmp/uploads")
	# Create the directory; parents=True ensures creation of parent dirs if needed.
	UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
	logger.info(f"Using temporary directory for uploads: {UPLOADS_DIR}")

	# -----------------------------
	# Initialize Gemini Client & FastAPI App
	# -----------------------------

	# Configure the Gemini client with the API key from environment variables/secrets
	try:
	api_key = os.getenv("GOOGLE_API_KEY")
	if not api_key:
	raise ValueError("GOOGLE_API_KEY not found in environment variables or secrets.")
	genai.configure(api_key=api_key)
	logger.info("Google GenAI client configured successfully.")
	except Exception as e:
	logger.error(f"FATAL: Failed to configure Google GenAI client: {e}")
	# Exit if the client can't be configured, as the app is non-functional without it.
	sys.exit(1)

	# Initialize FastAPI app
	app = FastAPI(
	title="Data Analysis and Visualization API",
	description="An API to analyze Excel files and generate Python code for visualizations using Google's Gemini.",
	version="1.1.0"
	)

	# -----------------------------
	# Pydantic Models for API I/O
	# -----------------------------

	class AnalysisResponse(BaseModel):
	file_id: str = Field(..., description="Unique identifier for the uploaded file.")
	summary: str = Field(..., description="AI-generated summary of the data.")
	suggestions: list[str] = Field(..., description="List of AI-generated analysis/visualization suggestions.")

	class VisualizationRequest(BaseModel):
	file_id: str = Field(..., description="The unique identifier of the file to be visualized.")
	command: str = Field(..., description="The selected suggestion/command from the analysis step.")

	class VisualizationResponse(BaseModel):
	type: str = Field(..., description="The type of visualization (e.g., 'bar', 'pie').")
	explanation: str = Field(..., description="A one-sentence description of the visualization.")
	data: dict \| list = Field(..., description="The numeric JSON data produced by the executed code.")
	generated_code: str = Field(..., description="The Python code that was generated and executed.")


	# -----------------------------
	# Helper Functions
	# -----------------------------

	def get_metadata(df: pd.DataFrame) -> dict:
	"""Extracts metadata from a pandas DataFrame."""
	return {
	"columns": list(df.columns),
	"dtypes": df.dtypes.apply(str).to_dict(),
	"null_counts": df.isnull().sum().to_dict(),
	"unique_counts": df.nunique().to_dict(),
	"sample_rows": df.head(3).to_dict(orient="records")
	}

	def generate_metadata_analysis(metadata: dict) -> dict:
	"""Generates a JSON summary and suggestions from metadata using Gemini."""
	metadata_text = json.dumps(metadata, indent=2)
	model = "gemini-pro"

	system_instruction = """
	You are a structured data analysis AI. Your output must be strict JSON.

	1. Summary:
	Provide a concise description of what kind of data this is, what it likely represents, and its domain or use-case.

	2. Suggestions:
	Provide exactly three actionable analyses and visualizations based on the metadata.

	Respond in this exact JSON format:
	{
	"summary": "<short summary>",
	"suggestions": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]
	}
	"""
	try:
	response = genai.GenerativeModel(
	model_name=model,
	system_instruction=system_instruction
	).generate_content(
	f"Analyze the following structured data metadata:\n{metadata_text}",
	generation_config=types.GenerationConfig(response_mime_type="application/json")
	)
	return json.loads(response.text)
	except Exception as e:
	logger.error(f"Error generating metadata analysis from Gemini: {e}")
	raise HTTPException(status_code=500, detail="Failed to get analysis from AI model.")

	def generate_visualization_code(file_path: str, command: str) -> dict:
	"""Generates Python code for visualization based on a user command."""
	model = "gemini-pro"

	system_instruction = f"""
	You are a Python assistant that MUST return output strictly in JSON format.
	The JSON MUST contain exactly three keys: "type", "code", "explanation".

	- "type": Lowercase visualization type (e.g., "bar", "pie", "line").
	- "code": A string of Python code that prints a JSON object to standard output. The code must access data using this exact line: df = pd.read_excel(r"{file_path}")
	- "explanation": A one-sentence description of the visualization.
	"""
	try:
	response = genai.GenerativeModel(
	model_name=model,
	system_instruction=system_instruction
	).generate_content(
	f"Generate Python code to create a {command}",
	generation_config=types.GenerationConfig(response_mime_type="application/json")
	)
	return json.loads(response.text)
	except Exception as e:
	logger.error(f"Error generating visualization code from Gemini: {e}")
	raise HTTPException(status_code=500, detail="Failed to generate visualization code from AI model.")

	# -----------------------------
	# API Endpoints
	# -----------------------------

	@app.post("/analyze", response_model=AnalysisResponse)
	async def analyze_file(file: UploadFile = File(...)):
	"""
	Upload an Excel file, get its metadata, and receive an AI-generated
	summary and a list of visualization suggestions.
	"""
	if not file.filename.endswith(('.xlsx', '.xls')):
	raise HTTPException(status_code=400, detail="Invalid file type. Please upload an Excel file.")

	file_id = str(uuid.uuid4())
	file_path = UPLOADS_DIR / f"{file_id}_{file.filename}"

	try:
	with open(file_path, "wb") as buffer:
	buffer.write(await file.read())
	logger.info(f"File '{file.filename}' saved to temp path '{file_path}'")

	df = pd.read_excel(file_path)
	metadata = get_metadata(df)
	logger.info(f"Metadata extracted for file_id: {file_id}")

	analysis = generate_metadata_analysis(metadata)
	logger.info(f"Metadata analysis generated for file_id: {file_id}")

	return AnalysisResponse(
	file_id=file_id,
	summary=analysis.get("summary", "No summary provided."),
	suggestions=analysis.get("suggestions", [])
	)
	except Exception as e:
	logger.error(f"An error occurred during file analysis: {e}")
	if file_path.exists():
	os.remove(file_path)
	raise HTTPException(status_code=500, detail=f"An internal server error occurred: {e}")


	@app.post("/visualize", response_model=VisualizationResponse)
	async def visualize_data(request: VisualizationRequest):
	"""
	Generate and execute Python code for a visualization based on a file_id
	and a selected command from the analysis step.
	"""
	matching_files = list(UPLOADS_DIR.glob(f"{request.file_id}_*"))
	if not matching_files:
	logger.error(f"File with ID '{request.file_id}' not found in {UPLOADS_DIR}.")
	raise HTTPException(status_code=404, detail="File not found. It may have been cleared from the temporary cache. Please re-upload.")

	file_path = matching_files[0]
	logger.info(f"Found file '{file_path}' for file_id '{request.file_id}'")

	agent_output = generate_visualization_code(str(file_path), request.command)
	code_to_run = agent_output.get("code")

	if not code_to_run:
	raise HTTPException(status_code=500, detail="AI model failed to generate valid code.")
	logger.info(f"Code generated for command: '{request.command}'")

	try:
	logger.info("Executing generated code in a sandboxed subprocess...")
	process = subprocess.run(
	[sys.executable, "-c", code_to_run],
	capture_output=True, text=True, check=True, timeout=20
	)
	stdout = process.stdout.strip()
	logger.info(f"Code executed successfully. Stdout length: {len(stdout)}")
	chart_data = json.loads(stdout)

	return VisualizationResponse(
	type=agent_output.get("type", "unknown"),
	explanation=agent_output.get("explanation", "No explanation provided."),
	data=chart_data,
	generated_code=code_to_run
	)
	except subprocess.CalledProcessError as e:
	logger.error(f"Error executing generated code. Stderr: {e.stderr}")
	raise HTTPException(status_code=500, detail=f"Error during code execution: {e.stderr}")
	except json.JSONDecodeError:
	logger.error(f"Failed to decode JSON from stdout. Output was: {stdout}")
	raise HTTPException(status_code=500, detail="Generated code did not produce valid JSON output.")
	except subprocess.TimeoutExpired:
	logger.error("Code execution timed out.")
	raise HTTPException(status_code=408, detail="Code execution took too long and was terminated.")
	except Exception as e:
	logger.error(f"An unexpected error occurred during visualization: {e}")
	raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")


	@app.get("/", include_in_schema=False)
	def root():
	return {"message": "Welcome to the Data Analysis API. Visit /docs for the API interface."}