triflix commited on
Commit
0c92577
·
verified ·
1 Parent(s): 09a8302

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -50
app.py CHANGED
@@ -12,48 +12,51 @@ from pathlib import Path
12
  import pandas as pd
13
  from dotenv import load_dotenv
14
  from fastapi import FastAPI, UploadFile, File, HTTPException, Body
15
- from fastapi.responses import JSONResponse
16
  from pydantic import BaseModel, Field
17
 
18
  from google import genai
19
- from google.genai import types
20
 
21
  # -----------------------------
22
  # Initial Configuration
23
  # -----------------------------
24
 
25
- # Load environment variables from .env file
26
  load_dotenv()
27
 
28
  # Set up logging
29
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
30
  logger = logging.getLogger(__name__)
31
 
32
- # Create an 'uploads' directory if it doesn't exist
33
- UPLOADS_DIR = Path("uploads")
34
- UPLOADS_DIR.mkdir(exist_ok=True)
 
 
 
 
35
 
36
  # -----------------------------
37
  # Initialize Gemini Client & FastAPI App
38
  # -----------------------------
39
 
40
- # Configure the Gemini client with the API key from environment variables
41
  try:
42
- api_key = "AIzaSyB1jgGCuzg7ELPwNEEwaluQZoZhxhgLmAs"
43
  if not api_key:
44
- raise ValueError("GOOGLE_API_KEY not found in environment variables.")
45
  genai.configure(api_key=api_key)
46
  logger.info("Google GenAI client configured successfully.")
47
  except Exception as e:
48
- logger.error(f"Failed to configure Google GenAI client: {e}")
49
- # We exit if the client can't be configured as the app is useless without it.
50
  sys.exit(1)
51
 
52
  # Initialize FastAPI app
53
  app = FastAPI(
54
  title="Data Analysis and Visualization API",
55
  description="An API to analyze Excel files and generate Python code for visualizations using Google's Gemini.",
56
- version="1.0.0"
57
  )
58
 
59
  # -----------------------------
@@ -77,7 +80,7 @@ class VisualizationResponse(BaseModel):
77
 
78
 
79
  # -----------------------------
80
- # Helper Functions (Adapted from your script)
81
  # -----------------------------
82
 
83
  def get_metadata(df: pd.DataFrame) -> dict:
@@ -93,16 +96,16 @@ def get_metadata(df: pd.DataFrame) -> dict:
93
  def generate_metadata_analysis(metadata: dict) -> dict:
94
  """Generates a JSON summary and suggestions from metadata using Gemini."""
95
  metadata_text = json.dumps(metadata, indent=2)
96
- model = "gemini-pro" # Using gemini-pro as it's better for this kind of structured generation
97
 
98
  system_instruction = """
99
  You are a structured data analysis AI. Your output must be strict JSON.
100
 
101
  1. Summary:
102
- Provide a concise description of what kind of data this is, what it likely represents, and its domain or use-case. Indicate assumptions if needed.
103
 
104
  2. Suggestions:
105
- Provide exactly three actionable analyses and visualizations based on the metadata. For each, specify the columns to use and the type of insight to be gained.
106
 
107
  Respond in this exact JSON format:
108
  {
@@ -110,7 +113,6 @@ def generate_metadata_analysis(metadata: dict) -> dict:
110
  "suggestions": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]
111
  }
112
  """
113
-
114
  try:
115
  response = genai.GenerativeModel(
116
  model_name=model,
@@ -129,16 +131,13 @@ def generate_visualization_code(file_path: str, command: str) -> dict:
129
  model = "gemini-pro"
130
 
131
  system_instruction = f"""
132
- You are a Python assistant that MUST return output strictly in JSON format and NOTHING else.
133
- The top-level JSON MUST contain exactly three keys in this order: "type", "code", "explanation".
134
-
135
- Requirements:
136
- - "type": The suggested visualization type as a lowercase string (e.g., "bar", "pie", "line", "scatter").
137
- - "code": A string of Python code. This code MUST print a JSON object to standard output. The JSON should contain the data needed for the plot. Use pandas to process the data.
138
- - The code must access the data using this exact line: df = pd.read_excel(r"{file_path}")
139
- - "explanation": A concise, one-sentence description of what the visualization shows.
140
- """
141
 
 
 
 
 
142
  try:
143
  response = genai.GenerativeModel(
144
  model_name=model,
@@ -169,12 +168,10 @@ async def analyze_file(file: UploadFile = File(...)):
169
  file_path = UPLOADS_DIR / f"{file_id}_{file.filename}"
170
 
171
  try:
172
- # Save the uploaded file
173
  with open(file_path, "wb") as buffer:
174
  buffer.write(await file.read())
175
- logger.info(f"File '{file.filename}' saved as '{file_path.name}'")
176
 
177
- # Process the file
178
  df = pd.read_excel(file_path)
179
  metadata = get_metadata(df)
180
  logger.info(f"Metadata extracted for file_id: {file_id}")
@@ -187,13 +184,11 @@ async def analyze_file(file: UploadFile = File(...)):
187
  summary=analysis.get("summary", "No summary provided."),
188
  suggestions=analysis.get("suggestions", [])
189
  )
190
-
191
  except Exception as e:
192
  logger.error(f"An error occurred during file analysis: {e}")
193
- # Clean up the saved file in case of an error
194
  if file_path.exists():
195
  os.remove(file_path)
196
- raise HTTPException(status_code=500, detail=f"An internal error occurred: {e}")
197
 
198
 
199
  @app.post("/visualize", response_model=VisualizationResponse)
@@ -202,40 +197,29 @@ async def visualize_data(request: VisualizationRequest):
202
  Generate and execute Python code for a visualization based on a file_id
203
  and a selected command from the analysis step.
204
  """
205
- # Find the file corresponding to the file_id
206
  matching_files = list(UPLOADS_DIR.glob(f"{request.file_id}_*"))
207
  if not matching_files:
208
- logger.error(f"File with ID '{request.file_id}' not found.")
209
- raise HTTPException(status_code=404, detail="File not found. Please re-upload the file.")
210
 
211
  file_path = matching_files[0]
212
  logger.info(f"Found file '{file_path}' for file_id '{request.file_id}'")
213
 
214
- # Generate the visualization code from Gemini
215
  agent_output = generate_visualization_code(str(file_path), request.command)
216
  code_to_run = agent_output.get("code")
217
 
218
  if not code_to_run:
219
  raise HTTPException(status_code=500, detail="AI model failed to generate valid code.")
220
-
221
  logger.info(f"Code generated for command: '{request.command}'")
222
 
223
- # --- Safe Code Execution using subprocess ---
224
  try:
225
  logger.info("Executing generated code in a sandboxed subprocess...")
226
  process = subprocess.run(
227
  [sys.executable, "-c", code_to_run],
228
- capture_output=True,
229
- text=True,
230
- check=True, # Raises CalledProcessError for non-zero exit codes
231
- timeout=15 # Add a timeout for safety
232
  )
233
-
234
- # The output from the script is expected to be a JSON string
235
  stdout = process.stdout.strip()
236
- logger.info(f"Code executed successfully. Stdout: {stdout[:200]}...") # Log first 200 chars
237
-
238
- # Parse the JSON output from the executed code
239
  chart_data = json.loads(stdout)
240
 
241
  return VisualizationResponse(
@@ -244,12 +228,11 @@ async def visualize_data(request: VisualizationRequest):
244
  data=chart_data,
245
  generated_code=code_to_run
246
  )
247
-
248
  except subprocess.CalledProcessError as e:
249
  logger.error(f"Error executing generated code. Stderr: {e.stderr}")
250
  raise HTTPException(status_code=500, detail=f"Error during code execution: {e.stderr}")
251
  except json.JSONDecodeError:
252
- logger.error(f"Failed to decode JSON from executed code's stdout. Output was: {stdout}")
253
  raise HTTPException(status_code=500, detail="Generated code did not produce valid JSON output.")
254
  except subprocess.TimeoutExpired:
255
  logger.error("Code execution timed out.")
@@ -261,4 +244,4 @@ async def visualize_data(request: VisualizationRequest):
261
 
262
  @app.get("/", include_in_schema=False)
263
  def root():
264
- return {"message": "Welcome to the Data Analysis and Visualization API. Visit /docs for more info."}
 
12
  import pandas as pd
13
  from dotenv import load_dotenv
14
  from fastapi import FastAPI, UploadFile, File, HTTPException, Body
 
15
  from pydantic import BaseModel, Field
16
 
17
  from google import genai
18
+ from google.generativeai import types
19
 
20
  # -----------------------------
21
  # Initial Configuration
22
  # -----------------------------
23
 
24
+ # Load environment variables (will load from Hugging Face secrets)
25
  load_dotenv()
26
 
27
  # Set up logging
28
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
29
  logger = logging.getLogger(__name__)
30
 
31
+ # --- MODIFICATION FOR HUGGING FACE ---
32
+ # Use the /tmp directory for ephemeral file storage.
33
+ # This is a standard temporary directory in Linux environments like HF Spaces.
34
+ UPLOADS_DIR = Path("/tmp/uploads")
35
+ # Create the directory; parents=True ensures creation of parent dirs if needed.
36
+ UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
37
+ logger.info(f"Using temporary directory for uploads: {UPLOADS_DIR}")
38
 
39
  # -----------------------------
40
  # Initialize Gemini Client & FastAPI App
41
  # -----------------------------
42
 
43
+ # Configure the Gemini client with the API key from environment variables/secrets
44
  try:
45
+ api_key = os.getenv("GOOGLE_API_KEY")
46
  if not api_key:
47
+ raise ValueError("GOOGLE_API_KEY not found in environment variables or secrets.")
48
  genai.configure(api_key=api_key)
49
  logger.info("Google GenAI client configured successfully.")
50
  except Exception as e:
51
+ logger.error(f"FATAL: Failed to configure Google GenAI client: {e}")
52
+ # Exit if the client can't be configured, as the app is non-functional without it.
53
  sys.exit(1)
54
 
55
  # Initialize FastAPI app
56
  app = FastAPI(
57
  title="Data Analysis and Visualization API",
58
  description="An API to analyze Excel files and generate Python code for visualizations using Google's Gemini.",
59
+ version="1.1.0"
60
  )
61
 
62
  # -----------------------------
 
80
 
81
 
82
  # -----------------------------
83
+ # Helper Functions
84
  # -----------------------------
85
 
86
  def get_metadata(df: pd.DataFrame) -> dict:
 
96
  def generate_metadata_analysis(metadata: dict) -> dict:
97
  """Generates a JSON summary and suggestions from metadata using Gemini."""
98
  metadata_text = json.dumps(metadata, indent=2)
99
+ model = "gemini-pro"
100
 
101
  system_instruction = """
102
  You are a structured data analysis AI. Your output must be strict JSON.
103
 
104
  1. Summary:
105
+ Provide a concise description of what kind of data this is, what it likely represents, and its domain or use-case.
106
 
107
  2. Suggestions:
108
+ Provide exactly three actionable analyses and visualizations based on the metadata.
109
 
110
  Respond in this exact JSON format:
111
  {
 
113
  "suggestions": ["<analysis #1>", "<analysis #2>", "<analysis #3>"]
114
  }
115
  """
 
116
  try:
117
  response = genai.GenerativeModel(
118
  model_name=model,
 
131
  model = "gemini-pro"
132
 
133
  system_instruction = f"""
134
+ You are a Python assistant that MUST return output strictly in JSON format.
135
+ The JSON MUST contain exactly three keys: "type", "code", "explanation".
 
 
 
 
 
 
 
136
 
137
+ - "type": Lowercase visualization type (e.g., "bar", "pie", "line").
138
+ - "code": A string of Python code that prints a JSON object to standard output. The code must access data using this exact line: df = pd.read_excel(r"{file_path}")
139
+ - "explanation": A one-sentence description of the visualization.
140
+ """
141
  try:
142
  response = genai.GenerativeModel(
143
  model_name=model,
 
168
  file_path = UPLOADS_DIR / f"{file_id}_{file.filename}"
169
 
170
  try:
 
171
  with open(file_path, "wb") as buffer:
172
  buffer.write(await file.read())
173
+ logger.info(f"File '{file.filename}' saved to temp path '{file_path}'")
174
 
 
175
  df = pd.read_excel(file_path)
176
  metadata = get_metadata(df)
177
  logger.info(f"Metadata extracted for file_id: {file_id}")
 
184
  summary=analysis.get("summary", "No summary provided."),
185
  suggestions=analysis.get("suggestions", [])
186
  )
 
187
  except Exception as e:
188
  logger.error(f"An error occurred during file analysis: {e}")
 
189
  if file_path.exists():
190
  os.remove(file_path)
191
+ raise HTTPException(status_code=500, detail=f"An internal server error occurred: {e}")
192
 
193
 
194
  @app.post("/visualize", response_model=VisualizationResponse)
 
197
  Generate and execute Python code for a visualization based on a file_id
198
  and a selected command from the analysis step.
199
  """
 
200
  matching_files = list(UPLOADS_DIR.glob(f"{request.file_id}_*"))
201
  if not matching_files:
202
+ logger.error(f"File with ID '{request.file_id}' not found in {UPLOADS_DIR}.")
203
+ raise HTTPException(status_code=404, detail="File not found. It may have been cleared from the temporary cache. Please re-upload.")
204
 
205
  file_path = matching_files[0]
206
  logger.info(f"Found file '{file_path}' for file_id '{request.file_id}'")
207
 
 
208
  agent_output = generate_visualization_code(str(file_path), request.command)
209
  code_to_run = agent_output.get("code")
210
 
211
  if not code_to_run:
212
  raise HTTPException(status_code=500, detail="AI model failed to generate valid code.")
 
213
  logger.info(f"Code generated for command: '{request.command}'")
214
 
 
215
  try:
216
  logger.info("Executing generated code in a sandboxed subprocess...")
217
  process = subprocess.run(
218
  [sys.executable, "-c", code_to_run],
219
+ capture_output=True, text=True, check=True, timeout=20
 
 
 
220
  )
 
 
221
  stdout = process.stdout.strip()
222
+ logger.info(f"Code executed successfully. Stdout length: {len(stdout)}")
 
 
223
  chart_data = json.loads(stdout)
224
 
225
  return VisualizationResponse(
 
228
  data=chart_data,
229
  generated_code=code_to_run
230
  )
 
231
  except subprocess.CalledProcessError as e:
232
  logger.error(f"Error executing generated code. Stderr: {e.stderr}")
233
  raise HTTPException(status_code=500, detail=f"Error during code execution: {e.stderr}")
234
  except json.JSONDecodeError:
235
+ logger.error(f"Failed to decode JSON from stdout. Output was: {stdout}")
236
  raise HTTPException(status_code=500, detail="Generated code did not produce valid JSON output.")
237
  except subprocess.TimeoutExpired:
238
  logger.error("Code execution timed out.")
 
244
 
245
  @app.get("/", include_in_schema=False)
246
  def root():
247
+ return {"message": "Welcome to the Data Analysis API. Visit /docs for the API interface."}