import os import sys import requests import pandas as pd import matplotlib.pyplot as plt from io import StringIO from dotenv import load_dotenv load_dotenv() API_KEY = os.getenv("GEMINI_API_KEY") API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent" def get_df_info(df): return f"""Columns: {list(df.columns)} Sample data: {df.head(3).to_string()} Dtypes: {df.dtypes.to_dict()}""" def call_gemini(prompt): headers = {"Content-Type": "application/json", "x-goog-api-key": API_KEY} data = {"contents": [{"parts": [{"text": prompt}]}]} response = requests.post(API_URL, headers=headers, json=data) return response.json()["candidates"][0]["content"]["parts"][0]["text"] def query_csv(df, user_query): df_info = get_df_info(df) prompt = f"""You are a data analyst. Given this dataframe info: {df_info} User question: {user_query} Write Python code using pandas to answer this. The dataframe is called 'df'. IMPORTANT: Always assign your final answer to a variable called 'result'. Whenever possible, show both text result AND a plot for better visualization. Use matplotlib for plots (don't call plt.show()). Only output Python code, no markdown, no explanation.""" code = call_gemini(prompt) code = code.replace("```python", "").replace("```", "").strip() local_vars = {"df": df.copy(), "pd": pd, "plt": plt} fig = None old_stdout = sys.stdout sys.stdout = StringIO() try: plt.close('all') exec(code, local_vars) printed = sys.stdout.getvalue() sys.stdout = old_stdout if plt.get_fignums(): plt.gcf().savefig('plot.png', dpi=100, bbox_inches='tight') fig = 'plot.png' plt.close('all') result = local_vars.get("result", None) if result is None and printed: result = printed.strip() if isinstance(result, (pd.DataFrame, pd.Series)): result = result.to_string() return str(result) if result else "Query executed.", fig, code except Exception as e: sys.stdout = old_stdout plt.close('all') return f"Error: {e}", None, code