Data_Whisperer / app.py
Kakarla7's picture
Update app.py
1fa05fe verified
import gradio as gr
import pandas as pd
import requests
import time
# Hugging Face API URL for free-tier model (no auth needed for public models)
API_URL = "https://api-inference.huggingface.co/models/Salesforce/codegen-350M-mono"
headers = {} # Leave empty for free models
# def query_llm(prompt):
# try:
# response = requests.post(API_URL, headers=headers, json={"inputs": prompt})
# return response.json()[0]["generated_text"]
# except Exception as e:
# return f"# LLM API error: {str(e)}"
# def query_llm(prompt, retries=3):
# for i in range(retries):
# try:
# response = requests.post(API_URL, headers=headers, json={"inputs": prompt})
# data = response.json()
# if isinstance(data, list) and "generated_text" in data[0]:
# return data[0]["generated_text"]
# elif "error" in data:
# if "loading" in data["error"].lower() and i < retries - 1:
# time.sleep(5)
# continue
# return f"# LLM API error: {data['error']}"
# else:
# return "# Unexpected API response format."
# except Exception as e:
# return f"# LLM API exception: {str(e)}"
import re
import requests
from typing import Optional
def extract_code_from_markdown(text: str) -> str:
"""Extract Python code from markdown code blocks."""
# Split on ```python or ``` and take the last code block
code = re.split(r'```(?:python)?', text)[-1]
# Remove everything after closing ```
code = code.split('```')[0]
return code.strip()
def query_llm(prompt: str,
api_url: str,
headers: dict,
timeout: int = 10) -> Optional[str]:
"""
Query LLM API and return cleaned code output.
Args:
prompt: Input prompt for the LLM
api_url: API endpoint URL
headers: Request headers
timeout: Request timeout in seconds
Returns:
Cleaned code string or None if error occurs
"""
try:
response = requests.post(
api_url,
headers=headers,
json={"inputs": prompt},
timeout=timeout
)
response.raise_for_status()
output = response.json()
if not isinstance(output, list) or not output:
print("Unexpected API response format: Not a list or empty")
return None
if "generated_text" not in output[0]:
print("Missing 'generated_text' in API response")
return None
raw_code = output[0]["generated_text"]
return extract_code_from_markdown(raw_code)
except requests.exceptions.RequestException as e:
print(f"API request failed: {str(e)}")
return None
except (ValueError, KeyError, IndexError) as e:
print(f"Error processing API response: {str(e)}")
return None
def run_user_code(code, df):
local_vars = {"df": df}
try:
exec(code, {}, local_vars)
return local_vars.get("result", None)
except Exception as e:
raise RuntimeError(f"Error executing code: {str(e)}")
def whisperer_agent(file, question):
# try:
# df = pd.read_csv(file.name, encoding='utf-8')
# except Exception as e:
# return f"# Failed to read CSV: {str(e)}", None
encodings = ['utf-8', 'cp1252', 'ISO-8859-1']
for enc in encodings:
try:
df = pd.read_csv(file.name, encoding=enc)
break
except Exception:
df = None
continue
if df is None:
return "# Failed to read CSV file with tried encodings (utf-8, cp1252, ISO-8859-1).", None
prompt = f"""You are a Python data analyst. Data columns are: {', '.join(df.columns)}.
Write a Python Pandas code snippet to answer the question: "{question}".
Assign the output to a variable named `result`. Only return code."""
code = query_llm(prompt)
if "```" in code:
code = code.split("```")[1]
try:
result = run_user_code(code, df)
if isinstance(result, pd.DataFrame):
return code, result.head()
elif result is not None:
return code + f"\n\n# Output:\n{result}", None
else:
return code + "\n\n# No output returned.", None
except Exception as e:
return f"# {str(e)}", None
demo = gr.Interface(
fn=whisperer_agent,
inputs=[
gr.File(label="Upload CSV", file_types=[".csv"]),
gr.Textbox(label="Ask a question about your data")
],
outputs=[
gr.Code(label="Generated Code and Output"),
gr.Dataframe(label="Result Table")
],
title="Data Whisperer Agent AI",
description="Analyze CSV files using natural language. Powered by open-source LLMs."
)
demo.launch()