Spaces:

Kakarla7
/

Data_Whisperer

Sleeping

App Files Files Community

Data_Whisperer / app.py

Kakarla7

Update app.py

1fa05fe verified 12 months ago

raw

history blame contribute delete

5 kB

	import gradio as gr
	import pandas as pd
	import requests
	import time

	# Hugging Face API URL for free-tier model (no auth needed for public models)
	API_URL = "https://api-inference.huggingface.co/models/Salesforce/codegen-350M-mono"

	headers = {} # Leave empty for free models

	# def query_llm(prompt):
	# try:
	# response = requests.post(API_URL, headers=headers, json={"inputs": prompt})
	# return response.json()[0]["generated_text"]
	# except Exception as e:
	# return f"# LLM API error: {str(e)}"



	# def query_llm(prompt, retries=3):
	# for i in range(retries):
	# try:
	# response = requests.post(API_URL, headers=headers, json={"inputs": prompt})
	# data = response.json()

	# if isinstance(data, list) and "generated_text" in data[0]:
	# return data[0]["generated_text"]
	# elif "error" in data:
	# if "loading" in data["error"].lower() and i < retries - 1:
	# time.sleep(5)
	# continue
	# return f"# LLM API error: {data['error']}"
	# else:
	# return "# Unexpected API response format."
	# except Exception as e:
	# return f"# LLM API exception: {str(e)}"


	import re
	import requests
	from typing import Optional

	def extract_code_from_markdown(text: str) -> str:
	"""Extract Python code from markdown code blocks."""
	# Split on ```python or ``` and take the last code block
	code = re.split(r'```(?:python)?', text)[-1]
	# Remove everything after closing ```
	code = code.split('```')[0]
	return code.strip()

	def query_llm(prompt: str,
	api_url: str,
	headers: dict,
	timeout: int = 10) -> Optional[str]:
	"""
	Query LLM API and return cleaned code output.

	Args:
	prompt: Input prompt for the LLM
	api_url: API endpoint URL
	headers: Request headers
	timeout: Request timeout in seconds

	Returns:
	Cleaned code string or None if error occurs
	"""
	try:
	response = requests.post(
	api_url,
	headers=headers,
	json={"inputs": prompt},
	timeout=timeout
	)
	response.raise_for_status()

	output = response.json()

	if not isinstance(output, list) or not output:
	print("Unexpected API response format: Not a list or empty")
	return None

	if "generated_text" not in output[0]:
	print("Missing 'generated_text' in API response")
	return None

	raw_code = output[0]["generated_text"]
	return extract_code_from_markdown(raw_code)

	except requests.exceptions.RequestException as e:
	print(f"API request failed: {str(e)}")
	return None
	except (ValueError, KeyError, IndexError) as e:
	print(f"Error processing API response: {str(e)}")
	return None

	def run_user_code(code, df):
	local_vars = {"df": df}
	try:
	exec(code, {}, local_vars)
	return local_vars.get("result", None)
	except Exception as e:
	raise RuntimeError(f"Error executing code: {str(e)}")

	def whisperer_agent(file, question):
	# try:
	# df = pd.read_csv(file.name, encoding='utf-8')
	# except Exception as e:
	# return f"# Failed to read CSV: {str(e)}", None
	encodings = ['utf-8', 'cp1252', 'ISO-8859-1']
	for enc in encodings:
	try:
	df = pd.read_csv(file.name, encoding=enc)
	break
	except Exception:
	df = None
	continue
	if df is None:
	return "# Failed to read CSV file with tried encodings (utf-8, cp1252, ISO-8859-1).", None

	prompt = f"""You are a Python data analyst. Data columns are: {', '.join(df.columns)}.
	Write a Python Pandas code snippet to answer the question: "{question}".
	Assign the output to a variable named `result`. Only return code."""

	code = query_llm(prompt)
	if "```" in code:
	code = code.split("```")[1]

	try:
	result = run_user_code(code, df)
	if isinstance(result, pd.DataFrame):
	return code, result.head()
	elif result is not None:
	return code + f"\n\n# Output:\n{result}", None
	else:
	return code + "\n\n# No output returned.", None
	except Exception as e:
	return f"# {str(e)}", None

	demo = gr.Interface(
	fn=whisperer_agent,
	inputs=[
	gr.File(label="Upload CSV", file_types=[".csv"]),
	gr.Textbox(label="Ask a question about your data")
	],
	outputs=[
	gr.Code(label="Generated Code and Output"),
	gr.Dataframe(label="Result Table")
	],
	title="Data Whisperer Agent AI",
	description="Analyze CSV files using natural language. Powered by open-source LLMs."
	)

	demo.launch()