Spaces:

Agents-MCP-Hackathon
/

data_science_agent

Paused

App Files Files Community

data_science_agent / app.py

bpHigh

Update app.py

010332b verified 10 months ago

raw

history blame

33.5 kB

	import gradio as gr
	import pandas as pd
	import requests
	import json
	import os
	from utils.google_genai_llm import get_response, generate_with_gemini
	from utils.utils import parse_json_codefences, parse_python_codefences
	from utils.code_sandbox import code_eval
	from prompts.requirements_gathering import requirements_gathering_system_prompt
	from prompts.planning import hf_query_gen_prompt, hf_context_gen_prompt
	from prompts.devstral_coding_prompt import devstral_code_gen_sys_prompt, devstral_code_gen_user_prompt
	from dotenv import load_dotenv
	import os
	import asyncio
	load_dotenv()

	try:
	import modal
	# Import the Modal inference function and app from separate file
	import subprocess
	MODAL_AVAILABLE = True

	except ImportError:
	MODAL_AVAILABLE = False
	print("Warning: Modal not available. Code generation will be disabled.MCP Server will be disabled")

	from PIL import Image
	import tempfile
	import traceback
	import hashlib

	# Import Marker for document processing
	try:
	from marker.converters.pdf import PdfConverter
	from marker.models import create_model_dict
	from marker.output import text_from_rendered
	MARKER_AVAILABLE = True
	except ImportError:
	MARKER_AVAILABLE = False
	print("Warning: Marker library not available. PDF, PPT, and DOCX processing will be limited.")


	def get_file_hash(file_path):
	"""Generate a hash of the file for caching purposes"""
	try:
	with open(file_path, 'rb') as f:
	file_hash = hashlib.md5(f.read()).hexdigest()
	return file_hash
	except Exception:
	return None

	def extract_text_with_marker(file_path):
	"""Extract text from PDF, PPT, or DOCX using Marker"""
	if not MARKER_AVAILABLE:
	return "Marker library not available for document processing.", ""

	try:
	# Create converter with model artifacts
	converter = PdfConverter(
	artifact_dict=create_model_dict(),
	)

	# Convert document
	rendered = converter(file_path)

	# Extract text from rendered output
	text, _, images = text_from_rendered(rendered)

	# Get basic stats
	word_count = len(text.split())
	char_count = len(text)

	stats = f"Extracted text ({word_count} words, {char_count} characters)"

	return stats, text

	except Exception as e:
	error_msg = f"Error processing document: {str(e)}"
	return error_msg, ""

	def process_user_input(message, history, uploaded_files, file_cache):
	"""Process user input and generate AI response using requirements gathering prompt"""

	# Build conversation history from chat history
	conversation_history = ""
	if history:
	for i, (user_msg, ai_msg) in enumerate(history):
	conversation_history += f"User: {user_msg}\n"
	if ai_msg:
	conversation_history += f"Assistant: {ai_msg}\n"

	# Add file information to conversation if files are uploaded
	if uploaded_files:
	file_info = f"\n[UPLOADED_FILES]\n"
	new_file_cache = file_cache.copy() if file_cache else {}

	for file_path in uploaded_files:
	try:
	file_name = file_path.split('/')[-1]
	file_extension = os.path.splitext(file_name)[1].lower()
	file_hash = get_file_hash(file_path)
	cache_key = f"{file_name}_{file_hash}"

	# Handle CSV files
	if file_extension == '.csv':
	df = pd.read_csv(file_path)
	file_info += f"- {file_name}: CSV file with {len(df)} rows and {len(df.columns)} columns\n"
	file_info += f" Columns: {', '.join(df.columns.tolist())}\n"

	# Handle Excel files
	elif file_extension in ['.xlsx', '.xls']:
	df = pd.read_excel(file_path)
	file_info += f"- {file_name}: Excel file with {len(df)} rows and {len(df.columns)} columns\n"
	file_info += f" Columns: {', '.join(df.columns.tolist())}\n"

	# Handle document files with Marker (PDF, PPT, DOCX)
	elif file_extension in ['.pdf', '.ppt', '.pptx', '.doc', '.docx']:
	file_size = os.path.getsize(file_path)
	file_size_mb = round(file_size / (1024 * 1024), 2)

	# Check if file is already processed and cached
	if cache_key in new_file_cache:
	# Use cached text
	extraction_stats = new_file_cache[cache_key]['stats']
	extracted_text = new_file_cache[cache_key]['text']
	status = "(cached)"
	else:
	# Process new file with Marker
	extraction_stats, extracted_text = extract_text_with_marker(file_path)
	# Cache the results
	new_file_cache[cache_key] = {
	'stats': extraction_stats,
	'text': extracted_text,
	'file_name': file_name,
	'file_path': file_path
	}
	status = "(newly processed)"

	# Determine document type
	if file_extension == '.pdf':
	doc_type = "PDF document"
	elif file_extension in ['.ppt', '.pptx']:
	doc_type = "PowerPoint presentation"
	else:
	doc_type = "Word document"

	file_info += f"- {file_name}: {doc_type}, Size: {file_size_mb} MB {status}\n"
	file_info += f" Content: {extraction_stats}\n"

	# Include extracted text in conversation context for better AI understanding
	if extracted_text and len(extracted_text.strip()) > 0:
	# Truncate very long texts for context (keep first 2000 chars)
	text_preview = extracted_text[:200000] + "..." if len(extracted_text) > 200000 else extracted_text
	file_info += f" Text Preview: {text_preview}\n"

	# Handle image files
	elif file_extension in ['.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp']:
	with Image.open(file_path) as img:
	width, height = img.size
	mode = img.mode
	file_size = os.path.getsize(file_path)
	file_size_mb = round(file_size / (1024 * 1024), 2)
	file_info += f"- {file_name}: {file_extension.upper()[1:]} image file\n"
	file_info += f" Dimensions: {width}x{height} pixels, Mode: {mode}, Size: {file_size_mb} MB\n"

	# Handle JSON files
	elif file_extension == '.json':
	file_size = os.path.getsize(file_path)
	file_size_kb = round(file_size / 1024, 2)
	file_info += f"- {file_name}: JSON file, Size: {file_size_kb} KB\n"

	# Handle text files
	elif file_extension == '.txt':
	with open(file_path, 'r', encoding='utf-8') as f:
	lines = len(f.readlines())
	file_size = os.path.getsize(file_path)
	file_size_kb = round(file_size / 1024, 2)
	file_info += f"- {file_name}: Text file with {lines} lines, Size: {file_size_kb} KB\n"

	# Handle other files
	else:
	file_size = os.path.getsize(file_path)
	file_size_kb = round(file_size / 1024, 2)
	file_info += f"- {file_name}: File uploaded, Size: {file_size_kb} KB\n"

	except Exception as e:
	file_info += f"- {file_path.split('/')[-1]}: File uploaded (unable to preview: {str(e)})\n"
	print(f"Error processing file {file_path}: {traceback.format_exc()}")

	conversation_history += file_info

	# Update the cache
	file_cache.update(new_file_cache)

	# Format the prompt with conversation history and current query
	formatted_prompt = requirements_gathering_system_prompt.format(
	conversation_history=conversation_history,
	query=message
	)

	# Get AI response
	ai_response = generate_with_gemini(formatted_prompt, purpose="REQUIREMENTS_GATHERING")

	return ai_response, file_cache

	def chat_interface(message, history, uploaded_files, file_cache):
	"""Main chat interface function"""

	# Get AI response with updated cache
	ai_response, updated_cache = process_user_input(message, history, uploaded_files, file_cache)

	# Add to history
	history.append((message, ai_response))

	return history, history, "", updated_cache

	def clear_chat():
	"""Clear the chat history and file cache"""
	return [], [], {}

	def upload_file_handler(files):
	"""Handle file uploads"""
	if files:
	return files
	return []

	async def generate_plan(history, file_cache):
	"""Generate a plan using the planning prompt and Gemini API"""

	# Build conversation history
	yield "⏳ Generating plan... (Starting)"

	conversation_history = ""
	if history:
	for user_msg, ai_msg in history:
	conversation_history += f"User: {user_msg}\n"
	if ai_msg:
	conversation_history += f"Assistant: {ai_msg}\n"
	yield "⏳ Generating plan... (Getting HF MCP tools)"
	try:
	mcp_tool_func = modal.Function.from_name("HuggingFace-MCP","connect_and_get_tools")
	hf_query_gen_tool_details = mcp_tool_func.remote()
	print(hf_query_gen_tool_details)
	except Exception as e:
	hf_query_gen_tool_details = """meta=None nextCursor=None tools=[Tool(name='hf_whoami', description="Hugging Face tools are being used by authenticated user 'bpHigh'", inputSchema={'type': 'object', 'properties': {}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Hugging Face User Info', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=None)), Tool(name='space_search', description='Find Hugging Face Spaces using semantic search. Include links to the Space when presenting the results.', inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'minLength': 1, 'maxLength': 50, 'description': 'Semantic Search Query'}, 'limit': {'type': 'number', 'default': 10, 'description': 'Number of results to return'}, 'mcp': {'type': 'boolean', 'default': False, 'description': 'Only return MCP Server enabled Spaces'}}, 'required': ['query'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Hugging Face Space Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='model_search', description='Find Machine Learning models hosted on Hugging Face. Returns comprehensive information about matching models including downloads, likes, tags, and direct links. Include links to the models in your response', inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'description': 'Search term. Leave blank and specify "sort" and "limit" to get e.g. "Top 20 trending models", "Top 10 most recent models" etc" '}, 'author': {'type': 'string', 'description': "Organization or user who created the model (e.g., 'google', 'meta-llama', 'microsoft')"}, 'task': {'type': 'string', 'description': "Model task type (e.g., 'text-generation', 'image-classification', 'translation')"}, 'library': {'type': 'string', 'description': "Framework the model uses (e.g., 'transformers', 'diffusers', 'timm')"}, 'sort': {'type': 'string', 'enum': ['trendingScore', 'downloads', 'likes', 'createdAt', 'lastModified'], 'description': 'Sort order: trendingScore, downloads , likes, createdAt, lastModified'}, 'limit': {'type': 'number', 'minimum': 1, 'maximum': 100, 'default': 20, 'description': 'Maximum number of results to return'}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Model Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='model_details', description='Get detailed information about a specific model from the Hugging Face Hub.', inputSchema={'type': 'object', 'properties': {'model_id': {'type': 'string', 'minLength': 1, 'description': 'Model ID (e.g., microsoft/DialoGPT-large)'}}, 'required': ['model_id'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Model Details', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=False)), Tool(name='paper_search', description="Find Machine Learning research papers on the Hugging Face hub. Include 'Link to paper' When presenting the results. Consider whether tabulating results matches user intent.", inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'minLength': 3, 'maxLength': 200, 'description': 'Semantic Search query'}, 'results_limit': {'type': 'number', 'default': 12, 'description': 'Number of results to return'}, 'concise_only': {'type': 'boolean', 'default': False, 'description': 'Return a 2 sentence summary of the abstract. Use for broad search terms which may return a lot of results. Check with User if unsure.'}}, 'required': ['query'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Paper Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='dataset_search', description='Find Datasets hosted on the Hugging Face hub. Returns comprehensive information about matching datasets including downloads, likes, tags, and direct links. Include links to the datasets in your response', inputSchema={'type': 'object', 'properties': {'query': {'type': 'string', 'description': 'Search term. Leave blank and specify "sort" and "limit" to get e.g. "Top 20 trending datasets", "Top 10 most recent datasets" etc" '}, 'author': {'type': 'string', 'description': "Organization or user who created the dataset (e.g., 'google', 'facebook', 'allenai')"}, 'tags': {'type': 'array', 'items': {'type': 'string'}, 'description': "Tags to filter datasets (e.g., ['language:en', 'size_categories:1M<n<10M', 'task_categories:text-classification'])"}, 'sort': {'type': 'string', 'enum': ['trendingScore', 'downloads', 'likes', 'createdAt', 'lastModified'], 'description': 'Sort order: trendingScore, downloads, likes, createdAt, lastModified'}, 'limit': {'type': 'number', 'minimum': 1, 'maximum': 100, 'default': 20, 'description': 'Maximum number of results to return'}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Dataset Search', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=True)), Tool(name='dataset_details', description='Get detailed information about a specific dataset on Hugging Face Hub.', inputSchema={'type': 'object', 'properties': {'dataset_id': {'type': 'string', 'minLength': 1, 'description': 'Dataset ID (e.g., squad, glue, imdb)'}}, 'required': ['dataset_id'], 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='Dataset Details', readOnlyHint=True, destructiveHint=False, idempotentHint=None, openWorldHint=False)), Tool(name='gr1_evalstate_flux1_schnell', description='Generate an image using the Flux 1 Schnell Image Generator. (from evalstate/flux1_schnell)', inputSchema={'type': 'object', 'properties': {'prompt': {'type': 'string'}, 'seed': {'type': 'number', 'description': 'numeric value between 0 and 2147483647'}, 'randomize_seed': {'type': 'boolean', 'default': True}, 'width': {'type': 'number', 'description': 'numeric value between 256 and 2048', 'default': 1024}, 'height': {'type': 'number', 'description': 'numeric value between 256 and 2048', 'default': 1024}, 'num_inference_steps': {'type': 'number', 'description': 'numeric value between 1 and 50', 'default': 4}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='evalstate/flux1_schnell - flux1_schnell_infer 🏎️💨', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=True)), Tool(name='gr2_abidlabs_easyghibli', description='Convert an image into a Studio Ghibli style image (from abidlabs/EasyGhibli)', inputSchema={'type': 'object', 'properties': {'spatial_img': {'type': 'string', 'description': 'File input: provide URL or file path'}}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='abidlabs/EasyGhibli - abidlabs_EasyGhiblisingle_condition_generate_image 🦀', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=True)), Tool(name='gr3_linoyts_framepack_f1', description='FramePack_F1_end_process tool from linoyts/FramePack-F1', inputSchema={'type': 'object', 'properties': {}, 'additionalProperties': False, '$schema': 'http://json-schema.org/draft-07/schema#'}, annotations=ToolAnnotations(title='linoyts/FramePack-F1 - FramePack_F1_end_process 📹⚡️', readOnlyHint=None, destructiveHint=None, idempotentHint=None, openWorldHint=True))]"""
	print(str(e))
	# Format the prompt
	formatted_prompt = hf_query_gen_prompt.format(
	Tool_Details=hf_query_gen_tool_details
	) + "\n\n" + conversation_history
	# Get plan from Gemini
	yield "⏳ Generating plan... (Strategizing which tools to call)"

	plan = generate_with_gemini(formatted_prompt, "Planning with gemini")
	# Parse the plan
	parsed_plan = parse_json_codefences(plan)
	print(parsed_plan)
	# Call tool to get tool calls
	yield "⏳ Generating plan... (calling HF platform tools and getting data)"

	try:
	mcp_call_tool_func = modal.Function.from_name(app_name="HuggingFace-MCP",name="call_tool")
	tool_calls = []
	async for tool_call in mcp_call_tool_func.starmap.aio([(tool['tool'], tool['args']) for tool in parsed_plan]):
	tool_calls.append(tool_call)
	except Exception as e:
	print(str(e))
	tool_calls = []
	print(tool_calls)
	yield "⏳ Generating plan... (Generating Plan context from tool call info)"

	if tool_calls!=[]:
	formatted_context_prompt = hf_context_gen_prompt.format(
	Conversation=conversation_history,
	Tool_Calls=parsed_plan,
	Results=tool_calls
	)
	context = generate_with_gemini(formatted_context_prompt, "Generating context for plan")

	else:
	formatted_context_prompt = hf_context_gen_prompt.format(
	Conversation=conversation_history,
	Tool_Calls=parsed_plan,
	Results="Couldn't generate the tool calls results but use your knowledge about huggingface platform(models, datasets, spaces, training libraries, transfomers library etc.) as backup to generate the plan"
	)
	context = generate_with_gemini(formatted_context_prompt, "Generating context for plan")
	yield context

	def generate_code_with_devstral(plan_text, history, file_cache):
	"""Generate code using the deployed Devstral model via Modal"""
	yield "⏳ Generating code... (Starting Codegen)"

	if not MODAL_AVAILABLE:
	yield "❌ Modal not available. Please install Modal to use code generation."
	return

	if not plan_text or not plan_text.strip() or "Plan will be generated here..." in plan_text:
	yield "❌ Please generate a plan first before generating code."
	return

	# try:
	# Extract user query from conversation history
	user_query = ""
	if history:
	# Get the latest user message as the main query
	for user_msg, ai_msg in reversed(history):
	if user_msg and user_msg.strip():
	user_query = user_msg.strip()
	break

	if not user_query:
	user_query = "Generate Python code based on the provided plan and context."

	# Build context from file cache and conversation
	context = ""
	if file_cache:
	context += "Available Data Files:\n"
	for cache_key, file_info in file_cache.items():
	context += f"- {file_info.get('file_name', 'Unknown file')}\n"
	if 'stats' in file_info:
	context += f" {file_info['stats']}\n"

	# Add conversation context
	if history:
	context += "\nConversation Context:\n"
	for user_msg, ai_msg in history[-3:]: # Last 3 exchanges
	context += f"User: {user_msg}\n"
	if ai_msg:
	context += f"Assistant: {ai_msg}\n"

	# Format the user prompt with variables
	formatted_user_prompt = devstral_code_gen_user_prompt.format(
	user_query=user_query,
	plan=plan_text,
	context=context
	)

	# Use Modal app.run() pattern like in the examples

	base_url = os.getenv("DEVSTRAL_BASE_URL")
	api_key = os.getenv("DEVSTRAL_API_KEY")
	print(f"🚀 Generating code using Devstral...")
	print(f"📡 Connecting to: {base_url}")
	yield "⏳ Generating code... (Calling Devstral VLLM API server deployed on Modal)"

	try:
	devstral_inference_func = modal.Function.from_name("devstral-inference-client", "run_devstral_inference")
	result = devstral_inference_func.remote(
	base_url=base_url,
	api_key=api_key,
	prompts=[formatted_user_prompt],
	system_prompt=devstral_code_gen_sys_prompt,
	mode="single"
	)
	if result and "response" in result:
	code_output = result["response"]
	yield f"🚀 Generated Code:\n\n{code_output}"
	else:
	yield "❌ Error: No response received from Devstral model."
	except Exception as e:
	yield f"❌ Error: {str(e)}"
	def execute_code(code_output):
	"""Executes Python code from a string and returns the output."""
	yield "⏳ Executing code... (Starting)"

	try:
	if "Code will be generated here..." in code_output or "Generated Code" not in code_output:
	yield "❌ Please generate code first before executing."
	return

	yield "⏳ Executing code... (Parsing code)"
	code = parse_python_codefences(code_output)

	if not code or not code.strip():
	yield "❌ No Python code found to execute."
	return

	yield "⏳ Executing code... (Running in sandbox)"
	exec_result, build_logs = code_eval(code)

	# Ensure exec_result is a dictionary
	if not isinstance(exec_result, dict):
	yield f"❌ Error: Unexpected execution result format.\\n\\n```\\n{str(exec_result)}\\n```"
	return

	return_code = exec_result.get('returncode', -1)
	stdout = exec_result.get('stdout', '')
	stderr = exec_result.get('stderr', '')
	error_msg = exec_result.get('error', 'Unknown error')

	# Build the formatted Markdown output
	formatted_output = ""
	if return_code == 0:
	formatted_output += "## ✅ Execution Successful\n"
	if stdout:
	formatted_output += f"Output:\n```text\n{stdout.strip()}\n```\n"
	if stderr:
	formatted_output += f"Warnings (`stderr`):\n```text\n{stderr.strip()}\n```\n"
	else:
	formatted_output += f"## ❌ Execution Failed (Exit Code: {return_code})\n"
	formatted_output += f"Error: `{error_msg}`\n\n"
	if stderr:
	formatted_output += f"Error Log (`stderr`):\n```text\n{stderr.strip()}\n```\n"
	if stdout:
	formatted_output += f"Output (`stdout`):\n```text\n{stdout.strip()}\n```\n"

	# Add build logs in a collapsible section
	if build_logs:
	formatted_output += f"""
	<details>
	<summary>Click to view build logs</summary>

	```
	{build_logs.strip()}
	```
	</details>
	"""
	yield formatted_output

	except Exception as e:
	yield f"❌ Error running execution logic: {str(e)}\n\n{traceback.format_exc()}"

	# Custom CSS for a sleek design
	custom_css = """
	.gradio-container {
	max-width: 900px !important;
	margin: auto !important;
	}

	.chat-container {
	height: 600px !important;
	}

	#component-0 {
	height: 100vh;
	}

	.message {
	padding: 15px !important;
	margin: 10px 0 !important;
	border-radius: 15px !important;
	}

	.user-message {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	color: white !important;
	margin-left: 20% !important;
	}

	.bot-message {
	background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%) !important;
	color: white !important;
	margin-right: 20% !important;
	}

	.upload-area {
	border: 2px dashed #4f46e5 !important;
	border-radius: 10px !important;
	padding: 20px !important;
	text-align: center !important;
	background: linear-gradient(135deg, #f0f4ff 0%, #e0e7ff 100%) !important;
	}

	.btn-primary {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	border: none !important;
	border-radius: 25px !important;
	padding: 10px 25px !important;
	font-weight: bold !important;
	}

	.btn-secondary {
	background: linear-gradient(135deg, #ffeaa7 0%, #fab1a0 100%) !important;
	border: none !important;
	border-radius: 25px !important;
	padding: 10px 25px !important;
	font-weight: bold !important;
	color: #2d3436 !important;
	}

	.title {
	text-align: center !important;
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
	-webkit-background-clip: text !important;
	-webkit-text-fill-color: transparent !important;
	font-size: 2.5em !important;
	font-weight: bold !important;
	margin-bottom: 20px !important;
	}

	.subtitle {
	text-align: center !important;
	color: #6c757d !important;
	font-size: 1.2em !important;
	margin-bottom: 30px !important;
	}

	.output-markdown {
	height: 250px;
	overflow-y: auto !important;
	border: 1px solid #e0e0e0;
	padding: 10px;
	border-radius: 5px;
	}
	"""

	# Create the Gradio interface
	with gr.Blocks(css=custom_css, title="Data Science Requirements Gathering Agent") as app:

	# Header
	gr.HTML("""
	<div class="title">🔬 Data Science Consultant</div>
	<div class="subtitle">
	Transform your vague ideas into reality
	</div>
	<div class="tools">Powered by Modal🧡 , LlamaIndex 🦙, Mistral AI🦾 & Sambanova 🧑🏽‍💻</div>
	""")

	with gr.Row():
	with gr.Column(scale=3):
	# Chat interface
	chatbot = gr.Chatbot(
	label="Requirements Gathering Conversation",
	height=500,
	show_copy_button=True,
	bubble_full_width=False,
	elem_classes=["chat-container"]
	)

	plan_output = gr.Markdown(
	"Plan will be generated here...",
	label="Generated Plan",
	elem_classes=["output-markdown"],
	)

	code_output = gr.Markdown(
	"Code will be generated here...",
	label="Generated Code",
	elem_classes=["output-markdown"],
	)
	execution_output = gr.Markdown(
	"Execution output will be shown here...",
	label="Execution Output",
	elem_classes=["output-markdown"],
	)
	with gr.Row():
	with gr.Column(scale=4):
	msg = gr.Textbox(
	placeholder="Describe your data science project or ask a question...",
	label="Your Message",
	lines=2,
	max_lines=5
	)
	with gr.Column(scale=1):
	send_btn = gr.Button("Send 📤", variant="primary", elem_classes=["btn-primary"])


	with gr.Row():
	clear_btn = gr.Button("Clear Chat 🗑️", variant="secondary", elem_classes=["btn-secondary"])

	with gr.Column(scale=1):
	# File upload section
	gr.HTML("<h3 style='text-align: center; color: #4f46e5;'>📁 Upload Data Files</h3>")

	file_upload = gr.File(
	label="Upload your files (CSV, Excel, PDF, PPT, DOCX, Images, etc.)",
	file_count="multiple",
	file_types=[".csv", ".xlsx", ".xls", ".json", ".txt", ".pdf", ".ppt", ".pptx", ".doc", ".docx", ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"],
	elem_classes=["upload-area"]
	)

	uploaded_files_display = gr.File(
	label="Uploaded Files",
	file_count="multiple",
	interactive=False,
	visible=True
	)

	# Instructions
	gr.HTML("""
	<div style="padding: 15px; background: linear-gradient(135deg, #e3f2fd 0%, #f3e5f5 100%);
	border-radius: 10px; margin-top: 20px;">
	<h4 style="color: #4f46e5; margin-bottom: 10px;">💡 How it works:</h4>
	<ol style="color: #555; font-size: 14px; line-height: 1.6;">
	<li>Describe your data science project</li>
	<li>Upload your files (data, documents, images)</li>
	<li>Answer clarifying questions</li>
	<li>Generate a plan for your project</li>
	<li>Generate Python code using Devstral AI</li>
	</ol>
	<p style="color: #666; font-size: 12px; margin-top: 10px;">
	📄 Supports: CSV, Excel, PDF, PowerPoint, Word docs, Images, JSON, Text files<br>
	💻 Code generation powered by Mistral Devstral-Small-2505
	</p>
	</div>
	""")

	# Action buttons section
	with gr.Column():
	plan_btn = gr.Button("Generate Plan 📋", variant="secondary", elem_classes=["btn-secondary"], size="lg")
	code_btn = gr.Button("Generate Code 💻", variant="secondary", elem_classes=["btn-secondary"], size="lg")
	execute_code_btn = gr.Button("Execute Code 🚀", variant="primary", elem_classes=["btn-primary"], size="lg")

	# State for conversation history and file cache
	chat_history = gr.State([])
	file_cache = gr.State({})

	# Event handlers
	def handle_send(message, history, files, cache):
	if message.strip():
	new_history, updated_history, cleared_input, updated_cache = chat_interface(message, history, files, cache)
	return new_history, updated_history, cleared_input, updated_cache
	return history, history, message, cache

	# Wire up the interface
	send_btn.click(
	handle_send,
	inputs=[msg, chat_history, uploaded_files_display, file_cache],
	outputs=[chatbot, chat_history, msg, file_cache]
	)

	msg.submit(
	handle_send,
	inputs=[msg, chat_history, uploaded_files_display, file_cache],
	outputs=[chatbot, chat_history, msg, file_cache]
	)

	clear_btn.click(
	clear_chat,
	outputs=[chatbot, chat_history, file_cache]
	)

	plan_btn.click(
	generate_plan,
	inputs=[chat_history, file_cache],
	outputs=[plan_output]
	)

	code_btn.click(
	generate_code_with_devstral,
	inputs=[plan_output, chat_history, file_cache],
	outputs=[code_output]
	)
	execute_code_btn.click(
	execute_code,
	inputs=[code_output],
	outputs=[execution_output]
	)
	file_upload.change(
	lambda files: files,
	inputs=[file_upload],
	outputs=[uploaded_files_display]
	)

	# Welcome message
	app.load(
	lambda: [(None, "👋 Hello! I'm your Data Science Project Agent. I'll help you transform your project ideas into reality .\n\n🚀 Let's get started! Tell me about your data science project or what you're trying to achieve.")],
	outputs=[chatbot]
	)

	if __name__ == "__main__":
	app.queue()
	app.launch(show_api=True, ssr_mode=False, show_error=True, mcp_server=False)