Spaces:

shukdevdattaEX
/

Data-Summarizer-Excel-CSV

Paused

App Files Files Community

Data-Summarizer-Excel-CSV / v1.txt

shukdevdattaEX

Create v1.txt

d513747 verified 7 months ago

raw

history blame contribute delete

9.43 kB

	import gradio as gr
	import pandas as pd
	import aiohttp
	import asyncio
	import json
	import io
	import os
	from typing import Optional, Tuple

	class DataAnalyzer:
	def __init__(self):
	self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"

	async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str:
	"""Send data to Chutes API for analysis"""
	headers = {
	"Authorization": f"Bearer {api_token}",
	"Content-Type": "application/json"
	}

	# Create the prompt based on whether it's initial analysis or follow-up question
	if user_question:
	prompt = f"""Based on this dataset summary:
	{data_summary}
	User question: {user_question}
	Please provide a detailed answer based on the data."""
	else:
	prompt = f"""Analyze the following dataset and provide comprehensive insights:
	{data_summary}
	Please provide:
	1. Key statistical insights
	2. Notable patterns or trends
	3. Data quality observations
	4. Business recommendations
	5. Potential areas for further analysis
	Keep the analysis clear, actionable, and data-driven."""

	body = {
	"model": "openai/gpt-oss-20b",
	"messages": [
	{
	"role": "user",
	"content": prompt
	}
	],
	"stream": True,
	"max_tokens": 2048,
	"temperature": 0.3 # Lower temperature for more consistent analysis
	}

	try:
	async with aiohttp.ClientSession() as session:
	async with session.post(self.api_base_url, headers=headers, json=body) as response:
	if response.status != 200:
	return f"Error: API request failed with status {response.status}"

	full_response = ""
	async for line in response.content:
	line = line.decode("utf-8").strip()
	if line.startswith("data: "):
	data = line[6:]
	if data == "[DONE]":
	break
	try:
	chunk_data = json.loads(data)
	if "choices" in chunk_data and len(chunk_data["choices"]) > 0:
	delta = chunk_data["choices"][0].get("delta", {})
	content = delta.get("content", "")
	if content:
	full_response += content
	except json.JSONDecodeError:
	continue

	return full_response if full_response else "No response received from the model."

	except Exception as e:
	return f"Error connecting to Chutes API: {str(e)}"

	def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]:
	"""Process uploaded CSV or Excel file"""
	try:
	file_extension = os.path.splitext(file_path)[1].lower()

	if file_extension == '.csv':
	df = pd.read_csv(file_path)
	elif file_extension in ['.xlsx', '.xls']:
	df = pd.read_excel(file_path)
	else:
	raise ValueError("Unsupported file format. Please upload CSV or Excel files.")

	# Generate comprehensive data summary
	summary = self.generate_data_summary(df)
	return df, summary

	except Exception as e:
	raise Exception(f"Error processing file: {str(e)}")

	def generate_data_summary(self, df: pd.DataFrame) -> str:
	"""Generate a comprehensive summary of the dataset"""
	summary = []

	# Basic info
	summary.append(f"Dataset Overview:")
	summary.append(f"- Shape: {df.shape[0]} rows × {df.shape[1]} columns")
	summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}")

	# Column information
	summary.append(f"\nColumn Information:")
	for i, (col, dtype) in enumerate(df.dtypes.items()):
	null_count = df[col].isnull().sum()
	null_pct = (null_count / len(df)) * 100
	summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)")

	# Numerical columns statistics
	numeric_cols = df.select_dtypes(include=['number']).columns
	if len(numeric_cols) > 0:
	summary.append(f"\nNumerical Columns Summary:")
	for col in numeric_cols:
	stats = df[col].describe()
	summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]")

	# Categorical columns
	categorical_cols = df.select_dtypes(include=['object', 'category']).columns
	if len(categorical_cols) > 0:
	summary.append(f"\nCategorical Columns Summary:")
	for col in categorical_cols:
	unique_count = df[col].nunique()
	most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A"
	summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'")

	# Sample data
	summary.append(f"\nFirst 5 rows preview:")
	summary.append(df.head().to_string())

	return "\n".join(summary)

	# Initialize the analyzer
	analyzer = DataAnalyzer()

	async def analyze_data(file, api_key, user_question=""):
	"""Main function to analyze uploaded data"""
	if not file:
	return "Please upload a CSV or Excel file.", "", ""

	if not api_key:
	return "Please enter your Chutes API key.", "", ""

	try:
	# Process the uploaded file
	df, data_summary = analyzer.process_file(file.name)

	# Get AI analysis
	ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)

	# Format the complete response
	response = f"""## 📊 Data Analysis Complete!
	### 📈 Dataset Overview:
	{data_summary}
	### 🤖 AI Insights & Recommendations:
	{ai_analysis}
	"""

	return response, data_summary, df.head(10).to_html()

	except Exception as e:
	return f"Error: {str(e)}", "", ""

	def sync_analyze_data(file, api_key, user_question=""):
	"""Synchronous wrapper for the async analyze function"""
	return asyncio.run(analyze_data(file, api_key, user_question))

	# Create the Gradio interface
	with gr.Blocks(title="📊 Smart Data Analyzer", theme=gr.themes.Ocean()) as app:
	gr.Markdown("""
	# 📊 Smart Data Analyzer
	### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes!
	""")

	with gr.Row():
	with gr.Column(scale=1):
	# File upload
	file_input = gr.File(
	label="📁 Upload CSV or Excel File",
	file_types=[".csv", ".xlsx", ".xls"],
	file_count="single"
	)

	# API key input
	api_key_input = gr.Textbox(
	label="🔑 Chutes API Key",
	placeholder="Enter your Chutes API token here...",
	type="password",
	lines=1
	)

	# Optional question input
	question_input = gr.Textbox(
	label="❓ Ask a Specific Question (Optional)",
	placeholder="e.g., What are the sales trends? Which region performs best?",
	lines=2
	)

	# Analyze button
	analyze_btn = gr.Button("🚀 Analyze Data", variant="primary", size="lg")

	with gr.Column(scale=2):
	# Results display
	analysis_output = gr.Markdown(
	label="📋 Analysis Results",
	value="Upload a file and click 'Analyze Data' to see insights..."
	)

	# Additional outputs (hidden by default)
	with gr.Accordion("📊 Data Preview", open=False):
	data_preview = gr.HTML(label="First 10 Rows")

	with gr.Accordion("🔍 Raw Data Summary", open=False):
	raw_summary = gr.Textbox(label="Dataset Summary", lines=10)

	# Event handlers
	analyze_btn.click(
	fn=sync_analyze_data,
	inputs=[file_input, api_key_input, question_input],
	outputs=[analysis_output, raw_summary, data_preview]
	)

	# Example section
	gr.Markdown("""
	### 💡 Tips for Best Results:
	- File Size: Keep files under 10MB for fastest processing
	- API Key: Get your free Chutes API key from [chutes.ai](https://chutes.ai)
	- Questions: Be specific! Ask about trends, patterns, outliers, or recommendations
	- Formats: Supports CSV, XLSX, and XLS files

	### 🎯 Example Questions to Ask:
	- "What are the key trends in this sales data?"
	- "Which products are underperforming?"
	- "Are there any seasonal patterns?"
	- "What recommendations do you have based on this data?"
	""")

	# Launch the application
	if __name__ == "__main__":
	app.launch(
	share=True
	)