| import gradio as gr |
| import pandas as pd |
| import aiohttp |
| import asyncio |
| import json |
| import io |
| import os |
| from typing import Optional, Tuple |
|
|
| class DataAnalyzer: |
| def __init__(self): |
| self.api_base_url = "https://llm.chutes.ai/v1/chat/completions" |
| |
| async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str: |
| """Send data to Chutes API for analysis""" |
| headers = { |
| "Authorization": f"Bearer {api_token}", |
| "Content-Type": "application/json" |
| } |
| |
| # Create the prompt based on whether it's initial analysis or follow-up question |
| if user_question: |
| prompt = f"""Based on this dataset summary: |
| {data_summary} |
| User question: {user_question} |
| Please provide a detailed answer based on the data.""" |
| else: |
| prompt = f"""Analyze the following dataset and provide comprehensive insights: |
| {data_summary} |
| Please provide: |
| 1. Key statistical insights |
| 2. Notable patterns or trends |
| 3. Data quality observations |
| 4. Business recommendations |
| 5. Potential areas for further analysis |
| Keep the analysis clear, actionable, and data-driven.""" |
|
|
| body = { |
| "model": "openai/gpt-oss-20b", |
| "messages": [ |
| { |
| "role": "user", |
| "content": prompt |
| } |
| ], |
| "stream": True, |
| "max_tokens": 2048, |
| "temperature": 0.3 # Lower temperature for more consistent analysis |
| } |
| |
| try: |
| async with aiohttp.ClientSession() as session: |
| async with session.post(self.api_base_url, headers=headers, json=body) as response: |
| if response.status != 200: |
| return f"Error: API request failed with status {response.status}" |
| |
| full_response = "" |
| async for line in response.content: |
| line = line.decode("utf-8").strip() |
| if line.startswith("data: "): |
| data = line[6:] |
| if data == "[DONE]": |
| break |
| try: |
| chunk_data = json.loads(data) |
| if "choices" in chunk_data and len(chunk_data["choices"]) > 0: |
| delta = chunk_data["choices"][0].get("delta", {}) |
| content = delta.get("content", "") |
| if content: |
| full_response += content |
| except json.JSONDecodeError: |
| continue |
| |
| return full_response if full_response else "No response received from the model." |
| |
| except Exception as e: |
| return f"Error connecting to Chutes API: {str(e)}" |
|
|
| def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]: |
| """Process uploaded CSV or Excel file""" |
| try: |
| file_extension = os.path.splitext(file_path)[1].lower() |
| |
| if file_extension == '.csv': |
| df = pd.read_csv(file_path) |
| elif file_extension in ['.xlsx', '.xls']: |
| df = pd.read_excel(file_path) |
| else: |
| raise ValueError("Unsupported file format. Please upload CSV or Excel files.") |
| |
| # Generate comprehensive data summary |
| summary = self.generate_data_summary(df) |
| return df, summary |
| |
| except Exception as e: |
| raise Exception(f"Error processing file: {str(e)}") |
| |
| def generate_data_summary(self, df: pd.DataFrame) -> str: |
| """Generate a comprehensive summary of the dataset""" |
| summary = [] |
| |
| # Basic info |
| summary.append(f"Dataset Overview:") |
| summary.append(f"- Shape: {df.shape[0]} rows Γ {df.shape[1]} columns") |
| summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}") |
| |
| # Column information |
| summary.append(f"\nColumn Information:") |
| for i, (col, dtype) in enumerate(df.dtypes.items()): |
| null_count = df[col].isnull().sum() |
| null_pct = (null_count / len(df)) * 100 |
| summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)") |
| |
| # Numerical columns statistics |
| numeric_cols = df.select_dtypes(include=['number']).columns |
| if len(numeric_cols) > 0: |
| summary.append(f"\nNumerical Columns Summary:") |
| for col in numeric_cols: |
| stats = df[col].describe() |
| summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]") |
| |
| # Categorical columns |
| categorical_cols = df.select_dtypes(include=['object', 'category']).columns |
| if len(categorical_cols) > 0: |
| summary.append(f"\nCategorical Columns Summary:") |
| for col in categorical_cols: |
| unique_count = df[col].nunique() |
| most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A" |
| summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'") |
| |
| # Sample data |
| summary.append(f"\nFirst 5 rows preview:") |
| summary.append(df.head().to_string()) |
| |
| return "\n".join(summary) |
|
|
| # Initialize the analyzer |
| analyzer = DataAnalyzer() |
|
|
| async def analyze_data(file, api_key, user_question=""): |
| """Main function to analyze uploaded data""" |
| if not file: |
| return "Please upload a CSV or Excel file.", "", "" |
| |
| if not api_key: |
| return "Please enter your Chutes API key.", "", "" |
| |
| try: |
| # Process the uploaded file |
| df, data_summary = analyzer.process_file(file.name) |
| |
| # Get AI analysis |
| ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question) |
| |
| # Format the complete response |
| response = f"""## π Data Analysis Complete! |
| ### π Dataset Overview: |
| {data_summary} |
| ### π€ AI Insights & Recommendations: |
| {ai_analysis} |
| """ |
| |
| return response, data_summary, df.head(10).to_html() |
| |
| except Exception as e: |
| return f"Error: {str(e)}", "", "" |
|
|
| def sync_analyze_data(file, api_key, user_question=""): |
| """Synchronous wrapper for the async analyze function""" |
| return asyncio.run(analyze_data(file, api_key, user_question)) |
|
|
| # Create the Gradio interface |
| with gr.Blocks(title="π Smart Data Analyzer", theme=gr.themes.Ocean()) as app: |
| gr.Markdown(""" |
| # π Smart Data Analyzer |
| ### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes! |
| """) |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| # File upload |
| file_input = gr.File( |
| label="π Upload CSV or Excel File", |
| file_types=[".csv", ".xlsx", ".xls"], |
| file_count="single" |
| ) |
| |
| # API key input |
| api_key_input = gr.Textbox( |
| label="π Chutes API Key", |
| placeholder="Enter your Chutes API token here...", |
| type="password", |
| lines=1 |
| ) |
| |
| # Optional question input |
| question_input = gr.Textbox( |
| label="β Ask a Specific Question (Optional)", |
| placeholder="e.g., What are the sales trends? Which region performs best?", |
| lines=2 |
| ) |
| |
| # Analyze button |
| analyze_btn = gr.Button("π Analyze Data", variant="primary", size="lg") |
| |
| with gr.Column(scale=2): |
| # Results display |
| analysis_output = gr.Markdown( |
| label="π Analysis Results", |
| value="Upload a file and click 'Analyze Data' to see insights..." |
| ) |
| |
| # Additional outputs (hidden by default) |
| with gr.Accordion("π Data Preview", open=False): |
| data_preview = gr.HTML(label="First 10 Rows") |
| |
| with gr.Accordion("π Raw Data Summary", open=False): |
| raw_summary = gr.Textbox(label="Dataset Summary", lines=10) |
| |
| # Event handlers |
| analyze_btn.click( |
| fn=sync_analyze_data, |
| inputs=[file_input, api_key_input, question_input], |
| outputs=[analysis_output, raw_summary, data_preview] |
| ) |
| |
| # Example section |
| gr.Markdown(""" |
| ### π‘ Tips for Best Results: |
| - **File Size**: Keep files under 10MB for fastest processing |
| - **API Key**: Get your free Chutes API key from [chutes.ai](https://chutes.ai) |
| - **Questions**: Be specific! Ask about trends, patterns, outliers, or recommendations |
| - **Formats**: Supports CSV, XLSX, and XLS files |
| |
| ### π― Example Questions to Ask: |
| - "What are the key trends in this sales data?" |
| - "Which products are underperforming?" |
| - "Are there any seasonal patterns?" |
| - "What recommendations do you have based on this data?" |
| """) |
|
|
| # Launch the application |
| if __name__ == "__main__": |
| app.launch( |
| share=True |
| ) |