File size: 3,097 Bytes

5dc8e4c

import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from huggingface_hub import InferenceClient
import os

# Initialize HF Inference client
client = InferenceClient(token=os.environ.get("HF_TOKEN"))
MAX_TOKENS = 1000

def analyze_data(query, df, chat_history):
    """Process user query using HF API and generate response"""
    if df is None:
        return chat_history + [[query, "Please upload a CSV file first!"]]
    
    try:
        # Generate code using StarCoder model
        code_response = client.text_generation(
            prompt=f"""
            Generate Python code to answer this data analysis question:
            Dataset columns: {list(df.columns)}
            Question: {query}
            
            Return only the code with these requirements:
            1. Use pandas and matplotlib
            2. Store result in 'result' variable
            3. Create plot using plt.figure()
            4. No user input or file operations
            """,
            model="bigcode/starcoder",
            max_new_tokens=MAX_TOKENS,
            temperature=0.2
        )
        
        # Clean and execute code
        code = clean_code(code_response)
        exec_env = {'df': df, 'plt': plt}
        exec(code, exec_env)
        
        # Prepare response
        response = []
        if 'result' in exec_env:
            response.append(f"Analysis Result:\n{str(exec_env['result']}")
            
        fig = exec_env.get('plt', plt).gcf()
        if fig.get_axes():
            fig.savefig("temp_plot.png")
            response.append(gr.Image("temp_plot.png"))
        
        return chat_history + [[query, "\n\n".join(response)]]
    
    except Exception as e:
        return chat_history + [[query, f"Error: {str(e)}"]]

def clean_code(code):
    """Sanitize generated code"""
    # Remove dangerous patterns
    forbidden_patterns = [
        'os.', 'subprocess', 'sys.', 'shutil', 'open(',
        '__', 'lambda', 'exec(', 'eval(', 'getattr'
    ]
    
    for pattern in forbidden_patterns:
        if pattern in code:
            raise ValueError(f"Forbidden pattern detected: {pattern}")
            
    return code

def handle_file(file):
    """Process uploaded CSV file"""
    try:
        return pd.read_csv(file.name)
    except Exception as e:
        raise gr.Error(f"Error loading CSV: {str(e)}")

with gr.Blocks(title="AI CSV Analyzer") as demo:
    gr.Markdown("# 📊 AI-Powered CSV Analyzer")
    gr.Markdown("Upload a CSV file and ask questions about your data!")
    
    with gr.Row():
        data_state = gr.State()
        file_input = gr.UploadButton(label="Upload CSV", file_types=[".csv"])
    
    chatbot = gr.Chatbot(height=400)
    msg = gr.Textbox(label="Your Question")
    clear = gr.ClearButton([msg, chatbot])
    
    file_input.upload(
        fn=handle_file,
        inputs=file_input,
        outputs=data_state
    )
    
    msg.submit(
        fn=analyze_data,
        inputs=[msg, data_state, chatbot],
        outputs=chatbot,
        queue=False
    )

if __name__ == "__main__":
    demo.launch()