import os import gradio as gr import pandas as pd import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline # ✅ Use DeepSeek Free Model model_name = "deepseek-ai/deepseek-coder-6.7b" # ✅ Load DeepSeek model & tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) # ✅ Function to analyze CSV data based on accuracy def analyze_csv(file): try: df = pd.read_csv(file.name) # Read uploaded CSV # ✅ Ensure column names are stripped of extra spaces df.columns = df.columns.str.strip() # ✅ Validate required columns required_columns = {"Run ID", "Latency (ms)", "Throughput (req/sec)", "Memory Usage (GB)", "CPU Utilization (%)"} if not required_columns.issubset(df.columns): return f"Error: Missing one or more required columns. Required: {', '.join(required_columns)}" # ✅ Avoid division errors (replace zero values in Latency & Memory Usage) df["Latency (ms)"].replace(0, 1e-6, inplace=True) df["Memory Usage (GB)"].replace(0, 1e-6, inplace=True) # ✅ Calculate Accuracy Score: Throughput / (Latency * Memory Usage) df["Accuracy Score"] = df["Throughput (req/sec)"] / (df["Latency (ms)"] * df["Memory Usage (GB)"]) # ✅ Find the best-performing model best_model = df.loc[df["Accuracy Score"].idxmax()] best_run_id = best_model["Run ID"] # ✅ Construct analysis summary summary = f""" **🏆 Best Performing Test Run:** `{best_run_id}` - **Latency:** {best_model["Latency (ms)"]} ms - **Throughput:** {best_model["Throughput (req/sec)"]} req/sec - **Memory Usage:** {best_model["Memory Usage (GB)"]} GB - **CPU Utilization:** {best_model["CPU Utilization (%)"]}% - **Accuracy Score:** {best_model["Accuracy Score"]:.6f} --- **📊 Accuracy Ranking Table** ```plaintext {df[["Run ID", "Accuracy Score"]].sort_values(by="Accuracy Score", ascending=False).to_string(index=False)} ``` --- Based on this benchmark, generate insights on why this test run performed best and provide recommendations. """ # ✅ Generate AI-based insights using DeepSeek output = pipe(summary, max_new_tokens=150, do_sample=True, temperature=0.7) return f"{summary}\n\n### 🤖 AI Insights:\n{output[0]['generated_text']}" except Exception as e: return f"⚠️ Error processing CSV: {str(e)}" # ✅ Gradio Interface iface = gr.Interface( fn=analyze_csv, inputs=gr.File(label="Upload CSV File"), outputs="text", title="Benchmark Analyzer (DeepSeek Free)", description="Upload a benchmark CSV file to analyze test performance based on accuracy." ) iface.launch()