File size: 2,997 Bytes
1b29932
a2ff5d8
04fe564
5a948c5
 
 
 
 
 
 
 
 
 
a2ff5d8
22f6c1d
04fe564
5a948c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e29c41b
19bc324
e29c41b
04fe564
 
e29c41b
5a948c5
 
e29c41b
 
5a948c5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import gradio as gr
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# βœ… Use DeepSeek Free Model
model_name = "deepseek-ai/deepseek-coder-6.7b"

# βœ… Load DeepSeek model & tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# βœ… Function to analyze CSV data based on accuracy
def analyze_csv(file):
    try:
        df = pd.read_csv(file.name)  # Read uploaded CSV
        
        # βœ… Ensure column names are stripped of extra spaces
        df.columns = df.columns.str.strip()

        # βœ… Validate required columns
        required_columns = {"Run ID", "Latency (ms)", "Throughput (req/sec)", "Memory Usage (GB)", "CPU Utilization (%)"}
        if not required_columns.issubset(df.columns):
            return f"Error: Missing one or more required columns. Required: {', '.join(required_columns)}"

        # βœ… Avoid division errors (replace zero values in Latency & Memory Usage)
        df["Latency (ms)"].replace(0, 1e-6, inplace=True)
        df["Memory Usage (GB)"].replace(0, 1e-6, inplace=True)

        # βœ… Calculate Accuracy Score: Throughput / (Latency * Memory Usage)
        df["Accuracy Score"] = df["Throughput (req/sec)"] / (df["Latency (ms)"] * df["Memory Usage (GB)"])

        # βœ… Find the best-performing model
        best_model = df.loc[df["Accuracy Score"].idxmax()]
        best_run_id = best_model["Run ID"]

        # βœ… Construct analysis summary
        summary = f"""
        **πŸ† Best Performing Test Run:** `{best_run_id}`
        
        - **Latency:** {best_model["Latency (ms)"]} ms
        - **Throughput:** {best_model["Throughput (req/sec)"]} req/sec
        - **Memory Usage:** {best_model["Memory Usage (GB)"]} GB
        - **CPU Utilization:** {best_model["CPU Utilization (%)"]}%
        - **Accuracy Score:** {best_model["Accuracy Score"]:.6f}
        ---
        **πŸ“Š Accuracy Ranking Table**
        ```plaintext
        {df[["Run ID", "Accuracy Score"]].sort_values(by="Accuracy Score", ascending=False).to_string(index=False)}
        ```
        ---
        Based on this benchmark, generate insights on why this test run performed best and provide recommendations.
        """

        # βœ… Generate AI-based insights using DeepSeek
        output = pipe(summary, max_new_tokens=150, do_sample=True, temperature=0.7)

        return f"{summary}\n\n### πŸ€– AI Insights:\n{output[0]['generated_text']}"

    except Exception as e:
        return f"⚠️ Error processing CSV: {str(e)}"

# βœ… Gradio Interface
iface = gr.Interface(
    fn=analyze_csv,
    inputs=gr.File(label="Upload CSV File"),
    outputs="text",
    title="Benchmark Analyzer (DeepSeek Free)",
    description="Upload a benchmark CSV file to analyze test performance based on accuracy."
)

iface.launch()