Spaces:

alishabhale
/

benchMarkAnalysis

Runtime error

App Files Files Community

benchMarkAnalysis / app.py

alishabhale

Updated with deepseek model

5a948c5 verified 10 months ago

raw

history blame contribute delete

3 kB

	import os
	import gradio as gr
	import pandas as pd
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

	# ✅ Use DeepSeek Free Model
	model_name = "deepseek-ai/deepseek-coder-6.7b"

	# ✅ Load DeepSeek model & tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

	# ✅ Function to analyze CSV data based on accuracy
	def analyze_csv(file):
	try:
	df = pd.read_csv(file.name) # Read uploaded CSV

	# ✅ Ensure column names are stripped of extra spaces
	df.columns = df.columns.str.strip()

	# ✅ Validate required columns
	required_columns = {"Run ID", "Latency (ms)", "Throughput (req/sec)", "Memory Usage (GB)", "CPU Utilization (%)"}
	if not required_columns.issubset(df.columns):
	return f"Error: Missing one or more required columns. Required: {', '.join(required_columns)}"

	# ✅ Avoid division errors (replace zero values in Latency & Memory Usage)
	df["Latency (ms)"].replace(0, 1e-6, inplace=True)
	df["Memory Usage (GB)"].replace(0, 1e-6, inplace=True)

	# ✅ Calculate Accuracy Score: Throughput / (Latency * Memory Usage)
	df["Accuracy Score"] = df["Throughput (req/sec)"] / (df["Latency (ms)"] * df["Memory Usage (GB)"])

	# ✅ Find the best-performing model
	best_model = df.loc[df["Accuracy Score"].idxmax()]
	best_run_id = best_model["Run ID"]

	# ✅ Construct analysis summary
	summary = f"""
	🏆 Best Performing Test Run: `{best_run_id}`

	- Latency: {best_model["Latency (ms)"]} ms
	- Throughput: {best_model["Throughput (req/sec)"]} req/sec
	- Memory Usage: {best_model["Memory Usage (GB)"]} GB
	- CPU Utilization: {best_model["CPU Utilization (%)"]}%
	- Accuracy Score: {best_model["Accuracy Score"]:.6f}
	---
	📊 Accuracy Ranking Table
	```plaintext
	{df[["Run ID", "Accuracy Score"]].sort_values(by="Accuracy Score", ascending=False).to_string(index=False)}
	```
	---
	Based on this benchmark, generate insights on why this test run performed best and provide recommendations.
	"""

	# ✅ Generate AI-based insights using DeepSeek
	output = pipe(summary, max_new_tokens=150, do_sample=True, temperature=0.7)

	return f"{summary}\n\n### 🤖 AI Insights:\n{output[0]['generated_text']}"

	except Exception as e:
	return f"⚠️ Error processing CSV: {str(e)}"

	# ✅ Gradio Interface
	iface = gr.Interface(
	fn=analyze_csv,
	inputs=gr.File(label="Upload CSV File"),
	outputs="text",
	title="Benchmark Analyzer (DeepSeek Free)",
	description="Upload a benchmark CSV file to analyze test performance based on accuracy."
	)

	iface.launch()