Spaces:

Nickeik
/

FiLM_Benchmark

Sleeping

App Files Files Community

FiLM_Benchmark / app.py

Nickeik

Update app.py

f61ed22 verified over 1 year ago

raw

history blame contribute delete

3.77 kB

	import gradio as gr
	from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
	import pandas as pd
	from sklearn.metrics import accuracy_score, precision_recall_fscore_support
	import openai
	import requests

	# Define the available models and tasks
	TASKS = ["sentiment-analysis", "ner", "text-classification"]
	MODELS = {
	"DistilBERT": "distilbert-base-uncased",
	"BERT": "bert-base-uncased",
	"RoBERTa": "roberta-base",
	"LLaMA2_7B_chat": "meta-llama/Llama-2-7b-chat-hf",
	"LLaMA2_70B": "meta-llama/Llama-2-70b-hf",
	"ChatGLM3_6B": "THUDM/chatglm-6b",
	"InternLM_7B": "internlm/internlm-7b",
	"Falcon_7B": "tiiuae/falcon-7b"
	# Add other Hugging Face models here
	}

	# Function to load pipeline for Hugging Face models
	def load_pipeline(task, model):
	model_name = MODELS[model]
	return pipeline(task, model=model_name)

	# Function to predict using Hugging Face models and OpenAI models
	def predict(task, model, text):
	try:
	selected_pipeline = load_pipeline(task, model)
	if model in ["ChatGPT", "GPT4"]:
	# OpenAI API request
	response = openai.ChatCompletion.create(
	model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo",
	messages=[{"role": "user", "content": text}]
	)
	return response['choices'][0]['message']['content']
	else:
	# Hugging Face pipeline
	results = selected_pipeline(text)
	return results
	except Exception as e:
	print(f"Error in prediction: {e}")
	return {"error": str(e)}

	# Function to benchmark Hugging Face models and OpenAI models
	def benchmark(task, model, file):
	try:
	data = pd.read_csv(file.name)
	texts = data['query'].tolist()
	true_labels = data['answer'].tolist()

	predictions = []
	if model in ["ChatGPT", "GPT4"]:
	for text in texts:
	response = openai.ChatCompletion.create(
	model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo",
	messages=[{"role": "user", "content": text}]
	)
	predictions.append(response['choices'][0]['message']['content'].strip())
	else:
	selected_pipeline = load_pipeline(task, model)
	predictions = [selected_pipeline(text)[0]['label'] for text in texts]

	accuracy = accuracy_score(true_labels, predictions)
	precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='macro')

	return {
	"Accuracy": accuracy,
	"Precision": precision,
	"Recall": recall,
	"F1 Score": f1
	}
	except Exception as e:
	print(f"Error in benchmarking: {e}")
	return {"error": str(e)}

	# Define the Gradio interface
	with gr.Blocks() as demo:
	with gr.Row():
	task_input = gr.Dropdown(TASKS, label="Task")
	model_input = gr.Dropdown(list(MODELS.keys()) + ["ChatGPT", "GPT4"], label="Model")

	with gr.Tab("Predict"):
	with gr.Row():
	text_input = gr.Textbox(lines=2, placeholder="Enter text here...", label="Text")
	predict_button = gr.Button("Predict")
	predict_output = gr.JSON(label="Prediction Output")
	predict_button.click(predict, inputs=[task_input, model_input, text_input], outputs=predict_output)

	with gr.Tab("Benchmark"):
	with gr.Row():
	file_input = gr.File(label="Upload CSV for Benchmarking")
	benchmark_button = gr.Button("Benchmark")
	benchmark_output = gr.JSON(label="Benchmark Output")
	benchmark_button.click(benchmark, inputs=[task_input, model_input, file_input], outputs=benchmark_output)

	demo.launch()