Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer | |
| import pandas as pd | |
| from sklearn.metrics import accuracy_score, precision_recall_fscore_support | |
| import openai | |
| import requests | |
| # Define the available models and tasks | |
| TASKS = ["sentiment-analysis", "ner", "text-classification"] | |
| MODELS = { | |
| "DistilBERT": "distilbert-base-uncased", | |
| "BERT": "bert-base-uncased", | |
| "RoBERTa": "roberta-base", | |
| "LLaMA2_7B_chat": "meta-llama/Llama-2-7b-chat-hf", | |
| "LLaMA2_70B": "meta-llama/Llama-2-70b-hf", | |
| "ChatGLM3_6B": "THUDM/chatglm-6b", | |
| "InternLM_7B": "internlm/internlm-7b", | |
| "Falcon_7B": "tiiuae/falcon-7b" | |
| # Add other Hugging Face models here | |
| } | |
| # Function to load pipeline for Hugging Face models | |
| def load_pipeline(task, model): | |
| model_name = MODELS[model] | |
| return pipeline(task, model=model_name) | |
| # Function to predict using Hugging Face models and OpenAI models | |
| def predict(task, model, text): | |
| try: | |
| selected_pipeline = load_pipeline(task, model) | |
| if model in ["ChatGPT", "GPT4"]: | |
| # OpenAI API request | |
| response = openai.ChatCompletion.create( | |
| model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo", | |
| messages=[{"role": "user", "content": text}] | |
| ) | |
| return response['choices'][0]['message']['content'] | |
| else: | |
| # Hugging Face pipeline | |
| results = selected_pipeline(text) | |
| return results | |
| except Exception as e: | |
| print(f"Error in prediction: {e}") | |
| return {"error": str(e)} | |
| # Function to benchmark Hugging Face models and OpenAI models | |
| def benchmark(task, model, file): | |
| try: | |
| data = pd.read_csv(file.name) | |
| texts = data['query'].tolist() | |
| true_labels = data['answer'].tolist() | |
| predictions = [] | |
| if model in ["ChatGPT", "GPT4"]: | |
| for text in texts: | |
| response = openai.ChatCompletion.create( | |
| model="gpt-4" if model == "GPT4" else "gpt-3.5-turbo", | |
| messages=[{"role": "user", "content": text}] | |
| ) | |
| predictions.append(response['choices'][0]['message']['content'].strip()) | |
| else: | |
| selected_pipeline = load_pipeline(task, model) | |
| predictions = [selected_pipeline(text)[0]['label'] for text in texts] | |
| accuracy = accuracy_score(true_labels, predictions) | |
| precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predictions, average='macro') | |
| return { | |
| "Accuracy": accuracy, | |
| "Precision": precision, | |
| "Recall": recall, | |
| "F1 Score": f1 | |
| } | |
| except Exception as e: | |
| print(f"Error in benchmarking: {e}") | |
| return {"error": str(e)} | |
| # Define the Gradio interface | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| task_input = gr.Dropdown(TASKS, label="Task") | |
| model_input = gr.Dropdown(list(MODELS.keys()) + ["ChatGPT", "GPT4"], label="Model") | |
| with gr.Tab("Predict"): | |
| with gr.Row(): | |
| text_input = gr.Textbox(lines=2, placeholder="Enter text here...", label="Text") | |
| predict_button = gr.Button("Predict") | |
| predict_output = gr.JSON(label="Prediction Output") | |
| predict_button.click(predict, inputs=[task_input, model_input, text_input], outputs=predict_output) | |
| with gr.Tab("Benchmark"): | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload CSV for Benchmarking") | |
| benchmark_button = gr.Button("Benchmark") | |
| benchmark_output = gr.JSON(label="Benchmark Output") | |
| benchmark_button.click(benchmark, inputs=[task_input, model_input, file_input], outputs=benchmark_output) | |
| demo.launch() | |