Spaces:
Build error
Build error
| import streamlit as st | |
| import torch | |
| import logging | |
| from typing import List, Dict | |
| import gc | |
| import os | |
| import pandas as pd | |
| import numpy as np | |
| import json | |
| # Huggingface stuff | |
| from datasets import load_dataset, Dataset | |
| from huggingface_hub import hf_hub_url, ModelCard | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer | |
| from evaluate import load | |
| def preprocess_function(examples): | |
| return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True) | |
| def compute_metrics(eval_pred): | |
| predictions, labels = eval_pred | |
| predictions = np.argmax(predictions, axis=1) | |
| return metric.compute(predictions=predictions, references=labels) | |
| def compute_model_card_evaluation_results(tokenizer, model_checkpoint, raw_datasets, metric): | |
| tokenized_datasets = raw_datasets.map(preprocess_function, batched=True) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=2) | |
| batch_size = 16 | |
| args = TrainingArguments( | |
| "test-glue", | |
| eval_strategy = "epoch", | |
| learning_rate=5e-5, | |
| seed=42, | |
| lr_scheduler_type="linear", | |
| per_device_train_batch_size=batch_size, | |
| per_device_eval_batch_size=batch_size, | |
| num_train_epochs=3, | |
| weight_decay=0.01, | |
| load_best_model_at_end=False, | |
| metric_for_best_model="accuracy", | |
| report_to="none" | |
| ) | |
| trainer = Trainer( | |
| model, | |
| args, | |
| train_dataset=tokenized_datasets["train"], | |
| eval_dataset=tokenized_datasets["validation"], | |
| tokenizer=tokenizer, | |
| compute_metrics=compute_metrics | |
| ) | |
| result = trainer.evaluate() | |
| return result | |
| if __name__ == "__main__": | |
| st.title("Hugging Face Model Evaluation Demo") | |
| with st.form("my_st_form"): | |
| # Create an input text box | |
| dataset_name = st.text_input("Enter dataset identifier", "") | |
| model_checkpoint = st.text_input("Enter model identifier", "") | |
| # Every form must have a submit button. | |
| submitted = st.form_submit_button("Submit") | |
| if submitted: | |
| print(dataset_name, model_checkpoint) | |
| # hardcode input data | |
| #model_checkpoint = "sgugger/glue-mrpc" | |
| #dataset_name = "nyu-mll/glue" | |
| metric = load("glue", "mrpc") | |
| tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) | |
| raw_datasets = load_dataset(dataset_name, "mrpc") | |
| tokenizer = AutoTokenizer.from_pretrained(model_checkpoint) | |
| output = compute_model_card_evaluation_results(tokenizer, model_checkpoint, raw_datasets, metric) | |
| print(json.dumps(output)) | |
| #st.text_area(label="Output Data:", value=st.json(output, expanded=True), height=300) | |
| st.header("Self-generated Evaluation Results:") | |
| st.json(output, expanded=True) | |
| card = ModelCard.load(model_checkpoint) | |
| #st.text_area(label="Model Card Data:", height=500, value=json.dumps(card.data.eval_results)) | |
| st.header("Model Card Evaluation Results:") | |
| st.json(card.data.eval_results, expanded=True) |