Niklauseik commited on
Commit
982df32
·
1 Parent(s): 149e91b
Files changed (1) hide show
  1. app.py +4 -49
app.py CHANGED
@@ -1,16 +1,14 @@
1
  import gradio as gr
2
- from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
3
  import pandas as pd
4
  from sklearn.metrics import accuracy_score, precision_recall_fscore_support
5
- import torch
6
 
7
  # Define the available models and tasks
8
- TASKS = ["sentiment-analysis", "ner", "text-classification"]
9
  MODELS = {
10
- "DistilBERT": "distilbert-base-uncased-finetuned-sst-2-english",
11
  "BERT": "bert-base-uncased",
12
- "RoBERTa": "roberta-base",
13
- "LLaMA": "decapoda-research/llama-7b-hf"
14
  # Add other models here
15
  }
16
 
@@ -40,42 +38,6 @@ def benchmark(task, model, file):
40
  "F1 Score": f1
41
  }
42
 
43
- def train_model(task, model, file):
44
- data = pd.read_csv(file.name)
45
- train_texts = data['text'].tolist()
46
- train_labels = data['label'].tolist()
47
-
48
- model_name = MODELS[model]
49
- tokenizer = AutoTokenizer.from_pretrained(model_name)
50
-
51
- encodings = tokenizer(train_texts, truncation=True, padding=True)
52
- dataset = torch.utils.data.TensorDataset(
53
- torch.tensor(encodings['input_ids']),
54
- torch.tensor(encodings['attention_mask']),
55
- torch.tensor(train_labels)
56
- )
57
-
58
- model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(set(train_labels)))
59
- training_args = TrainingArguments(
60
- output_dir='./results',
61
- num_train_epochs=3,
62
- per_device_train_batch_size=8,
63
- per_device_eval_batch_size=8,
64
- warmup_steps=500,
65
- weight_decay=0.01,
66
- logging_dir='./logs'
67
- )
68
-
69
- trainer = Trainer(
70
- model=model,
71
- args=training_args,
72
- train_dataset=dataset,
73
- eval_dataset=dataset
74
- )
75
-
76
- trainer.train()
77
- return "Training Completed"
78
-
79
  # Define the Gradio interface
80
  with gr.Blocks() as demo:
81
  with gr.Row():
@@ -95,12 +57,5 @@ with gr.Blocks() as demo:
95
  benchmark_button = gr.Button("Benchmark")
96
  benchmark_output = gr.JSON(label="Benchmark Output")
97
  benchmark_button.click(benchmark, inputs=[task_input, model_input, file_input], outputs=benchmark_output)
98
-
99
- with gr.Tab("Train"):
100
- with gr.Row():
101
- train_file_input = gr.File(label="Upload CSV for Training")
102
- train_button = gr.Button("Train Model")
103
- train_output = gr.JSON(label="Training Status")
104
- train_button.click(train_model, inputs=[task_input, model_input, train_file_input], outputs=train_output)
105
 
106
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
3
  import pandas as pd
4
  from sklearn.metrics import accuracy_score, precision_recall_fscore_support
 
5
 
6
  # Define the available models and tasks
7
+ TASKS = ["sentiment-analysis", "text-classification"]
8
  MODELS = {
9
+ "DistilBERT": "distilbert-base-uncased",
10
  "BERT": "bert-base-uncased",
11
+ "RoBERTa": "roberta-base"
 
12
  # Add other models here
13
  }
14
 
 
38
  "F1 Score": f1
39
  }
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  # Define the Gradio interface
42
  with gr.Blocks() as demo:
43
  with gr.Row():
 
57
  benchmark_button = gr.Button("Benchmark")
58
  benchmark_output = gr.JSON(label="Benchmark Output")
59
  benchmark_button.click(benchmark, inputs=[task_input, model_input, file_input], outputs=benchmark_output)
 
 
 
 
 
 
 
60
 
61
  demo.launch()