Spaces:

WhoLetMeCook
/

DistilBERTDemo

Sleeping

App Files Files Community

WhoLetMeCook commited on Sep 14, 2024

Commit

0cf1faf

verified ·

1 Parent(s): 4c41360

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -94

app.py CHANGED Viewed

@@ -1,100 +1,26 @@
-import streamlit as st
-import numpy as np
-from datasets import load_dataset, Dataset
-from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
-from datasets import load_metric
 import torch
-# Load datasets (IMDB and SST2) and combine them
-@st.cache_resource
-def load_datasets():
-    imdb = load_dataset('imdb', split='train[:5000]')
-    sst2 = load_dataset('glue', 'sst2', split='train[:5000]')
-    # Combine datasets into a single list
-    train_list = [{'text': example['text'], 'label': example['label']} for example in imdb] + [{'text': example['sentence'], 'label': example['label']} for example in sst2]
-    full_data = Dataset.from_list(train_list)
-    # Split the dataset into train/validation/test
-    train_data = full_data.train_test_split(test_size=0.2, seed=42)
-    train_data = train_data['train'].train_test_split(test_size=0.25, seed=42)  # 60% train, 20% validation, 20% test
-    return train_data['train'], train_data['test']
-train_dataset, val_dataset = load_datasets()
-# Load the tokenizer and model
-@st.cache_resource
-def load_tokenizer_model():
-    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
-    model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)
-    return tokenizer, model
-tokenizer, model = load_tokenizer_model()
-# Preprocess function for tokenization
-def preprocess_function(examples):
-    return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=512)
-# Tokenize datasets
-tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True)
-tokenized_val_dataset = val_dataset.map(preprocess_function, batched=True)
-# Define the training arguments
-training_args = TrainingArguments(
-    output_dir='./results',
-    evaluation_strategy='epoch',
-    learning_rate=2e-5,
-    per_device_train_batch_size=16,
-    per_device_eval_batch_size=16,
-    num_train_epochs=3,
-    weight_decay=0.01,
-    logging_dir='./logs',
-)
-# Load accuracy metric
-metric = load_metric('accuracy')
-# Function to compute metrics
-def compute_metrics(eval_pred):
-    logits, labels = eval_pred
-    predictions = np.argmax(logits, axis=-1)
-    return metric.compute(predictions=predictions, references=labels)
-# Initialize the trainer
-trainer = Trainer(
-    model=model,
-    args=training_args,
-    train_dataset=tokenized_train_dataset,
-    eval_dataset=tokenized_val_dataset,
-    compute_metrics=compute_metrics,
-)
-# Streamlit UI
-st.title("DistilBERT Sentiment Training and Inference")
-# Button to start training
-if st.button("Train the Model"):
-    st.write("Training the model... This will take some time.")
-    trainer.train()
-    st.write("Model training complete!")
-# User input for inference
-st.write("Once the model is trained, you can enter a sentence for sentiment analysis:")
-user_input = st.text_area("Enter a sentence:")
 # Function to make predictions
-def predict_sentiment(text):
-    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
     with torch.no_grad():
         outputs = model(**inputs)
-    logits = outputs.logits
-    prediction = torch.argmax(logits, dim=-1).item()
-    return "Positive" if prediction == 1 else "Negative"
-# Button to generate predictions after training
-if st.button("Analyze Sentiment"):
-    if user_input.strip():
-        result = predict_sentiment(user_input)
-        st.write(f"Predicted Sentiment: **{result}**")
-    else:
-        st.write("Please enter a sentence.")

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
+# Load the model and tokenizer
+model_name = "WhoLetMeCook/ChefBERT"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
 # Function to make predictions
+def predict_emotion(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
     with torch.no_grad():
         outputs = model(**inputs)
+    prediction = torch.argmax(outputs.logits, dim=-1).item()
+    return "Positive Emotion" if prediction == 1 else "Negative Emotion"
+# Create the Gradio interface
+iface = gr.Interface(fn=predict_emotion,
+                     inputs="text",
+                     outputs="text",
+                     title="ChefBERT Emotion Classifier",
+                     description="Enter a sentence and ChefBERT will predict whether the emotion is positive (1) or negative (0).")
+# Launch the interface
+iface.launch()