ghost7 commited on
Commit
af37519
·
verified ·
1 Parent(s): a88d9f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -63
app.py CHANGED
@@ -1,61 +1,13 @@
1
- from datasets import load_dataset
2
- from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
3
  import torch
4
- import numpy as np
5
- from sklearn.metrics import accuracy_score, f1_score
6
 
7
- # Step 2: Load dataset
8
- dataset = load_dataset("amazon_polarity")
9
- train_dataset = dataset["train"].shuffle(seed=42).select(range(10000))
10
- test_dataset = dataset["test"].shuffle(seed=42).select(range(2000))
11
 
12
- # Step 3: Tokenize dataset
13
- tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
14
- def tokenize_function(examples):
15
- text = [title + " " + content for title, content in zip(examples["title"], examples["content"])]
16
- return tokenizer(text, padding="max_length", truncation=True, max_length=512)
17
- tokenized_train = train_dataset.map(tokenize_function, batched=True)
18
- tokenized_test = test_dataset.map(tokenize_function, batched=True)
19
- tokenized_train = tokenized_train.remove_columns(["title", "content"])
20
- tokenized_test = tokenized_test.remove_columns(["title", "content"])
21
- tokenized_train.set_format("torch", columns=["input_ids", "attention_mask", "label"])
22
- tokenized_test.set_format("torch", columns=["input_ids", "attention_mask", "label"])
23
-
24
- # Step 4: Fine-tune model
25
- model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
26
- def compute_metrics(pred):
27
- labels = pred.label_ids
28
- preds = pred.predictions.argmax(-1)
29
- acc = accuracy_score(labels, preds)
30
- f1 = f1_score(labels, preds, average="weighted")
31
- return {"accuracy": acc, "f1": f1}
32
- training_args = TrainingArguments(
33
- output_dir="./results",
34
- num_train_epochs=3,
35
- per_device_train_batch_size=16,
36
- per_device_eval_batch_size=16,
37
- warmup_steps=500,
38
- weight_decay=0.01,
39
- logging_dir="./logs",
40
- logging_steps=10,
41
- eval_strategy="epoch",
42
- save_strategy="epoch",
43
- load_best_model_at_end=True,
44
- )
45
- trainer = Trainer(
46
- model=model,
47
- args=training_args,
48
- train_dataset=tokenized_train,
49
- eval_dataset=tokenized_test,
50
- compute_metrics=compute_metrics,
51
- )
52
- trainer.train()
53
-
54
- # Step 5: Evaluate and predict
55
- eval_results = trainer.evaluate()
56
- print("Evaluation results:", eval_results)
57
- model.save_pretrained("./fine_tuned_distilbert")
58
- tokenizer.save_pretrained("./fine_tuned_distilbert")
59
  def predict_sentiment(text):
60
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
61
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -66,14 +18,6 @@ def predict_sentiment(text):
66
  logits = outputs.logits
67
  predicted_class = torch.argmax(logits, dim=1).item()
68
  return "Positive" if predicted_class == 1 else "Negative"
69
- example_reviews = [
70
- "Great product! Fast shipping and works perfectly as described.",
71
- "Terrible quality, broke after one use. Very disappointed.",
72
- "The item is okay, not amazing but does the job for the price."
73
- ]
74
- for review in example_reviews:
75
- sentiment = predict_sentiment(review)
76
- print(f"Review: {review}\nPredicted Sentiment: {sentiment}\n")
77
 
78
  # Create Gradio interface
79
  interface = gr.Interface(fn=predict_sentiment, inputs="text", outputs="text", title="Amazon Sentiment Analysis Demo")
 
1
+ from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
 
2
  import torch
3
+ import gradio as gr
 
4
 
5
+ # Load your saved model and tokenizer
6
+ model_path = "fine_tuned_distilbert"
7
+ tokenizer = DistilBertTokenizer.from_pretrained(model_path)
8
+ model = DistilBertForSequenceClassification.from_pretrained(model_path)
9
 
10
+ # Prediction function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def predict_sentiment(text):
12
  inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
18
  logits = outputs.logits
19
  predicted_class = torch.argmax(logits, dim=1).item()
20
  return "Positive" if predicted_class == 1 else "Negative"
 
 
 
 
 
 
 
 
21
 
22
  # Create Gradio interface
23
  interface = gr.Interface(fn=predict_sentiment, inputs="text", outputs="text", title="Amazon Sentiment Analysis Demo")