Spaces:

ogflash
/

bert-yelp

Sleeping

App Files Files Community

ogflash commited on Jul 23, 2025

Commit

3dbfb13

verified ·

1 Parent(s): e2e4cf3

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -41

app.py CHANGED Viewed

@@ -2,48 +2,43 @@ import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import gradio as gr
-# Load model and tokenizer
-model_path = "model"  # Your local fine-tuned model directory
-tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = AutoModelForSequenceClassification.from_pretrained(model_path)
-# Define label mapping
-id2label = model.config.id2label or {
-    0: "Negative",
-    1: "Neutral",
-    2: "Positive"
-}
 def classify(text):
-    inputs = tokenizer(text, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
-    logits = outputs.logits
-    probs = torch.softmax(logits, dim=1)[0]
-    predicted_class = torch.argmax(probs).item()
-    label = id2label.get(predicted_class, f"LABEL_{predicted_class}")
-    confidence = round(float(probs[predicted_class]) * 100, 2)
-    all_probs = {
-        id2label.get(i, f"LABEL_{i}"): f"{round(float(prob)*100, 2)}%"
-        for i, prob in enumerate(probs)
-    }
-    return f"Prediction: {label} ({confidence}%)", all_probs
-# Gradio UI
-with gr.Blocks() as demo:
-    gr.Markdown("# Yelp Review Sentiment Classifier")
-    with gr.Row():
-        input_box = gr.Textbox(lines=4, label="Enter a review")
-    with gr.Row():
-        output_label = gr.Textbox(label="Predicted Sentiment")
-        output_probs = gr.JSON(label="All Class Probabilities")
-    with gr.Row():
-        classify_btn = gr.Button("Classify")
-    classify_btn.click(fn=classify, inputs=input_box, outputs=[output_label, output_probs])
-if __name__ == "__main__":
-    demo.launch()

 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import gradio as gr
+# Load model & tokenizer from HF or local path
+model_name = "ogflash/yelp_review_classifier"  # Change if needed
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+# Fix for DistilBERT models that don't accept token_type_ids
 def classify(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    # Remove token_type_ids if not supported
+    if "token_type_ids" in inputs and "token_type_ids" not in model.forward.__code__.co_varnames:
+        del inputs["token_type_ids"]
     with torch.no_grad():
         outputs = model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+        top_class = torch.argmax(probs, dim=1).item()
+        confidence = probs[0][top_class].item() * 100
+    # Reliable label mapping
+    id2label = model.config.id2label
+    if not id2label or not isinstance(id2label, dict) or len(id2label) == 0:
+        id2label = {
+            0: "Negative",
+            1: "Neutral",
+            2: "Positive"
+        }
+    label_name = id2label.get(top_class, f"LABEL_{top_class}")
+    return f"{label_name} ({confidence:.2f}%)"
+# UI with Gradio
+iface = gr.Interface(
+    fn=classify,
+    inputs=gr.Textbox(lines=3, placeholder="Enter text to analyze..."),
+    outputs="text",
+    title="Sentiment Classifier",
+    description="Predicts sentiment using a BERT-based model.",
+)
+iface.launch(share=True)