File size: 1,539 Bytes
09c9102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import streamlit as st
from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np
from sklearn.linear_model import LogisticRegression

# Load Hugging Face model
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

# Function to get text embeddings
def get_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[:, 0, :].numpy()

# Sample dataset (sentiment analysis)
texts = ["I love this!", "This is terrible.", "Fantastic experience!", "I hate it.", "Absolutely wonderful!", "Worst ever!"]
labels = [1, 0, 1, 0, 1, 0]  # 1 = Positive, 0 = Negative

# Convert text to embeddings
X = np.vstack([get_embedding(text) for text in texts])
y = np.array(labels)

# ✅ Fix: Assign Logistic Regression Model
clf = LogisticRegression()  # This line was missing
clf.fit(X, y)  # Train the model

# Streamlit UI
st.title("Sentiment Analysis with Hugging Face & Logistic Regression")
st.write("Enter a sentence and the model will predict whether the sentiment is Positive or Negative.")

# User input
user_input = st.text_input("Enter your text here:")

if user_input:
    user_embedding = get_embedding(user_input)
    prediction = clf.predict(user_embedding)
    sentiment = "Positive 😊" if prediction[0] == 1 else "Negative 😡"
    st.write(f"**Predicted Sentiment:** {sentiment}")