File size: 1,289 Bytes
498f92c
 
 
 
 
 
 
a4dc491
 
c747161
a4dc491
9d2b2cb
 
 
 
 
 
8f2517a
 
 
a4dc491
498f92c
 
 
8f2517a
498f92c
 
 
8f2517a
498f92c
 
 
a4dc491
498f92c
8f2517a
 
 
 
 
498f92c
a4dc491
498f92c
 
 
 
 
 
 
 
a4dc491
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import gradio as gr
import joblib
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('punkt_tab', quiet=True)

from nltk.tokenize.punkt import PunktSentenceTokenizer, PunktTrainer
trainer = PunktTrainer()
tokenizer = PunktSentenceTokenizer(trainer.train(""))






# Load model and vectorizer
model = joblib.load("sentiment_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")


def preprocess(text):
    text = str(text).lower()
    text = re.sub(r'[^a-z\s]', '', text)
    tokens = text.split()  # simpler than word_tokenize
    tokens = [t for t in tokens if t not in stopwords.words('english')]
    return ' '.join(tokens)

# Prediction function
def predict_sentiment(text):
    processed = preprocess(text)
    print("Processed:", processed)  # debug
    vectorized = vectorizer.transform([processed])
    prediction = model.predict(vectorized)[0]
    return prediction

# Gradio interface
demo = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
    outputs="label",
    title="Sentiment Analysis App",
    description="Classify text as Positive, Negative, or Neutral"
)

demo.launch()