nadish1210 commited on
Commit
f75d9fd
·
verified ·
1 Parent(s): 1c8f9d6

Upload 8 files

Browse files
Unconfirmed 489417.crdownload ADDED
File without changes
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import preprocess
4
+
5
+ # Load the trained model
6
+ try:
7
+ model = joblib.load("sentiment_model_best.pkl")
8
+ print("Model loaded successfully.")
9
+ except FileNotFoundError:
10
+ print("Error: Model file 'sentiment_model_best.pkl' not found. Please run train_model.py first.")
11
+ model = None
12
+
13
+ def analyze_sentiment(text):
14
+ if model is None:
15
+ return "Model not loaded."
16
+
17
+ # Preprocess
18
+ clean_text = preprocess.preprocess_text(text)
19
+
20
+ # Predict
21
+ # The pipeline handles vectorization
22
+ prediction = model.predict([clean_text])[0]
23
+
24
+ # Get confidence scores if possible (LinearSVC uses decision_function, not predict_proba by default,
25
+ # but for simplicity we rely on the label.
26
+ # If we wanted proba, we'd need CalibratedClassifierCV or use LogisticRegression)
27
+
28
+ return prediction
29
+
30
+ # Custom CSS for a nicer look
31
+ custom_css = """
32
+ body {background-color: #f0f2f5;}
33
+ .gradio-container {max-width: 700px !important; margin-top: 50px !important;}
34
+ h1 {text-align: center; color: #333;}
35
+ """
36
+
37
+ with gr.Blocks(css=custom_css, title="Sentiment Analyzer") as demo:
38
+ gr.Markdown("# 📊 Sentiment Analysis System")
39
+ gr.Markdown("Enter a review or sentence below to analyze its sentiment (Positive, Negative, or Neutral).")
40
+
41
+ with gr.Row():
42
+ input_text = gr.Textbox(
43
+ label="Input Text",
44
+ placeholder="Type something here... (e.g., 'The product is amazing!')",
45
+ lines=3
46
+ )
47
+
48
+ with gr.Row():
49
+ analyze_btn = gr.Button("Analyze Sentiment", variant="primary")
50
+
51
+ with gr.Row():
52
+ output_label = gr.Label(label="Predicted Sentiment")
53
+
54
+ analyze_btn.click(fn=analyze_sentiment, inputs=input_text, outputs=output_label)
55
+
56
+ gr.Markdown("---")
57
+ gr.Markdown("### Examples")
58
+ gr.Examples(
59
+ examples=[
60
+ ["I absolutely love this! It's fantastic."],
61
+ ["This is the worst experience I've ever had."],
62
+ ["It's average, nothing special."],
63
+ ],
64
+ inputs=input_text
65
+ )
66
+
67
+ if __name__ == "__main__":
68
+ demo.launch(share=False)
dataset.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ text,label
2
+ "I absolutely love this product! It works perfectly.",Positive
3
+ "Terrible experience. The item was broken and support was rude.",Negative
4
+ "It's okay, does the job but nothing special.",Neutral
5
+ "Best purchase I've made all year. Highly recommended.",Positive
6
+ "Waste of money. Do not buy.",Negative
7
+ "Average quality, faster delivery than expected.",Neutral
8
+ "The design is beautiful but the functionality is lacking.",Neutral
9
+ "Absolutely fantastic service and great quality.",Positive
10
+ "Disappointed. Not as described.",Negative
11
+ "Just arrived. Looks good so far.",Neutral
12
+ "I am very happy with this.",Positive
13
+ "This is garbage.",Negative
preprocess.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import nltk
3
+ from nltk.corpus import stopwords
4
+ from nltk.stem import WordNetLemmatizer
5
+
6
+ # Ensure nltk resources are downloaded
7
+ try:
8
+ nltk.data.find('corpora/stopwords')
9
+ except LookupError:
10
+ nltk.download('stopwords')
11
+ try:
12
+ nltk.data.find('corpora/wordnet')
13
+ except LookupError:
14
+ nltk.download('wordnet')
15
+
16
+ stop_words = set(stopwords.words('english'))
17
+ lemmatizer = WordNetLemmatizer()
18
+
19
+ def preprocess_text(text):
20
+ if not isinstance(text, str):
21
+ return ""
22
+
23
+ # Lowercase
24
+ text = text.lower()
25
+
26
+ # Remove special characters, numbers, and urls
27
+ text = re.sub(r'http\S+', '', text)
28
+ text = re.sub(r'[^a-zA-Z\s]', '', text)
29
+
30
+ # Tokenize and remove stopwords & lemmatize
31
+ words = text.split()
32
+ clean_words = [lemmatizer.lemmatize(w) for w in words if w not in stop_words]
33
+
34
+ return " ".join(clean_words)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ pandas
2
+ scikit-learn
3
+ nltk
4
+ gradio
5
+ joblib
6
+ numpy
sentiment_model_best.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66cd844719f82787c69dcb627c5e28cd049391ea81a695d2ef0098345495f4b5
3
+ size 3181
tfidf_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3
+ size 0
train_model.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import joblib
3
+ from sklearn.model_selection import train_test_split, GridSearchCV
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.pipeline import Pipeline
6
+ from sklearn.svm import LinearSVC
7
+ from sklearn.metrics import classification_report, accuracy_score
8
+ import preprocess
9
+
10
+ def train():
11
+ print("Loading dataset...")
12
+ try:
13
+ df = pd.read_csv("dataset.csv")
14
+ except FileNotFoundError:
15
+ print("Error: dataset.csv not found.")
16
+ return
17
+
18
+ print("Preprocessing data...")
19
+ # Fill NaN with empty string just in case
20
+ df['text'] = df['text'].fillna('')
21
+ df['clean_text'] = df['text'].apply(preprocess.preprocess_text)
22
+
23
+ X = df['clean_text']
24
+ y = df['label']
25
+
26
+ print("Splitting data...")
27
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
28
+
29
+ print("Setting up pipeline and grid search...")
30
+ # Pipeline: TF-IDF -> LinearSVC (often best for text)
31
+ pipeline = Pipeline([
32
+ ('tfidf', TfidfVectorizer()),
33
+ ('clf', LinearSVC(dual='auto'))
34
+ ])
35
+
36
+ # Parameters to tune
37
+ param_grid = {
38
+ 'tfidf__ngram_range': [(1, 1), (1, 2)], # Unigrams or Bigrams
39
+ 'tfidf__max_df': [0.9, 1.0],
40
+ 'clf__C': [0.1, 1, 10]
41
+ }
42
+
43
+ # Grid Search for best accuracy
44
+ grid_search = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=-1, verbose=1)
45
+
46
+ print("Training model...")
47
+ grid_search.fit(X_train, y_train)
48
+
49
+ print(f"Best Parameters: {grid_search.best_params_}")
50
+ best_model = grid_search.best_estimator_
51
+
52
+ print("Evaluating model...")
53
+ y_pred = best_model.predict(X_test)
54
+ print("Accuracy:", accuracy_score(y_test, y_pred))
55
+ print("\nClassification Report:\n", classification_report(y_test, y_pred))
56
+
57
+ print("Saving model...")
58
+ joblib.dump(best_model, "sentiment_model_best.pkl")
59
+ print("Model saved to sentiment_model_best.pkl")
60
+
61
+ if __name__ == "__main__":
62
+ train()