Update app.py
Browse files
app.py
CHANGED
|
@@ -65,12 +65,16 @@
|
|
| 65 |
import streamlit as st
|
| 66 |
from transformers import pipeline
|
| 67 |
import re
|
|
|
|
| 68 |
from collections import Counter
|
| 69 |
|
| 70 |
# Load the sentiment analysis model
|
| 71 |
classifier = pipeline("text-classification", model="Mpavan45/Telugu_Sentimental_Analysis")
|
| 72 |
|
| 73 |
-
#
|
|
|
|
|
|
|
|
|
|
| 74 |
st.markdown("""
|
| 75 |
<style>
|
| 76 |
.radium-title {
|
|
@@ -82,76 +86,51 @@ st.markdown("""
|
|
| 82 |
background: linear-gradient(90deg, #ff416c, #ff4b2b);
|
| 83 |
box-shadow: 0 0 20px #ff416c, 0 0 30px #ff4b2b;
|
| 84 |
}
|
| 85 |
-
.
|
| 86 |
-
font-size:
|
| 87 |
font-weight: bold;
|
| 88 |
color: white;
|
| 89 |
-
padding:
|
| 90 |
-
border-radius:
|
| 91 |
-
background: linear-gradient(90deg, #
|
| 92 |
-
|
| 93 |
-
margin-top:
|
| 94 |
}
|
| 95 |
</style>
|
| 96 |
""", unsafe_allow_html=True)
|
| 97 |
|
| 98 |
-
st.markdown('<div class="radium-title">
|
| 99 |
-
st.write("This app
|
| 100 |
|
| 101 |
-
#
|
| 102 |
label_map = {
|
| 103 |
"LABEL_0": ("Negative", "π"),
|
| 104 |
"LABEL_1": ("Neutral", "π"),
|
| 105 |
"LABEL_2": ("Positive", "π")
|
| 106 |
}
|
| 107 |
|
| 108 |
-
#
|
| 109 |
-
def split_telugu_text(text):
|
| 110 |
-
sentences = re.split(r'[.!?ΰ₯€\n]', text)
|
| 111 |
-
return [s.strip() for s in sentences if s.strip()]
|
| 112 |
-
|
| 113 |
-
# Telugu validation
|
| 114 |
def is_telugu_text(text):
|
| 115 |
cleaned = re.sub(r'[\u0C00-\u0C7F\s\d\-ββ\.,!@#\$%\^&\*\(\)\[\]\{\}\'\"βββ¦ββ<>?/|]+', '', text)
|
| 116 |
return len(cleaned.strip()) == 0
|
| 117 |
|
| 118 |
-
#
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
| 123 |
|
| 124 |
# Text input
|
| 125 |
-
text_input = st.text_area("Enter
|
| 126 |
-
|
| 127 |
-
if st.button("Analyze Paragraph"):
|
| 128 |
-
if is_telugu_text(text_input):
|
| 129 |
-
st.session_state.text_input = text_input
|
| 130 |
-
st.session_state.result_shown = True
|
| 131 |
-
|
| 132 |
-
telugu_sentences = split_telugu_text(text_input)
|
| 133 |
-
sentence_sentiments = []
|
| 134 |
-
|
| 135 |
-
for sent in telugu_sentences:
|
| 136 |
-
result = classifier(sent)
|
| 137 |
-
label = result[0]['label']
|
| 138 |
-
sentiment, _ = label_map.get(label, (label, ""))
|
| 139 |
-
sentence_sentiments.append(sentiment)
|
| 140 |
-
|
| 141 |
-
sentiment_counts = Counter(sentence_sentiments)
|
| 142 |
-
overall = sentiment_counts.most_common(1)[0][0]
|
| 143 |
-
emoji = [v[1] for k, v in label_map.items() if v[0] == overall][0]
|
| 144 |
-
|
| 145 |
-
st.session_state.overall_sentiment = overall
|
| 146 |
-
st.session_state.overall_emoji = emoji
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
else:
|
| 149 |
-
st.session_state.result_shown = False
|
| 150 |
st.warning("Please enter valid Telugu text only (no English or symbols)!")
|
| 151 |
|
| 152 |
-
# Display overall paragraph sentiment only
|
| 153 |
-
if st.session_state.result_shown:
|
| 154 |
-
overall = st.session_state.overall_sentiment
|
| 155 |
-
emoji = st.session_state.overall_emoji
|
| 156 |
-
st.markdown(f'<div class="summary-label" Sentiment: {overall} {emoji}</div>', unsafe_allow_html=True)
|
| 157 |
|
|
|
|
| 65 |
import streamlit as st
|
| 66 |
from transformers import pipeline
|
| 67 |
import re
|
| 68 |
+
import nltk
|
| 69 |
from collections import Counter
|
| 70 |
|
| 71 |
# Load the sentiment analysis model
|
| 72 |
classifier = pipeline("text-classification", model="Mpavan45/Telugu_Sentimental_Analysis")
|
| 73 |
|
| 74 |
+
# Download NLTK tokenizer
|
| 75 |
+
nltk.download('punkt_tab')
|
| 76 |
+
|
| 77 |
+
# CSS styling
|
| 78 |
st.markdown("""
|
| 79 |
<style>
|
| 80 |
.radium-title {
|
|
|
|
| 86 |
background: linear-gradient(90deg, #ff416c, #ff4b2b);
|
| 87 |
box-shadow: 0 0 20px #ff416c, 0 0 30px #ff4b2b;
|
| 88 |
}
|
| 89 |
+
.radium-label {
|
| 90 |
+
font-size: 24px;
|
| 91 |
font-weight: bold;
|
| 92 |
color: white;
|
| 93 |
+
padding: 10px;
|
| 94 |
+
border-radius: 8px;
|
| 95 |
+
background: linear-gradient(90deg, #36d1dc, #5b86e5);
|
| 96 |
+
display: inline-block;
|
| 97 |
+
margin-top: 10px;
|
| 98 |
}
|
| 99 |
</style>
|
| 100 |
""", unsafe_allow_html=True)
|
| 101 |
|
| 102 |
+
st.markdown('<div class="radium-title">Sentiment Analysis with BERT</div>', unsafe_allow_html=True)
|
| 103 |
+
st.write("This app uses a fine-tuned BERT model to classify **Telugu text** as Positive, Negative, or Neutral.")
|
| 104 |
|
| 105 |
+
# Emoji map
|
| 106 |
label_map = {
|
| 107 |
"LABEL_0": ("Negative", "π"),
|
| 108 |
"LABEL_1": ("Neutral", "π"),
|
| 109 |
"LABEL_2": ("Positive", "π")
|
| 110 |
}
|
| 111 |
|
| 112 |
+
# Telugu validation function
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
def is_telugu_text(text):
|
| 114 |
cleaned = re.sub(r'[\u0C00-\u0C7F\s\d\-ββ\.,!@#\$%\^&\*\(\)\[\]\{\}\'\"βββ¦ββ<>?/|]+', '', text)
|
| 115 |
return len(cleaned.strip()) == 0
|
| 116 |
|
| 117 |
+
# Overall sentiment from paragraph
|
| 118 |
+
def get_overall_sentiment(paragraph):
|
| 119 |
+
sentences = nltk.sent_tokenize(paragraph)
|
| 120 |
+
sentiments = [classifier(sent)[0]['label'] for sent in sentences]
|
| 121 |
+
most_common = Counter(sentiments).most_common(1)[0][0]
|
| 122 |
+
sentiment, emoji = label_map.get(most_common, (most_common, ""))
|
| 123 |
+
return sentiment, emoji
|
| 124 |
|
| 125 |
# Text input
|
| 126 |
+
text_input = st.text_area("Enter Telugu text to analyze sentiment:", height=150)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
+
# Analyze sentiment
|
| 129 |
+
if st.button("Analyze Sentiment"):
|
| 130 |
+
if is_telugu_text(text_input) and text_input.strip():
|
| 131 |
+
sentiment, emoji = get_overall_sentiment(text_input)
|
| 132 |
+
st.markdown(f'<div class="radium-label">{sentiment} {emoji}</div>', unsafe_allow_html=True)
|
| 133 |
else:
|
|
|
|
| 134 |
st.warning("Please enter valid Telugu text only (no English or symbols)!")
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|