Mpavan45 commited on
Commit
7ae290e
Β·
verified Β·
1 Parent(s): 63d9885

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -50
app.py CHANGED
@@ -65,12 +65,16 @@
65
  import streamlit as st
66
  from transformers import pipeline
67
  import re
 
68
  from collections import Counter
69
 
70
  # Load the sentiment analysis model
71
  classifier = pipeline("text-classification", model="Mpavan45/Telugu_Sentimental_Analysis")
72
 
73
- # Styling
 
 
 
74
  st.markdown("""
75
  <style>
76
  .radium-title {
@@ -82,76 +86,51 @@ st.markdown("""
82
  background: linear-gradient(90deg, #ff416c, #ff4b2b);
83
  box-shadow: 0 0 20px #ff416c, 0 0 30px #ff4b2b;
84
  }
85
- .summary-label {
86
- font-size: 26px;
87
  font-weight: bold;
88
  color: white;
89
- padding: 12px;
90
- border-radius: 10px;
91
- background: linear-gradient(90deg, #00b09b, #96c93d);
92
- text-align: center;
93
- margin-top: 20px;
94
  }
95
  </style>
96
  """, unsafe_allow_html=True)
97
 
98
- st.markdown('<div class="radium-title">Telugu Paragraph Sentiment Analyzer</div>', unsafe_allow_html=True)
99
- st.write("This app analyzes a **Telugu paragraph**, splits it into sentences, and predicts the **overall sentiment** based on those sentences.")
100
 
101
- # Label map
102
  label_map = {
103
  "LABEL_0": ("Negative", "😞"),
104
  "LABEL_1": ("Neutral", "😐"),
105
  "LABEL_2": ("Positive", "😊")
106
  }
107
 
108
- # Sentence splitter
109
- def split_telugu_text(text):
110
- sentences = re.split(r'[.!?ΰ₯€\n]', text)
111
- return [s.strip() for s in sentences if s.strip()]
112
-
113
- # Telugu validation
114
  def is_telugu_text(text):
115
  cleaned = re.sub(r'[\u0C00-\u0C7F\s\d\-–—\.,!@#\$%\^&\*\(\)\[\]\{\}\'\"β€œβ€β€¦β€˜β€™<>?/|]+', '', text)
116
  return len(cleaned.strip()) == 0
117
 
118
- # Streamlit session
119
- if "text_input" not in st.session_state:
120
- st.session_state.text_input = ""
121
- if "result_shown" not in st.session_state:
122
- st.session_state.result_shown = False
 
 
123
 
124
  # Text input
125
- text_input = st.text_area("Enter a Telugu paragraph:", value=st.session_state.text_input, height=200)
126
-
127
- if st.button("Analyze Paragraph"):
128
- if is_telugu_text(text_input):
129
- st.session_state.text_input = text_input
130
- st.session_state.result_shown = True
131
-
132
- telugu_sentences = split_telugu_text(text_input)
133
- sentence_sentiments = []
134
-
135
- for sent in telugu_sentences:
136
- result = classifier(sent)
137
- label = result[0]['label']
138
- sentiment, _ = label_map.get(label, (label, ""))
139
- sentence_sentiments.append(sentiment)
140
-
141
- sentiment_counts = Counter(sentence_sentiments)
142
- overall = sentiment_counts.most_common(1)[0][0]
143
- emoji = [v[1] for k, v in label_map.items() if v[0] == overall][0]
144
-
145
- st.session_state.overall_sentiment = overall
146
- st.session_state.overall_emoji = emoji
147
 
 
 
 
 
 
148
  else:
149
- st.session_state.result_shown = False
150
  st.warning("Please enter valid Telugu text only (no English or symbols)!")
151
 
152
- # Display overall paragraph sentiment only
153
- if st.session_state.result_shown:
154
- overall = st.session_state.overall_sentiment
155
- emoji = st.session_state.overall_emoji
156
- st.markdown(f'<div class="summary-label" Sentiment: {overall} {emoji}</div>', unsafe_allow_html=True)
157
 
 
65
  import streamlit as st
66
  from transformers import pipeline
67
  import re
68
+ import nltk
69
  from collections import Counter
70
 
71
  # Load the sentiment analysis model
72
  classifier = pipeline("text-classification", model="Mpavan45/Telugu_Sentimental_Analysis")
73
 
74
+ # Download NLTK tokenizer
75
+ nltk.download('punkt_tab')
76
+
77
+ # CSS styling
78
  st.markdown("""
79
  <style>
80
  .radium-title {
 
86
  background: linear-gradient(90deg, #ff416c, #ff4b2b);
87
  box-shadow: 0 0 20px #ff416c, 0 0 30px #ff4b2b;
88
  }
89
+ .radium-label {
90
+ font-size: 24px;
91
  font-weight: bold;
92
  color: white;
93
+ padding: 10px;
94
+ border-radius: 8px;
95
+ background: linear-gradient(90deg, #36d1dc, #5b86e5);
96
+ display: inline-block;
97
+ margin-top: 10px;
98
  }
99
  </style>
100
  """, unsafe_allow_html=True)
101
 
102
+ st.markdown('<div class="radium-title">Sentiment Analysis with BERT</div>', unsafe_allow_html=True)
103
+ st.write("This app uses a fine-tuned BERT model to classify **Telugu text** as Positive, Negative, or Neutral.")
104
 
105
+ # Emoji map
106
  label_map = {
107
  "LABEL_0": ("Negative", "😞"),
108
  "LABEL_1": ("Neutral", "😐"),
109
  "LABEL_2": ("Positive", "😊")
110
  }
111
 
112
+ # Telugu validation function
 
 
 
 
 
113
  def is_telugu_text(text):
114
  cleaned = re.sub(r'[\u0C00-\u0C7F\s\d\-–—\.,!@#\$%\^&\*\(\)\[\]\{\}\'\"β€œβ€β€¦β€˜β€™<>?/|]+', '', text)
115
  return len(cleaned.strip()) == 0
116
 
117
+ # Overall sentiment from paragraph
118
+ def get_overall_sentiment(paragraph):
119
+ sentences = nltk.sent_tokenize(paragraph)
120
+ sentiments = [classifier(sent)[0]['label'] for sent in sentences]
121
+ most_common = Counter(sentiments).most_common(1)[0][0]
122
+ sentiment, emoji = label_map.get(most_common, (most_common, ""))
123
+ return sentiment, emoji
124
 
125
  # Text input
126
+ text_input = st.text_area("Enter Telugu text to analyze sentiment:", height=150)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
+ # Analyze sentiment
129
+ if st.button("Analyze Sentiment"):
130
+ if is_telugu_text(text_input) and text_input.strip():
131
+ sentiment, emoji = get_overall_sentiment(text_input)
132
+ st.markdown(f'<div class="radium-label">{sentiment} {emoji}</div>', unsafe_allow_html=True)
133
  else:
 
134
  st.warning("Please enter valid Telugu text only (no English or symbols)!")
135
 
 
 
 
 
 
136