Spaces:

Mpavan45
/

Telugu_Sentiment_Finetuning

Sleeping

App Files Files Community

Mpavan45 commited on Apr 24, 2025

Commit

29196f6

verified ·

1 Parent(s): c8b54cd

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -66

app.py CHANGED Viewed

@@ -1,9 +1,13 @@
 import streamlit as st
-import re
 from transformers import pipeline
-# Load the model
-classifier = pipeline("text-classification", model="Mpavan45/Telugu_Sentimental_Analysis")
 # Background & Style
 st.markdown("""
@@ -37,71 +41,40 @@ st.markdown("""
     </style>
 """, unsafe_allow_html=True)
-# Title
-st.markdown('<div class="radium-title"> Telugu Sentiment Analysis with BERT</div>', unsafe_allow_html=True)
-st.write("This app uses a fine-tuned BERT model to classify Telugu text as Positive, Negative, or Neutral.")
-# Label mapping
-label_map = {
-    "LABEL_0": ("Negative", "😞"),
-    "LABEL_1": ("Neutral", "😐"),
-    "LABEL_2": ("Positive", "😊")
-}
-# Telugu input checker
-def is_telugu_text(text):
-    cleaned_text = re.sub(r'[\u0C00-\u0C7F0-9\s\.\,\!\?]', '', text)
-    return len(cleaned_text) == 0
-# Session state
-if "text_input" not in st.session_state:
-    st.session_state.text_input = ""
-if "result" not in st.session_state:
-    st.session_state.result = None
-# Example inputs
-st.subheader("Try one of the following examples:")
-examples = [
-    "ఈ ఆహారం చాలా చెడుగా ఉంది",
-    "నాకు ఈ రోజు చాలా సంతోషంగా ఉంది",
-    "నేను ఈ వార్తలకు చాలా బాధపడ్డాను",
-    "ఈ చిత్రం నాకు చాలా భయంకరంగా ఉంది",
-    "ఈ సెల్ఫీ చాలా అందంగా ఉంది",
-    "ఈ వాతావరణం నాకు చాలా ఉష్ణంగా ఉంది",
-    "ఈ సినిమా కి 5 స్టార్ ఇచ్చాను"
-]
-for i in range(0, len(examples), 2):
-    cols = st.columns(2)
-    for j in range(2):
-        if i + j < len(examples):
-            example = examples[i + j]
-            if cols[j].button(example[:30] + "..."):
-                st.session_state.text_input = example
-                if not is_telugu_text(example):
-                    st.session_state.result = "error"
-                else:
-                    st.session_state.result = classifier(example)[0]
-# Input text area
-input_text = st.text_area("Enter text to analyze sentiment:", value=st.session_state.text_input, height=150)
-# Analyze button
-if st.button("Analyze Sentiment"):
-    st.session_state.text_input = input_text
-    if not input_text.strip():
-        st.warning("Please enter some text to analyze!")
-        st.session_state.result = None
-    elif not is_telugu_text(input_text):
-        st.session_state.result = "error"
-    else:
-        st.session_state.result = classifier(input_text)[0]
-# Display result
-if st.session_state.result:
-    if st.session_state.result == "error":
-        st.error("Please enter valid **Telugu** text only (digits allowed).")
     else:
-        label = st.session_state.result['label']
-        sentiment, emoji = label_map.get(label, (label, ""))
-        st.markdown(f'<div class="radium-label">Sentiment: {sentiment} {emoji}</div>', unsafe_allow_html=True)

 import streamlit as st
 from transformers import pipeline
+import re
+# Load your Telugu sentiment model (update with actual model repo or path)
+pipe = pipeline("text-classification", model="your-username/Telugu_Sentiment_Model")
+# Label mapping (ensure this matches your model's label structure)
+labels = ["neutral", "positive", "negative"]
+emojis = {"positive": "🤗", "negative": "😔", "neutral": "😐"}
 # Background & Style
 st.markdown("""
     </style>
 """, unsafe_allow_html=True)
+def is_mostly_telugu(text):
+    if not text.strip():
+        return False
+    telugu_pattern = r'[\u0C00-\u0C7F]'
+    allowed_pattern = r'[a-zA-Z0-9\s.,!?]'
+    telugu_chars = len(re.findall(telugu_pattern, text))
+    allowed_chars = len(re.findall(allowed_pattern, text))
+    total_chars = len(text)
+    telugu_ratio = telugu_chars / total_chars if total_chars > 0 else 0
+    valid_chars = telugu_chars + allowed_chars == total_chars
+    return telugu_ratio >= 0.7 and valid_chars
+def clean_input(text):
+    cleaned_text = re.sub(r'[^a-zA-Z0-9\u0C00-\u0C7F\s?.!]', ' ', text)
+    cleaned_text = re.sub(r'([?.!])(?![?.!]\s|$)', '', cleaned_text)
+    return ' '.join(cleaned_text.split())
+st.markdown('<div class="radium-title">Telugu Sentiment Analysis</div>', unsafe_allow_html=True)
+user_input = st.text_area("Enter your Telugu text:")
+if st.button("Predict"):
+    if not user_input.strip():
+        st.warning("Please enter some Telugu text.")
     else:
+        cleaned = clean_input(user_input)
+        if not is_mostly_telugu(cleaned):
+            st.error("Please enter text primarily in Telugu script.")
+        else:
+            result = pipe(cleaned)[0]
+            label = result['label']
+            try:
+                index = int(label.split('_')[-1])  # for LABEL_0, LABEL_1...
+                sentiment = labels[index]
+            except (ValueError, IndexError):
+                sentiment = label.lower() if label.lower() in labels else "neutral"
+            st.success(f"**Sentiment:** {sentiment.capitalize()} {emojis.get(sentiment, '')}  \n**Confidence:** {result['score']:.2f}")