Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,13 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import re
|
| 3 |
from transformers import pipeline
|
|
|
|
| 4 |
|
| 5 |
-
# Load
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Background & Style
|
| 9 |
st.markdown("""
|
|
@@ -37,71 +41,40 @@ st.markdown("""
|
|
| 37 |
</style>
|
| 38 |
""", unsafe_allow_html=True)
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
cleaned_text = re.sub(r'[\
|
| 54 |
-
return
|
| 55 |
|
| 56 |
-
|
| 57 |
-
if "text_input" not in st.session_state:
|
| 58 |
-
st.session_state.text_input = ""
|
| 59 |
-
if "result" not in st.session_state:
|
| 60 |
-
st.session_state.result = None
|
| 61 |
|
| 62 |
-
|
| 63 |
-
st.subheader("Try one of the following examples:")
|
| 64 |
-
examples = [
|
| 65 |
-
"ఈ ఆహారం చాలా చెడుగా ఉంది",
|
| 66 |
-
"నాకు ఈ రోజు చాలా సంతోషంగా ఉంది",
|
| 67 |
-
"నేను ఈ వార్తలకు చాలా బాధపడ్డాను",
|
| 68 |
-
"ఈ చిత్రం నాకు చాలా భయంకరంగా ఉంది",
|
| 69 |
-
"ఈ సెల్ఫీ చాలా అందంగా ఉంది",
|
| 70 |
-
"ఈ వాతావరణం నాకు చాలా ఉష్ణంగా ఉంది",
|
| 71 |
-
"ఈ సినిమా కి 5 స్టార్ ఇచ్చాను"
|
| 72 |
-
]
|
| 73 |
-
|
| 74 |
-
for i in range(0, len(examples), 2):
|
| 75 |
-
cols = st.columns(2)
|
| 76 |
-
for j in range(2):
|
| 77 |
-
if i + j < len(examples):
|
| 78 |
-
example = examples[i + j]
|
| 79 |
-
if cols[j].button(example[:30] + "..."):
|
| 80 |
-
st.session_state.text_input = example
|
| 81 |
-
if not is_telugu_text(example):
|
| 82 |
-
st.session_state.result = "error"
|
| 83 |
-
else:
|
| 84 |
-
st.session_state.result = classifier(example)[0]
|
| 85 |
-
|
| 86 |
-
# Input text area
|
| 87 |
-
input_text = st.text_area("Enter text to analyze sentiment:", value=st.session_state.text_input, height=150)
|
| 88 |
-
|
| 89 |
-
# Analyze button
|
| 90 |
-
if st.button("Analyze Sentiment"):
|
| 91 |
-
st.session_state.text_input = input_text
|
| 92 |
-
if not input_text.strip():
|
| 93 |
-
st.warning("Please enter some text to analyze!")
|
| 94 |
-
st.session_state.result = None
|
| 95 |
-
elif not is_telugu_text(input_text):
|
| 96 |
-
st.session_state.result = "error"
|
| 97 |
-
else:
|
| 98 |
-
st.session_state.result = classifier(input_text)[0]
|
| 99 |
|
| 100 |
-
|
| 101 |
-
if
|
| 102 |
-
|
| 103 |
-
st.error("Please enter valid **Telugu** text only (digits allowed).")
|
| 104 |
else:
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
from transformers import pipeline
|
| 3 |
+
import re
|
| 4 |
|
| 5 |
+
# Load your Telugu sentiment model (update with actual model repo or path)
|
| 6 |
+
pipe = pipeline("text-classification", model="your-username/Telugu_Sentiment_Model")
|
| 7 |
+
|
| 8 |
+
# Label mapping (ensure this matches your model's label structure)
|
| 9 |
+
labels = ["neutral", "positive", "negative"]
|
| 10 |
+
emojis = {"positive": "🤗", "negative": "😔", "neutral": "😐"}
|
| 11 |
|
| 12 |
# Background & Style
|
| 13 |
st.markdown("""
|
|
|
|
| 41 |
</style>
|
| 42 |
""", unsafe_allow_html=True)
|
| 43 |
|
| 44 |
+
def is_mostly_telugu(text):
|
| 45 |
+
if not text.strip():
|
| 46 |
+
return False
|
| 47 |
+
telugu_pattern = r'[\u0C00-\u0C7F]'
|
| 48 |
+
allowed_pattern = r'[a-zA-Z0-9\s.,!?]'
|
| 49 |
+
telugu_chars = len(re.findall(telugu_pattern, text))
|
| 50 |
+
allowed_chars = len(re.findall(allowed_pattern, text))
|
| 51 |
+
total_chars = len(text)
|
| 52 |
+
telugu_ratio = telugu_chars / total_chars if total_chars > 0 else 0
|
| 53 |
+
valid_chars = telugu_chars + allowed_chars == total_chars
|
| 54 |
+
return telugu_ratio >= 0.7 and valid_chars
|
| 55 |
|
| 56 |
+
def clean_input(text):
|
| 57 |
+
cleaned_text = re.sub(r'[^a-zA-Z0-9\u0C00-\u0C7F\s?.!]', ' ', text)
|
| 58 |
+
cleaned_text = re.sub(r'([?.!])(?![?.!]\s|$)', '', cleaned_text)
|
| 59 |
+
return ' '.join(cleaned_text.split())
|
| 60 |
|
| 61 |
+
st.markdown('<div class="radium-title">Telugu Sentiment Analysis</div>', unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
+
user_input = st.text_area("Enter your Telugu text:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
if st.button("Predict"):
|
| 66 |
+
if not user_input.strip():
|
| 67 |
+
st.warning("Please enter some Telugu text.")
|
|
|
|
| 68 |
else:
|
| 69 |
+
cleaned = clean_input(user_input)
|
| 70 |
+
if not is_mostly_telugu(cleaned):
|
| 71 |
+
st.error("Please enter text primarily in Telugu script.")
|
| 72 |
+
else:
|
| 73 |
+
result = pipe(cleaned)[0]
|
| 74 |
+
label = result['label']
|
| 75 |
+
try:
|
| 76 |
+
index = int(label.split('_')[-1]) # for LABEL_0, LABEL_1...
|
| 77 |
+
sentiment = labels[index]
|
| 78 |
+
except (ValueError, IndexError):
|
| 79 |
+
sentiment = label.lower() if label.lower() in labels else "neutral"
|
| 80 |
+
st.success(f"**Sentiment:** {sentiment.capitalize()} {emojis.get(sentiment, '')} \n**Confidence:** {result['score']:.2f}")
|