Mpavan45 commited on
Commit
29196f6
·
verified ·
1 Parent(s): c8b54cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -66
app.py CHANGED
@@ -1,9 +1,13 @@
1
  import streamlit as st
2
- import re
3
  from transformers import pipeline
 
4
 
5
- # Load the model
6
- classifier = pipeline("text-classification", model="Mpavan45/Telugu_Sentimental_Analysis")
 
 
 
 
7
 
8
  # Background & Style
9
  st.markdown("""
@@ -37,71 +41,40 @@ st.markdown("""
37
  </style>
38
  """, unsafe_allow_html=True)
39
 
40
- # Title
41
- st.markdown('<div class="radium-title"> Telugu Sentiment Analysis with BERT</div>', unsafe_allow_html=True)
42
- st.write("This app uses a fine-tuned BERT model to classify Telugu text as Positive, Negative, or Neutral.")
43
-
44
- # Label mapping
45
- label_map = {
46
- "LABEL_0": ("Negative", "😞"),
47
- "LABEL_1": ("Neutral", "😐"),
48
- "LABEL_2": ("Positive", "😊")
49
- }
 
50
 
51
- # Telugu input checker
52
- def is_telugu_text(text):
53
- cleaned_text = re.sub(r'[\u0C00-\u0C7F0-9\s\.\,\!\?]', '', text)
54
- return len(cleaned_text) == 0
55
 
56
- # Session state
57
- if "text_input" not in st.session_state:
58
- st.session_state.text_input = ""
59
- if "result" not in st.session_state:
60
- st.session_state.result = None
61
 
62
- # Example inputs
63
- st.subheader("Try one of the following examples:")
64
- examples = [
65
- "ఈ ఆహారం చాలా చెడుగా ఉంది",
66
- "నాకు ఈ రోజు చాలా సంతోషంగా ఉంది",
67
- "నేను ఈ వార్తలకు చాలా బాధపడ్డాను",
68
- "ఈ చిత్రం నాకు చాలా భయంకరంగా ఉంది",
69
- "ఈ సెల్ఫీ చాలా అందంగా ఉంది",
70
- "ఈ వాతావరణం నాకు చాలా ఉష్ణంగా ఉంది",
71
- "ఈ సినిమా కి 5 స్టార్ ఇచ్చాను"
72
- ]
73
-
74
- for i in range(0, len(examples), 2):
75
- cols = st.columns(2)
76
- for j in range(2):
77
- if i + j < len(examples):
78
- example = examples[i + j]
79
- if cols[j].button(example[:30] + "..."):
80
- st.session_state.text_input = example
81
- if not is_telugu_text(example):
82
- st.session_state.result = "error"
83
- else:
84
- st.session_state.result = classifier(example)[0]
85
-
86
- # Input text area
87
- input_text = st.text_area("Enter text to analyze sentiment:", value=st.session_state.text_input, height=150)
88
-
89
- # Analyze button
90
- if st.button("Analyze Sentiment"):
91
- st.session_state.text_input = input_text
92
- if not input_text.strip():
93
- st.warning("Please enter some text to analyze!")
94
- st.session_state.result = None
95
- elif not is_telugu_text(input_text):
96
- st.session_state.result = "error"
97
- else:
98
- st.session_state.result = classifier(input_text)[0]
99
 
100
- # Display result
101
- if st.session_state.result:
102
- if st.session_state.result == "error":
103
- st.error("Please enter valid **Telugu** text only (digits allowed).")
104
  else:
105
- label = st.session_state.result['label']
106
- sentiment, emoji = label_map.get(label, (label, ""))
107
- st.markdown(f'<div class="radium-label">Sentiment: {sentiment} {emoji}</div>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  from transformers import pipeline
3
+ import re
4
 
5
+ # Load your Telugu sentiment model (update with actual model repo or path)
6
+ pipe = pipeline("text-classification", model="your-username/Telugu_Sentiment_Model")
7
+
8
+ # Label mapping (ensure this matches your model's label structure)
9
+ labels = ["neutral", "positive", "negative"]
10
+ emojis = {"positive": "🤗", "negative": "😔", "neutral": "😐"}
11
 
12
  # Background & Style
13
  st.markdown("""
 
41
  </style>
42
  """, unsafe_allow_html=True)
43
 
44
+ def is_mostly_telugu(text):
45
+ if not text.strip():
46
+ return False
47
+ telugu_pattern = r'[\u0C00-\u0C7F]'
48
+ allowed_pattern = r'[a-zA-Z0-9\s.,!?]'
49
+ telugu_chars = len(re.findall(telugu_pattern, text))
50
+ allowed_chars = len(re.findall(allowed_pattern, text))
51
+ total_chars = len(text)
52
+ telugu_ratio = telugu_chars / total_chars if total_chars > 0 else 0
53
+ valid_chars = telugu_chars + allowed_chars == total_chars
54
+ return telugu_ratio >= 0.7 and valid_chars
55
 
56
+ def clean_input(text):
57
+ cleaned_text = re.sub(r'[^a-zA-Z0-9\u0C00-\u0C7F\s?.!]', ' ', text)
58
+ cleaned_text = re.sub(r'([?.!])(?![?.!]\s|$)', '', cleaned_text)
59
+ return ' '.join(cleaned_text.split())
60
 
61
+ st.markdown('<div class="radium-title">Telugu Sentiment Analysis</div>', unsafe_allow_html=True)
 
 
 
 
62
 
63
+ user_input = st.text_area("Enter your Telugu text:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ if st.button("Predict"):
66
+ if not user_input.strip():
67
+ st.warning("Please enter some Telugu text.")
 
68
  else:
69
+ cleaned = clean_input(user_input)
70
+ if not is_mostly_telugu(cleaned):
71
+ st.error("Please enter text primarily in Telugu script.")
72
+ else:
73
+ result = pipe(cleaned)[0]
74
+ label = result['label']
75
+ try:
76
+ index = int(label.split('_')[-1]) # for LABEL_0, LABEL_1...
77
+ sentiment = labels[index]
78
+ except (ValueError, IndexError):
79
+ sentiment = label.lower() if label.lower() in labels else "neutral"
80
+ st.success(f"**Sentiment:** {sentiment.capitalize()} {emojis.get(sentiment, '')} \n**Confidence:** {result['score']:.2f}")