import streamlit as st from transformers import pipeline import re st.set_page_config(page_title="Hindi Sentiment Analysis", layout="centered") # Custom CSS for styling st.markdown(""" """, unsafe_allow_html=True) # Load model pipe = pipeline("text-classification", model="NeonSamurai/hindi_sentiment_bert_finetuned") names = ["neutral", "positive", "negative"] emojis = {"positive": "🤗", "negative": "😔", "neutral": "😐"} # Utility functions def is_mostly_hindi(text): if not text.strip(): return False devanagari_pattern = r'[\u0900-\u097F]' allowed_pattern = r'[a-zA-Z0-9\s.,!?]' devanagari_chars = len(re.findall(devanagari_pattern, text)) allowed_chars = len(re.findall(allowed_pattern, text)) total_chars = len(text) hindi_proportion = devanagari_chars / total_chars if total_chars > 0 else 0 valid_chars = devanagari_chars + allowed_chars == total_chars return hindi_proportion >= 0.7 and valid_chars def clean_input(text): cleaned_text = re.sub(r'[^a-zA-Z0-9\u0900-\u097F\s?.!]', ' ', text) cleaned_text = re.sub(r'([?.!])(?![?.!]\s|$)', '', cleaned_text) cleaned_text = ' '.join(cleaned_text.split()) return cleaned_text # Title st.markdown("
{result['score']:.2f}