Spaces:
Running
Running
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.preprocessing import LabelEncoder | |
| from imblearn.over_sampling import RandomOverSampler | |
| from sklearn.model_selection import train_test_split | |
| def load_data(): | |
| df = pd.read_csv("SushasanSampleData.csv", encoding='utf-8') | |
| df = df.drop(columns=['ulbName', 'wardName']) | |
| df['applicationId'] = df['applicationId'].astype(str) | |
| df['applicationSubCategoryName'] = df['applicationSubCategoryName'].fillna("अन्य") | |
| return df | |
| def train_model(df): | |
| tfidf = TfidfVectorizer(max_features=5000) | |
| X = tfidf.fit_transform(df['applicationDetail']) | |
| label_encoder = LabelEncoder() | |
| y = label_encoder.fit_transform(df['applicationCategoryName']) | |
| ros = RandomOverSampler(random_state=42) | |
| X_resampled, y_resampled = ros.fit_resample(X, y) | |
| X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42) | |
| model = LogisticRegression(max_iter=1000) | |
| model.fit(X_train, y_train) | |
| return model, tfidf, label_encoder | |
| # Load and train | |
| df = load_data() | |
| model, tfidf, label_encoder = train_model(df) | |
| # UI | |
| st.title("🧾 Hindi Application Category Classifier") | |
| st.markdown("Enter a grievance or demand in Hindi. The model will predict whether it is a **मांग** (Demand) or a **शिकायत** (Complaint).") | |
| user_input = st.text_area("✍️ Application Detail", "") | |
| if st.button("🔍 Predict Category"): | |
| if user_input.strip() == "": | |
| st.warning("Please enter some text.") | |
| else: | |
| input_vector = tfidf.transform([user_input]) | |
| prediction = model.predict(input_vector) | |
| label = label_encoder.inverse_transform(prediction)[0] | |
| st.success(f"🧠 Predicted Category: **{label}**") | |