Spaces:

UpendraAI
/

hindi-classifier

Running

App Files Files Community

hindi-classifier / app.py

UpendraAI

Create app.py

9d74e47 verified 9 months ago

raw

history blame contribute delete

1.92 kB

	import streamlit as st
	import pandas as pd
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.linear_model import LogisticRegression
	from sklearn.preprocessing import LabelEncoder
	from imblearn.over_sampling import RandomOverSampler
	from sklearn.model_selection import train_test_split

	@st.cache_data
	def load_data():
	df = pd.read_csv("SushasanSampleData.csv", encoding='utf-8')
	df = df.drop(columns=['ulbName', 'wardName'])
	df['applicationId'] = df['applicationId'].astype(str)
	df['applicationSubCategoryName'] = df['applicationSubCategoryName'].fillna("अन्य")
	return df

	@st.cache_resource
	def train_model(df):
	tfidf = TfidfVectorizer(max_features=5000)
	X = tfidf.fit_transform(df['applicationDetail'])

	label_encoder = LabelEncoder()
	y = label_encoder.fit_transform(df['applicationCategoryName'])

	ros = RandomOverSampler(random_state=42)
	X_resampled, y_resampled = ros.fit_resample(X, y)

	X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

	model = LogisticRegression(max_iter=1000)
	model.fit(X_train, y_train)

	return model, tfidf, label_encoder

	# Load and train
	df = load_data()
	model, tfidf, label_encoder = train_model(df)

	# UI
	st.title("🧾 Hindi Application Category Classifier")
	st.markdown("Enter a grievance or demand in Hindi. The model will predict whether it is a मांग (Demand) or a शिकायत (Complaint).")

	user_input = st.text_area("✍️ Application Detail", "")

	if st.button("🔍 Predict Category"):
	if user_input.strip() == "":
	st.warning("Please enter some text.")
	else:
	input_vector = tfidf.transform([user_input])
	prediction = model.predict(input_vector)
	label = label_encoder.inverse_transform(prediction)[0]
	st.success(f"🧠 Predicted Category: {label}")