Spaces:

shujath000
/

stackoverflow_app

Sleeping

App Files Files Community

stackoverflow_app / streamlit_app.py

shujath000

Update streamlit_app.py

3c6eb62 verified 10 months ago

raw

history blame contribute delete

3.62 kB

	import streamlit as st
	import pandas as pd
	import joblib
	import numpy as np
	import string
	import nltk
	from nltk.corpus import stopwords as stp
	from nltk import pos_tag, word_tokenize as w, sent_tokenize as s
	from nltk.stem import WordNetLemmatizer as wl

	nltk.download('punkt_tab')
	# Download necessary NLTK data
	#nltk.download('punkt', quiet=True)
	#nltk.download('averaged_perceptron_tagger', quiet=True)
	#nltk.download('wordnet', quiet=True)
	#nltk.download('stopwords', quiet=True)
	nltk.download('punkt')
	nltk.download('averaged_perceptron_tagger_eng')
	nltk.download('wordnet')
	nltk.download('stopwords')

	# === Cleaning Function ===
	def sahi_karneka_function(x):
	nouns=[]
	li=[]
	lem=wl()
	l=s(x)
	for i in l:
	d=w(i.lower())
	for k in d:
	li.append(k)
	lw=len(li)
	j=0
	while j<lw:
	if li[j] in string.punctuation:
	li.remove(li[j])
	lw=len(li)
	j=0
	elif li[j] in stp.words("english"):
	li.remove(li[j])
	lw=len(li)
	j=0
	else:
	j=j+1
	tags=pos_tag(li)
	for word,tag in tags:
	if tag.startswith("NN") or tag.startswith("V"):
	nouns.append(word)
	semi_final_words=[lem.lemmatize(m,pos="n") if tagg.startswith("NN") else lem.lemmatize(m,pos="v") for m,tagg in pos_tag(nouns)]
	final_sentence=" ".join(semi_final_words)
	return final_sentence

	# === Load Data and Models ===
	df = pd.read_csv(r"c_d.csv")
	model = joblib.load("logistic_models.pkl")
	tfidf = joblib.load("tfidf.pkl")
	ml = joblib.load("multilabels.pkl")

	# === Streamlit UI ===
	st.title("🧠 Enhancing Developer Support: Automated Tagging on Stack Overflow")

	# --- Select a URL for context ---
	selected_url = st.selectbox("Select a question URL (for context):", df['questions_url'])
	st.markdown(f"🔗 [Open selected question]({selected_url})")

	# --- Session State ---
	if "user_input" not in st.session_state:
	st.session_state["user_input"] = ""
	if "clear_input" not in st.session_state:
	st.session_state["clear_input"] = False

	# --- Clear input if flagged (AFTER rerun) ---
	if st.session_state.clear_input:
	st.session_state.user_input = ""
	st.session_state.clear_input = False

	# --- Input box ---
	st.text_area("✍️ Type your question here:", key="user_input", height=150)

	# --- Predict button ---
	if st.button("Predict Tags"):
	final_question = st.session_state.user_input.strip()

	if not final_question:
	st.warning("⚠️ Please enter a question.")
	else:
	with st.spinner("🔍 Predicting tags..."):
	# Step 1: Clean input
	cleaned = sahi_karneka_function(final_question)

	# Step 2: TF-IDF
	f=[]
	f.append(cleaned)
	x_tfidf = tfidf.transform(f)

	# Step 3: Predict
	y_probs = model.predict_proba(x_tfidf)
	threshold = 0.55
	y_predd=model.predict(x_tfidf)
	probs_column1 = np.array([i[:, 1] for i in y_probs]).T
	y_pred = (probs_column1 >= threshold).astype(int)

	# Step 4: Decode
	predicted_tags = ml.inverse_transform(y_predd)

	# Step 5: Display results
	st.success("✅ Predicted Tags:")
	if predicted_tags and predicted_tags[0]:
	for tag in predicted_tags[0]:
	st.markdown(f"🔹 `{tag}`")
	else:
	st.info("No tags matched the threshold.")

	# Step 6: Show a "Clear" button
	if st.button("Clear Input"):
	st.session_state.user_input = ""