Spaces:

shujath000
/

stackoverflow_app

Sleeping

File size: 3,620 Bytes

import streamlit as st
import pandas as pd
import joblib
import numpy as np
import string
import nltk
from nltk.corpus import stopwords as stp
from nltk import pos_tag, word_tokenize as w, sent_tokenize as s
from nltk.stem import WordNetLemmatizer as wl

nltk.download('punkt_tab')
# Download necessary NLTK data
#nltk.download('punkt', quiet=True)
#nltk.download('averaged_perceptron_tagger', quiet=True)
#nltk.download('wordnet', quiet=True)
#nltk.download('stopwords', quiet=True)
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('wordnet')
nltk.download('stopwords')

# === Cleaning Function ===
def sahi_karneka_function(x):
    nouns=[]
    li=[]
    lem=wl()
    l=s(x) 
    for i in l:
        d=w(i.lower())
        for k in d:
            li.append(k)
    lw=len(li)
    j=0
    while j<lw:
        if li[j] in string.punctuation:
            li.remove(li[j])
            lw=len(li)
            j=0
        elif li[j] in stp.words("english"):
            li.remove(li[j])
            lw=len(li)
            j=0
        else:
            j=j+1
    tags=pos_tag(li)
    for word,tag in tags:
        if tag.startswith("NN") or tag.startswith("V"):
            nouns.append(word)
    semi_final_words=[lem.lemmatize(m,pos="n") if tagg.startswith("NN") else lem.lemmatize(m,pos="v") for m,tagg in pos_tag(nouns)]
    final_sentence=" ".join(semi_final_words)
    return final_sentence

# === Load Data and Models ===
df = pd.read_csv(r"c_d.csv")
model = joblib.load("logistic_models.pkl")
tfidf = joblib.load("tfidf.pkl")
ml = joblib.load("multilabels.pkl")

# === Streamlit UI ===
st.title("🧠 Enhancing Developer Support: Automated Tagging on Stack Overflow")

# --- Select a URL for context ---
selected_url = st.selectbox("Select a question URL (for context):", df['questions_url'])
st.markdown(f"🔗 [Open selected question]({selected_url})")

# --- Session State ---
if "user_input" not in st.session_state:
    st.session_state["user_input"] = ""
if "clear_input" not in st.session_state:
    st.session_state["clear_input"] = False

# --- Clear input if flagged (AFTER rerun) ---
if st.session_state.clear_input:
    st.session_state.user_input = ""
    st.session_state.clear_input = False

# --- Input box ---
st.text_area("✍️ Type your question here:", key="user_input", height=150)

# --- Predict button ---
if st.button("Predict Tags"):
    final_question = st.session_state.user_input.strip()

    if not final_question:
        st.warning("⚠️ Please enter a question.")
    else:
        with st.spinner("🔍 Predicting tags..."):
            # Step 1: Clean input
            cleaned = sahi_karneka_function(final_question)

            # Step 2: TF-IDF
            f=[]
            f.append(cleaned)
            x_tfidf = tfidf.transform(f)

            # Step 3: Predict
            y_probs = model.predict_proba(x_tfidf)
            threshold = 0.55
            y_predd=model.predict(x_tfidf)
            probs_column1 = np.array([i[:, 1] for i in y_probs]).T
            y_pred = (probs_column1 >= threshold).astype(int)

            # Step 4: Decode
            predicted_tags = ml.inverse_transform(y_predd)

            # Step 5: Display results
            st.success("✅ Predicted Tags:")
            if predicted_tags and predicted_tags[0]:
                for tag in predicted_tags[0]:
                    st.markdown(f"🔹 **`{tag}`**")
            else:
                st.info("No tags matched the threshold.")

        # Step 6: Show a "Clear" button
        if st.button("Clear Input"):
            st.session_state.user_input = ""