Spaces:

shujath000
/

stack_overflow

Sleeping

App Files Files Community

shujath000 commited on Jul 9, 2025

Commit

43d807a

verified ·

1 Parent(s): e6218e9

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +109 -38

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,111 @@
-import altair as alt
-import numpy as np
-import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import streamlit as st
+import pandas as pd
+import joblib
+import numpy as np
+import string
+import nltk
+from nltk.corpus import stopwords as stp
+from nltk import pos_tag, word_tokenize as w, sent_tokenize as s
+from nltk.stem import WordNetLemmatizer as wl
+# Download necessary NLTK data
+nltk.download('punkt', quiet=True)
+nltk.download('averaged_perceptron_tagger', quiet=True)
+nltk.download('wordnet', quiet=True)
+nltk.download('stopwords', quiet=True)
+# === Cleaning Function ===
+def sahi_karneka_function(x):
+    nouns=[]
+    li=[]
+    lem=wl()
+    l=s(x)
+    for i in l:
+        d=w(i.lower())
+        for k in d:
+            li.append(k)
+    lw=len(li)
+    j=0
+    while j<lw:
+        if li[j] in string.punctuation:
+            li.remove(li[j])
+            lw=len(li)
+            j=0
+        elif li[j] in stp.words("english"):
+            li.remove(li[j])
+            lw=len(li)
+            j=0
+        else:
+            j=j+1
+    tags=pos_tag(li)
+    for word,tag in tags:
+        if tag.startswith("NN") or tag.startswith("V"):
+            nouns.append(word)
+    semi_final_words=[lem.lemmatize(m,pos="n") if tagg.startswith("NN") else lem.lemmatize(m,pos="v") for m,tagg in pos_tag(nouns)]
+    final_sentence=" ".join(semi_final_words)
+    return final_sentence
+# === Load Data and Models ===
+df = pd.read_csv(r"src/c_d.csv")
+model = joblib.load("src/logistic_models.pkl")
+tfidf = joblib.load("src/tfidf.pkl")
+ml = joblib.load("src/multilabels.pkl")
+# === Streamlit UI ===
+st.title("🧠 Multi-Label Question Tag Predictor")
+# --- Select a URL for context ---
+selected_url = st.selectbox("Select a question URL (for context):", df['questions_url'])
+st.markdown(f"🔗 [Open selected question]({selected_url})")
+# --- Session State ---
+if "user_input" not in st.session_state:
+    st.session_state["user_input"] = ""
+if "clear_input" not in st.session_state:
+    st.session_state["clear_input"] = False
+# --- Clear input if flagged (AFTER rerun) ---
+if st.session_state.clear_input:
+    st.session_state.user_input = ""
+    st.session_state.clear_input = False
+# --- Input box ---
+st.text_area("✍️ Type your question here:", key="user_input", height=150)
+# --- Predict button ---
+if st.button("Predict Tags"):
+    final_question = st.session_state.user_input.strip()
+    if not final_question:
+        st.warning("⚠️ Please enter a question.")
+    else:
+        with st.spinner("🔍 Predicting tags..."):
+            # Step 1: Clean input
+            cleaned = sahi_karneka_function(final_question)
+            # Step 2: TF-IDF
+            f=[]
+            f.append(cleaned)
+            x_tfidf = tfidf.transform(f)
+            # Step 3: Predict
+            y_probs = model.predict_proba(x_tfidf)
+            threshold = 0.55
+            y_predd=model.predict(x_tfidf)
+            probs_column1 = np.array([i[:, 1] for i in y_probs]).T
+            y_pred = (probs_column1 >= threshold).astype(int)
+            # Step 4: Decode
+            predicted_tags = ml.inverse_transform(y_predd)
+            # Step 5: Display results
+            st.success("✅ Predicted Tags:")
+            if predicted_tags and predicted_tags[0]:
+                for tag in predicted_tags[0]:
+                    st.markdown(f"🔹 **`{tag}`**")
+            else:
+                st.info("No tags matched the threshold.")
+        # Step 6: Show a "Clear" button
+        if st.button("Clear Input"):
+            st.session_state.user_input = ""