File size: 3,894 Bytes
162b166 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | """
Gradio app for Quora Duplicate Question Detector.
Deploy to Hugging Face Spaces with Gradio SDK.
"""
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent
sys.path.insert(0, str(ROOT))
sys.path.insert(0, str(ROOT / "streamlit-app"))
import nltk
nltk.download("stopwords", quiet=True)
import helper
import gradio as gr
def predict_fn(q1: str, q2: str, model_name: str):
"""Run prediction and return formatted output."""
q1_clean = (q1 or "").strip()
q2_clean = (q2 or "").strip()
if not q1_clean or not q2_clean:
return "โ ๏ธ Please enter both questions.", 0.0
if len(q1_clean) < 3 or len(q2_clean) < 3:
return "โ ๏ธ Questions should be at least 3 characters.", 0.0
try:
model_type = "classical" if "Classical" in model_name else "transformer"
pred, proba = helper.predict(q1_clean, q2_clean, model_type)
if pred:
msg = "**Duplicate** โ These questions likely have the same meaning."
else:
msg = "**Not Duplicate** โ These questions appear to be different."
return msg, proba
except Exception as e:
return f"โ Error: {str(e)}", 0.0
# Build model options
available = helper.get_available_models()
if not available:
raise RuntimeError("No models found. Add models to models/ or configure HF Hub download.")
inference_times = helper.get_inference_times()
model_choices = [helper.get_model_display_name(m) for m in available]
model_choices_with_time = []
for m in model_choices:
key = "classical" if "Classical" in m else "transformer"
ms = inference_times.get(key, {}).get("mean_ms", 0)
suffix = f" (~{ms:.0f} ms)" if ms else ""
model_choices_with_time.append(f"{m}{suffix}")
with gr.Blocks(title="Quora Duplicate Detector", theme=gr.themes.Soft()) as demo:
gr.Markdown("# ๐ Quora Duplicate Question Pairs")
gr.Markdown("Enter two questions to check if they are semantically duplicate.")
with gr.Row():
with gr.Column(scale=2):
q1 = gr.Textbox(
label="Question 1",
placeholder="e.g. What is the capital of India?",
lines=2,
)
q2 = gr.Textbox(
label="Question 2",
placeholder="e.g. Which city is India's capital?",
lines=2,
)
model_dropdown = gr.Dropdown(
label="Model",
choices=model_choices_with_time,
value=model_choices_with_time[0],
)
check_btn = gr.Button("Check", variant="primary")
with gr.Column(scale=1):
result_text = gr.Markdown(value="")
proba_slider = gr.Slider(
minimum=0,
maximum=1,
value=0,
label="Probability of Duplicate",
interactive=False,
)
with gr.Accordion("Try example pairs", open=False):
gr.Examples(
examples=[
["How do I learn Python?", "What is the best way to learn Python programming?"],
["What is the capital of France?", "How do I cook pasta?"],
],
inputs=[q1, q2],
label="",
)
check_btn.click(
fn=predict_fn,
inputs=[q1, q2, model_dropdown],
outputs=[result_text, proba_slider],
)
gr.Markdown("---")
with gr.Accordion("About", open=False):
gr.Markdown("""
This app predicts whether two Quora questions are duplicates (same meaning).
**Models:**
- **Classical**: Random Forest or XGBoost on 25 handcrafted features + TF-IDF
- **DistilBERT**: Fine-tuned transformer for sentence-pair classification
*Built for fun & learning. Results may not always be accurate โ use with caution.*
""")
demo.launch()
|