RISHABH KUMAR
Add Quora duplicate detector Gradio app
162b166
"""
Gradio app for Quora Duplicate Question Detector.
Deploy to Hugging Face Spaces with Gradio SDK.
"""
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parent
sys.path.insert(0, str(ROOT))
sys.path.insert(0, str(ROOT / "streamlit-app"))
import nltk
nltk.download("stopwords", quiet=True)
import helper
import gradio as gr
def predict_fn(q1: str, q2: str, model_name: str):
"""Run prediction and return formatted output."""
q1_clean = (q1 or "").strip()
q2_clean = (q2 or "").strip()
if not q1_clean or not q2_clean:
return "⚠️ Please enter both questions.", 0.0
if len(q1_clean) < 3 or len(q2_clean) < 3:
return "⚠️ Questions should be at least 3 characters.", 0.0
try:
model_type = "classical" if "Classical" in model_name else "transformer"
pred, proba = helper.predict(q1_clean, q2_clean, model_type)
if pred:
msg = "**Duplicate** β€” These questions likely have the same meaning."
else:
msg = "**Not Duplicate** β€” These questions appear to be different."
return msg, proba
except Exception as e:
return f"❌ Error: {str(e)}", 0.0
# Build model options
available = helper.get_available_models()
if not available:
raise RuntimeError("No models found. Add models to models/ or configure HF Hub download.")
inference_times = helper.get_inference_times()
model_choices = [helper.get_model_display_name(m) for m in available]
model_choices_with_time = []
for m in model_choices:
key = "classical" if "Classical" in m else "transformer"
ms = inference_times.get(key, {}).get("mean_ms", 0)
suffix = f" (~{ms:.0f} ms)" if ms else ""
model_choices_with_time.append(f"{m}{suffix}")
with gr.Blocks(title="Quora Duplicate Detector", theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ” Quora Duplicate Question Pairs")
gr.Markdown("Enter two questions to check if they are semantically duplicate.")
with gr.Row():
with gr.Column(scale=2):
q1 = gr.Textbox(
label="Question 1",
placeholder="e.g. What is the capital of India?",
lines=2,
)
q2 = gr.Textbox(
label="Question 2",
placeholder="e.g. Which city is India's capital?",
lines=2,
)
model_dropdown = gr.Dropdown(
label="Model",
choices=model_choices_with_time,
value=model_choices_with_time[0],
)
check_btn = gr.Button("Check", variant="primary")
with gr.Column(scale=1):
result_text = gr.Markdown(value="")
proba_slider = gr.Slider(
minimum=0,
maximum=1,
value=0,
label="Probability of Duplicate",
interactive=False,
)
with gr.Accordion("Try example pairs", open=False):
gr.Examples(
examples=[
["How do I learn Python?", "What is the best way to learn Python programming?"],
["What is the capital of France?", "How do I cook pasta?"],
],
inputs=[q1, q2],
label="",
)
check_btn.click(
fn=predict_fn,
inputs=[q1, q2, model_dropdown],
outputs=[result_text, proba_slider],
)
gr.Markdown("---")
with gr.Accordion("About", open=False):
gr.Markdown("""
This app predicts whether two Quora questions are duplicates (same meaning).
**Models:**
- **Classical**: Random Forest or XGBoost on 25 handcrafted features + TF-IDF
- **DistilBERT**: Fine-tuned transformer for sentence-pair classification
*Built for fun & learning. Results may not always be accurate β€” use with caution.*
""")
demo.launch()