| | """ |
| | Gradio app for Quora Duplicate Question Detector. |
| | Deploy to Hugging Face Spaces with Gradio SDK. |
| | """ |
| | import sys |
| | from pathlib import Path |
| |
|
| | ROOT = Path(__file__).resolve().parent |
| | sys.path.insert(0, str(ROOT)) |
| | sys.path.insert(0, str(ROOT / "streamlit-app")) |
| |
|
| | import nltk |
| | nltk.download("stopwords", quiet=True) |
| |
|
| | import helper |
| |
|
| | import gradio as gr |
| |
|
| |
|
| | def predict_fn(q1: str, q2: str, model_name: str): |
| | """Run prediction and return formatted output.""" |
| | q1_clean = (q1 or "").strip() |
| | q2_clean = (q2 or "").strip() |
| |
|
| | if not q1_clean or not q2_clean: |
| | return "β οΈ Please enter both questions.", 0.0 |
| | if len(q1_clean) < 3 or len(q2_clean) < 3: |
| | return "β οΈ Questions should be at least 3 characters.", 0.0 |
| |
|
| | try: |
| | model_type = "classical" if "Classical" in model_name else "transformer" |
| | pred, proba = helper.predict(q1_clean, q2_clean, model_type) |
| |
|
| | if pred: |
| | msg = "**Duplicate** β These questions likely have the same meaning." |
| | else: |
| | msg = "**Not Duplicate** β These questions appear to be different." |
| |
|
| | return msg, proba |
| | except Exception as e: |
| | return f"β Error: {str(e)}", 0.0 |
| |
|
| |
|
| | |
| | available = helper.get_available_models() |
| | if not available: |
| | raise RuntimeError("No models found. Add models to models/ or configure HF Hub download.") |
| |
|
| | inference_times = helper.get_inference_times() |
| | model_choices = [helper.get_model_display_name(m) for m in available] |
| | model_choices_with_time = [] |
| | for m in model_choices: |
| | key = "classical" if "Classical" in m else "transformer" |
| | ms = inference_times.get(key, {}).get("mean_ms", 0) |
| | suffix = f" (~{ms:.0f} ms)" if ms else "" |
| | model_choices_with_time.append(f"{m}{suffix}") |
| |
|
| | with gr.Blocks(title="Quora Duplicate Detector", theme=gr.themes.Soft()) as demo: |
| | gr.Markdown("# π Quora Duplicate Question Pairs") |
| | gr.Markdown("Enter two questions to check if they are semantically duplicate.") |
| |
|
| | with gr.Row(): |
| | with gr.Column(scale=2): |
| | q1 = gr.Textbox( |
| | label="Question 1", |
| | placeholder="e.g. What is the capital of India?", |
| | lines=2, |
| | ) |
| | q2 = gr.Textbox( |
| | label="Question 2", |
| | placeholder="e.g. Which city is India's capital?", |
| | lines=2, |
| | ) |
| | model_dropdown = gr.Dropdown( |
| | label="Model", |
| | choices=model_choices_with_time, |
| | value=model_choices_with_time[0], |
| | ) |
| | check_btn = gr.Button("Check", variant="primary") |
| | with gr.Column(scale=1): |
| | result_text = gr.Markdown(value="") |
| | proba_slider = gr.Slider( |
| | minimum=0, |
| | maximum=1, |
| | value=0, |
| | label="Probability of Duplicate", |
| | interactive=False, |
| | ) |
| |
|
| | with gr.Accordion("Try example pairs", open=False): |
| | gr.Examples( |
| | examples=[ |
| | ["How do I learn Python?", "What is the best way to learn Python programming?"], |
| | ["What is the capital of France?", "How do I cook pasta?"], |
| | ], |
| | inputs=[q1, q2], |
| | label="", |
| | ) |
| |
|
| | check_btn.click( |
| | fn=predict_fn, |
| | inputs=[q1, q2, model_dropdown], |
| | outputs=[result_text, proba_slider], |
| | ) |
| |
|
| | gr.Markdown("---") |
| | with gr.Accordion("About", open=False): |
| | gr.Markdown(""" |
| | This app predicts whether two Quora questions are duplicates (same meaning). |
| | |
| | **Models:** |
| | - **Classical**: Random Forest or XGBoost on 25 handcrafted features + TF-IDF |
| | - **DistilBERT**: Fine-tuned transformer for sentence-pair classification |
| | |
| | *Built for fun & learning. Results may not always be accurate β use with caution.* |
| | """) |
| |
|
| | demo.launch() |
| |
|