""" Gradio app for Quora Duplicate Question Detector. Deploy to Hugging Face Spaces with Gradio SDK. """ import sys from pathlib import Path ROOT = Path(__file__).resolve().parent sys.path.insert(0, str(ROOT)) sys.path.insert(0, str(ROOT / "streamlit-app")) import nltk nltk.download("stopwords", quiet=True) import helper import gradio as gr def predict_fn(q1: str, q2: str, model_name: str): """Run prediction and return formatted output.""" q1_clean = (q1 or "").strip() q2_clean = (q2 or "").strip() if not q1_clean or not q2_clean: return "⚠️ Please enter both questions.", 0.0 if len(q1_clean) < 3 or len(q2_clean) < 3: return "⚠️ Questions should be at least 3 characters.", 0.0 try: model_type = "classical" if "Classical" in model_name else "transformer" pred, proba = helper.predict(q1_clean, q2_clean, model_type) if pred: msg = "**Duplicate** — These questions likely have the same meaning." else: msg = "**Not Duplicate** — These questions appear to be different." return msg, proba except Exception as e: return f"❌ Error: {str(e)}", 0.0 # Build model options available = helper.get_available_models() if not available: raise RuntimeError("No models found. Add models to models/ or configure HF Hub download.") inference_times = helper.get_inference_times() model_choices = [helper.get_model_display_name(m) for m in available] model_choices_with_time = [] for m in model_choices: key = "classical" if "Classical" in m else "transformer" ms = inference_times.get(key, {}).get("mean_ms", 0) suffix = f" (~{ms:.0f} ms)" if ms else "" model_choices_with_time.append(f"{m}{suffix}") with gr.Blocks(title="Quora Duplicate Detector", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🔍 Quora Duplicate Question Pairs") gr.Markdown("Enter two questions to check if they are semantically duplicate.") with gr.Row(): with gr.Column(scale=2): q1 = gr.Textbox( label="Question 1", placeholder="e.g. What is the capital of India?", lines=2, ) q2 = gr.Textbox( label="Question 2", placeholder="e.g. Which city is India's capital?", lines=2, ) model_dropdown = gr.Dropdown( label="Model", choices=model_choices_with_time, value=model_choices_with_time[0], ) check_btn = gr.Button("Check", variant="primary") with gr.Column(scale=1): result_text = gr.Markdown(value="") proba_slider = gr.Slider( minimum=0, maximum=1, value=0, label="Probability of Duplicate", interactive=False, ) with gr.Accordion("Try example pairs", open=False): gr.Examples( examples=[ ["How do I learn Python?", "What is the best way to learn Python programming?"], ["What is the capital of France?", "How do I cook pasta?"], ], inputs=[q1, q2], label="", ) check_btn.click( fn=predict_fn, inputs=[q1, q2, model_dropdown], outputs=[result_text, proba_slider], ) gr.Markdown("---") with gr.Accordion("About", open=False): gr.Markdown(""" This app predicts whether two Quora questions are duplicates (same meaning). **Models:** - **Classical**: Random Forest or XGBoost on 25 handcrafted features + TF-IDF - **DistilBERT**: Fine-tuned transformer for sentence-pair classification *Built for fun & learning. Results may not always be accurate — use with caution.* """) demo.launch()