import gradio as gr
import os
from charcnn_bylstm import demo_inference, DATA_DIR, Vocab

def run_inference(pdf_file):
    # Handle Gradio's NamedString / path / dict input
    if hasattr(pdf_file, "name"):
        pdf_path = pdf_file.name
    elif isinstance(pdf_file, dict) and "name" in pdf_file:
        pdf_path = pdf_file["name"]
    elif isinstance(pdf_file, str):
        pdf_path = pdf_file
    else:
        return "Invalid file input."

    model_path = os.path.join("best_mcq_tagger.pt")
    vocab_path = os.path.join( "vocabs.pkl")

    if not os.path.exists(model_path):
        return "Model file missing: best_mcq_tagger.pt"
    if not os.path.exists(vocab_path):
        return "Vocab file missing: vocabs.pkl"

    try:
        mcqs, predic = demo_inference(pdf_path, model_path, vocab_path)
    except Exception as e:
        return f"Error during inference: {e}"

    if not mcqs:
        return "No MCQs detected."

    output_lines = []
    for i, mcq in enumerate(mcqs):
        q = mcq.get("question", "")
        opts = mcq.get("options", [])
        ans = mcq.get("answer", "")
        output_lines.append(f"Q{i+1}: {q}")
        for j, opt in enumerate(opts):
            output_lines.append(f"   Option {j+1}: {opt}")
        output_lines.append(f"   Answer: {ans}")
        output_lines.append("")

    return "\n".join(output_lines)

demo = gr.Interface(
    fn=run_inference,
    inputs=gr.File(label="Upload your PDF", file_types=[".pdf"]),
    outputs=gr.Textbox(label="Predicted MCQs", lines=25),
    title="PDF → MCQ Extractor",
    description="Upload a PDF, and this app extracts multiple-choice questions using your trained model."
)

if __name__ == "__main__":
    demo.launch()