|
|
import gradio as gr |
|
|
import os |
|
|
from charcnn_bylstm import demo_inference, DATA_DIR, Vocab |
|
|
|
|
|
def run_inference(pdf_file): |
|
|
|
|
|
if hasattr(pdf_file, "name"): |
|
|
pdf_path = pdf_file.name |
|
|
elif isinstance(pdf_file, dict) and "name" in pdf_file: |
|
|
pdf_path = pdf_file["name"] |
|
|
elif isinstance(pdf_file, str): |
|
|
pdf_path = pdf_file |
|
|
else: |
|
|
return "Invalid file input." |
|
|
|
|
|
model_path = os.path.join("best_mcq_tagger.pt") |
|
|
vocab_path = os.path.join( "vocabs.pkl") |
|
|
|
|
|
if not os.path.exists(model_path): |
|
|
return "Model file missing: best_mcq_tagger.pt" |
|
|
if not os.path.exists(vocab_path): |
|
|
return "Vocab file missing: vocabs.pkl" |
|
|
|
|
|
try: |
|
|
mcqs, predic = demo_inference(pdf_path, model_path, vocab_path) |
|
|
except Exception as e: |
|
|
return f"Error during inference: {e}" |
|
|
|
|
|
if not mcqs: |
|
|
return "No MCQs detected." |
|
|
|
|
|
output_lines = [] |
|
|
for i, mcq in enumerate(mcqs): |
|
|
q = mcq.get("question", "") |
|
|
opts = mcq.get("options", []) |
|
|
ans = mcq.get("answer", "") |
|
|
output_lines.append(f"Q{i+1}: {q}") |
|
|
for j, opt in enumerate(opts): |
|
|
output_lines.append(f" Option {j+1}: {opt}") |
|
|
output_lines.append(f" Answer: {ans}") |
|
|
output_lines.append("") |
|
|
|
|
|
return "\n".join(output_lines) |
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=run_inference, |
|
|
inputs=gr.File(label="Upload your PDF", file_types=[".pdf"]), |
|
|
outputs=gr.Textbox(label="Predicted MCQs", lines=25), |
|
|
title="PDF β MCQ Extractor", |
|
|
description="Upload a PDF, and this app extracts multiple-choice questions using your trained model." |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|