import gradio as gr import pdfplumber import re import json from xml.etree.ElementTree import Element, SubElement, tostring def extract_questions(text): """ Extract questions, options, and scores from text. Example: "Q1: What is 2+2? Options: a) 4 (50) b) 5 (0)" """ questions = [] q_blocks = re.split(r'Q\d+:', text) for q in q_blocks[1:]: # skip the first split part # Extract question text q_text_match = re.search(r'(.*?)Options:', q, re.S) q_text = q_text_match.group(1).strip() if q_text_match else q.strip() # Extract options options = [] option_matches = re.findall(r'([a-z]\))\s*(.*?)\s*\((\d+)\)', q, re.S) for _, opt_text, score in option_matches: options.append({ "optiontext": opt_text.strip(), "score": score, "img": "" }) questions.append({ "questiontext": f"

{q_text}

", "questiontype": "single_select", "randomizeopt": False, "marks": max([int(o["score"]) for o in options]) if options else 0, "options": options, "minscore": "", "hint": "", "numberofoptions": len(options) }) return questions def read_pdf(file, format_type): if file is None: return "Please upload a PDF file." text = "" with pdfplumber.open(file.name) as pdf: for page in pdf.pages: text += page.extract_text() or "" # Check for totalmarks, time, cutoff totalmarks_match = re.search(r'Total Marks[:\s]*(\d+)', text, re.I) time_match = re.search(r'Time[:\s]*(\d+)', text, re.I) cutoff_match = re.search(r'Cutoff[:\s]*(\d+)', text, re.I) if not (totalmarks_match and time_match and cutoff_match): return "PDF must contain Total Marks, Time, and Cutoff." totalmarks = totalmarks_match.group(1) time = time_match.group(1) cutoff = cutoff_match.group(1) # Extract questions questions = extract_questions(text) data = { "title": "Certification Title", "totalmarks": totalmarks, "time": time, "cutoff": cutoff, "failurl": "", "passurl": "", "sendpassemail": True, "questions": json.dumps({"questions": questions}) } if format_type == "HTML": html = f"
{json.dumps(data, indent=2)}
" return html else: xml_content = f"" return xml_content app = gr.Interface( fn=read_pdf, inputs=[gr.File(label="Upload PDF"), gr.Radio(["HTML", "XML"], label="Output Format")], outputs="text", title="PDF to HTML/XML Converter" ) app.launch()