| | import gradio as gr |
| | import pdfplumber |
| | import re |
| | import json |
| | from xml.etree.ElementTree import Element, SubElement, tostring |
| |
|
| | def extract_questions(text): |
| | """ |
| | Extract questions, options, and scores from text. |
| | Example: "Q1: What is 2+2? Options: a) 4 (50) b) 5 (0)" |
| | """ |
| | questions = [] |
| | q_blocks = re.split(r'Q\d+:', text) |
| | |
| | for q in q_blocks[1:]: |
| | |
| | q_text_match = re.search(r'(.*?)Options:', q, re.S) |
| | q_text = q_text_match.group(1).strip() if q_text_match else q.strip() |
| |
|
| | |
| | options = [] |
| | option_matches = re.findall(r'([a-z]\))\s*(.*?)\s*\((\d+)\)', q, re.S) |
| | for _, opt_text, score in option_matches: |
| | options.append({ |
| | "optiontext": opt_text.strip(), |
| | "score": score, |
| | "img": "" |
| | }) |
| | |
| | questions.append({ |
| | "questiontext": f"<p>{q_text}</p>", |
| | "questiontype": "single_select", |
| | "randomizeopt": False, |
| | "marks": max([int(o["score"]) for o in options]) if options else 0, |
| | "options": options, |
| | "minscore": "", |
| | "hint": "", |
| | "numberofoptions": len(options) |
| | }) |
| | return questions |
| |
|
| | def read_pdf(file, format_type): |
| | if file is None: |
| | return "Please upload a PDF file." |
| | |
| | text = "" |
| | with pdfplumber.open(file.name) as pdf: |
| | for page in pdf.pages: |
| | text += page.extract_text() or "" |
| | |
| | |
| | totalmarks_match = re.search(r'Total Marks[:\s]*(\d+)', text, re.I) |
| | time_match = re.search(r'Time[:\s]*(\d+)', text, re.I) |
| | cutoff_match = re.search(r'Cutoff[:\s]*(\d+)', text, re.I) |
| | |
| | if not (totalmarks_match and time_match and cutoff_match): |
| | return "PDF must contain Total Marks, Time, and Cutoff." |
| |
|
| | totalmarks = totalmarks_match.group(1) |
| | time = time_match.group(1) |
| | cutoff = cutoff_match.group(1) |
| |
|
| | |
| | questions = extract_questions(text) |
| |
|
| | data = { |
| | "title": "Certification Title", |
| | "totalmarks": totalmarks, |
| | "time": time, |
| | "cutoff": cutoff, |
| | "failurl": "", |
| | "passurl": "", |
| | "sendpassemail": True, |
| | "questions": json.dumps({"questions": questions}) |
| | } |
| |
|
| | if format_type == "HTML": |
| | html = f"<html><body><pre>{json.dumps(data, indent=2)}</pre></body></html>" |
| | return html |
| | else: |
| | xml_content = f"<questiondata><![CDATA[{json.dumps(data)}]]></questiondata>" |
| | return xml_content |
| |
|
| | app = gr.Interface( |
| | fn=read_pdf, |
| | inputs=[gr.File(label="Upload PDF"), gr.Radio(["HTML", "XML"], label="Output Format")], |
| | outputs="text", |
| | title="PDF to HTML/XML Converter" |
| | ) |
| |
|
| | app.launch() |
| |
|