File size: 2,829 Bytes
cf33e18 c6daf95 cf33e18 c6daf95 cf33e18 c6daf95 cf33e18 c6daf95 cf33e18 c6daf95 cf33e18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | import gradio as gr
import pdfplumber
import re
import json
from xml.etree.ElementTree import Element, SubElement, tostring
def extract_questions(text):
"""
Extract questions, options, and scores from text.
Example: "Q1: What is 2+2? Options: a) 4 (50) b) 5 (0)"
"""
questions = []
q_blocks = re.split(r'Q\d+:', text)
for q in q_blocks[1:]: # skip the first split part
# Extract question text
q_text_match = re.search(r'(.*?)Options:', q, re.S)
q_text = q_text_match.group(1).strip() if q_text_match else q.strip()
# Extract options
options = []
option_matches = re.findall(r'([a-z]\))\s*(.*?)\s*\((\d+)\)', q, re.S)
for _, opt_text, score in option_matches:
options.append({
"optiontext": opt_text.strip(),
"score": score,
"img": ""
})
questions.append({
"questiontext": f"<p>{q_text}</p>",
"questiontype": "single_select",
"randomizeopt": False,
"marks": max([int(o["score"]) for o in options]) if options else 0,
"options": options,
"minscore": "",
"hint": "",
"numberofoptions": len(options)
})
return questions
def read_pdf(file, format_type):
if file is None:
return "Please upload a PDF file."
text = ""
with pdfplumber.open(file.name) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
# Check for totalmarks, time, cutoff
totalmarks_match = re.search(r'Total Marks[:\s]*(\d+)', text, re.I)
time_match = re.search(r'Time[:\s]*(\d+)', text, re.I)
cutoff_match = re.search(r'Cutoff[:\s]*(\d+)', text, re.I)
if not (totalmarks_match and time_match and cutoff_match):
return "PDF must contain Total Marks, Time, and Cutoff."
totalmarks = totalmarks_match.group(1)
time = time_match.group(1)
cutoff = cutoff_match.group(1)
# Extract questions
questions = extract_questions(text)
data = {
"title": "Certification Title",
"totalmarks": totalmarks,
"time": time,
"cutoff": cutoff,
"failurl": "",
"passurl": "",
"sendpassemail": True,
"questions": json.dumps({"questions": questions})
}
if format_type == "HTML":
html = f"<html><body><pre>{json.dumps(data, indent=2)}</pre></body></html>"
return html
else:
xml_content = f"<questiondata><![CDATA[{json.dumps(data)}]]></questiondata>"
return xml_content
app = gr.Interface(
fn=read_pdf,
inputs=[gr.File(label="Upload PDF"), gr.Radio(["HTML", "XML"], label="Output Format")],
outputs="text",
title="PDF to HTML/XML Converter"
)
app.launch()
|