Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,85 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pdfplumber
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
def read_pdf(file, format_type):
|
| 5 |
if file is None:
|
| 6 |
return "Please upload a PDF file."
|
|
|
|
| 7 |
text = ""
|
| 8 |
with pdfplumber.open(file.name) as pdf:
|
| 9 |
for page in pdf.pages:
|
| 10 |
text += page.extract_text() or ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
if format_type == "HTML":
|
| 12 |
-
|
|
|
|
| 13 |
else:
|
| 14 |
-
|
|
|
|
| 15 |
|
| 16 |
app = gr.Interface(
|
| 17 |
fn=read_pdf,
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pdfplumber
|
| 3 |
+
import re
|
| 4 |
+
import json
|
| 5 |
+
from xml.etree.ElementTree import Element, SubElement, tostring
|
| 6 |
+
|
| 7 |
+
def extract_questions(text):
|
| 8 |
+
"""
|
| 9 |
+
Extract questions, options, and scores from text.
|
| 10 |
+
Example: "Q1: What is 2+2? Options: a) 4 (50) b) 5 (0)"
|
| 11 |
+
"""
|
| 12 |
+
questions = []
|
| 13 |
+
q_blocks = re.split(r'Q\d+:', text)
|
| 14 |
+
|
| 15 |
+
for q in q_blocks[1:]: # skip the first split part
|
| 16 |
+
# Extract question text
|
| 17 |
+
q_text_match = re.search(r'(.*?)Options:', q, re.S)
|
| 18 |
+
q_text = q_text_match.group(1).strip() if q_text_match else q.strip()
|
| 19 |
+
|
| 20 |
+
# Extract options
|
| 21 |
+
options = []
|
| 22 |
+
option_matches = re.findall(r'([a-z]\))\s*(.*?)\s*\((\d+)\)', q, re.S)
|
| 23 |
+
for _, opt_text, score in option_matches:
|
| 24 |
+
options.append({
|
| 25 |
+
"optiontext": opt_text.strip(),
|
| 26 |
+
"score": score,
|
| 27 |
+
"img": ""
|
| 28 |
+
})
|
| 29 |
+
|
| 30 |
+
questions.append({
|
| 31 |
+
"questiontext": f"<p>{q_text}</p>",
|
| 32 |
+
"questiontype": "single_select",
|
| 33 |
+
"randomizeopt": False,
|
| 34 |
+
"marks": max([int(o["score"]) for o in options]) if options else 0,
|
| 35 |
+
"options": options,
|
| 36 |
+
"minscore": "",
|
| 37 |
+
"hint": "",
|
| 38 |
+
"numberofoptions": len(options)
|
| 39 |
+
})
|
| 40 |
+
return questions
|
| 41 |
|
| 42 |
def read_pdf(file, format_type):
|
| 43 |
if file is None:
|
| 44 |
return "Please upload a PDF file."
|
| 45 |
+
|
| 46 |
text = ""
|
| 47 |
with pdfplumber.open(file.name) as pdf:
|
| 48 |
for page in pdf.pages:
|
| 49 |
text += page.extract_text() or ""
|
| 50 |
+
|
| 51 |
+
# Check for totalmarks, time, cutoff
|
| 52 |
+
totalmarks_match = re.search(r'Total Marks[:\s]*(\d+)', text, re.I)
|
| 53 |
+
time_match = re.search(r'Time[:\s]*(\d+)', text, re.I)
|
| 54 |
+
cutoff_match = re.search(r'Cutoff[:\s]*(\d+)', text, re.I)
|
| 55 |
+
|
| 56 |
+
if not (totalmarks_match and time_match and cutoff_match):
|
| 57 |
+
return "PDF must contain Total Marks, Time, and Cutoff."
|
| 58 |
+
|
| 59 |
+
totalmarks = totalmarks_match.group(1)
|
| 60 |
+
time = time_match.group(1)
|
| 61 |
+
cutoff = cutoff_match.group(1)
|
| 62 |
+
|
| 63 |
+
# Extract questions
|
| 64 |
+
questions = extract_questions(text)
|
| 65 |
+
|
| 66 |
+
data = {
|
| 67 |
+
"title": "Certification Title",
|
| 68 |
+
"totalmarks": totalmarks,
|
| 69 |
+
"time": time,
|
| 70 |
+
"cutoff": cutoff,
|
| 71 |
+
"failurl": "",
|
| 72 |
+
"passurl": "",
|
| 73 |
+
"sendpassemail": True,
|
| 74 |
+
"questions": json.dumps({"questions": questions})
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
if format_type == "HTML":
|
| 78 |
+
html = f"<html><body><pre>{json.dumps(data, indent=2)}</pre></body></html>"
|
| 79 |
+
return html
|
| 80 |
else:
|
| 81 |
+
xml_content = f"<questiondata><![CDATA[{json.dumps(data)}]]></questiondata>"
|
| 82 |
+
return xml_content
|
| 83 |
|
| 84 |
app = gr.Interface(
|
| 85 |
fn=read_pdf,
|