Spaces:

prat1003
/

test1

Sleeping

App Files Files Community

prat1003 commited on Oct 12, 2025

Commit

c6daf95

verified ·

1 Parent(s): 7be4f29

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -2

app.py CHANGED Viewed

@@ -1,17 +1,85 @@
 import gradio as gr
 import pdfplumber
 def read_pdf(file, format_type):
     if file is None:
         return "Please upload a PDF file."
     text = ""
     with pdfplumber.open(file.name) as pdf:
         for page in pdf.pages:
             text += page.extract_text() or ""
     if format_type == "HTML":
-        return f"<html><body><p>{text}</p></body></html>"
     else:
-        return f"<xml><content>{text}</content></xml>"
 app = gr.Interface(
     fn=read_pdf,

 import gradio as gr
 import pdfplumber
+import re
+import json
+from xml.etree.ElementTree import Element, SubElement, tostring
+def extract_questions(text):
+    """
+    Extract questions, options, and scores from text.
+    Example: "Q1: What is 2+2? Options: a) 4 (50) b) 5 (0)"
+    """
+    questions = []
+    q_blocks = re.split(r'Q\d+:', text)
+    for q in q_blocks[1:]:  # skip the first split part
+        # Extract question text
+        q_text_match = re.search(r'(.*?)Options:', q, re.S)
+        q_text = q_text_match.group(1).strip() if q_text_match else q.strip()
+        # Extract options
+        options = []
+        option_matches = re.findall(r'([a-z]\))\s*(.*?)\s*\((\d+)\)', q, re.S)
+        for _, opt_text, score in option_matches:
+            options.append({
+                "optiontext": opt_text.strip(),
+                "score": score,
+                "img": ""
+            })
+        questions.append({
+            "questiontext": f"<p>{q_text}</p>",
+            "questiontype": "single_select",
+            "randomizeopt": False,
+            "marks": max([int(o["score"]) for o in options]) if options else 0,
+            "options": options,
+            "minscore": "",
+            "hint": "",
+            "numberofoptions": len(options)
+        })
+    return questions
 def read_pdf(file, format_type):
     if file is None:
         return "Please upload a PDF file."
     text = ""
     with pdfplumber.open(file.name) as pdf:
         for page in pdf.pages:
             text += page.extract_text() or ""
+    # Check for totalmarks, time, cutoff
+    totalmarks_match = re.search(r'Total Marks[:\s]*(\d+)', text, re.I)
+    time_match = re.search(r'Time[:\s]*(\d+)', text, re.I)
+    cutoff_match = re.search(r'Cutoff[:\s]*(\d+)', text, re.I)
+    if not (totalmarks_match and time_match and cutoff_match):
+        return "PDF must contain Total Marks, Time, and Cutoff."
+    totalmarks = totalmarks_match.group(1)
+    time = time_match.group(1)
+    cutoff = cutoff_match.group(1)
+    # Extract questions
+    questions = extract_questions(text)
+    data = {
+        "title": "Certification Title",
+        "totalmarks": totalmarks,
+        "time": time,
+        "cutoff": cutoff,
+        "failurl": "",
+        "passurl": "",
+        "sendpassemail": True,
+        "questions": json.dumps({"questions": questions})
+    }
     if format_type == "HTML":
+        html = f"<html><body><pre>{json.dumps(data, indent=2)}</pre></body></html>"
+        return html
     else:
+        xml_content = f"<questiondata><![CDATA[{json.dumps(data)}]]></questiondata>"
+        return xml_content
 app = gr.Interface(
     fn=read_pdf,