Spaces:

prat1003
/

test1

Sleeping

File size: 2,829 Bytes

import gradio as gr
import pdfplumber
import re
import json
from xml.etree.ElementTree import Element, SubElement, tostring

def extract_questions(text):
    """
    Extract questions, options, and scores from text.
    Example: "Q1: What is 2+2? Options: a) 4 (50) b) 5 (0)"
    """
    questions = []
    q_blocks = re.split(r'Q\d+:', text)
    
    for q in q_blocks[1:]:  # skip the first split part
        # Extract question text
        q_text_match = re.search(r'(.*?)Options:', q, re.S)
        q_text = q_text_match.group(1).strip() if q_text_match else q.strip()

        # Extract options
        options = []
        option_matches = re.findall(r'([a-z]\))\s*(.*?)\s*\((\d+)\)', q, re.S)
        for _, opt_text, score in option_matches:
            options.append({
                "optiontext": opt_text.strip(),
                "score": score,
                "img": ""
            })
        
        questions.append({
            "questiontext": f"<p>{q_text}</p>",
            "questiontype": "single_select",
            "randomizeopt": False,
            "marks": max([int(o["score"]) for o in options]) if options else 0,
            "options": options,
            "minscore": "",
            "hint": "",
            "numberofoptions": len(options)
        })
    return questions

def read_pdf(file, format_type):
    if file is None:
        return "Please upload a PDF file."
    
    text = ""
    with pdfplumber.open(file.name) as pdf:
        for page in pdf.pages:
            text += page.extract_text() or ""
    
    # Check for totalmarks, time, cutoff
    totalmarks_match = re.search(r'Total Marks[:\s]*(\d+)', text, re.I)
    time_match = re.search(r'Time[:\s]*(\d+)', text, re.I)
    cutoff_match = re.search(r'Cutoff[:\s]*(\d+)', text, re.I)
    
    if not (totalmarks_match and time_match and cutoff_match):
        return "PDF must contain Total Marks, Time, and Cutoff."

    totalmarks = totalmarks_match.group(1)
    time = time_match.group(1)
    cutoff = cutoff_match.group(1)

    # Extract questions
    questions = extract_questions(text)

    data = {
        "title": "Certification Title",
        "totalmarks": totalmarks,
        "time": time,
        "cutoff": cutoff,
        "failurl": "",
        "passurl": "",
        "sendpassemail": True,
        "questions": json.dumps({"questions": questions})
    }

    if format_type == "HTML":
        html = f"<html><body><pre>{json.dumps(data, indent=2)}</pre></body></html>"
        return html
    else:
        xml_content = f"<questiondata><![CDATA[{json.dumps(data)}]]></questiondata>"
        return xml_content

app = gr.Interface(
    fn=read_pdf,
    inputs=[gr.File(label="Upload PDF"), gr.Radio(["HTML", "XML"], label="Output Format")],
    outputs="text",
    title="PDF to HTML/XML Converter"
)

app.launch()