Spaces:

prat1003
/

project2

Sleeping

App Files Files Community

prat1003 commited on Oct 13, 2025

Commit

719feee

verified ·

1 Parent(s): 6577daa

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -26

app.py CHANGED Viewed

@@ -1,33 +1,53 @@
 import gradio as gr
-from PyPDF2 import PdfReader
 from transformers import pipeline
 import json
-def generate_questions(pdf_file):
-    # Step 1: Extract text
-    reader = PdfReader(pdf_file.name)
     text = ""
-    for page in reader.pages:
-        text += page.extract_text() + "\n"
-    # Step 2: Hugging Face QG model
-    qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-qg-prepend")
-    questions = qg_pipeline(f"generate questions: {text[:2000]}")  # Demo: first 2000 chars
-    # Step 3: Build XML
-    generated_questions = []
-    for q in questions:
-        generated_questions.append({
-            "questiontext": q['generated_text'],
             "questiontype": "single_select",
             "marks": 10,
             "options": [
-                {"optiontext": "Answer1", "score": "10"},
-                {"optiontext": "Answer2", "score": "0"}
             ]
         })
-    questiondata_json = json.dumps({
         "title": "Certification Title",
         "totalmarks": "50",
         "time": "20",
@@ -35,18 +55,20 @@ def generate_questions(pdf_file):
         "failurl": "",
         "passurl": "",
         "sendpassemail": True,
-        "questions": json.dumps({"questions": generated_questions}),
         "maxattempts": 3
-    })
-    xml_output = f'<questiondata><![CDATA[{questiondata_json}]]></questiondata>'
     return xml_output
-# Gradio UI
 iface = gr.Interface(
-    fn=generate_questions,
-    inputs=gr.File(file_types=['.pdf']),
-    outputs=gr.Textbox(label="Generated XML")
 )
 iface.launch()

 import gradio as gr
+from pdf2image import convert_from_path
+import pytesseract
 from transformers import pipeline
 import json
+import tempfile
+import os
+# Load question generation model
+qg_pipeline = pipeline("text2text-generation", model="valhalla/t5-small-qg-prepend")
+# OCR function
+def extract_text_from_scanned_pdf(file_path):
+    pages = convert_from_path(file_path)
     text = ""
+    for page in pages:
+        text += pytesseract.image_to_string(page)
+    return text.strip()
+# Main function
+def process_pdf(pdf_file):
+    # Step 1: Save uploaded PDF
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
+        temp_pdf.write(pdf_file.read())
+        temp_pdf_path = temp_pdf.name
+    # Step 2: OCR extraction
+    extracted_text = extract_text_from_scanned_pdf(temp_pdf_path)
+    os.remove(temp_pdf_path)
+    if not extracted_text.strip():
+        return "❌ Could not extract text. Make sure the PDF has readable content."
+    # Step 3: Generate questions
+    questions_output = qg_pipeline("generate questions: " + extracted_text[:1000], max_length=128, num_return_sequences=3)
+    # Step 4: Convert to <questiondata> XML
+    question_list = []
+    for q in questions_output:
+        question_list.append({
+            "questiontext": q["generated_text"],
             "questiontype": "single_select",
             "marks": 10,
             "options": [
+                {"optiontext": "Option 1", "score": "10"},
+                {"optiontext": "Option 2", "score": "0"}
             ]
         })
+    data = {
         "title": "Certification Title",
         "totalmarks": "50",
         "time": "20",
         "failurl": "",
         "passurl": "",
         "sendpassemail": True,
+        "questions": json.dumps({"questions": question_list}),
         "maxattempts": 3
+    }
+    xml_output = "<questiondata><![CDATA[" + json.dumps(data) + "]]></questiondata>"
     return xml_output
+# Gradio interface
 iface = gr.Interface(
+    fn=process_pdf,
+    inputs=gr.File(label="Upload your scanned PDF"),
+    outputs="text",
+    title="📄 PDF to Question Generator (with OCR)",
+    description="Uploads a scanned PDF, runs OCR, and generates <questiondata> XML output for your quiz system."
 )
 iface.launch()