prat1003 commited on
Commit
c6daf95
·
verified ·
1 Parent(s): 7be4f29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -2
app.py CHANGED
@@ -1,17 +1,85 @@
1
  import gradio as gr
2
  import pdfplumber
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def read_pdf(file, format_type):
5
  if file is None:
6
  return "Please upload a PDF file."
 
7
  text = ""
8
  with pdfplumber.open(file.name) as pdf:
9
  for page in pdf.pages:
10
  text += page.extract_text() or ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  if format_type == "HTML":
12
- return f"<html><body><p>{text}</p></body></html>"
 
13
  else:
14
- return f"<xml><content>{text}</content></xml>"
 
15
 
16
  app = gr.Interface(
17
  fn=read_pdf,
 
1
  import gradio as gr
2
  import pdfplumber
3
+ import re
4
+ import json
5
+ from xml.etree.ElementTree import Element, SubElement, tostring
6
+
7
+ def extract_questions(text):
8
+ """
9
+ Extract questions, options, and scores from text.
10
+ Example: "Q1: What is 2+2? Options: a) 4 (50) b) 5 (0)"
11
+ """
12
+ questions = []
13
+ q_blocks = re.split(r'Q\d+:', text)
14
+
15
+ for q in q_blocks[1:]: # skip the first split part
16
+ # Extract question text
17
+ q_text_match = re.search(r'(.*?)Options:', q, re.S)
18
+ q_text = q_text_match.group(1).strip() if q_text_match else q.strip()
19
+
20
+ # Extract options
21
+ options = []
22
+ option_matches = re.findall(r'([a-z]\))\s*(.*?)\s*\((\d+)\)', q, re.S)
23
+ for _, opt_text, score in option_matches:
24
+ options.append({
25
+ "optiontext": opt_text.strip(),
26
+ "score": score,
27
+ "img": ""
28
+ })
29
+
30
+ questions.append({
31
+ "questiontext": f"<p>{q_text}</p>",
32
+ "questiontype": "single_select",
33
+ "randomizeopt": False,
34
+ "marks": max([int(o["score"]) for o in options]) if options else 0,
35
+ "options": options,
36
+ "minscore": "",
37
+ "hint": "",
38
+ "numberofoptions": len(options)
39
+ })
40
+ return questions
41
 
42
  def read_pdf(file, format_type):
43
  if file is None:
44
  return "Please upload a PDF file."
45
+
46
  text = ""
47
  with pdfplumber.open(file.name) as pdf:
48
  for page in pdf.pages:
49
  text += page.extract_text() or ""
50
+
51
+ # Check for totalmarks, time, cutoff
52
+ totalmarks_match = re.search(r'Total Marks[:\s]*(\d+)', text, re.I)
53
+ time_match = re.search(r'Time[:\s]*(\d+)', text, re.I)
54
+ cutoff_match = re.search(r'Cutoff[:\s]*(\d+)', text, re.I)
55
+
56
+ if not (totalmarks_match and time_match and cutoff_match):
57
+ return "PDF must contain Total Marks, Time, and Cutoff."
58
+
59
+ totalmarks = totalmarks_match.group(1)
60
+ time = time_match.group(1)
61
+ cutoff = cutoff_match.group(1)
62
+
63
+ # Extract questions
64
+ questions = extract_questions(text)
65
+
66
+ data = {
67
+ "title": "Certification Title",
68
+ "totalmarks": totalmarks,
69
+ "time": time,
70
+ "cutoff": cutoff,
71
+ "failurl": "",
72
+ "passurl": "",
73
+ "sendpassemail": True,
74
+ "questions": json.dumps({"questions": questions})
75
+ }
76
+
77
  if format_type == "HTML":
78
+ html = f"<html><body><pre>{json.dumps(data, indent=2)}</pre></body></html>"
79
+ return html
80
  else:
81
+ xml_content = f"<questiondata><![CDATA[{json.dumps(data)}]]></questiondata>"
82
+ return xml_content
83
 
84
  app = gr.Interface(
85
  fn=read_pdf,