Spaces:
Sleeping
Sleeping
Update question_extractor.py
Browse files- question_extractor.py +6 -8
question_extractor.py
CHANGED
|
@@ -650,13 +650,12 @@ def extract_questions_with_marks(text):
|
|
| 650 |
|
| 651 |
|
| 652 |
def process_question_paper(image_path, output_path):
|
| 653 |
-
"""
|
| 654 |
-
Process a question paper image and save the extracted content to a text file.
|
| 655 |
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
"""
|
| 661 |
print(f"Processing: {image_path}")
|
| 662 |
|
|
@@ -667,8 +666,7 @@ def process_question_paper(image_path, output_path):
|
|
| 667 |
# Use text-line based generic extraction as the primary method.
|
| 668 |
questions = extract_questions_from_text(text)
|
| 669 |
|
| 670 |
-
# Write out the results
|
| 671 |
-
with open(output_path, 'w', encoding='utf-8') as f:
|
| 672 |
with open(output_path, 'w', encoding='utf-8') as f:
|
| 673 |
f.write(f"Subject: {subject}\\n\\n")
|
| 674 |
f.write(f"Total Questions: {len(questions)}\\n\\n")
|
|
|
|
| 650 |
|
| 651 |
|
| 652 |
def process_question_paper(image_path, output_path):
|
| 653 |
+
"""Process a question paper image and save the extracted content.
|
|
|
|
| 654 |
|
| 655 |
+
This function is fully subject-agnostic: it runs OCR, infers a
|
| 656 |
+
subject line from generic headers, extracts questions using generic
|
| 657 |
+
heuristics, and writes a structured text file (subject, total
|
| 658 |
+
questions, and numbered questions with marks).
|
| 659 |
"""
|
| 660 |
print(f"Processing: {image_path}")
|
| 661 |
|
|
|
|
| 666 |
# Use text-line based generic extraction as the primary method.
|
| 667 |
questions = extract_questions_from_text(text)
|
| 668 |
|
| 669 |
+
# Write out the results in a structured layout
|
|
|
|
| 670 |
with open(output_path, 'w', encoding='utf-8') as f:
|
| 671 |
f.write(f"Subject: {subject}\\n\\n")
|
| 672 |
f.write(f"Total Questions: {len(questions)}\\n\\n")
|