Zeeshan24 commited on
Commit
bfbd7e6
·
verified ·
1 Parent(s): c020812

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -20
app.py CHANGED
@@ -1,27 +1,22 @@
1
  import streamlit as st
2
  from PIL import Image
3
- import pytesseract
4
- from transformers import pipeline
5
  import re
6
- import os
7
 
8
- # Install Tesseract OCR during runtime
9
- os.system("apt-get update && apt-get install -y tesseract-ocr")
 
10
 
11
- # Load pre-trained model for question-answering
12
  qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
13
 
14
- # Function to extract text using OCR
15
- def extract_text_from_image(image_file):
16
- image = Image.open(image_file)
17
- text = pytesseract.image_to_string(image)
18
- return text
19
-
20
- # Process text to extract questions
21
- def extract_questions_from_text(text):
22
- # Extract lines that look like questions
23
- questions = re.findall(r'(?:[^\n]*\?)', text)
24
- return questions
25
 
26
  # Extract student name and roll number
27
  def extract_student_info(text):
@@ -31,7 +26,12 @@ def extract_student_info(text):
31
  roll_number = roll_no.group(1).strip() if roll_no else "Unknown"
32
  return student_name, roll_number
33
 
34
- # Grade answers using QA model
 
 
 
 
 
35
  def grade_answer(question, context):
36
  result = qa_pipeline(question=question, context=context)
37
  return result['score'], "Correct" if result['score'] > 0.5 else "Incorrect"
@@ -46,8 +46,8 @@ uploaded_image = st.file_uploader("Upload Handwritten Image", type=["png", "jpg"
46
  if uploaded_image:
47
  st.image(uploaded_image, caption="Uploaded Handwritten File", use_container_width=True)
48
 
49
- # Extract text using OCR
50
- extracted_text = extract_text_from_image(uploaded_image)
51
  st.subheader("Extracted Text")
52
  st.text(extracted_text)
53
 
 
1
  import streamlit as st
2
  from PIL import Image
3
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel, pipeline
 
4
  import re
 
5
 
6
+ # Load TrOCR Model for Handwritten OCR
7
+ processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
8
+ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
9
 
10
+ # Load pre-trained QA model
11
  qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
12
 
13
+ # Function to extract text using TrOCR
14
+ def extract_text_from_handwriting(image_file):
15
+ image = Image.open(image_file).convert("RGB")
16
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
17
+ generated_ids = model.generate(pixel_values)
18
+ extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
19
+ return extracted_text
 
 
 
 
20
 
21
  # Extract student name and roll number
22
  def extract_student_info(text):
 
26
  roll_number = roll_no.group(1).strip() if roll_no else "Unknown"
27
  return student_name, roll_number
28
 
29
+ # Extract questions from the text
30
+ def extract_questions_from_text(text):
31
+ questions = re.findall(r'(?:[^\n]*\?)', text) # Extract sentences ending with "?"
32
+ return questions
33
+
34
+ # Grading function using QA model
35
  def grade_answer(question, context):
36
  result = qa_pipeline(question=question, context=context)
37
  return result['score'], "Correct" if result['score'] > 0.5 else "Incorrect"
 
46
  if uploaded_image:
47
  st.image(uploaded_image, caption="Uploaded Handwritten File", use_container_width=True)
48
 
49
+ # Extract text using TrOCR
50
+ extracted_text = extract_text_from_handwriting(uploaded_image)
51
  st.subheader("Extracted Text")
52
  st.text(extracted_text)
53