engrrifatullah commited on
Commit
47e620b
·
verified ·
1 Parent(s): 47b61ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -1,13 +1,12 @@
1
  import streamlit as st
2
  import pytesseract
3
- import cv2
4
  import numpy as np
5
  from PIL import Image
6
- import pdfkit
7
  from docx import Document
8
  from transformers import pipeline
9
 
10
- # Set up OCR pipeline (you can replace with Hugging Face model)
11
  ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
12
 
13
  # Streamlit UI
@@ -17,14 +16,15 @@ st.title("Handwritten Text Extractor")
17
  uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
18
 
19
  if uploaded_file is not None:
 
20
  image = Image.open(uploaded_file)
21
  st.image(image, caption="Uploaded Image", use_column_width=True)
22
 
23
- # Convert image to numpy array
24
- image_np = np.array(image)
25
 
26
- # Extract text using OCR model
27
- extracted_text = ocr_pipeline(image_np)[0]['generated_text']
28
 
29
  # Display extracted text
30
  st.subheader("Extracted Text")
@@ -43,4 +43,3 @@ if uploaded_file is not None:
43
  # Download buttons
44
  st.download_button("Download as DOCX", data=open(docx_filename, "rb"), file_name=docx_filename)
45
  st.download_button("Download as PDF", data=open(pdf_filename, "rb"), file_name=pdf_filename)
46
-
 
1
  import streamlit as st
2
  import pytesseract
3
+ import pdfkit
4
  import numpy as np
5
  from PIL import Image
 
6
  from docx import Document
7
  from transformers import pipeline
8
 
9
+ # Set up OCR pipeline from Hugging Face (ensure the correct model is used)
10
  ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
11
 
12
  # Streamlit UI
 
16
  uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
17
 
18
  if uploaded_file is not None:
19
+ # Open and display the uploaded image
20
  image = Image.open(uploaded_file)
21
  st.image(image, caption="Uploaded Image", use_column_width=True)
22
 
23
+ # Convert the image to RGB (if not already)
24
+ image = image.convert("RGB")
25
 
26
+ # Extract text using Hugging Face OCR model
27
+ extracted_text = ocr_pipeline(image)[0]['generated_text']
28
 
29
  # Display extracted text
30
  st.subheader("Extracted Text")
 
43
  # Download buttons
44
  st.download_button("Download as DOCX", data=open(docx_filename, "rb"), file_name=docx_filename)
45
  st.download_button("Download as PDF", data=open(pdf_filename, "rb"), file_name=pdf_filename)