engrrifatullah commited on
Commit
d73dc88
·
verified ·
1 Parent(s): 348d51a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -15
app.py CHANGED
@@ -1,25 +1,23 @@
1
  import streamlit as st
2
- import pdfkit
3
  from PIL import Image
4
  from docx import Document
5
  from transformers import pipeline
6
-
7
- # Configure pdfkit with wkhtmltopdf (Ensure wkhtmltopdf is installed)
8
- config = pdfkit.configuration(wkhtmltopdf='/usr/bin/wkhtmltopdf') # Adjust path if necessary
9
 
10
  # Set up OCR pipeline from Hugging Face
11
  ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
12
 
13
  # Streamlit UI
14
- st.title("Handwritten Text Extractor")
15
 
16
  # File uploader
17
- uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
18
 
19
  if uploaded_file is not None:
20
  # Open and display the uploaded image
21
  image = Image.open(uploaded_file)
22
- st.image(image, caption="Uploaded Image", use_column_width=True)
23
 
24
  # Convert the image to RGB format
25
  image = image.convert("RGB")
@@ -28,20 +26,23 @@ if uploaded_file is not None:
28
  extracted_text = ocr_pipeline(image)[0]['generated_text']
29
 
30
  # Display extracted text
31
- st.subheader("Extracted Text")
32
  st.write(extracted_text)
33
 
34
  # Save as DOCX
35
  doc = Document()
36
  doc.add_paragraph(extracted_text)
37
- docx_filename = "extracted_text.docx"
38
- doc.save(docx_filename)
 
39
 
40
  # Save as PDF
41
- pdf_filename = "extracted_text.pdf"
42
- pdfkit.from_string(extracted_text, pdf_filename, configuration=config)
 
 
 
43
 
44
  # Download buttons
45
- st.download_button("Download as DOCX", data=open(docx_filename, "rb"), file_name=docx_filename)
46
- st.download_button("Download as PDF", data=open(pdf_filename, "rb"), file_name=pdf_filename)
47
-
 
1
  import streamlit as st
2
+ import io
3
  from PIL import Image
4
  from docx import Document
5
  from transformers import pipeline
6
+ from reportlab.pdfgen import canvas
 
 
7
 
8
  # Set up OCR pipeline from Hugging Face
9
  ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
10
 
11
  # Streamlit UI
12
+ st.title("📝 Handwritten Text Extractor")
13
 
14
  # File uploader
15
+ uploaded_file = st.file_uploader("📤 Upload an image", type=["jpg", "jpeg", "png"])
16
 
17
  if uploaded_file is not None:
18
  # Open and display the uploaded image
19
  image = Image.open(uploaded_file)
20
+ st.image(image, caption="📷 Uploaded Image", use_column_width=True)
21
 
22
  # Convert the image to RGB format
23
  image = image.convert("RGB")
 
26
  extracted_text = ocr_pipeline(image)[0]['generated_text']
27
 
28
  # Display extracted text
29
+ st.subheader("📜 Extracted Text")
30
  st.write(extracted_text)
31
 
32
  # Save as DOCX
33
  doc = Document()
34
  doc.add_paragraph(extracted_text)
35
+ docx_buffer = io.BytesIO()
36
+ doc.save(docx_buffer)
37
+ docx_buffer.seek(0)
38
 
39
  # Save as PDF
40
+ pdf_buffer = io.BytesIO()
41
+ pdf = canvas.Canvas(pdf_buffer)
42
+ pdf.drawString(100, 750, extracted_text)
43
+ pdf.save()
44
+ pdf_buffer.seek(0)
45
 
46
  # Download buttons
47
+ st.download_button("⬇️ Download as DOCX", data=docx_buffer, file_name="extracted_text.docx")
48
+ st.download_button("⬇️ Download as PDF", data=pdf_buffer, file_name="extracted_text.pdf", mime="application/pdf")