Spaces:

katrinaaaaaaaaaaaaaaaaaa
/

PSquaredOCR_Image_to_docx

Running

App Files Files Community

PSquaredOCR_Image_to_docx / app.py

katrinaaaaaaaaaaaaaaaaaa

Update app.py

b05965f verified 3 months ago

raw

history blame contribute delete

6.89 kB

	"""
	Gradio Web Interface for Chemistry Notes to DOCX Converter
	Deployed on Hugging Face Spaces
	"""

	import gradio as gr
	import tempfile
	import os
	from docx import Document
	from docx.shared import Pt
	from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
	from PIL import Image
	import numpy as np

	# Global variable for cached model
	ocr_pipeline = None

	def load_model():
	"""Load PaddleOCR model (cached)"""
	global ocr_pipeline
	if ocr_pipeline is None:
	from paddleocr import PaddleOCR
	ocr_pipeline = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)
	return ocr_pipeline

	def create_docx(blocks, output_path):
	"""Create formatted DOCX using python-docx"""
	doc = Document()

	for block in blocks:
	label = block.get('label', 'text')
	content = block.get('content', '')

	if label == 'title':
	doc.add_heading(content, level=2)

	elif label == 'text':
	lines = content.split('\n')
	for line in lines:
	if not line.strip():
	doc.add_paragraph("")
	continue

	if line.strip().startswith('#'):
	heading = line.strip().lstrip('#').strip()
	doc.add_heading(heading, level=2)
	elif line.strip().startswith(('•', '-', '*', '·')):
	text = line.strip().lstrip('•-*·').strip()
	doc.add_paragraph(text, style='List Bullet')
	else:
	is_bold = line.strip().isupper() and len(line.strip()) > 3
	p = doc.add_paragraph(line)
	if is_bold:
	for run in p.runs:
	run.bold = True

	elif label == 'formula':
	p = doc.add_paragraph(content)
	for run in p.runs:
	run.font.name = 'Courier New'
	run.font.size = Pt(11)

	elif label == 'table':
	p = doc.add_paragraph(content)
	for run in p.runs:
	run.font.name = 'Courier New'
	run.font.size = Pt(10)

	elif label in ['figure', 'chart', 'image']:
	p = doc.add_paragraph("[Figure/Diagram - See Original Image]")
	p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

	else:
	doc.add_paragraph(content)

	doc.save(output_path)
	return output_path

	def convert_image_to_docx(image):
	"""Main conversion function"""
	if image is None:
	return None, "Please upload an image first!"

	try:
	# Load model
	pipeline = load_model()

	# Convert PIL Image to numpy array if needed
	if isinstance(image, Image.Image):
	image_array = np.array(image)
	else:
	image_array = image

	# Process image with PaddleOCR
	result = pipeline.ocr(image_array, cls=True)

	# Extract text blocks
	all_blocks = []
	if result and result[0]:
	for line in result[0]:
	text = line[1][0]
	confidence = line[1][1]
	all_blocks.append({
	'label': 'text',
	'content': text,
	'confidence': confidence
	})

	if not all_blocks:
	return None, "No text could be extracted from the image! Please try with a clearer image."

	# Create output file
	output_path = tempfile.mktemp(suffix=".docx")
	create_docx(all_blocks, output_path)

	# Create preview text
	preview = f"Successfully extracted {len(all_blocks)} text blocks!\n\n"
	preview += "--- Content Preview ---\n\n"
	for i, block in enumerate(all_blocks[:15], 1):
	content_preview = block['content'][:100]
	if len(block['content']) > 100:
	content_preview += "..."
	conf = block.get('confidence', 0) * 100
	preview += f"{i}. [{conf:.1f}%] {content_preview}\n"

	if len(all_blocks) > 15:
	preview += f"\n... and {len(all_blocks) - 15} more blocks"

	return output_path, preview

	except Exception as e:
	return None, f"Error during conversion: {str(e)}"

	# Custom CSS for better styling
	custom_css = """
	.gradio-container {
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	}
	.gr-button-primary {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	}
	"""

	# Create Gradio interface
	with gr.Blocks(css=custom_css, title="Image to DOCX") as demo:
	gr.Markdown(
	"""
	# Image to DOCX Converter

	Upload an image of your handwritten text and get a formatted Word document!

	Supported formats: JPG, JPEG, PNG, BMP, WEBP
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	input_image = gr.Image(
	label="Upload Chemistry Notes Image",
	type="pil"
	)
	convert_btn = gr.Button(
	"Convert to DOCX",
	variant="primary"
	)

	with gr.Column(scale=1):
	output_preview = gr.Textbox(
	label="Extraction Preview",
	lines=15,
	max_lines=20,
	interactive=False
	)
	output_file = gr.File(
	label="Download DOCX",
	file_count="single"
	)

	# Connect the conversion function
	convert_btn.click(
	fn=convert_image_to_docx,
	inputs=[input_image],
	outputs=[output_file, output_preview]
	)

	gr.Markdown("---")

	with gr.Accordion("Tips for Best Results", open=False):
	gr.Markdown(
	"""
	- Use clear, high-resolution images - Higher quality images produce better results
	- Ensure good lighting - Avoid shadows and uneven lighting
	- Works best with typed or neatly handwritten notes - Clear text is easier to recognize
	- Avoid glare - Reflections can interfere with text recognition
	- Capture the entire page - Make sure all content is visible in the image
	- Keep the camera steady - Blurry images reduce accuracy
	"""
	)

	gr.Markdown(
	"""
	---
	Note: This app uses PaddleOCR for text extraction. Processing may take a few seconds depending on image complexity.

	Made with ❤️ using Gradio and PaddleOCR
	"""
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True
	)