Spaces:

AitBAD
/

kab-ocr-tanti

Running

App Files Files Community

kab-ocr-tanti / app.py

AitBAD

Update app.py

49a6789 verified 5 months ago

raw

history blame contribute delete

15.3 kB

	# app.py script that converts pdf or png txt image to UTF8 text
	# Kabyle OCR tool
	import streamlit as st
	import pytesseract
	from pdf2image import convert_from_bytes
	from PIL import Image, ImageEnhance, ImageFilter
	import os

	# --- Set TESSDATA_PREFIX for Docker deployment ---
	# This tells pytesseract where to find the .traineddata files
	os.environ['TESSDATA_PREFIX'] = '/app/tessdata/' # Point to the local tessdata folder inside the container

	# --- Configuration ---
	# Simplified config using the model found via TESSDATA_PREFIX
	custom_config = '-l kab'

	# --- Function Definition ---
	def enhance_image(image, for_display=False):
	"""Applies enhancements to improve OCR quality or display quality."""
	# Convert to 'L' mode (grayscale) if not already
	if image.mode != 'L':
	image = image.convert('L')

	# Enhance Contrast
	contrast_enhancer = ImageEnhance.Contrast(image)
	# Increase contrast slightly for OCR. Values > 1.0 increase contrast.
	# For display, we might want a slightly different value or skip this step entirely
	# depending on the original image quality. Let's use the same value for now.
	contrast_factor = 1.5
	if for_display:
	# Potentially use a different factor for display if needed
	# contrast_factor = 1.3 # Example for display
	pass # Using same factor for now
	image = contrast_enhancer.enhance(contrast_factor)

	# Enhance Sharpness
	sharpness_enhancer = ImageEnhance.Sharpness(image)
	# Slightly increase sharpness for OCR. Values > 1.0 increase sharpness.
	# Again, for display, a different value might be preferred.
	sharpness_factor = 1.2
	if for_display:
	# Potentially use a different factor for display if needed
	# sharpness_factor = 1.1 # Example for display
	pass # Using same factor for now
	image = sharpness_enhancer.enhance(sharpness_factor)

	# Optional: Apply a slight unsharp mask filter for further sharpening
	# if not for_display: # Only for OCR processing?
	# image = image.filter(ImageFilter.UnsharpMask(radius=1, percent=50, threshold=0))

	return image

	def process_image(image):
	"""Processes a single image using pytesseract, applying enhancements first."""
	try:
	# Apply enhancements before OCR
	enhanced_image = enhance_image(image, for_display=False) # Explicitly for OCR
	text = pytesseract.image_to_string(enhanced_image, config=custom_config)
	return text
	except Exception as e:
	return f"Error during OCR: {e}"

	# --- Page Setup ---
	st.set_page_config(page_title="Kabyle OCR", layout="wide")
	st.title("Asemmezdey Asekdan n Teqbaylit - Kabyle OCR")

	# --- Sidebar ---
	st.sidebar.header("Isefka")
	uploaded_file = st.sidebar.file_uploader("Ssali-d Afaylu - Smenyif amerkid n 300+ DPI", type=["png", "jpg", "jpeg", "pdf"])

	# Add font size selector to the sidebar
	font_size = st.sidebar.slider("Tiddi n Yisekkilen n Uḍris seg OCR (px)", min_value=10, max_value=30, value=18, step=1)

	# Add the preview quality (DPI) slider to the sidebar, under font size
	preview_dpi = st.sidebar.slider("Amerkid n Uskan (DPI)", min_value=150, max_value=700, value=300, step=50)

	# --- Inject Custom CSS for Font Size, Text Color, and Image Layout ---
	# This CSS applies the selected font size and a darker color to the text area
	# It also ensures images in col1 behave predictably with max-width
	st.markdown(
	f"""
	<style>
	/* --- Text Area Styling --- */
	/* Target the main text area input field */
	textarea[data-testid="stText"] {{
	font-size: {font_size}px;
	color: #000000 !important; /* Force black text */
	background-color: #FFFFFF !important; /* Force white background */
	line-height: 1.5; /* Improve readability with line spacing */
	}}
	/* Target the text area inside stTextArea component */
	.stTextArea textarea {{
	font-size: {font_size}px;
	color: #000000 !important; /* Force black text */
	background-color: #FFFFFF !important; /* Force white background */
	line-height: 1.5;
	}}
	/* Target the container of the text area for potential background issues */
	.stTextArea > div > div {{
	background-color: #FFFFFF !important; /* Ensure container background is white */
	}}
	/* --- Image Styling (Kept as is) --- */
	section[data-testid="stSidebar"] ~ div > div:has(div[data-testid="stColumn"] > div:nth-child(1)) > div:nth-child(1) img {{
	max-width: 100%; /* Ensures image doesn't exceed the column width */
	height: auto; /* Maintains aspect ratio when width is constrained */
	display: block; /* Makes the image a block element, necessary for max-width */
	margin-left: auto; /* Center the image horizontally within its container */
	margin-right: auto; /* Center the image horizontally within its container */
	margin-bottom: 10px; /* Add some space below the image */
	}}
	</style>
	""",
	unsafe_allow_html=True
	)

	# --- Main App Logic ---
	if uploaded_file is not None:
	# Check if the uploaded file object has changed (a new file was selected)
	# Compare the new file's info with the one stored in session state (if it exists)
	current_file_info = (uploaded_file.name, uploaded_file.size, uploaded_file.type)
	if 'current_file_info' not in st.session_state or st.session_state.current_file_info != current_file_info:
	# New file detected, reset session state
	st.session_state.current_file_info = current_file_info
	st.session_state.ocr_text = ""
	st.session_state.display_image = None
	st.session_state.all_pdf_images = [] # Add list for all PDF preview images
	st.session_state.current_page_index = 0 # Add index for slideshow
	# Optional: Clear other relevant session state variables if needed
	# st.session_state.some_other_var = default_value

	st.info(f"Afaylu i d-yulin: {uploaded_file.name}")

	# Initialize session state for text and display image if needed (should be after reset check)
	if 'ocr_text' not in st.session_state:
	st.session_state.ocr_text = ""
	if 'display_image' not in st.session_state:
	st.session_state.display_image = None
	# Initialize list for all PDF preview images
	if 'all_pdf_images' not in st.session_state:
	st.session_state.all_pdf_images = []
	# Initialize current page index for slideshow
	if 'current_page_index' not in st.session_state:
	st.session_state.current_page_index = 0


	# Create two columns for side-by-side view
	# Using [1, 1] ratio as suggested
	col1, col2 = st.columns([1, 1])

	with col1:
	st.subheader("Askan n Ufaylu deg Talɣa-s Tamenzut")

	if "pdf" in uploaded_file.type:
	if st.button("Sekker PDF (Askan n Yisebtar)"):
	try:
	# Read the file content once for PDF conversion
	pdf_content = uploaded_file.read()
	# Convert PDF pages to images with the DPI selected by the slider in the sidebar
	images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use the sidebar value directly
	if images:
	# Store the original images for display (un-enhanced copies for preview)
	st.session_state.all_pdf_images = [img.copy() for img in images]
	# Reset current page index for slideshow
	st.session_state.current_page_index = 0
	# Store the first page image for display (original, for preview) - kept for backward compatibility if needed
	st.session_state.display_image = images[0]
	# Reset OCR text for new processing (only reset here if button is pressed)
	# st.session_state.ocr_text = "" # This is done on file change now
	st.success(f"Yuli-d uPDF (DPI n Uskan: {preview_dpi}). Tekki ɣef 'Sekker OCR' i Uselket.")
	else:
	st.error("Ulac isebtare deg ufaylu PDF.")
	except Exception as e:
	st.error(f"Ugul deg uselket n PDF: {e}")
	st.warning("Senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').")

	# Display the current PDF page and navigation controls (if PDF was processed and pages are stored)
	if st.session_state.all_pdf_images:
	st.subheader("Askan n Yisebtar n Ufaylu PDF") # Subheader for the single page view
	num_pages = len(st.session_state.all_pdf_images)
	current_idx = st.session_state.current_page_index

	# Display navigation buttons and page info on the same line using columns
	col_nav1, col_nav2, col_nav3 = st.columns([1, 2, 1]) # Create columns for layout
	with col_nav1:
	# Disable 'Previous' button if on the first page
	st.button("Ɣer deffir", disabled=(current_idx == 0), on_click=lambda: setattr(st.session_state, 'current_page_index', max(0, current_idx - 1)), key='prev_btn_slideshow')
	with col_nav2:
	# Display page number centered
	st.text(f"Asebter {current_idx + 1} n {num_pages}")
	with col_nav3:
	# Disable 'Next' button if on the last page
	st.button("Ɣer zdat", disabled=(current_idx == num_pages - 1), on_click=lambda: setattr(st.session_state, 'current_page_index', min(num_pages - 1, current_idx + 1)), key='next_btn_slideshow')

	# Display the current image below the navigation
	# Calculate width based on DPI relative to a standard DPI (e.g., 300)
	# This provides a dynamic scaling effect based on DPI for the display.
	base_dpi = 300
	base_width = 600 # A reasonable base width for 300 DPI
	calculated_width = int((preview_dpi / base_dpi) * base_width)

	# Enhance the copy of the current image just for display
	current_img = st.session_state.all_pdf_images[current_idx]
	display_image_enhanced = enhance_image(current_img.copy(), for_display=True)
	# Width is now calculated based on the DPI slider value
	# Display the enhanced image for preview with the calculated width
	# The CSS rule max-width: 100% will prevent it from exceeding col1's width
	st.image(display_image_enhanced, caption=f"Asebter {current_idx + 1} ({preview_dpi} DPI)", width=calculated_width) # Use calculated width

	else: # It's an image file
	# For direct image files, use the calculated width based on the slider value from the sidebar.
	base_dpi = 300
	base_width = 600
	calculated_width = int((preview_dpi / base_dpi) * base_width)

	image = Image.open(uploaded_file)
	# Enhance the copy of the image just for display
	display_image_enhanced = enhance_image(image.copy(), for_display=True)
	# Display the enhanced image for preview (NOT inside the PDF container)
	st.image(display_image_enhanced, caption=uploaded_file.name, width=calculated_width)
	# Store the original image for potential use later (though not strictly needed here)
	st.session_state.display_image = image


	with col2:
	st.subheader("Asezṛeg n Uḍris seg OCR")

	# OCR Button (only appears after preview is potentially loaded for PDFs)
	if st.button("Sekker OCR"):
	full_text = ""
	# Use a placeholder to clear the area and then update with progress
	progress_text = st.empty()
	progress_bar = st.progress(0)

	with st.spinner("Asekker n OCR..."):
	if "pdf" in uploaded_file.type:
	try:
	# Read the file content again for OCR, using the value from the sidebar slider
	pdf_content = uploaded_file.read()
	images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use sidebar value directly
	num_pages = len(images)
	for i, page_image in enumerate(images):
	# Update the placeholder with the current page message
	progress_text.text(f"Yeɣɣar asebter {i+1} n {num_pages}...")
	# Update the progress bar
	progress_bar.progress((i + 1) / num_pages)
	# Process the page
	txt = process_image(page_image)
	full_text += f"""
	--- Asebter {i+1} ---
	{txt}
	"""
	except Exception as e:
	st.error(f"Ugul deg uPDF deg OCR: {e}")
	st.warning("Ma twalaḍ ugul yeɛnan 'poppler_path', senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').")
	else: # It's an image file
	# For images, we don't need to re-open or re-scale based on DPI slider for OCR itself,
	# just use the original uploaded image.
	# The process_image function now handles enhancement internally
	progress_text.text("Yeɣɣar tugna...") # Inform user about single image processing
	image = Image.open(uploaded_file)
	full_text = process_image(image)
	progress_bar.progress(100) # Indicate completion for single image

	# Clear the progress text and bar after processing is complete
	progress_text.empty()
	progress_bar.empty()

	st.session_state.ocr_text = full_text
	st.success("OCR Yemmed!")

	# Text Editor - Always visible, updates with OCR result or user edits
	# Height increased, font size controlled by sidebar slider via CSS
	# Text color also controlled by CSS
	edited_text = st.text_area("Zṛeg Aḍris, Seɣti Tira-s da", value=st.session_state.ocr_text, height=600, key="text_editor")
	# Update session state if user edits the text area
	if edited_text != st.session_state.ocr_text:
	st.session_state.ocr_text = edited_text

	# Download Button (only enabled if there's text)
	if st.session_state.ocr_text:
	st.download_button(
	label="Zdem Aḍris",
	data=st.session_state.ocr_text.encode('utf-8'),
	file_name=f"{uploaded_file.name.replace('.', '_')}_ocr.txt",
	mime="text/plain"
	)
	else:
	st.info("Seddu OCR, Selket s Aḍris.")

	else:
	# If no file is uploaded, clear session state to ensure clean interface on initial load
	# or if user deselects the file.
	for key in ["ocr_text", "display_image", "current_file_info", "all_pdf_images", "current_page_index"]:
	if key in st.session_state:
	del st.session_state[key]
	st.write("Ldi afaylu PDF, PNG, JPG, or JPEG seg ufeggad n yisefka.")