Spaces:
Running
Running
| # app.py script that converts pdf or png txt image to UTF8 text | |
| # Kabyle OCR tool | |
| import streamlit as st | |
| import pytesseract | |
| from pdf2image import convert_from_bytes | |
| from PIL import Image, ImageEnhance, ImageFilter | |
| import os | |
| # --- Set TESSDATA_PREFIX for Docker deployment --- | |
| # This tells pytesseract where to find the .traineddata files | |
| os.environ['TESSDATA_PREFIX'] = '/app/tessdata/' # Point to the local tessdata folder inside the container | |
| # --- Configuration --- | |
| # Simplified config using the model found via TESSDATA_PREFIX | |
| custom_config = '-l kab' | |
| # --- Function Definition --- | |
| def enhance_image(image, for_display=False): | |
| """Applies enhancements to improve OCR quality or display quality.""" | |
| # Convert to 'L' mode (grayscale) if not already | |
| if image.mode != 'L': | |
| image = image.convert('L') | |
| # Enhance Contrast | |
| contrast_enhancer = ImageEnhance.Contrast(image) | |
| # Increase contrast slightly for OCR. Values > 1.0 increase contrast. | |
| # For display, we might want a slightly different value or skip this step entirely | |
| # depending on the original image quality. Let's use the same value for now. | |
| contrast_factor = 1.5 | |
| if for_display: | |
| # Potentially use a different factor for display if needed | |
| # contrast_factor = 1.3 # Example for display | |
| pass # Using same factor for now | |
| image = contrast_enhancer.enhance(contrast_factor) | |
| # Enhance Sharpness | |
| sharpness_enhancer = ImageEnhance.Sharpness(image) | |
| # Slightly increase sharpness for OCR. Values > 1.0 increase sharpness. | |
| # Again, for display, a different value might be preferred. | |
| sharpness_factor = 1.2 | |
| if for_display: | |
| # Potentially use a different factor for display if needed | |
| # sharpness_factor = 1.1 # Example for display | |
| pass # Using same factor for now | |
| image = sharpness_enhancer.enhance(sharpness_factor) | |
| # Optional: Apply a slight unsharp mask filter for further sharpening | |
| # if not for_display: # Only for OCR processing? | |
| # image = image.filter(ImageFilter.UnsharpMask(radius=1, percent=50, threshold=0)) | |
| return image | |
| def process_image(image): | |
| """Processes a single image using pytesseract, applying enhancements first.""" | |
| try: | |
| # Apply enhancements before OCR | |
| enhanced_image = enhance_image(image, for_display=False) # Explicitly for OCR | |
| text = pytesseract.image_to_string(enhanced_image, config=custom_config) | |
| return text | |
| except Exception as e: | |
| return f"Error during OCR: {e}" | |
| # --- Page Setup --- | |
| st.set_page_config(page_title="Kabyle OCR", layout="wide") | |
| st.title("Asemmezdey Asekdan n Teqbaylit - Kabyle OCR") | |
| # --- Sidebar --- | |
| st.sidebar.header("Isefka") | |
| uploaded_file = st.sidebar.file_uploader("Ssali-d Afaylu - Smenyif amerkid n 300+ DPI", type=["png", "jpg", "jpeg", "pdf"]) | |
| # Add font size selector to the sidebar | |
| font_size = st.sidebar.slider("Tiddi n Yisekkilen n Uḍris seg OCR (px)", min_value=10, max_value=30, value=18, step=1) | |
| # Add the preview quality (DPI) slider to the sidebar, under font size | |
| preview_dpi = st.sidebar.slider("Amerkid n Uskan (DPI)", min_value=150, max_value=700, value=300, step=50) | |
| # --- Inject Custom CSS for Font Size, Text Color, and Image Layout --- | |
| # This CSS applies the selected font size and a darker color to the text area | |
| # It also ensures images in col1 behave predictably with max-width | |
| st.markdown( | |
| f""" | |
| <style> | |
| /* --- Text Area Styling --- */ | |
| /* Target the main text area input field */ | |
| textarea[data-testid="stText"] {{ | |
| font-size: {font_size}px; | |
| color: #000000 !important; /* Force black text */ | |
| background-color: #FFFFFF !important; /* Force white background */ | |
| line-height: 1.5; /* Improve readability with line spacing */ | |
| }} | |
| /* Target the text area inside stTextArea component */ | |
| .stTextArea textarea {{ | |
| font-size: {font_size}px; | |
| color: #000000 !important; /* Force black text */ | |
| background-color: #FFFFFF !important; /* Force white background */ | |
| line-height: 1.5; | |
| }} | |
| /* Target the container of the text area for potential background issues */ | |
| .stTextArea > div > div {{ | |
| background-color: #FFFFFF !important; /* Ensure container background is white */ | |
| }} | |
| /* --- Image Styling (Kept as is) --- */ | |
| section[data-testid="stSidebar"] ~ div > div:has(div[data-testid="stColumn"] > div:nth-child(1)) > div:nth-child(1) img {{ | |
| max-width: 100%; /* Ensures image doesn't exceed the column width */ | |
| height: auto; /* Maintains aspect ratio when width is constrained */ | |
| display: block; /* Makes the image a block element, necessary for max-width */ | |
| margin-left: auto; /* Center the image horizontally within its container */ | |
| margin-right: auto; /* Center the image horizontally within its container */ | |
| margin-bottom: 10px; /* Add some space below the image */ | |
| }} | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # --- Main App Logic --- | |
| if uploaded_file is not None: | |
| # Check if the uploaded file object has changed (a new file was selected) | |
| # Compare the new file's info with the one stored in session state (if it exists) | |
| current_file_info = (uploaded_file.name, uploaded_file.size, uploaded_file.type) | |
| if 'current_file_info' not in st.session_state or st.session_state.current_file_info != current_file_info: | |
| # New file detected, reset session state | |
| st.session_state.current_file_info = current_file_info | |
| st.session_state.ocr_text = "" | |
| st.session_state.display_image = None | |
| st.session_state.all_pdf_images = [] # Add list for all PDF preview images | |
| st.session_state.current_page_index = 0 # Add index for slideshow | |
| # Optional: Clear other relevant session state variables if needed | |
| # st.session_state.some_other_var = default_value | |
| st.info(f"Afaylu i d-yulin: {uploaded_file.name}") | |
| # Initialize session state for text and display image if needed (should be after reset check) | |
| if 'ocr_text' not in st.session_state: | |
| st.session_state.ocr_text = "" | |
| if 'display_image' not in st.session_state: | |
| st.session_state.display_image = None | |
| # Initialize list for all PDF preview images | |
| if 'all_pdf_images' not in st.session_state: | |
| st.session_state.all_pdf_images = [] | |
| # Initialize current page index for slideshow | |
| if 'current_page_index' not in st.session_state: | |
| st.session_state.current_page_index = 0 | |
| # Create two columns for side-by-side view | |
| # Using [1, 1] ratio as suggested | |
| col1, col2 = st.columns([1, 1]) | |
| with col1: | |
| st.subheader("Askan n Ufaylu deg Talɣa-s Tamenzut") | |
| if "pdf" in uploaded_file.type: | |
| if st.button("Sekker PDF (Askan n Yisebtar)"): | |
| try: | |
| # Read the file content once for PDF conversion | |
| pdf_content = uploaded_file.read() | |
| # Convert PDF pages to images with the DPI selected by the slider in the sidebar | |
| images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use the sidebar value directly | |
| if images: | |
| # Store the *original* images for display (un-enhanced copies for preview) | |
| st.session_state.all_pdf_images = [img.copy() for img in images] | |
| # Reset current page index for slideshow | |
| st.session_state.current_page_index = 0 | |
| # Store the first page image for display (original, for preview) - kept for backward compatibility if needed | |
| st.session_state.display_image = images[0] | |
| # Reset OCR text for new processing (only reset here if button is pressed) | |
| # st.session_state.ocr_text = "" # This is done on file change now | |
| st.success(f"Yuli-d uPDF (DPI n Uskan: {preview_dpi}). Tekki ɣef 'Sekker OCR' i Uselket.") | |
| else: | |
| st.error("Ulac isebtare deg ufaylu PDF.") | |
| except Exception as e: | |
| st.error(f"Ugul deg uselket n PDF: {e}") | |
| st.warning("Senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').") | |
| # Display the current PDF page and navigation controls (if PDF was processed and pages are stored) | |
| if st.session_state.all_pdf_images: | |
| st.subheader("Askan n Yisebtar n Ufaylu PDF") # Subheader for the single page view | |
| num_pages = len(st.session_state.all_pdf_images) | |
| current_idx = st.session_state.current_page_index | |
| # Display navigation buttons and page info on the same line using columns | |
| col_nav1, col_nav2, col_nav3 = st.columns([1, 2, 1]) # Create columns for layout | |
| with col_nav1: | |
| # Disable 'Previous' button if on the first page | |
| st.button("Ɣer deffir", disabled=(current_idx == 0), on_click=lambda: setattr(st.session_state, 'current_page_index', max(0, current_idx - 1)), key='prev_btn_slideshow') | |
| with col_nav2: | |
| # Display page number centered | |
| st.text(f"Asebter {current_idx + 1} n {num_pages}") | |
| with col_nav3: | |
| # Disable 'Next' button if on the last page | |
| st.button("Ɣer zdat", disabled=(current_idx == num_pages - 1), on_click=lambda: setattr(st.session_state, 'current_page_index', min(num_pages - 1, current_idx + 1)), key='next_btn_slideshow') | |
| # Display the current image below the navigation | |
| # Calculate width based on DPI relative to a standard DPI (e.g., 300) | |
| # This provides a dynamic scaling effect based on DPI for the *display*. | |
| base_dpi = 300 | |
| base_width = 600 # A reasonable base width for 300 DPI | |
| calculated_width = int((preview_dpi / base_dpi) * base_width) | |
| # Enhance the *copy* of the *current* image just for display | |
| current_img = st.session_state.all_pdf_images[current_idx] | |
| display_image_enhanced = enhance_image(current_img.copy(), for_display=True) | |
| # Width is now calculated based on the DPI slider value | |
| # Display the *enhanced* image for preview with the calculated width | |
| # The CSS rule max-width: 100% will prevent it from exceeding col1's width | |
| st.image(display_image_enhanced, caption=f"Asebter {current_idx + 1} ({preview_dpi} DPI)", width=calculated_width) # Use calculated width | |
| else: # It's an image file | |
| # For direct image files, use the calculated width based on the slider value from the sidebar. | |
| base_dpi = 300 | |
| base_width = 600 | |
| calculated_width = int((preview_dpi / base_dpi) * base_width) | |
| image = Image.open(uploaded_file) | |
| # Enhance the *copy* of the image just for display | |
| display_image_enhanced = enhance_image(image.copy(), for_display=True) | |
| # Display the *enhanced* image for preview (NOT inside the PDF container) | |
| st.image(display_image_enhanced, caption=uploaded_file.name, width=calculated_width) | |
| # Store the *original* image for potential use later (though not strictly needed here) | |
| st.session_state.display_image = image | |
| with col2: | |
| st.subheader("Asezṛeg n Uḍris seg OCR") | |
| # OCR Button (only appears after preview is potentially loaded for PDFs) | |
| if st.button("Sekker OCR"): | |
| full_text = "" | |
| # Use a placeholder to clear the area and then update with progress | |
| progress_text = st.empty() | |
| progress_bar = st.progress(0) | |
| with st.spinner("Asekker n OCR..."): | |
| if "pdf" in uploaded_file.type: | |
| try: | |
| # Read the file content again for OCR, using the value from the sidebar slider | |
| pdf_content = uploaded_file.read() | |
| images = convert_from_bytes(pdf_content, dpi=preview_dpi) # Use sidebar value directly | |
| num_pages = len(images) | |
| for i, page_image in enumerate(images): | |
| # Update the placeholder with the current page message | |
| progress_text.text(f"Yeɣɣar asebter {i+1} n {num_pages}...") | |
| # Update the progress bar | |
| progress_bar.progress((i + 1) / num_pages) | |
| # Process the page | |
| txt = process_image(page_image) | |
| full_text += f""" | |
| --- Asebter {i+1} --- | |
| {txt} | |
| """ | |
| except Exception as e: | |
| st.error(f"Ugul deg uPDF deg OCR: {e}") | |
| st.warning("Ma twalaḍ ugul yeɛnan 'poppler_path', senked ma yuli poppler (MD. 'conda install -c conda-forge poppler').") | |
| else: # It's an image file | |
| # For images, we don't need to re-open or re-scale based on DPI slider for OCR itself, | |
| # just use the original uploaded image. | |
| # The process_image function now handles enhancement internally | |
| progress_text.text("Yeɣɣar tugna...") # Inform user about single image processing | |
| image = Image.open(uploaded_file) | |
| full_text = process_image(image) | |
| progress_bar.progress(100) # Indicate completion for single image | |
| # Clear the progress text and bar after processing is complete | |
| progress_text.empty() | |
| progress_bar.empty() | |
| st.session_state.ocr_text = full_text | |
| st.success("OCR Yemmed!") | |
| # Text Editor - Always visible, updates with OCR result or user edits | |
| # Height increased, font size controlled by sidebar slider via CSS | |
| # Text color also controlled by CSS | |
| edited_text = st.text_area("Zṛeg Aḍris, Seɣti Tira-s da", value=st.session_state.ocr_text, height=600, key="text_editor") | |
| # Update session state if user edits the text area | |
| if edited_text != st.session_state.ocr_text: | |
| st.session_state.ocr_text = edited_text | |
| # Download Button (only enabled if there's text) | |
| if st.session_state.ocr_text: | |
| st.download_button( | |
| label="Zdem Aḍris", | |
| data=st.session_state.ocr_text.encode('utf-8'), | |
| file_name=f"{uploaded_file.name.replace('.', '_')}_ocr.txt", | |
| mime="text/plain" | |
| ) | |
| else: | |
| st.info("Seddu OCR, Selket s Aḍris.") | |
| else: | |
| # If no file is uploaded, clear session state to ensure clean interface on initial load | |
| # or if user deselects the file. | |
| for key in ["ocr_text", "display_image", "current_file_info", "all_pdf_images", "current_page_index"]: | |
| if key in st.session_state: | |
| del st.session_state[key] | |
| st.write("Ldi afaylu PDF, PNG, JPG, or JPEG seg ufeggad n yisefka.") | |