Spaces:

ranaspark
/

voice

Sleeping

rahulrana0001 commited on May 1

Commit

ac3541e

1 Parent(s): 209c5b0

Upgrade: Hybrid Digital/OCR scanner for 100% accurate ebook reading

Files changed (2) hide show

app.py CHANGED Viewed

@@ -8,7 +8,8 @@ from pipeline.tts import generate_tamil_speech
 from pipeline.document_parser import (
     extract_text_from_document,
     get_pdf_page_as_image,
-    get_pdf_page_count
 )
 # Setup logging
@@ -64,12 +65,18 @@ def load_comic_page(pdf_path, page_num):
     status = f"Page {page_num + 1} of {total_pages}"
     return img_path, status, page_num
-def process_comic_page(img_path, emotion_choice):
     if not img_path:
         return "No page loaded", "", None
-    # OCR on the rendered comic page
-    extracted_text = extract_text_from_image(img_path)
     if not extracted_text.strip():
         return "No text found on this page", "", None
@@ -137,7 +144,7 @@ with gr.Blocks(title="Tamil Comic & Manga Reader AI") as demo:
     read_page_btn.click(
         process_comic_page,
-        inputs=[comic_display, voice_style_comic],
         outputs=[comic_text, comic_tamil, comic_audio]
     )
@@ -172,8 +179,8 @@ with gr.Blocks(title="Tamil Comic & Manga Reader AI") as demo:
         if not img: # End of book
             return gr.update(), status, p_num, gr.update(), gr.update(), gr.update()
-        # 2. Process the new page
-        txt, tam, aud = process_comic_page(img, voice)
         return img, status, p_num, txt, tam, aud
     # The hidden button triggers the actual logic

 from pipeline.document_parser import (
     extract_text_from_document,
     get_pdf_page_as_image,
+    get_pdf_page_count,
+    get_text_from_page
 )
 # Setup logging
     status = f"Page {page_num + 1} of {total_pages}"
     return img_path, status, page_num
+def process_comic_page(img_path, pdf_path, page_num, emotion_choice):
     if not img_path:
         return "No page loaded", "", None
+    # 1. Try Direct Digital Extraction (100% Accuracy for ebooks/EPUBs)
+    extracted_text = get_text_from_page(pdf_path, page_num)
+    # 2. Fallback to AI OCR (For image-based comics)
+    if not extracted_text or len(extracted_text.strip()) < 5:
+        print(f"DEBUG: No digital text found. Falling back to AI OCR...")
+        extracted_text = extract_text_from_image(img_path)
     if not extracted_text.strip():
         return "No text found on this page", "", None
     read_page_btn.click(
         process_comic_page,
+        inputs=[comic_display, comic_pdf_path, current_page, voice_style_comic],
         outputs=[comic_text, comic_tamil, comic_audio]
     )
         if not img: # End of book
             return gr.update(), status, p_num, gr.update(), gr.update(), gr.update()
+        # 2. Process the new page (Using Hybrid Mode)
+        txt, tam, aud = process_comic_page(img, pdf, p_num, voice)
         return img, status, p_num, txt, tam, aud
     # The hidden button triggers the actual logic

pipeline/document_parser.py CHANGED Viewed

@@ -62,6 +62,20 @@ def get_pdf_page_as_image(file_path: str, page_num: int) -> str:
         print(f"ERROR: PDF rendering failed: {e}")
         return None
 def extract_text_from_document(file_path: str) -> str:
     """
     Dispatcher to extract text based on file extension.

         print(f"ERROR: PDF rendering failed: {e}")
         return None
+def get_text_from_page(file_path: str, page_num: int) -> str:
+    """
+    Tries to extract digital text directly from a specific page.
+    """
+    try:
+        doc = fitz.open(file_path)
+        if page_num >= len(doc):
+            return ""
+        text = doc[page_num].get_text().strip()
+        doc.close()
+        return text
+    except:
+        return ""
 def extract_text_from_document(file_path: str) -> str:
     """
     Dispatcher to extract text based on file extension.