Spaces:

oguz7
/

bulk-tesseract-ocr

Sleeping

App Files Files Community

oguz commited on Oct 29, 2025

Commit

339611b

1 Parent(s): 61c6f21

Fix: packages.txt encoding + app.py syntax

Browse files

Files changed (2) hide show

app.py +27 -17
packages.txt +2 -2

app.py CHANGED Viewed

@@ -8,11 +8,11 @@ from typing import List, Tuple, Optional
 def process_bulk_images(files: List) -> Tuple[str, Optional[str]]:
     """
-    Toplu resim iÅŸleme fonksiyonu
-    70'ten fazla resmi iÅŸleyip tÃ¼m yazÄ±larÄ± tek bir metin olarak birleÅŸtirir
     """
     if not files:
-        return "LÃ¼tfen en az bir resim yÃ¼kleyin.", None
     all_texts = []
     processed_count = 0
@@ -20,12 +20,19 @@ def process_bulk_images(files: List) -> Tuple[str, Optional[str]]:
     for file in files:
         try:
-            image = Image.open(file.name)\n            try:\n                text = pytesseract.image_to_string(image, lang='tur+eng')\n            except TesseractNotFoundError:\n                raise TesseractNotFoundError('Tesseract yÃ¼klÃ¼ deÄŸil veya PATH iÃ§inde deÄŸil. Spaces iÃ§in packages.txt ile kurulur; Windows iÃ§in README\'deki yolu ayarlayÄ±n.')
             image_name = os.path.basename(file.name)
             if text.strip():
                 all_texts.append(f"\n{'='*60}\n[{image_name}]\n{'='*60}\n{text.strip()}\n")
             else:
-                all_texts.append(f"\n{'='*60}\n[{image_name}]\n{'='*60}\n[Bu resimden metin Ã§Ä±karÄ±lamadÄ±]\n")
             processed_count += 1
         except Exception as e:
             errors.append(f"{os.path.basename(file.name)}: {str(e)}")
@@ -33,16 +40,16 @@ def process_bulk_images(files: List) -> Tuple[str, Optional[str]]:
     combined_text = "\n".join(all_texts)
     stats = f"""
-Ä°ÅŸlenen Resim SayÄ±sÄ±: {processed_count}/{len(files)}
-Hata SayÄ±sÄ±: {len(errors)}
-Toplam Karakter SayÄ±sÄ±: {len(combined_text)}
 """
     if errors:
         stats += "\nHatalar:\n" + "\n".join(errors)
     temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8')
     temp_file.write(combined_text)
     temp_file.close()
-    result_message = stats + "\n\nTÃ¼m metinler baÅŸarÄ±yla birleÅŸtirildi. AÅŸaÄŸÄ±daki dÃ¼ÄŸmeden indirebilirsiniz."
     return result_message, temp_file.name
@@ -50,19 +57,22 @@ def create_interface():
     with gr.Blocks(title="Bulk Tesseract OCR", theme=gr.themes.Soft()) as demo:
         gr.Markdown(
             """
-            # ðŸ“¸ Bulk Tesseract OCR
-            70'ten fazla resmi toplu iÅŸleyin; metinler tek bir dosyada.
             """
         )
-        file_input = gr.File(file_count="multiple", label="Resimleri SeÃ§in", file_types=["image"])
-        process_btn = gr.Button("ðŸš€ Resimleri Ä°ÅŸle", variant="primary")
-        output_text = gr.Textbox(label="Ä°ÅŸlem SonuÃ§larÄ±", lines=10, interactive=False)
-        output_file = gr.File(label="BirleÅŸtirilmiÅŸ Metin DosyasÄ±nÄ± Ä°ndir", type="filepath")
         process_btn.click(fn=process_bulk_images, inputs=file_input, outputs=[output_text, output_file])
     return demo
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch(share=False, server_name="0.0.0.0", server_port=7860)

 def process_bulk_images(files: List) -> Tuple[str, Optional[str]]:
     """
+    Toplu resim işleme fonksiyonu
+    70'ten fazla resmi işleyip tüm yazıları tek bir metin olarak birleştirir
     """
     if not files:
+        return "Lütfen en az bir resim yükleyin.", None
     all_texts = []
     processed_count = 0
     for file in files:
         try:
+            image = Image.open(file.name)
+            try:
+                text = pytesseract.image_to_string(image, lang='tur+eng')
+            except TesseractNotFoundError:
+                raise TesseractNotFoundError(
+                    'Tesseract yüklü değil veya PATH içinde değil. '
+                    'Spaces için packages.txt ile kurulur; Windows için README\'deki yolu ayarlayın.'
+                )
             image_name = os.path.basename(file.name)
             if text.strip():
                 all_texts.append(f"\n{'='*60}\n[{image_name}]\n{'='*60}\n{text.strip()}\n")
             else:
+                all_texts.append(f"\n{'='*60}\n[{image_name}]\n{'='*60}\n[Bu resimden metin çıkarılamadı]\n")
             processed_count += 1
         except Exception as e:
             errors.append(f"{os.path.basename(file.name)}: {str(e)}")
     combined_text = "\n".join(all_texts)
     stats = f"""
+İşlenen Resim Sayısı: {processed_count}/{len(files)}
+Hata Sayısı: {len(errors)}
+Toplam Karakter Sayısı: {len(combined_text)}
 """
     if errors:
         stats += "\nHatalar:\n" + "\n".join(errors)
     temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False, encoding='utf-8')
     temp_file.write(combined_text)
     temp_file.close()
+    result_message = stats + "\n\nTüm metinler başarıyla birleştirildi. Aşağıdaki düğmeden indirebilirsiniz."
     return result_message, temp_file.name
     with gr.Blocks(title="Bulk Tesseract OCR", theme=gr.themes.Soft()) as demo:
         gr.Markdown(
             """
+            # 📸 Bulk Tesseract OCR
+            **70'ten fazla resmi toplu olarak işleyin ve tüm yazıları tek bir metin dosyası olarak alın**
+            - Birden fazla resim yükleyebilirsiniz
+            - Tüm resimlerden çıkarılan metinler tek bir dosyada birleştirilir
+            - Desteklenen formatlar: PNG, JPG, JPEG, TIFF
+            - Türkçe ve İngilizce dil desteği
             """
         )
+        file_input = gr.File(file_count="multiple", label="Resimleri Seçin (70'ten fazla resim yükleyebilirsiniz)", file_types=["image"])
+        process_btn = gr.Button("🚀 Resimleri İşle", variant="primary", size="lg")
+        output_text = gr.Textbox(label="İşlem Sonuçları", lines=10, interactive=False)
+        output_file = gr.File(label="Birleştirilmiş Metin Dosyasını İndir", type="filepath")
         process_btn.click(fn=process_bulk_images, inputs=file_input, outputs=[output_text, output_file])
     return demo
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch(share=False, server_name="0.0.0.0", server_port=7860)

packages.txt CHANGED Viewed

@@ -1,3 +1,3 @@
-tesseract-ocr
 tesseract-ocr-tur
-tesseract-ocr-eng

+tesseract-ocr
 tesseract-ocr-tur
+tesseract-ocr-eng