Image-Text-Detection

Running

App Files Files Community

Update app.py

by K1Z3M1112 - opened Nov 10, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+97

-46

Files changed (1) hide show

app.py +97 -46

app.py CHANGED Viewed

@@ -3,62 +3,113 @@ from PIL import ImageDraw
 from PIL import Image
 import streamlit as st
 import os
 def load_image(image_file):
-	img = PIL.Image.open(image_file)
-	return img
 def init_session_states():
-  if 'disp' not in st.session_state:
-    st.session_state['disp'] = st.empty()
-    st.session_state['disp'].text("Setting up environment with latest build of easyocr. This will take about a minute ")
-  if 'init' not in st.session_state:
-    st.session_state['init'] = 1
-      # Not required as they are already installed through requirements and also seems to cause errors
-    # os.system('pip install git+git://github.com/jaidedai/easyocr.git')
-    # os.system('pip install git+https://github.com/huggingface/transformers.git --upgrade')
 init_session_states()
-import easyocr
-from transformers import TrOCRProcessor, VisionEncoderDecoderModel
-def text_recognition(image):
-    processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
-    model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
-    #processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-handwritten")
-    #model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-handwritten")
-    pixel_values = processor(image, return_tensors="pt").pixel_values
-    generated_ids = model.generate(pixel_values)
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    st.write(generated_text)
 def main():
-    st.session_state['disp'].text("Env setup up Complete")
-    uploaded_file = st.file_uploader("Choose image file to detect text",type=['jpeg','jpg'])
     if uploaded_file is not None:
-        file_details = {"FileName":uploaded_file.name,"FileType":uploaded_file.type,"FileSize":uploaded_file.size}
         st.write(file_details)
         image = load_image(uploaded_file)
-        st.image(image,width=500)
-        st.write("Detecting text bounding box and Take 1 recognition...")
-        reader = easyocr.Reader(['en'],gpu=True)
-        bound = reader.readtext(image)
-        st.write("Bounding box Detection complete")
-        st.write(str(bound))
-        st.write("Recognizing text - Take 2....")
-        text_recognition(image)
-if __name__ == "__main__":
-    main()

 from PIL import Image
 import streamlit as st
 import os
+import easyocr
 def load_image(image_file):
+    img = PIL.Image.open(image_file)
+    return img
 def init_session_states():
+    if 'disp' not in st.session_state:
+        st.session_state['disp'] = st.empty()
+        st.session_state['disp'].text("กำลังตั้งค่า Environment...")
+    if 'init' not in st.session_state:
+        st.session_state['init'] = 1
 init_session_states()
+def draw_bounding_boxes(image, results):
+    """วาด bounding boxes บนภาพ"""
+    draw = ImageDraw.Draw(image)
+    for (bbox, text, confidence) in results:
+        # แปลง coordinates
+        top_left = tuple(bbox[0])
+        bottom_right = tuple(bbox[2])
+        # วาดสี่เหลี่ยม
+        draw.rectangle([top_left, bottom_right], outline="red", width=3)
+        # วาดข้อความ
+        draw.text(top_left, f"{text} ({confidence:.2f})", fill="blue")
+    return image
 def main():
+    st.session_state['disp'].text("Environment พร้อมใช้งานแล้ว!")
+    st.title("📝 Image Text Detection")
+    st.write("ใช้ EasyOCR สำหรับตรวจจับและจดจำข้อความจากภาพ")
+    # เลือกภาษา
+    st.sidebar.subheader("🌐 เลือกภาษา")
+    languages = {
+        "English": "en",
+        "Thai": "th",
+        "Chinese Simplified": "ch_sim",
+        "Japanese": "ja",
+        "Korean": "ko",
+        "French": "fr",
+        "German": "de"
+    }
+    selected_langs = st.sidebar.multiselect(
+        "เลือกภาษาที่ต้องการตรวจจับ:",
+        options=list(languages.keys()),
+        default=["English", "Thai"]
+    )
+    # แปลงเป็นรหัสภาษา
+    lang_codes = [languages[lang] for lang in selected_langs]
+    uploaded_file = st.file_uploader("เลือกไฟล์ภาพ", type=['jpeg','jpg','png'])
     if uploaded_file is not None:
+        # แสดงรายละเอียดไฟล์
+        file_details = {
+            "ชื่อไฟล์": uploaded_file.name,
+            "ประเภท": uploaded_file.type,
+            "ขนาด": f"{uploaded_file.size / 1024:.2f} KB",
+            "ภาษาที่ตรวจจับ": ", ".join(selected_langs)
+        }
         st.write(file_details)
+        # โหลดและแสดงภาพ
         image = load_image(uploaded_file)
+        st.image(image, width=500, caption="ภาพต้นฉบับ")
+        # ตรวจจับข้อความ
+        with st.spinner(f"กำลังตรวจจับข้อความใน {len(selected_langs)} ภาษา..."):
+            reader = easyocr.Reader(lang_codes, gpu=False)
+            results = reader.readtext(image)
+        st.success("ตรวจจับข้อความสำเร็จ!")
+        # แสดงผลลัพธ์
+        st.subheader("🔍 ผลลัพธ์การตรวจจับ")
+        # วาด bounding boxes
+        if results:
+            annotated_image = draw_bounding_boxes(image.copy(), results)
+            st.image(annotated_image, width=500, caption="ภาพที่มีการวาด Bounding Boxes")
+            # แสดงข้อความที่ตรวจจับได้
+            st.subheader("📄 ข้อความที่ตรวจจับได้")
+            for i, (bbox, text, confidence) in enumerate(results):
+                st.write(f"**ข้อความ {i+1}:**")
+                st.write(f"- ข้อความ: `{text}`")
+                st.write(f"- ความมั่นใจ: `{confidence:.2%}`")
+                st.write(f"- ตำแหน่ง: {bbox}")
+                st.write("---")
+            # รวมข้อความทั้งหมด
+            all_text = " ".join([text for (_, text, _) in results])
+            st.subheader("📝 ข้อความรวม")
+            st.text_area("ข้อความที่ตรวจจับได้ทั้งหมด:", all_text, height=100)
+        else:
+            st.warning("❌ ไม่พบข้อความในภาพนี้")
+if __name__ == "__main__":
+    main()