Spaces:

tddf
/

end

Sleeping

App Files Files Community

tddf commited on Mar 29

Commit

2e07297

verified ·

1 Parent(s): f79d9bc

Update Main.py

Browse files

Files changed (1) hide show

Main.py +52 -79

Main.py CHANGED Viewed

@@ -1,10 +1,13 @@
 import io
 import streamlit as st
 import torch
 from transformers import LightOnOcrForConditionalGeneration, LightOnOcrProcessor
 from PIL import Image
-# ==================== Настройки страницы ====================
 st.set_page_config(
     page_title="LightOnOCR • Распознай текст",
     page_icon="📄",
@@ -12,66 +15,53 @@ st.set_page_config(
     initial_sidebar_state="expanded"
 )
-# ==================== Кастомный CSS ====================
 st.markdown("""
-    <style>
-        .main { background: linear-gradient(180deg, #f8f9fa, #e9f0f7); }
-        h1 { color: #1e3a8a; text-align: center; margin-bottom: 0.2rem; }
-        .stButton > button {
-            background: linear-gradient(90deg, #3b82f6, #1e40af);
-            color: white;
-            border-radius: 12px;
-            padding: 12px 32px;
-            font-weight: 600;
-            border: none;
-            box-shadow: 0 4px 15px rgba(59, 130, 246, 0.3);
-        }
-        .stButton > button:hover {
-            transform: translateY(-2px);
-            box-shadow: 0 8px 20px rgba(59, 130, 246, 0.4);
-        }
-        .result-box {
-            background: #ffffff;
-            border-radius: 16px;
-            padding: 24px;
-            box-shadow: 0 10px 30px rgba(0, 0, 0, 0.08);
-            border: 1px solid #e5e7eb;
-            margin-top: 20px;
-        }
-        .header-emoji { font-size: 3.5rem; text-align: center; margin: 10px 0; }
-    </style>
 """, unsafe_allow_html=True)
-# ==================== Загрузка модели ====================
-@st.cache_resource(show_spinner="Загрузка модели LightOnOCR-1B-1025...")
 def load_model():
     model_name = "lightonai/LightOnOCR-1B-1025"
-    if torch.backends.mps.is_available():
-        device = "mps"
-        dtype = torch.float32
-    elif torch.cuda.is_available():
-        device = "cuda"
-        dtype = torch.bfloat16
-    else:
-        device = "cpu"
-        dtype = torch.float32
     model = LightOnOcrForConditionalGeneration.from_pretrained(
         model_name,
         torch_dtype=dtype,
         trust_remote_code=True,
-        device_map=None  # загружаем вручную
     ).to(device)
     processor = LightOnOcrProcessor.from_pretrained(model_name)
     return processor, model, device, dtype
-# ==================== Загрузка изображения ====================
 def load_image():
     uploaded_file = st.file_uploader(
-        "📸 Загрузите изображение (фото, скан, документ)",
         type=['png', 'jpg', 'jpeg', 'webp']
     )
     if uploaded_file is not None:
@@ -80,45 +70,26 @@ def load_image():
         return Image.open(io.BytesIO(image_data)).convert('RGB')
     return None
-# ==================== Основной интерфейс ====================
-st.markdown('<div class="header-emoji">📄✨</div>', unsafe_allow_html=True)
-st.title("LightOnOCR")
-st.markdown("**Мгновенное распознавание текста на английском и других языках**")
-st.caption("Модель LightOnOCR-1B-1025 • Отлично работает с документами, чеками, таблицами и фото")
-# Загружаем модель один раз
-processor, model, device, dtype = load_model()
-# Сайдбар
-with st.sidebar:
-    st.markdown("### 🚀 О модели")
-    st.info("LightOnOCR-1B-1025 — компактная end-to-end модель для OCR и понимания документов.")
-    st.markdown("**Поддержка:** Английский + латиница, таблицы, сложная вёрстка")
-    st.caption(f"Устройство: **{device.upper()}** • dtype: **{dtype}**")
-# Загрузка изображения
 img = load_image()
-# Кнопка распознавания
 if st.button("🔍 Распознать текст", use_container_width=True, type="primary"):
     if img is None:
-        st.error("Пожалуйста, сначала загрузите изображение")
     else:
-        with st.spinner("Распознавание текста… (на CPU может занять 10–30 секунд)"):
-            # Правильный способ работы с этой моделью (chat template)
             conversation = [
                 {
                     "role": "user",
                     "content": [
-                        {"type": "image"},
-                        {"type": "te
-xt", "text": "Extract all the text from this image accurately. Preserve formatting, tables, and line breaks as much as possible."}
                     ]
                 }
             ]
             inputs = processor.apply_chat_template(
                 conversation,
                 add_generation_prompt=True,
@@ -127,9 +98,9 @@ xt", "text": "Extract all the text from this image accurately. Preserve formatti
                 return_tensors="pt"
             )
-            # Переносим на устройство
             inputs = {
-                k: v.to(device=device, dtype=dtype) if v.is_floating_point() else v.to(device)
                 for k, v in inputs.items()
             }
@@ -141,24 +112,26 @@ xt", "text": "Extract all the text from this image accurately. Preserve formatti
                 temperature=0.0
             )
-            # Убираем промпт, оставляем только сгенерированный текст
             generated_ids = output_ids[0, inputs["input_ids"].shape[1]:]
-            generated_text = processor.decode(generated_ids, skip_special_tokens=True)
-            # Вывод результата
-            st.success("✅ Распознавание завершено!")
             st.markdown('<div class="result-box">', unsafe_allow_html=True)
             st.subheader("📝 Распознанный текст")
-            st.markdown(f"```\n{generated_text}\n```")
             st.markdown('</div>', unsafe_allow_html=True)
-            # Кнопка скачивания
             st.download_button(
-                label="💾 Скачать текст (.txt)",
                 data=generated_text,
                 file_name="recognized_text.txt",
                 mime="text/plain"
             )
 st.markdown("---")
-st.markdown("**Сделано на базе [lightonai/LightOnOCR-1B-1025](https://huggingface.co/lightonai/LightOnOCR-1B-1025)**")

+import os
 import io
 import streamlit as st
 import torch
 from transformers import LightOnOcrForConditionalGeneration, LightOnOcrProcessor
 from PIL import Image
+# Ускоряем скачивание на HF Spaces
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 st.set_page_config(
     page_title="LightOnOCR • Распознай текст",
     page_icon="📄",
     initial_sidebar_state="expanded"
 )
+# Простой CSS
 st.markdown("""
+<style>
+    .main { background: linear-gradient(180deg, #f8f9fa, #e9f0f7); }
+    .result-box {
+        background: #ffffff;
+        border-radius: 16px;
+        padding: 24px;
+        box-shadow: 0 10px 30px rgba(0,0,0,0.08);
+        margin-top: 20px;
+    }
+    .header-emoji { font-size: 3.5rem; text-align: center; margin: 15px 0; }
+</style>
 """, unsafe_allow_html=True)
+@st.cache_resource(show_spinner="⏳ Загрузка модели LightOnOCR-1B-1025...\nЭто может занять 2–6 минут при первом запуске на CPU")
 def load_model():
     model_name = "lightonai/LightOnOCR-1B-1025"
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
     model = LightOnOcrForConditionalGeneration.from_pretrained(
         model_name,
         torch_dtype=dtype,
         trust_remote_code=True,
     ).to(device)
     processor = LightOnOcrProcessor.from_pretrained(model_name)
     return processor, model, device, dtype
+# ====================== Заголовок ======================
+st.markdown('<div class="header-emoji">📄✨</div>', unsafe_allow_html=True)
+st.title("LightOnOCR")
+st.markdown("**Распознавание текста с изображений**")
+st.caption("Модель: lightonai/LightOnOCR-1B-1025")
+# ====================== Загрузка модели ======================
+processor, model, device, dtype = load_model()
+st.sidebar.success(f"✅ Модель загружена на **{device.upper()}**")
+# ====================== Загрузка изображения ======================
 def load_image():
     uploaded_file = st.file_uploader(
+        "📸 Загрузите изображение (png, jpg, jpeg, webp)",
         type=['png', 'jpg', 'jpeg', 'webp']
     )
     if uploaded_file is not None:
         return Image.open(io.BytesIO(image_data)).convert('RGB')
     return None
 img = load_image()
+# ====================== Распознавание ======================
 if st.button("🔍 Распознать текст", use_container_width=True, type="primary"):
     if img is None:
+        st.error("Сначала загрузите изображение")
     else:
+        with st.spinner("Распознавание текста..."):
+            # Правильный формат для LightOnOCR (по официальному примеру)
             conversation = [
                 {
                     "role": "user",
                     "content": [
+                        {"type": "image"},                    # изображение передаётся отдельно
+                        {"type": "text", "text": "Extract all the text from this image accurately. Preserve original formatting, tables, and line breaks as much as possible."}
                     ]
                 }
             ]
+            # Подготовка inputs
             inputs = processor.apply_chat_template(
                 conversation,
                 add_generation_prompt=True,
                 return_tensors="pt"
             )
+            # Перенос на устройство
             inputs = {
+                k: (v.to(device=device, dtype=dtype) if v.is_floating_point() else v.to(device))
                 for k, v in inputs.items()
             }
                 temperature=0.0
             )
+            # Убираем промпт
             generated_ids = output_ids[0, inputs["input_ids"].shape[1]:]
+            generated_text = processor.decode(generated_ids,
+skip_special_tokens=True)
+            # Результат
+            st.success("✅ Готово!")
             st.markdown('<div class="result-box">', unsafe_allow_html=True)
             st.subheader("📝 Распознанный текст")
+            st.code(generated_text, language=None)
             st.markdown('</div>', unsafe_allow_html=True)
             st.download_button(
+                "💾 Скачать как .txt",
                 data=generated_text,
                 file_name="recognized_text.txt",
                 mime="text/plain"
             )
 st.markdown("---")
+st.caption("Сделано на базе [lightonai/LightOnOCR-1B-1025](https://huggingface.co/lightonai/LightOnOCR-1B-1025)")