Spaces:

tddf
/

end

Sleeping

App Files Files Community

tddf commited on Mar 30

Commit

de827c2

verified ·

1 Parent(s): a345f45

Update Main.py

Browse files

Files changed (1) hide show

Main.py +31 -26

Main.py CHANGED Viewed

@@ -81,28 +81,35 @@ if st.button("🔍 Распознать текст", use_container_width=True, t
     else:
         with st.spinner("Распознавание текста... (5–30 сек на CPU)"):
-            # Правильный формат разговора (как в официальных примерах)
             conversation = [
                 {
                     "role": "user",
                     "content": [
                         {"type": "image"},
-                        {"type": "text", "text": "Extract all the text from this image as accurately as possible. Preserve line breaks, formatting and tables."}
                     ]
                 }
             ]
-            # Применяем шаблон + передаём само изображение
-            inputs = processor.apply_chat_template(
                 conversation,
-                add_generation_prompt=True,
-                tokenize=True,
-                return_dict=True,
                 return_tensors="pt",
-                images=img   # ← Это ключевой момент
             )
-            # Переносим все тензоры на устройство
             inputs = {
                 k: (v.to(device=device, dtype=dtype) if v.is_floating_point() else v.to(device))
                 for k, v in inputs.items()
@@ -119,7 +126,7 @@ if st.button("🔍 Распознать текст", use_container_width=True, t
                 eos_token_id=processor.tokenizer.eos_token_id,
             )
-            # Убираем промпт, оставляем только сгенерированный текст
             prompt_length = inputs["input_ids"].shape[1]
             generated_ids = output_ids[0, prompt_length:]
@@ -129,21 +136,19 @@ if st.button("🔍 Распознать текст", use_container_width=True, t
                 clean_up_tokenization_spaces=True
             ).strip()
-            if not generated_text:
-                st.warning("Модель не смогла извлечь текст. Попробуйте более чёткое изображение с английским текстом.")
-            else:
-                st.success("✅ Распознавание завершено!")
-                st.markdown('<div class="result-box">', unsafe_allow_html=True)
-                st.subheader("📝 Распознанный текст")
-                st.code(generated_text, language=None)
-                st.markdown('</div>', unsafe_allow_html=True)
-                st.download_button(
-                    label="💾 Скачать как .txt",
-                    data=generated_text,
-                    file_name="recognized_text.txt",
-                    mime="text/plain"
-                )
 st.markdown("---")
 st.caption("Сделано на базе [lightonai/LightOnOCR-1B-1025](https://huggingface.co/lightonai/LightOnOCR-1B-1025)")

     else:
         with st.spinner("Распознавание текста... (5–30 сек на CPU)"):
+            # Промпт для модели
+            prompt = "Extract all the text from this image as accurately as possible. Preserve line breaks, formatting and tables."
+            # 1. Получаем текстовый шаблон чата (без токенизации)
             conversation = [
                 {
                     "role": "user",
                     "content": [
                         {"type": "image"},
+                        {"type": "text", "text": prompt}
                     ]
                 }
             ]
+            text_prompt = processor.apply_chat_template(
                 conversation,
+                tokenize=False,
+                add_generation_prompt=True
+            )
+            # 2. Правильный вызов процессора (ключевой момент!)
+            inputs = processor(
+                text=[text_prompt],
+                images=[[img]],                    # двойной список — обязательно!
                 return_tensors="pt",
+                padding=True,
+                size={"longest_edge": 1540}        # рекомендуемый размер модели
             )
+            # Переносим на устройство
             inputs = {
                 k: (v.to(device=device, dtype=dtype) if v.is_floating_point() else v.to(device))
                 for k, v in inputs.items()
                 eos_token_id=processor.tokenizer.eos_token_id,
             )
+            # Убираем промпт
             prompt_length = inputs["input_ids"].shape[1]
             generated_ids = output_ids[0, prompt_length:]
                 clean_up_tokenization_spaces=True
             ).strip()
+            # Результат
+            st.success("✅ Распознавание завершено!")
+            st.markdown('<div class="result-box">', unsafe_allow_html=True)
+            st.subheader("📝 Распознанный текст")
+            st.code(generated_text, language=None)
+            st.markdown('</div>', unsafe_allow_html=True)
+            st.download_button(
+                label="💾 Скачать как .txt",
+                data=generated_text,
+                file_name="recognized_text.txt",
+                mime="text/plain"
+            )
 st.markdown("---")
 st.caption("Сделано на базе [lightonai/LightOnOCR-1B-1025](https://huggingface.co/lightonai/LightOnOCR-1B-1025)")