Spaces:

astart01
/

hrLesha

Build error

astart01 commited on May 12, 2025

Commit

45eddb7

1 Parent(s): 88a3e54

bubububbu

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -11,7 +11,7 @@ RUN apt-get update && apt-get install -y \
     && apt-get clean && rm -rf /var/lib/apt/lists/*
 # Явная установка NumPy перед остальными зависимостями
-RUN pip install --no-cache-dir numpy==1.26.4
 # Установка остальных зависимостей
 RUN pip install --no-cache-dir -r requirements.txt
@@ -25,8 +25,4 @@ RUN python -c "import numpy; print(f'NumPy version: {numpy.__version__}')" \
 EXPOSE 8501
 # Запуск Streamlit
-CMD ["streamlit", "run", "app.py", "--server.address=0.0.0.0"]
-# В конце Dockerfile добавьте:
-# Установка правильных разрешений для временных директорий
-RUN mkdir -p /tmp && chmod 777 /tmp

     && apt-get clean && rm -rf /var/lib/apt/lists/*
 # Явная установка NumPy перед остальными зависимостями
+RUN pip install --no-cache-dir numpy==2.1.3
 # Установка остальных зависимостей
 RUN pip install --no-cache-dir -r requirements.txt
 EXPOSE 8501
 # Запуск Streamlit
+CMD ["streamlit", "run", "app.py", "--server.address=0.0.0.0"]

app.py CHANGED Viewed

@@ -28,23 +28,33 @@ def clean_text(text):
     text = text.translate(str.maketrans('', '', string.punctuation))
     return text
-def extract_text_with_pypdf2(pdf_file):
     text = ""
     try:
-        import io
-        from PyPDF2 import PdfReader
-        # Чтение PDF из памяти
-        pdf_bytes = io.BytesIO(pdf_file.getvalue())
-        reader = PdfReader(pdf_bytes)
-        # Получение текста из всех страниц
-        for page in reader.pages:
-            page_text = page.extract_text() or ""
-            text += page_text
     except Exception as e:
-        st.error(f"Ошибка при чтении PDF через PyPDF2: {e}")
         import traceback
         st.code(traceback.format_exc())

     text = text.translate(str.maketrans('', '', string.punctuation))
     return text
+def extract_text_from_pdf(pdf_file):
     text = ""
     try:
+        # Сохраняем во временный файл
+        import tempfile
+        import os
+        # Создаем временную директорию, если её нет
+        os.makedirs('/tmp', exist_ok=True)
+        # Сохраняем во временный файл
+        temp_path = f"/tmp/{pdf_file.name}"
+        with open(temp_path, 'wb') as f:
+            f.write(pdf_file.getbuffer())
+        st.info(f"Файл сохранен по пути: {temp_path}")
+        # Открываем PDF из файла
+        with fitz.open(temp_path) as doc:
+            for page in doc:
+                text += page.get_text()
+        # Удаляем временный файл
+        os.remove(temp_path)
     except Exception as e:
+        st.error(f"Ошибка при чтении PDF: {e}")
         import traceback
         st.code(traceback.format_exc())