Spaces:

jmparejaz
/

documents_classifyer

Build error

jmparejaz commited on Mar 30

Commit

7e04d95

verified ·

1 Parent(s): 2b593e9

fix: use huggingface/spaces base image with Tesseract

Files changed (4) hide show

Dockerfile ADDED Viewed

+FROM huggingface/spaces:latest
+# Install Tesseract OCR
+RUN apt-get update && apt-get install -y \
+    tesseract-ocr \
+    tesseract-ocr-spa \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy app
+COPY app.py .
+ENV GRADIO_SERVER_NAME=0.0.0.0
+ENV GRADIO_SERVER_PORT=7860
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -3,9 +3,7 @@ title: LayoutLMv3 Document Classifier
 emoji: 📄
 colorFrom: blue
 colorTo: indigo
-sdk: gradio
-sdk_version: 5.0.0
-app_file: app.py
 pinned: false
 license: apache-2.0
 short_description: Clasificador de documentos legales con LayoutLMv3

 emoji: 📄
 colorFrom: blue
 colorTo: indigo
+sdk: docker
 pinned: false
 license: apache-2.0
 short_description: Clasificador de documentos legales con LayoutLMv3

check_space.py ADDED Viewed

+from playwright.sync_api import sync_playwright
+with sync_playwright() as p:
+    browser = p.chromium.launch(headless=True)
+    page = browser.new_page()
+    try:
+        page.goto("https://huggingface.co/spaces/jmparejaz/documents_classifyer", timeout=60000, wait_until="domcontentloaded")
+        page.wait_for_timeout(3000)
+        print("Title:", page.title())
+        # Get page content
+        body_text = page.locator("body").inner_text()
+        print("\n--- Page Content ---")
+        print(body_text[:1500])
+    except Exception as e:
+        print(f"Error: {e}")
+    browser.close()

delete_pdfs.py ADDED Viewed

+from huggingface_hub import HfApi
+api = HfApi()
+repo_id = "jmparejaz/documents_classifyer"
+files_to_delete = [
+    "EP - Grado de Serano, Maria - S&I.pdf",
+    "EP - Hernandez Portillo, Manuel - S&I.pdf",
+    "EP - Ibarra, Guadalupe - S&I.pdf",
+    "EP - Mendoza Reyes, Faustino - S&I.pdf",
+    "MC - Garza, Anna - S&I Notes.pdf",
+    "MC - Garza, Anna - S&I SOCP.pdf",
+    "MC - Garza, Anna - S&I.pdf",
+    "MC - Junez, Juan Jr - S&I.pdf",
+    "MC - Marron, Maria - S&I Notes.pdf",
+    "MC - Marron, Maria - S&I SOCP.pdf",
+    "MC - Marron, Maria - S&I.pdf",
+    "SA - Banda, Nicolas - S&I.pdf",
+    "SA - Benavidez, Ysidro - S&I Demos.pdf",
+    "SA - Benavidez, Ysidro - S&I Notes.pdf",
+    "SA - Benavidez, Ysidro - S&I.pdf",
+    "SA - Fernandez, Hector - S&I.pdf",
+    "SA - Heath, Nelda - S&I Notes.pdf",
+    "SA - Heath, Nelda - S&I.pdf",
+    "SA - Valdez, Rogelio - S&I Demos.pdf",
+    "SA - Valdez, Rogelio - S&I Notes.pdf",
+    "SA - Valdez, Rogelio - S&I.pdf"
+]
+for f in files_to_delete:
+    try:
+        api.delete_file(f, repo_id, repo_type="space")
+        print(f"Deleted: {f}")
+    except Exception as e:
+        print(f"Error deleting {f}: {e}")