Spaces:

GSoumyajit2005
/

invoice-processor-ml

Sleeping

GSoumyajit2005 commited on Jan 12

Commit

7630bcd

1 Parent(s): 6b86b97

Restore full history with LFS images and all fixes

Files changed (8) hide show

.github/workflows/deploy.yml CHANGED Viewed

@@ -1,9 +1,7 @@
 name: Sync to Hugging Face Spaces
 on:
   push:
-    branches: [main]  # Trigger this whenever you push to the main branch
 jobs:
   sync-to-hub:
     runs-on: ubuntu-latest
@@ -11,27 +9,16 @@ jobs:
       - name: Checkout GitHub Code
         uses: actions/checkout@v3
         with:
-          fetch-depth: 0  # Fetch full history to allow merging
       - name: Push to Hugging Face
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_USERNAME: GSoumyajit2005
           SPACE_NAME: invoice-processor-ml
         run: |
-          # 1. Configure Git
           git config --global user.email "actions@github.com"
           git config --global user.name "GitHub Actions"
-          # 2. Add Hugging Face as a remote
           git remote add space https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME
-          # 3. Fetch the latest history from HF (to keep your heavy models safe)
           git fetch space main
-          # 4. Merge HF history (models) with GitHub history (code)
-          # We use strategy 'ours' to say "If code conflicts, GitHub wins"
           git merge space/main --allow-unrelated-histories -X ours -m "Sync GitHub code with HF models"
-          # 5. Push the combined result back to Hugging Face
           git push space main

 name: Sync to Hugging Face Spaces
 on:
   push:
+    branches: [main]
 jobs:
   sync-to-hub:
     runs-on: ubuntu-latest
       - name: Checkout GitHub Code
         uses: actions/checkout@v3
         with:
+          fetch-depth: 0
       - name: Push to Hugging Face
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           HF_USERNAME: GSoumyajit2005
           SPACE_NAME: invoice-processor-ml
         run: |
           git config --global user.email "actions@github.com"
           git config --global user.name "GitHub Actions"
           git remote add space https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME
           git fetch space main
           git merge space/main --allow-unrelated-histories -X ours -m "Sync GitHub code with HF models"
           git push space main

Dockerfile CHANGED Viewed

@@ -23,4 +23,4 @@ COPY . .
 EXPOSE 7860
 # 3. Run Streamlit
-CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0"]

 EXPOSE 7860
 # 3. Run Streamlit
+CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false"]

README.md CHANGED Viewed

@@ -6,7 +6,7 @@ colorTo: pink
 sdk: docker
 pinned: false
 license: mit
-short_description: A hybrid invoice extraction system using LayoutLMv3 and Regex
 ---
 # 📄 Smart Invoice Processor

 sdk: docker
 pinned: false
 license: mit
+short_description: Hybrid invoice extraction using LayoutLMv3 and Regex
 ---
 # 📄 Smart Invoice Processor

data/samples/sample_invoice.jpg ADDED Viewed

docs/screenshots/format_detection.png ADDED Viewed

docs/screenshots/homepage.png ADDED Viewed

docs/screenshots/success_result.png ADDED Viewed

src/ml_extraction.py CHANGED Viewed

@@ -1,7 +1,9 @@
 # src/ml_extraction.py
 import torch
 from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
 from PIL import Image
 import pytesseract
 from typing import List, Dict, Any
@@ -15,18 +17,19 @@ HUB_MODEL_ID = "GSoumyajit2005/layoutlmv3-sroie-invoice-extraction"
 # --- Load Model ---
 def load_model_and_processor(model_path, hub_id):
-    try:
-        print(f"Attempting to load model from local path: {model_path}...")
-        processor = LayoutLMv3Processor.from_pretrained(model_path)
-        model = LayoutLMv3ForTokenClassification.from_pretrained(model_path)
-        print("✅ Model loaded successfully from local path.")
-    except OSError:
-        print(f"Model not found locally. Downloading from Hub: {hub_id}...")
-        from huggingface_hub import snapshot_download
         snapshot_download(repo_id=hub_id, local_dir=model_path, local_dir_use_symlinks=False)
-        processor = LayoutLMv3Processor.from_pretrained(model_path)
         model = LayoutLMv3ForTokenClassification.from_pretrained(model_path)
-        print("✅ Model downloaded and loaded successfully.")
     return model, processor
 MODEL, PROCESSOR = load_model_and_processor(LOCAL_MODEL_PATH, HUB_MODEL_ID)

 # src/ml_extraction.py
+import os
 import torch
 from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
+from huggingface_hub import snapshot_download
 from PIL import Image
 import pytesseract
 from typing import List, Dict, Any
 # --- Load Model ---
 def load_model_and_processor(model_path, hub_id):
+    print("Loading processor from microsoft/layoutlmv3-base...")
+    processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
+    if not os.path.exists(model_path) or not os.listdir(model_path):
+        print(f"Downloading model from Hub: {hub_id}...")
         snapshot_download(repo_id=hub_id, local_dir=model_path, local_dir_use_symlinks=False)
+    try:
         model = LayoutLMv3ForTokenClassification.from_pretrained(model_path)
+    except Exception:
+        print(f"Fallback: Loading directly from Hub {hub_id}...")
+        model = LayoutLMv3ForTokenClassification.from_pretrained(hub_id)
     return model, processor
 MODEL, PROCESSOR = load_model_and_processor(LOCAL_MODEL_PATH, HUB_MODEL_ID)