GSoumyajit2005 commited on
Commit
7630bcd
·
1 Parent(s): 6b86b97

Restore full history with LFS images and all fixes

Browse files
.github/workflows/deploy.yml CHANGED
@@ -1,9 +1,7 @@
1
  name: Sync to Hugging Face Spaces
2
-
3
  on:
4
  push:
5
- branches: [main] # Trigger this whenever you push to the main branch
6
-
7
  jobs:
8
  sync-to-hub:
9
  runs-on: ubuntu-latest
@@ -11,27 +9,16 @@ jobs:
11
  - name: Checkout GitHub Code
12
  uses: actions/checkout@v3
13
  with:
14
- fetch-depth: 0 # Fetch full history to allow merging
15
-
16
  - name: Push to Hugging Face
17
  env:
18
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
19
  HF_USERNAME: GSoumyajit2005
20
  SPACE_NAME: invoice-processor-ml
21
  run: |
22
- # 1. Configure Git
23
  git config --global user.email "actions@github.com"
24
  git config --global user.name "GitHub Actions"
25
-
26
- # 2. Add Hugging Face as a remote
27
  git remote add space https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME
28
-
29
- # 3. Fetch the latest history from HF (to keep your heavy models safe)
30
  git fetch space main
31
-
32
- # 4. Merge HF history (models) with GitHub history (code)
33
- # We use strategy 'ours' to say "If code conflicts, GitHub wins"
34
  git merge space/main --allow-unrelated-histories -X ours -m "Sync GitHub code with HF models"
35
-
36
- # 5. Push the combined result back to Hugging Face
37
  git push space main
 
1
  name: Sync to Hugging Face Spaces
 
2
  on:
3
  push:
4
+ branches: [main]
 
5
  jobs:
6
  sync-to-hub:
7
  runs-on: ubuntu-latest
 
9
  - name: Checkout GitHub Code
10
  uses: actions/checkout@v3
11
  with:
12
+ fetch-depth: 0
 
13
  - name: Push to Hugging Face
14
  env:
15
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
16
  HF_USERNAME: GSoumyajit2005
17
  SPACE_NAME: invoice-processor-ml
18
  run: |
 
19
  git config --global user.email "actions@github.com"
20
  git config --global user.name "GitHub Actions"
 
 
21
  git remote add space https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/$HF_USERNAME/$SPACE_NAME
 
 
22
  git fetch space main
 
 
 
23
  git merge space/main --allow-unrelated-histories -X ours -m "Sync GitHub code with HF models"
 
 
24
  git push space main
Dockerfile CHANGED
@@ -23,4 +23,4 @@ COPY . .
23
  EXPOSE 7860
24
 
25
  # 3. Run Streamlit
26
- CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0"]
 
23
  EXPOSE 7860
24
 
25
  # 3. Run Streamlit
26
+ CMD ["streamlit", "run", "app.py", "--server.port", "7860", "--server.address", "0.0.0.0", "--server.enableCORS", "false", "--server.enableXsrfProtection", "false"]
README.md CHANGED
@@ -6,7 +6,7 @@ colorTo: pink
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
- short_description: A hybrid invoice extraction system using LayoutLMv3 and Regex
10
  ---
11
 
12
  # 📄 Smart Invoice Processor
 
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
+ short_description: Hybrid invoice extraction using LayoutLMv3 and Regex
10
  ---
11
 
12
  # 📄 Smart Invoice Processor
data/samples/sample_invoice.jpg ADDED

Git LFS Details

  • SHA256: f9c8699bb1adcfa3a49cd8425057c1818b5b4ec62d003a6f8bd5b0af8d7ccd53
  • Pointer size: 131 Bytes
  • Size of remote file: 157 kB
docs/screenshots/format_detection.png ADDED

Git LFS Details

  • SHA256: a1bc15780a1cd15ed04d67c756be7575066ad6e70f7a879aa1a47fd051ef4398
  • Pointer size: 131 Bytes
  • Size of remote file: 151 kB
docs/screenshots/homepage.png ADDED

Git LFS Details

  • SHA256: 55f5e55df3502f21ce18a98ef3ea107bee46ba76ff7941854675d541a3adbf40
  • Pointer size: 131 Bytes
  • Size of remote file: 134 kB
docs/screenshots/success_result.png ADDED

Git LFS Details

  • SHA256: b7e89be758e79a4d5bf25c04c12e05e2008e4e7e1945a4a2b9848730bf3c1e5d
  • Pointer size: 131 Bytes
  • Size of remote file: 170 kB
src/ml_extraction.py CHANGED
@@ -1,7 +1,9 @@
1
  # src/ml_extraction.py
2
 
 
3
  import torch
4
  from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
 
5
  from PIL import Image
6
  import pytesseract
7
  from typing import List, Dict, Any
@@ -15,18 +17,19 @@ HUB_MODEL_ID = "GSoumyajit2005/layoutlmv3-sroie-invoice-extraction"
15
 
16
  # --- Load Model ---
17
  def load_model_and_processor(model_path, hub_id):
18
- try:
19
- print(f"Attempting to load model from local path: {model_path}...")
20
- processor = LayoutLMv3Processor.from_pretrained(model_path)
21
- model = LayoutLMv3ForTokenClassification.from_pretrained(model_path)
22
- print(" Model loaded successfully from local path.")
23
- except OSError:
24
- print(f"Model not found locally. Downloading from Hub: {hub_id}...")
25
- from huggingface_hub import snapshot_download
26
  snapshot_download(repo_id=hub_id, local_dir=model_path, local_dir_use_symlinks=False)
27
- processor = LayoutLMv3Processor.from_pretrained(model_path)
 
28
  model = LayoutLMv3ForTokenClassification.from_pretrained(model_path)
29
- print("✅ Model downloaded and loaded successfully.")
 
 
 
30
  return model, processor
31
 
32
  MODEL, PROCESSOR = load_model_and_processor(LOCAL_MODEL_PATH, HUB_MODEL_ID)
 
1
  # src/ml_extraction.py
2
 
3
+ import os
4
  import torch
5
  from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
6
+ from huggingface_hub import snapshot_download
7
  from PIL import Image
8
  import pytesseract
9
  from typing import List, Dict, Any
 
17
 
18
  # --- Load Model ---
19
  def load_model_and_processor(model_path, hub_id):
20
+ print("Loading processor from microsoft/layoutlmv3-base...")
21
+ processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
22
+
23
+ if not os.path.exists(model_path) or not os.listdir(model_path):
24
+ print(f"Downloading model from Hub: {hub_id}...")
 
 
 
25
  snapshot_download(repo_id=hub_id, local_dir=model_path, local_dir_use_symlinks=False)
26
+
27
+ try:
28
  model = LayoutLMv3ForTokenClassification.from_pretrained(model_path)
29
+ except Exception:
30
+ print(f"Fallback: Loading directly from Hub {hub_id}...")
31
+ model = LayoutLMv3ForTokenClassification.from_pretrained(hub_id)
32
+
33
  return model, processor
34
 
35
  MODEL, PROCESSOR = load_model_and_processor(LOCAL_MODEL_PATH, HUB_MODEL_ID)