Spaces:
Sleeping
Sleeping
Commit
·
4768ab6
1
Parent(s):
aa4f954
refactor: Reorganize project structure
Browse files- Moved data loader to src/sroie_loader.py
- Moved training and eval scripts to scripts/ folder
- Updated imports to fix python path
- Removed unused tests/utils.py
- .dockerignore +1 -0
- eval_new_dataset.py → scripts/eval_new_dataset.py +5 -0
- explore_new_dataset.py → scripts/explore_new_dataset.py +5 -0
- train_combined.py → scripts/train_combined.py +6 -1
- train_layoutlm.py → scripts/train_layoutlm.py +6 -1
- load_sroie_dataset.py → src/sroie_loader.py +0 -0
- tests/utils.py +0 -7
.dockerignore
CHANGED
|
@@ -29,6 +29,7 @@ env
|
|
| 29 |
data/
|
| 30 |
outputs/
|
| 31 |
temp/
|
|
|
|
| 32 |
|
| 33 |
# Tests (not needed in production)
|
| 34 |
tests/
|
|
|
|
| 29 |
data/
|
| 30 |
outputs/
|
| 31 |
temp/
|
| 32 |
+
models/
|
| 33 |
|
| 34 |
# Tests (not needed in production)
|
| 35 |
tests/
|
eval_new_dataset.py → scripts/eval_new_dataset.py
RENAMED
|
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import torch
|
| 2 |
from src.data_loader import load_unified_dataset
|
| 3 |
from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 5 |
+
|
| 6 |
import torch
|
| 7 |
from src.data_loader import load_unified_dataset
|
| 8 |
from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
|
explore_new_dataset.py → scripts/explore_new_dataset.py
RENAMED
|
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from datasets import load_dataset
|
| 2 |
import json
|
| 3 |
import ast # <--- Added for robust parsing
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 5 |
+
|
| 6 |
from datasets import load_dataset
|
| 7 |
import json
|
| 8 |
import ast # <--- Added for robust parsing
|
train_combined.py → scripts/train_combined.py
RENAMED
|
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import torch
|
| 2 |
from torch.utils.data import Dataset, DataLoader
|
| 3 |
from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
|
|
@@ -10,7 +15,7 @@ import random
|
|
| 10 |
import os
|
| 11 |
|
| 12 |
# --- IMPORTS ---
|
| 13 |
-
from
|
| 14 |
from src.data_loader import load_unified_dataset
|
| 15 |
|
| 16 |
# --- CONFIGURATION ---
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 5 |
+
|
| 6 |
import torch
|
| 7 |
from torch.utils.data import Dataset, DataLoader
|
| 8 |
from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
|
|
|
|
| 15 |
import os
|
| 16 |
|
| 17 |
# --- IMPORTS ---
|
| 18 |
+
from src.sroie_loader import load_sroie
|
| 19 |
from src.data_loader import load_unified_dataset
|
| 20 |
|
| 21 |
# --- CONFIGURATION ---
|
train_layoutlm.py → scripts/train_layoutlm.py
RENAMED
|
@@ -1,7 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import torch
|
| 2 |
from torch.utils.data import Dataset, DataLoader
|
| 3 |
from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
|
| 4 |
-
from
|
| 5 |
from PIL import Image
|
| 6 |
from tqdm import tqdm
|
| 7 |
from seqeval.metrics import f1_score, precision_score, recall_score
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
| 5 |
+
|
| 6 |
import torch
|
| 7 |
from torch.utils.data import Dataset, DataLoader
|
| 8 |
from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
|
| 9 |
+
from src.sroie_loader import load_sroie
|
| 10 |
from PIL import Image
|
| 11 |
from tqdm import tqdm
|
| 12 |
from seqeval.metrics import f1_score, precision_score, recall_score
|
load_sroie_dataset.py → src/sroie_loader.py
RENAMED
|
File without changes
|
tests/utils.py
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
def save_image(image, path):
|
| 2 |
-
|
| 3 |
-
def visualize_boxes(image, boxes, text):
|
| 4 |
-
|
| 5 |
-
def validate_output(data):
|
| 6 |
-
|
| 7 |
-
def format_currency(amount):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|