GSoumyajit2005 commited on
Commit
4768ab6
·
1 Parent(s): aa4f954

refactor: Reorganize project structure

Browse files

- Moved data loader to src/sroie_loader.py
- Moved training and eval scripts to scripts/ folder
- Updated imports to fix python path
- Removed unused tests/utils.py

.dockerignore CHANGED
@@ -29,6 +29,7 @@ env
29
  data/
30
  outputs/
31
  temp/
 
32
 
33
  # Tests (not needed in production)
34
  tests/
 
29
  data/
30
  outputs/
31
  temp/
32
+ models/
33
 
34
  # Tests (not needed in production)
35
  tests/
eval_new_dataset.py → scripts/eval_new_dataset.py RENAMED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import torch
2
  from src.data_loader import load_unified_dataset
3
  from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
 
1
+ import sys
2
+ import os
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
5
+
6
  import torch
7
  from src.data_loader import load_unified_dataset
8
  from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
explore_new_dataset.py → scripts/explore_new_dataset.py RENAMED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  from datasets import load_dataset
2
  import json
3
  import ast # <--- Added for robust parsing
 
1
+ import sys
2
+ import os
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
5
+
6
  from datasets import load_dataset
7
  import json
8
  import ast # <--- Added for robust parsing
train_combined.py → scripts/train_combined.py RENAMED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import torch
2
  from torch.utils.data import Dataset, DataLoader
3
  from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
@@ -10,7 +15,7 @@ import random
10
  import os
11
 
12
  # --- IMPORTS ---
13
- from load_sroie_dataset import load_sroie
14
  from src.data_loader import load_unified_dataset
15
 
16
  # --- CONFIGURATION ---
 
1
+ import sys
2
+ import os
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
5
+
6
  import torch
7
  from torch.utils.data import Dataset, DataLoader
8
  from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
 
15
  import os
16
 
17
  # --- IMPORTS ---
18
+ from src.sroie_loader import load_sroie
19
  from src.data_loader import load_unified_dataset
20
 
21
  # --- CONFIGURATION ---
train_layoutlm.py → scripts/train_layoutlm.py RENAMED
@@ -1,7 +1,12 @@
 
 
 
 
 
1
  import torch
2
  from torch.utils.data import Dataset, DataLoader
3
  from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
4
- from load_sroie_dataset import load_sroie # Assumes your helper script is in the root
5
  from PIL import Image
6
  from tqdm import tqdm
7
  from seqeval.metrics import f1_score, precision_score, recall_score
 
1
+ import sys
2
+ import os
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
5
+
6
  import torch
7
  from torch.utils.data import Dataset, DataLoader
8
  from transformers import LayoutLMv3ForTokenClassification, LayoutLMv3Processor, DataCollatorForTokenClassification
9
+ from src.sroie_loader import load_sroie
10
  from PIL import Image
11
  from tqdm import tqdm
12
  from seqeval.metrics import f1_score, precision_score, recall_score
load_sroie_dataset.py → src/sroie_loader.py RENAMED
File without changes
tests/utils.py DELETED
@@ -1,7 +0,0 @@
1
- def save_image(image, path):
2
-
3
- def visualize_boxes(image, boxes, text):
4
-
5
- def validate_output(data):
6
-
7
- def format_currency(amount):