tsaddev commited on
Commit
0a2675c
·
verified ·
1 Parent(s): eff6430

Upload 18 files

Browse files
.gitattributes CHANGED
@@ -1,5 +1,35 @@
1
- models/charrec.pt filter=lfs diff=lfs merge=lfs -text
2
- models/large_LP_YOLOm_best.pt filter=lfs diff=lfs merge=lfs -text
3
- models/yolov8n_lp_det.pt filter=lfs diff=lfs merge=lfs -text
4
- models/yolov8n_lpchar_det.pt filter=lfs diff=lfs merge=lfs -text
5
- models/yolov8n.pt filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import gradio as gr
2
  import model1 as m1
 
 
3
  cars = []
4
  lps = []
5
  lp_texts = []
@@ -12,6 +14,20 @@ def model1(image):
12
  counter = 0
13
  return cars[0], lps[0], lp_texts[0]
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # function to go to next detected car licence plate
16
  def next_img():
17
  global counter
@@ -26,6 +42,21 @@ def prev_img():
26
  index = int(counter % len(cars))
27
  return cars[index], lps[index], lp_texts[index]
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # this code is responcible for the front end part of the page
30
  with gr.Blocks() as demo:
31
  gr.Markdown("## ANPR Project")
@@ -47,5 +78,24 @@ with gr.Blocks() as demo:
47
  submit.click(model1, inputs=[img], outputs=[car, lp, lp_text])
48
  next.click(next_img, outputs=[car, lp, lp_text])
49
  prev.click(prev_img, outputs=[car, lp, lp_text])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  demo.launch(share=False)
 
1
  import gradio as gr
2
  import model1 as m1
3
+ import model2 as m2
4
+
5
  cars = []
6
  lps = []
7
  lp_texts = []
 
14
  counter = 0
15
  return cars[0], lps[0], lp_texts[0]
16
 
17
+ # this is the main function that passes the images to the model 1
18
+ def model2(image):
19
+ global lps, lp_texts, counter
20
+ (lps, lp_texts) = m2.run([image])
21
+ counter = 0
22
+ return lps[0], lp_texts[0]
23
+
24
+ # this is the main function that passes the images to the model 1
25
+ def model3(image):
26
+ global cars, lps, lp_texts, counter
27
+ (cars, lps, lp_texts) = m3.run([image])
28
+ counter = 0
29
+ return cars[0], lps[0], lp_texts[0]
30
+
31
  # function to go to next detected car licence plate
32
  def next_img():
33
  global counter
 
42
  index = int(counter % len(cars))
43
  return cars[index], lps[index], lp_texts[index]
44
 
45
+
46
+ # function to go to next detected licence plate
47
+ def next_img_lp():
48
+ global counter
49
+ counter += 1
50
+ index = int(counter % len(lps))
51
+ return lps[index], lp_texts[index]
52
+
53
+ # function to go to prev detected licence plate
54
+ def prev_img_lp():
55
+ global counter
56
+ counter -= 1
57
+ index = int(counter % len(lps))
58
+ return lps[index], lp_texts[index]
59
+
60
  # this code is responcible for the front end part of the page
61
  with gr.Blocks() as demo:
62
  gr.Markdown("## ANPR Project")
 
78
  submit.click(model1, inputs=[img], outputs=[car, lp, lp_text])
79
  next.click(next_img, outputs=[car, lp, lp_text])
80
  prev.click(prev_img, outputs=[car, lp, lp_text])
81
+
82
+ with gr.Tab("Model 2"):
83
+ gr.Markdown("Using 2 different ML models")
84
+ gr.Markdown("YOLOv8m for car dection + easy ocr for text detection")
85
+ gr.Markdown("YOLOv8m for car dection is trained on a large dataset of 25K training images")
86
+ img2 = gr.Image(label="Input")
87
+ submit2 = gr.Button(value="submit")
88
+
89
+ with gr.Row():
90
+ lp2 = gr.Image(label="Licence Plate")
91
+ lp_text2 = gr.Text(label="Plate Number")
92
+
93
+ with gr.Row():
94
+ next2 = gr.Button(value="next")
95
+ prev2 = gr.Button(value="prev")
96
+
97
+ submit2.click(model2, inputs=[img2], outputs=[lp2, lp_text2])
98
+ next2.click(next_img_lp, outputs=[lp2, lp_text2])
99
+ prev2.click(prev_img_lp, outputs=[lp2, lp_text2])
100
 
101
  demo.launch(share=False)
model1.py CHANGED
@@ -1,10 +1,12 @@
1
  from ultralytics import YOLO
2
- import easyocr
3
  import numpy as np
 
4
 
5
  car_detection = YOLO("models/yolov8n.pt")
6
  lp_detection = YOLO("models/yolov8n_lp_det.pt")
7
- reader = easyocr.Reader(['en'])
 
 
8
 
9
  # char_dect = YOLO("models/yolov8n_lpchar_det.pt")
10
  # char_rec = torch.load("models/charrec.pt", map_location="cpu")
@@ -54,14 +56,16 @@ def detect_lp_text(inputs):
54
  # iterating through each licence plate
55
  for input in inputs:
56
  # finding the number/text in licence plate
57
- result = reader.readtext(input)
 
 
58
 
59
  # if no text is found in the licence plate, then adding a default text not found
60
- if len(result) == 0:
61
  plate_number.append("not found")
62
  else:
63
  # adding the licence plate number to a list
64
- plate_number.append(result[0][1])
65
 
66
  return plate_number
67
 
 
1
  from ultralytics import YOLO
 
2
  import numpy as np
3
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
4
 
5
  car_detection = YOLO("models/yolov8n.pt")
6
  lp_detection = YOLO("models/yolov8n_lp_det.pt")
7
+
8
+ processor = TrOCRProcessor.from_pretrained('models/processor')
9
+ model = VisionEncoderDecoderModel.from_pretrained('models/model')
10
 
11
  # char_dect = YOLO("models/yolov8n_lpchar_det.pt")
12
  # char_rec = torch.load("models/charrec.pt", map_location="cpu")
 
56
  # iterating through each licence plate
57
  for input in inputs:
58
  # finding the number/text in licence plate
59
+ pixel_values = processor(input, return_tensors="pt").pixel_values
60
+ generated_ids = model.generate(pixel_values)
61
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
62
 
63
  # if no text is found in the licence plate, then adding a default text not found
64
+ if len(generated_text) == 0:
65
  plate_number.append("not found")
66
  else:
67
  # adding the licence plate number to a list
68
+ plate_number.append(generated_text)
69
 
70
  return plate_number
71
 
model2.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from model1 import np, YOLO, processor, model
2
+
3
+ lp_detection = YOLO("models/yolov8n_lp_det.pt")
4
+
5
+ # processor = TrOCRProcessor.from_pretrained('models/processor')
6
+ # model = VisionEncoderDecoderModel.from_pretrained('models/model')
7
+
8
+ # set special tokens used for creating the decoder_input_ids from the labels
9
+ model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
10
+ model.config.pad_token_id = processor.tokenizer.pad_token_id
11
+ # make sure vocab size is set correctly
12
+ model.config.vocab_size = model.config.decoder.vocab_size
13
+
14
+ # set beam search parameters
15
+ model.config.eos_token_id = processor.tokenizer.sep_token_id
16
+ model.config.max_length = 20
17
+ model.config.early_stopping = True
18
+ model.config.no_repeat_ngram_size = 3
19
+ model.config.length_penalty = 2.0
20
+ model.config.num_beams = 4
21
+
22
+
23
+ # function to detect licence plates in the given car images
24
+ def detect_lp(inputs):
25
+ lps = []
26
+ # running the license plate detection model with 50% confidence threshold
27
+ lp_results = lp_detection.predict(source=inputs, conf=0.5, verbose=False)
28
+ # iterating through each output (num of outputs will be same as num of inputs)
29
+ for lp_result in lp_results:
30
+ # finding the bounding boxes of the license plate detected
31
+ lp_boxes = lp_result.boxes.xyxy.tolist()
32
+ # iterating through each license plate detected
33
+ for lp_box in lp_boxes:
34
+ # cropping license plate image from the car image
35
+ lp = lp_result.orig_img[int(lp_box[1]):int(lp_box[3]), int(lp_box[0]):int(lp_box[2])]
36
+ lps.append(lp)
37
+ # breaking as we only want to detect one licence plate per car
38
+ break
39
+
40
+ # if no licence plate is detected then we are adding a black image
41
+ if len(lp_boxes) == 0:
42
+ lps.append(np.zeros((100,100,3), np.uint8))
43
+
44
+ return lps
45
+
46
+ # function to detect licence plate number in the given licence plate images
47
+ def detect_lp_text(inputs):
48
+ plate_number = []
49
+ # iterating through each licence plate
50
+ for input in inputs:
51
+ # finding the number/text in licence plate
52
+ pixel_values = processor(input, return_tensors="pt").pixel_values
53
+ generated_ids = model.generate(pixel_values)
54
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
55
+
56
+
57
+ # if no text is found in the licence plate, then adding a default text not found
58
+ if len(generated_text) == 0:
59
+ plate_number.append("not found")
60
+ else:
61
+ # adding the licence plate number to a list
62
+ plate_number.append(generated_text)
63
+
64
+ return plate_number
65
+
66
+ def run(inputs):
67
+
68
+ # for future, to handle multiple inputs
69
+ # currently using just one input
70
+ inputs = inputs[0]
71
+
72
+ # detecting licence plates from the input images
73
+ # returns licence plate images, if it cant find a license plate a black image is returned
74
+ lps = detect_lp(inputs)
75
+
76
+ # detecting licence plate number from licence plate images
77
+ # returns text from the licence plate images, if none is detected "not found" text is returned
78
+ lp_text = detect_lp_text(lps)
79
+
80
+ return lps, lp_text
81
+
model3.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from model1 import reader, np, YOLO, car_detection, lp_detection
2
+ import torch
3
+ from PIL import Image
4
+ import cv2
5
+ from torchvision import transforms
6
+
7
+ char_dect = YOLO("models/yolov8n_lpchar_det.pt")
8
+ char_rec = torch.load("models/charrec.pt", map_location="cpu")
9
+
10
+ # function to detect cars in the given image
11
+ def detect_cars(inputs):
12
+ cars = []
13
+ # running the cars detection model with 50% confidence threshold
14
+ car_results = car_detection.predict(source=inputs, classes=[2], conf=0.5, verbose=False)
15
+ # iterating through each output (num of outputs will be same as num of inputs)
16
+ for car_result in car_results:
17
+ # finding the bounding boxes of the cars detected
18
+ boxes = car_result.boxes.xyxy.tolist()
19
+ # iterating through each car detected
20
+ for box in boxes:
21
+ # cropping car image from the input image
22
+ car = car_result.orig_img[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
23
+ cars.append(car)
24
+ return cars
25
+
26
+ # function to detect licence plates in the given car images
27
+ def detect_lp(inputs):
28
+ lps = []
29
+ # running the license plate detection model with 50% confidence threshold
30
+ lp_results = lp_detection.predict(source=inputs, conf=0.5, verbose=False)
31
+ # iterating through each output (num of outputs will be same as num of inputs)
32
+ for lp_result in lp_results:
33
+ # finding the bounding boxes of the license plate detected
34
+ lp_boxes = lp_result.boxes.xyxy.tolist()
35
+ # iterating through each license plate detected
36
+ for lp_box in lp_boxes:
37
+ # cropping license plate image from the car image
38
+ lp = lp_result.orig_img[int(lp_box[1]):int(lp_box[3]), int(lp_box[0]):int(lp_box[2])]
39
+ lps.append(lp)
40
+ # breaking as we only want to detect one licence plate per car
41
+ break
42
+
43
+ # if no licence plate is detected then we are adding a black image
44
+ if len(lp_boxes) == 0:
45
+ lps.append(np.zeros((100,100,3), np.uint8))
46
+
47
+ return lps
48
+
49
+ # function to detect licence plates character in the given LP images
50
+ def chars_lp_det(inputs):
51
+ vis_lp = []
52
+ chars = []
53
+ # running the license plate detection model with 50% confidence threshold
54
+ chars_results = char_dect.predict(source=inputs, conf=0.5, verbose=False)
55
+ # iterating through each output (num of outputs will be same as num of inputs)
56
+ for chars_result in chars_results:
57
+ # finding the bounding boxes of the license plate detected
58
+ chars_boxes = chars_result.boxes.xyxy.tolist()
59
+ # iterating through each license plate detected
60
+ vis = chars_result.orig_img.copy()
61
+ c_list =[]
62
+ for chars_box in chars_boxes:
63
+ # cropping license plate image from the car image
64
+ cv2.rectangle(vis, (int(chars_box[0]),int(chars_box[1])), (int(chars_box[2]), int(chars_box[3])), (0,255,0), 1)
65
+ chrs = chars_result.orig_img[int(chars_box[1]):int(chars_box[3]), int(chars_box[0]):int(chars_box[2])]
66
+ c_list.append(chrs)
67
+
68
+ chars.append(c_list)
69
+ vis_lp.append(vis)
70
+ # if no licence plate is detected then we are adding a black image
71
+ if len(vis_lp) == 0:
72
+ vis_lp.append(np.zeros((100,100,3), np.uint8))
73
+
74
+ return vis_lp, chars
75
+
76
+ # function to detect licence plate number in the given licence plate images
77
+ def detect_lp_text(inputs):
78
+ plate_number = []
79
+ # iterating through each licence plate
80
+ for input in inputs:
81
+ # finding the number/text in licence plate
82
+ result = reader.readtext(input)
83
+
84
+ # if no text is found in the licence plate, then adding a default text not found
85
+ if len(result) == 0:
86
+ plate_number.append("not found")
87
+ else:
88
+ # adding the licence plate number to a list
89
+ plate_number.append(result[0][1])
90
+
91
+ return plate_number
92
+
93
+ def rec_lp_char(inputs):
94
+ m = ['0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
95
+ transform = transforms.Compose([
96
+ transforms.Resize((224, 224)),
97
+ transforms.ToTensor(),
98
+ ])
99
+ lptexts = []
100
+ for input in inputs:
101
+ imgs = [transform(Image.fromarray(input[i])) for i in range(len(input))]
102
+ if len(imgs) <= 1:
103
+ lptexts.append("not found")
104
+ continue
105
+ imgs = torch.stack(imgs)
106
+ output = char_rec(imgs)
107
+ preds = torch.argmax(output, dim=1).tolist()
108
+ lptext = ""
109
+ for pred in preds:
110
+ lptext += m[int(pred)]
111
+ lptexts.append(lptext)
112
+ return lptexts
113
+
114
+
115
+ def run(inputs):
116
+
117
+ # for future, to handle multiple inputs
118
+ # currently using just one input
119
+ inputs = inputs[0]
120
+
121
+ # detecting cars, this function returns all detected car images
122
+ cars = detect_cars(inputs)
123
+
124
+ # if no car is detected black images are returned
125
+ if len(cars) == 0:
126
+ return [np.zeros((100,100,3), np.uint8)], [np.zeros((100,100,3), np.uint8)], "not found"
127
+
128
+ # detecting licence plates from the car images
129
+ # returns licence plate images, if it cant find a license plate a black image is returned
130
+ lps = detect_lp(cars)
131
+
132
+ vis_lp, chars_lp = chars_lp_det(lps)
133
+
134
+
135
+ lptexts = rec_lp_char(chars_lp)
136
+
137
+ # detecting licence plate number from licence plate images
138
+ # returns text from the licence plate images, if none is detected "not found" text is returned
139
+ # lp_text = detect_lp_text(lps)
140
+
141
+
142
+ return cars, vis_lp, lptexts
143
+
models/model/config.json ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/trocr-small-printed",
3
+ "architectures": [
4
+ "VisionEncoderDecoderModel"
5
+ ],
6
+ "decoder": {
7
+ "_name_or_path": "",
8
+ "activation_dropout": 0.0,
9
+ "activation_function": "relu",
10
+ "add_cross_attention": true,
11
+ "architectures": null,
12
+ "attention_dropout": 0.0,
13
+ "bad_words_ids": null,
14
+ "begin_suppress_tokens": null,
15
+ "bos_token_id": 0,
16
+ "chunk_size_feed_forward": 0,
17
+ "classifier_dropout": 0.0,
18
+ "cross_attention_hidden_size": 384,
19
+ "d_model": 256,
20
+ "decoder_attention_heads": 8,
21
+ "decoder_ffn_dim": 1024,
22
+ "decoder_layerdrop": 0.0,
23
+ "decoder_layers": 6,
24
+ "decoder_start_token_id": 2,
25
+ "diversity_penalty": 0.0,
26
+ "do_sample": false,
27
+ "dropout": 0.1,
28
+ "early_stopping": false,
29
+ "encoder_no_repeat_ngram_size": 0,
30
+ "eos_token_id": 2,
31
+ "exponential_decay_length_penalty": null,
32
+ "finetuning_task": null,
33
+ "forced_bos_token_id": null,
34
+ "forced_eos_token_id": null,
35
+ "id2label": {
36
+ "0": "LABEL_0",
37
+ "1": "LABEL_1"
38
+ },
39
+ "init_std": 0.02,
40
+ "is_decoder": true,
41
+ "is_encoder_decoder": false,
42
+ "label2id": {
43
+ "LABEL_0": 0,
44
+ "LABEL_1": 1
45
+ },
46
+ "layernorm_embedding": true,
47
+ "length_penalty": 1.0,
48
+ "max_length": 20,
49
+ "max_position_embeddings": 512,
50
+ "min_length": 0,
51
+ "model_type": "trocr",
52
+ "no_repeat_ngram_size": 0,
53
+ "num_beam_groups": 1,
54
+ "num_beams": 1,
55
+ "num_return_sequences": 1,
56
+ "output_attentions": false,
57
+ "output_hidden_states": false,
58
+ "output_scores": false,
59
+ "pad_token_id": 1,
60
+ "prefix": null,
61
+ "problem_type": null,
62
+ "pruned_heads": {},
63
+ "remove_invalid_values": false,
64
+ "repetition_penalty": 1.0,
65
+ "return_dict": true,
66
+ "return_dict_in_generate": false,
67
+ "scale_embedding": true,
68
+ "sep_token_id": null,
69
+ "suppress_tokens": null,
70
+ "task_specific_params": null,
71
+ "temperature": 1.0,
72
+ "tf_legacy_loss": false,
73
+ "tie_encoder_decoder": false,
74
+ "tie_word_embeddings": false,
75
+ "tokenizer_class": null,
76
+ "top_k": 50,
77
+ "top_p": 1.0,
78
+ "torch_dtype": null,
79
+ "torchscript": false,
80
+ "typical_p": 1.0,
81
+ "use_bfloat16": false,
82
+ "use_cache": false,
83
+ "use_learned_position_embeddings": true,
84
+ "vocab_size": 64044
85
+ },
86
+ "decoder_start_token_id": 0,
87
+ "early_stopping": true,
88
+ "encoder": {
89
+ "_name_or_path": "",
90
+ "add_cross_attention": false,
91
+ "architectures": null,
92
+ "attention_probs_dropout_prob": 0.0,
93
+ "bad_words_ids": null,
94
+ "begin_suppress_tokens": null,
95
+ "bos_token_id": null,
96
+ "chunk_size_feed_forward": 0,
97
+ "cross_attention_hidden_size": null,
98
+ "decoder_start_token_id": null,
99
+ "diversity_penalty": 0.0,
100
+ "do_sample": false,
101
+ "early_stopping": false,
102
+ "encoder_no_repeat_ngram_size": 0,
103
+ "encoder_stride": 16,
104
+ "eos_token_id": null,
105
+ "exponential_decay_length_penalty": null,
106
+ "finetuning_task": null,
107
+ "forced_bos_token_id": null,
108
+ "forced_eos_token_id": null,
109
+ "hidden_act": "gelu",
110
+ "hidden_dropout_prob": 0.0,
111
+ "hidden_size": 384,
112
+ "id2label": {
113
+ "0": "LABEL_0",
114
+ "1": "LABEL_1"
115
+ },
116
+ "image_size": 384,
117
+ "initializer_range": 0.02,
118
+ "intermediate_size": 1536,
119
+ "is_decoder": false,
120
+ "is_encoder_decoder": false,
121
+ "label2id": {
122
+ "LABEL_0": 0,
123
+ "LABEL_1": 1
124
+ },
125
+ "layer_norm_eps": 1e-12,
126
+ "length_penalty": 1.0,
127
+ "max_length": 20,
128
+ "min_length": 0,
129
+ "model_type": "deit",
130
+ "no_repeat_ngram_size": 0,
131
+ "num_attention_heads": 6,
132
+ "num_beam_groups": 1,
133
+ "num_beams": 1,
134
+ "num_channels": 3,
135
+ "num_hidden_layers": 12,
136
+ "num_return_sequences": 1,
137
+ "output_attentions": false,
138
+ "output_hidden_states": false,
139
+ "output_scores": false,
140
+ "pad_token_id": null,
141
+ "patch_size": 16,
142
+ "prefix": null,
143
+ "problem_type": null,
144
+ "pruned_heads": {},
145
+ "qkv_bias": true,
146
+ "remove_invalid_values": false,
147
+ "repetition_penalty": 1.0,
148
+ "return_dict": true,
149
+ "return_dict_in_generate": false,
150
+ "sep_token_id": null,
151
+ "suppress_tokens": null,
152
+ "task_specific_params": null,
153
+ "temperature": 1.0,
154
+ "tf_legacy_loss": false,
155
+ "tie_encoder_decoder": false,
156
+ "tie_word_embeddings": true,
157
+ "tokenizer_class": null,
158
+ "top_k": 50,
159
+ "top_p": 1.0,
160
+ "torch_dtype": null,
161
+ "torchscript": false,
162
+ "typical_p": 1.0,
163
+ "use_bfloat16": false
164
+ },
165
+ "eos_token_id": 2,
166
+ "is_encoder_decoder": true,
167
+ "length_penalty": 2.0,
168
+ "model_type": "vision-encoder-decoder",
169
+ "no_repeat_ngram_size": 3,
170
+ "num_beams": 4,
171
+ "pad_token_id": 1,
172
+ "tie_word_embeddings": false,
173
+ "torch_dtype": "float32",
174
+ "transformers_version": "4.37.0",
175
+ "vocab_size": 64044
176
+ }
models/model/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 0,
3
+ "decoder_start_token_id": 0,
4
+ "early_stopping": true,
5
+ "eos_token_id": 2,
6
+ "length_penalty": 2.0,
7
+ "no_repeat_ngram_size": 3,
8
+ "num_beams": 4,
9
+ "pad_token_id": 1,
10
+ "transformers_version": "4.37.0",
11
+ "use_cache": false
12
+ }
models/model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:819da9002f0817676c7db10c0ea64c16b17e3c71690be9b3cb1bff95280bee76
3
+ size 246430696
models/processor/preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": false,
7
+ "do_normalize": true,
8
+ "do_rescale": true,
9
+ "do_resize": true,
10
+ "image_mean": [
11
+ 0.5,
12
+ 0.5,
13
+ 0.5
14
+ ],
15
+ "image_processor_type": "DeiTImageProcessor",
16
+ "image_std": [
17
+ 0.5,
18
+ 0.5,
19
+ 0.5
20
+ ],
21
+ "processor_class": "TrOCRProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "height": 384,
26
+ "width": 384
27
+ }
28
+ }
models/processor/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f5e2fefcf793761a76a6bfb8ad35489f9c203b25557673284b6d032f41043f4
3
+ size 1356293
models/processor/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
models/processor/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/processor/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "64001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "pad_token": "<pad>",
51
+ "processor_class": "TrOCRProcessor",
52
+ "sep_token": "</s>",
53
+ "sp_model_kwargs": {},
54
+ "tokenizer_class": "XLMRobertaTokenizer",
55
+ "unk_token": "<unk>"
56
+ }