| from model1 import np, YOLO, processor, model | |
| lp_detection = YOLO("models/yolov8n_lp_det.pt") | |
| # processor = TrOCRProcessor.from_pretrained('models/processor') | |
| # model = VisionEncoderDecoderModel.from_pretrained('models/model') | |
| # set special tokens used for creating the decoder_input_ids from the labels | |
| model.config.decoder_start_token_id = processor.tokenizer.cls_token_id | |
| model.config.pad_token_id = processor.tokenizer.pad_token_id | |
| # make sure vocab size is set correctly | |
| model.config.vocab_size = model.config.decoder.vocab_size | |
| # set beam search parameters | |
| model.config.eos_token_id = processor.tokenizer.sep_token_id | |
| model.config.max_length = 20 | |
| model.config.early_stopping = True | |
| model.config.no_repeat_ngram_size = 3 | |
| model.config.length_penalty = 2.0 | |
| model.config.num_beams = 4 | |
| # function to detect licence plates in the given car images | |
| def detect_lp(inputs): | |
| lps = [] | |
| # running the license plate detection model with 50% confidence threshold | |
| lp_results = lp_detection.predict(source=inputs, conf=0.5, verbose=False) | |
| # iterating through each output (num of outputs will be same as num of inputs) | |
| for lp_result in lp_results: | |
| # finding the bounding boxes of the license plate detected | |
| lp_boxes = lp_result.boxes.xyxy.tolist() | |
| # iterating through each license plate detected | |
| for lp_box in lp_boxes: | |
| # cropping license plate image from the car image | |
| lp = lp_result.orig_img[int(lp_box[1]):int(lp_box[3]), int(lp_box[0]):int(lp_box[2])] | |
| lps.append(lp) | |
| # breaking as we only want to detect one licence plate per car | |
| break | |
| # if no licence plate is detected then we are adding a black image | |
| if len(lp_boxes) == 0: | |
| lps.append(np.zeros((100,100,3), np.uint8)) | |
| return lps | |
| # function to detect licence plate number in the given licence plate images | |
| def detect_lp_text(inputs): | |
| plate_number = [] | |
| # iterating through each licence plate | |
| for input in inputs: | |
| # finding the number/text in licence plate | |
| pixel_values = processor(input, return_tensors="pt").pixel_values | |
| generated_ids = model.generate(pixel_values) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| # if no text is found in the licence plate, then adding a default text not found | |
| if len(generated_text) == 0: | |
| plate_number.append("not found") | |
| else: | |
| # adding the licence plate number to a list | |
| plate_number.append(generated_text) | |
| return plate_number | |
| def run(inputs): | |
| # for future, to handle multiple inputs | |
| # currently using just one input | |
| inputs = inputs[0] | |
| # detecting licence plates from the input images | |
| # returns licence plate images, if it cant find a license plate a black image is returned | |
| lps = detect_lp(inputs) | |
| # detecting licence plate number from licence plate images | |
| # returns text from the licence plate images, if none is detected "not found" text is returned | |
| lp_text = detect_lp_text(lps) | |
| return lps, lp_text | |