| import os |
| import pytesseract |
| from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline |
| from IdentifyModel.cardModel import parse_id_card |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained("ckiplab/bert-base-chinese") |
| model = AutoModelForTokenClassification.from_pretrained("ckiplab/bert-base-chinese-ner") |
| ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer) |
|
|
|
|
| def llm_recognition(image, validation_type, language): |
| text = pytesseract.image_to_string(image, lang=language) |
| ner_results = ner_pipeline(text) |
| entities = {result['entity']: text[result['start']:result['end']] for result in ner_results} |
| return parse_id_card(text, validation_type, entities) |
|
|