# -*- coding: utf-8 -*- """ KYC OCR Global Recognition (Gradio) - 动态遍历:不依赖固定字段,自动展示 card_info 中所有字段 - 智能排序:重要字段优先,其余按字母排序 - 友好化:日期规范化(18-08-2000 -> 2000-08-18) - 国际化:中英双语联动 """ import os import io import json import base64 from datetime import datetime import gradio as gr import requests from PIL import Image from dotenv import load_dotenv # ----------------------------- # 0) 全局配置 # ----------------------------- os.environ["GRADIO_LANGUAGE"] = "en" load_dotenv() API_URL_ID = os.getenv("API_URL1", "") API_URL_SG = os.getenv("API_URL2", "") COOP_INTENT_URL = os.getenv("COOP_INTENT_URL", "") # ----------------------------- # 1) 字段优先级与文案(i18n) # ----------------------------- # 定义优先展示的字段顺序(前面的优先显示) PRIORITY_FIELDS = [ "document_forgery_result","name", "full_name", "nik", "id_number", "passport_number", "gender", "birthday", "birthplace", "nationality" ] # 字段名映射(可选,用于美化展示) FIELD_LABELS_EN = { "name": "Name", "full_name": "Full Name", "nik": "ID Number", "id_number": "ID Number", "passport_number": "Passport Number", "gender": "Gender", "birthday": "Birthday", "birthplace": "Birthplace", "nationality": "Nationality", "address": "Address", "village": "Village", "street": "Street", "rt_rw": "RT/RW", "city": "City/Regency", "province": "Province/State", "occupation": "Occupation", "marital_status": "Marital Status", "religion": "Religion", "issue_location": "Issue Location", "issue_date": "Issue Date", "expiry_date": "Expiry Date", "blood_type": "Blood Type", "document_type": "Document Type", } FIELD_LABELS_ZH = { "name": "姓名", "full_name": "全名", "nik": "证件号", "id_number": "证件号", "passport_number": "护照号", "gender": "性别", "birthday": "出生日期", "birthplace": "出生地", "nationality": "国籍", "address": "地址", "village": "村/社区", "street": "街道", "rt_rw": "RT/RW", "city": "城市/县", "province": "省/州", "occupation": "职业", "marital_status": "婚姻状况", "religion": "宗教", "issue_location": "签发地", "issue_date": "签发日期", "expiry_date": "到期日期", "blood_type": "血型", "document_type": "证件类型", } INTRO_EN = """ # 🌏 Global ID Recognition (KYC OCR) Supports global ID card OCR recognition. Upload any ID (passport, driver's license, national ID). We will extract key fields. ✅ High Accuracy (99.7%) ✅ Worldwide documents ✅ More cost-effective ✅ Fast (<1s) For more details and trial accounts, please visit www.trustdecision.com. """ INTRO_ZH = """ # 🌏 全球证件识别(KYC OCR) 上传身份证、护照或驾照等证件图片,自动识别关键信息。 ✅ 高准确率(99.7%) ✅ 全球证件支持 ✅ 性价比更高 ✅ 极速响应(<1s) 详情及试用账号申请,请访问 TrustDecision 官网:www.trustdecision.com。 """ COUNTRIES_EN = { "Indonesia 🇮🇩": "ID", "Singapore 🇸🇬": "SG", "Thailand 🇹🇭": "TH", "Mexico 🇲🇽": "MX", "Pakistan 🇵🇰": "PK", "Vietnam 🇻🇳": "VN", "Philippines 🇵🇭": "PH", "Malaysia 🇲🇾": "MY", "China 🇨🇳": "CN", "Japan 🇯🇵": "JP", "South Korea 🇰🇷": "KR", "India 🇮🇳": "IN", } COUNTRIES_ZH = { "印尼 🇮🇩": "ID", "新加坡 🇸🇬": "SG", "泰国 🇹🇭": "TH", "墨西哥 🇲🇽": "MX", "巴基斯坦 🇵🇰": "PK", "越南 🇻🇳": "VN", "菲律宾 🇵🇭": "PH", "马来西亚 🇲🇾": "MY", "中国 🇨🇳": "CN", "日本 🇯🇵": "JP", "韩国 🇰🇷": "KR", "印度 🇮🇳": "IN", } OPTIONS_EN = { "Document forgery detection": "document_forgery" } OPTIONS_ZH = { "证件伪造检测": "document_forgery" } # ----------------------------- # 2) 工具函数 # ----------------------------- def image_to_base64(image_path: str) -> str: with Image.open(image_path) as img: bio = io.BytesIO() img.save(bio, format=img.format if img.format else "JPEG") return base64.b64encode(bio.getvalue()).decode("utf-8") def normalize_date(s: str) -> str: """日期友好化:18-08-2000 -> 2000-08-18""" if not isinstance(s, str): return s s = s.strip() if not s: return s if s.upper().startswith("SEUMUR"): return "Lifetime" for fmt in ("%d-%m-%Y", "%Y-%m-%d", "%d/%m/%Y", "%Y/%m/%d"): try: return datetime.strptime(s, fmt).strftime("%Y-%m-%d") except ValueError: continue return s def pick_labels(lang: str): return FIELD_LABELS_EN if lang == "English" else FIELD_LABELS_ZH def format_field_name(key: str, lang: str) -> str: """获取字段的友好展示名称""" labels = pick_labels(lang) if key in labels: return labels[key] # 如果没有预定义,就格式化原始字段名:snake_case -> Title Case return key.replace("_", " ").title() def should_normalize_date(key: str) -> bool: """判断字段是否应该进行日期格式化""" date_keywords = ["date", "birthday", "birth", "expiry", "issue", "valid"] key_lower = key.lower() return any(kw in key_lower for kw in date_keywords) # ----------------------------- # 3) 动态遍历并排序字段 # ----------------------------- def sort_fields(card: dict) -> list: """ 智能排序字段: 1. 优先展示 PRIORITY_FIELDS 中的字段 2. 其余字段按字母排序 """ all_keys = list(card.keys()) # 分离优先字段和其他字段 priority_keys = [k for k in PRIORITY_FIELDS if k in all_keys] other_keys = sorted([k for k in all_keys if k not in PRIORITY_FIELDS]) return priority_keys + other_keys def build_contact_success_html(lang: str) -> str: """提交成功提示 HTML""" title = "✅ Submitted Successfully" if lang == "English" else "✅ 提交成功" msg = ( "Thank you! Our team will contact you soon." if lang == "English" else "感谢您的提交,我们的团队会尽快与您联系。" ) return f"""
{title}
{msg}
""" def submit_cooperation_form( first_name: str, last_name: str, phone: str, email: str, country: str, industry: str, description: str, lang: str, ): """调用合作意向接口,提交表单""" if not COOP_INTENT_URL: msg = ( "Backend URL is not configured. Please set COOP_INTENT_URL in .env." if lang == "English" else "后端接口地址未配置,请在 .env 中设置 COOP_INTENT_URL。" ) return build_error_html(msg, lang) # 简单必填校验(你可以按需扩展) if not email: msg = "Email is required" if lang == "English" else "邮箱为必填项" return build_error_html(msg, lang) payload = { "firstName": first_name or "", "lastName": last_name or "", "phone": phone or "", "email": email or "", "country": country or "", "industry": industry or "", "description": description or "", } try: resp = requests.post( COOP_INTENT_URL, headers={"Content-Type": "application/json"}, json=payload, timeout=10, ) if 200 <= resp.status_code < 300: return build_contact_success_html(lang) else: err = f"{resp.status_code}: {resp.text}" return build_error_html(err, lang) except Exception as e: return build_error_html(str(e), lang) # ----------------------------- # 4) 结果渲染 # ----------------------------- def build_result_html(card: dict, lang: str) -> str: """ 动态遍历 card_info 所有字段并生成 HTML 卡片 """ if not card: if lang == "English": return '
No data extracted
' else: return '
未提取到数据
' # 智能排序字段 sorted_keys = sort_fields(card) # 构建字段 HTML html_items = "" for key in sorted_keys: value = card[key] # 跳过空值 if value in (None, "", []): continue # 日期格式化 if should_normalize_date(key): value = normalize_date(str(value)) # 处理列表类型 if isinstance(value, list): value = ", ".join(str(v) for v in value) # 处理字典类型(嵌套对象) if isinstance(value, dict): value = json.dumps(value, ensure_ascii=False, indent=2) # HTML 转义 value_display = str(value).replace("<", "<").replace(">", ">") label = format_field_name(key, lang) html_items += f"""
{label}
{value_display}
""" # 添加成功状态提示 status_text = "✅ Recognition Successful" if lang == "English" else "✅ 识别成功" html = f"""
{status_text}
{html_items}
""" return html def build_error_html(text: str, lang: str) -> str: """错误提示 HTML""" title = "⚠️ Request Failed" if lang == "English" else "⚠️ 请求失败" return f"""
{title}
{text}
""" # ----------------------------- # 5) API 调用 # ----------------------------- def kyc_ocr(image, country_code: str, option_tokens: list[str]): try: img_base64 = image_to_base64(image) api_url = API_URL_ID if country_code == "ID" else API_URL_SG payload = { "image": img_base64, "country": country_code, "scenario": "Ocr", "options": ",".join(option_tokens) if option_tokens else "" } resp = requests.post(api_url, headers={"Content-Type": "application/json"}, data=json.dumps(payload)) if resp.status_code == 200: return {"ok": True, "data": resp.json(), "err": ""} else: return {"ok": False, "data": None, "err": f"{resp.status_code}: {resp.text}"} except Exception as e: return {"ok": False, "data": None, "err": str(e)} # ----------------------------- # 6) 下拉/复选与 token 互转 # ----------------------------- def _display_to_code_country(display: str, lang: str): m = COUNTRIES_EN if lang == "English" else COUNTRIES_ZH return m.get(display) def _display_to_tokens_options(selected_displays, lang: str): m = OPTIONS_EN if lang == "English" else OPTIONS_ZH rev = {k: v for k, v in m.items()} return [rev[d] for d in (selected_displays or []) if d in rev] def _tokens_to_displays_options(tokens: list[str], lang: str): m = OPTIONS_EN if lang == "English" else OPTIONS_ZH by_token = {v: k for k, v in m.items()} return [by_token[t] for t in (tokens or []) if t in by_token] # ----------------------------- # 7) 业务主函数 # ----------------------------- def run_ocr(image, country_display, option_displays, lang): code = _display_to_code_country(country_display, lang) if not code: err_msg = "Please select a supported country" if lang == "English" else "请选择国家" return (build_error_html(err_msg, lang), None) tokens = _display_to_tokens_options(option_displays, lang) r = kyc_ocr(image, code, tokens) if not r["ok"]: return (build_error_html(r["err"], lang), None) data = r["data"] or {} card = (data.get("card_info") or data.get("data") or {}) # 兼容:有些接口 card_info 为列表 if isinstance(card, list) and card: card = card[0] # 新增:直接将 document_forgery_info 的 result 添加到卡片数据 forgery_info = data.get("document_forgery_info", {}) if forgery_info and isinstance(forgery_info, dict): result = forgery_info.get("result") if result: card["document_forgery_result"] = result # 直接使用原始值 result_html = build_result_html(card, lang) return (result_html, data) # ----------------------------- # 8) Gradio UI # ----------------------------- def build_ui(): with gr.Blocks(title="KYC OCR Global Recognition") as demo: lang_state = gr.State("English") selected_option_tokens = gr.State([]) ui_lang = gr.Radio(choices=["English", "中文"], value="English", label="UI Language", interactive=True) intro_md = gr.Markdown(INTRO_EN) with gr.Row(): with gr.Column(scale=6): image_input = gr.Image(type="filepath", label="Upload Document") with gr.Column(scale=5): country_input = gr.Dropdown( choices=list(COUNTRIES_EN.keys()), value="Indonesia 🇮🇩", label="Select Country", interactive=True ) options_group = gr.CheckboxGroup( choices=list(OPTIONS_EN.keys()), value=[], label="Options", interactive=True ) submit_btn = gr.Button("Start Recognition", variant="primary") # 输出:结果展示 result_html = gr.HTML() # 提交逻辑 def on_submit(image, country_display, option_displays, lang_cur): tokens = _display_to_tokens_options(option_displays, lang_cur) html, raw = run_ocr(image, country_display, option_displays, lang_cur) return html, tokens submit_btn.click( fn=on_submit, inputs=[image_input, country_input, options_group, lang_state], outputs=[result_html, selected_option_tokens] ) # 语言切换逻辑 def on_lang_change(selected_lang, tokens_state): displays = _tokens_to_displays_options(tokens_state, selected_lang) if selected_lang == "English": return ( "English", gr.update(value="English"), gr.update(value=INTRO_EN), gr.update(label="Upload Document"), gr.update(choices=list(COUNTRIES_EN.keys()), value="Indonesia 🇮🇩", label="Select Country"), gr.update(choices=list(OPTIONS_EN.keys()), value=displays, label="Options"), gr.update(value="Start Recognition"), ) else: return ( "中文", gr.update(value="中文"), gr.update(value=INTRO_ZH), gr.update(label="上传图片"), gr.update(choices=list(COUNTRIES_ZH.keys()), value="印尼 🇮🇩", label="选择证件国家"), gr.update(choices=list(OPTIONS_ZH.keys()), value=displays, label="选项"), gr.update(value="开始识别"), ) ui_lang.change( fn=on_lang_change, inputs=[ui_lang, selected_option_tokens], outputs=[lang_state, ui_lang, intro_md, image_input, country_input, options_group, submit_btn] ) # ----------------------------- # 联系我们表单(新增) # ----------------------------- gr.Markdown("## 📩 Contact Us") with gr.Row(): with gr.Column(): contact_first_name = gr.Textbox( label="First Name", placeholder="e.g. John" ) contact_last_name = gr.Textbox( label="Last Name", placeholder="e.g. Doe" ) contact_phone = gr.Textbox( label="Phone", placeholder="+65 1234 5678" ) contact_email = gr.Textbox( label="Email *", placeholder="your@email.com" ) with gr.Column(): contact_country = gr.Textbox( label="Country", placeholder="sg / id / cn ..." ) contact_industry = gr.Textbox( label="Industry", placeholder="e.g. Fintech, E-commerce" ) contact_description = gr.Textbox( label="Description", lines=5, placeholder="Briefly describe your scenario" ) contact_submit_btn = gr.Button( "Submit Cooperation Intent", variant="secondary" ) contact_result_html = gr.HTML() def on_contact_submit( first_name, last_name, phone, email, country, industry, description, lang_cur, ): return submit_cooperation_form( first_name, last_name, phone, email, country, industry, description, lang_cur, ) contact_submit_btn.click( fn=on_contact_submit, inputs=[ contact_first_name, contact_last_name, contact_phone, contact_email, contact_country, contact_industry, contact_description, lang_state, ], outputs=[contact_result_html], ) return demo # ----------------------------- # 9) 入口 # ----------------------------- if __name__ == "__main__": demo = build_ui() demo.launch()