Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """ | |
| KYC OCR Global Recognition (Gradio) | |
| - 动态遍历:不依赖固定字段,自动展示 card_info 中所有字段 | |
| - 智能排序:重要字段优先,其余按字母排序 | |
| - 友好化:日期规范化(18-08-2000 -> 2000-08-18) | |
| - 国际化:中英双语联动 | |
| """ | |
| import os | |
| import io | |
| import json | |
| import base64 | |
| from datetime import datetime | |
| import gradio as gr | |
| import requests | |
| from PIL import Image | |
| from dotenv import load_dotenv | |
| # ----------------------------- | |
| # 0) 全局配置 | |
| # ----------------------------- | |
| os.environ["GRADIO_LANGUAGE"] = "en" | |
| load_dotenv() | |
| API_URL_ID = os.getenv("API_URL1", "") | |
| API_URL_SG = os.getenv("API_URL2", "") | |
| COOP_INTENT_URL = os.getenv("COOP_INTENT_URL", "") | |
| # ----------------------------- | |
| # 1) 字段优先级与文案(i18n) | |
| # ----------------------------- | |
| # 定义优先展示的字段顺序(前面的优先显示) | |
| PRIORITY_FIELDS = [ | |
| "document_forgery_result","name", "full_name", "nik", "id_number", "passport_number", | |
| "gender", "birthday", "birthplace", "nationality" | |
| ] | |
| # 字段名映射(可选,用于美化展示) | |
| FIELD_LABELS_EN = { | |
| "name": "Name", | |
| "full_name": "Full Name", | |
| "nik": "ID Number", | |
| "id_number": "ID Number", | |
| "passport_number": "Passport Number", | |
| "gender": "Gender", | |
| "birthday": "Birthday", | |
| "birthplace": "Birthplace", | |
| "nationality": "Nationality", | |
| "address": "Address", | |
| "village": "Village", | |
| "street": "Street", | |
| "rt_rw": "RT/RW", | |
| "city": "City/Regency", | |
| "province": "Province/State", | |
| "occupation": "Occupation", | |
| "marital_status": "Marital Status", | |
| "religion": "Religion", | |
| "issue_location": "Issue Location", | |
| "issue_date": "Issue Date", | |
| "expiry_date": "Expiry Date", | |
| "blood_type": "Blood Type", | |
| "document_type": "Document Type", | |
| } | |
| FIELD_LABELS_ZH = { | |
| "name": "姓名", | |
| "full_name": "全名", | |
| "nik": "证件号", | |
| "id_number": "证件号", | |
| "passport_number": "护照号", | |
| "gender": "性别", | |
| "birthday": "出生日期", | |
| "birthplace": "出生地", | |
| "nationality": "国籍", | |
| "address": "地址", | |
| "village": "村/社区", | |
| "street": "街道", | |
| "rt_rw": "RT/RW", | |
| "city": "城市/县", | |
| "province": "省/州", | |
| "occupation": "职业", | |
| "marital_status": "婚姻状况", | |
| "religion": "宗教", | |
| "issue_location": "签发地", | |
| "issue_date": "签发日期", | |
| "expiry_date": "到期日期", | |
| "blood_type": "血型", | |
| "document_type": "证件类型", | |
| } | |
| INTRO_EN = """ | |
| # 🌏 Global ID Recognition (KYC OCR) | |
| Supports global ID card OCR recognition. | |
| Upload any ID (passport, driver's license, national ID). We will extract key fields. | |
| ✅ High Accuracy (99.7%) | |
| ✅ Worldwide documents | |
| ✅ More cost-effective | |
| ✅ Fast (<1s) | |
| For more details and trial accounts, please visit www.trustdecision.com. | |
| """ | |
| INTRO_ZH = """ | |
| # 🌏 全球证件识别(KYC OCR) | |
| 上传身份证、护照或驾照等证件图片,自动识别关键信息。 | |
| ✅ 高准确率(99.7%) | |
| ✅ 全球证件支持 | |
| ✅ 性价比更高 | |
| ✅ 极速响应(<1s) | |
| 详情及试用账号申请,请访问 TrustDecision 官网:www.trustdecision.com。 | |
| """ | |
| COUNTRIES_EN = { | |
| "Indonesia 🇮🇩": "ID", | |
| "Singapore 🇸🇬": "SG", | |
| "Thailand 🇹🇭": "TH", | |
| "Mexico 🇲🇽": "MX", | |
| "Pakistan 🇵🇰": "PK", | |
| "Vietnam 🇻🇳": "VN", | |
| "Philippines 🇵🇭": "PH", | |
| "Malaysia 🇲🇾": "MY", | |
| "China 🇨🇳": "CN", | |
| "Japan 🇯🇵": "JP", | |
| "South Korea 🇰🇷": "KR", | |
| "India 🇮🇳": "IN", | |
| } | |
| COUNTRIES_ZH = { | |
| "印尼 🇮🇩": "ID", | |
| "新加坡 🇸🇬": "SG", | |
| "泰国 🇹🇭": "TH", | |
| "墨西哥 🇲🇽": "MX", | |
| "巴基斯坦 🇵🇰": "PK", | |
| "越南 🇻🇳": "VN", | |
| "菲律宾 🇵🇭": "PH", | |
| "马来西亚 🇲🇾": "MY", | |
| "中国 🇨🇳": "CN", | |
| "日本 🇯🇵": "JP", | |
| "韩国 🇰🇷": "KR", | |
| "印度 🇮🇳": "IN", | |
| } | |
| OPTIONS_EN = { | |
| "Document forgery detection": "document_forgery" | |
| } | |
| OPTIONS_ZH = { | |
| "证件伪造检测": "document_forgery" | |
| } | |
| # ----------------------------- | |
| # 2) 工具函数 | |
| # ----------------------------- | |
| def image_to_base64(image_path: str) -> str: | |
| with Image.open(image_path) as img: | |
| bio = io.BytesIO() | |
| img.save(bio, format=img.format if img.format else "JPEG") | |
| return base64.b64encode(bio.getvalue()).decode("utf-8") | |
| def normalize_date(s: str) -> str: | |
| """日期友好化:18-08-2000 -> 2000-08-18""" | |
| if not isinstance(s, str): | |
| return s | |
| s = s.strip() | |
| if not s: | |
| return s | |
| if s.upper().startswith("SEUMUR"): | |
| return "Lifetime" | |
| for fmt in ("%d-%m-%Y", "%Y-%m-%d", "%d/%m/%Y", "%Y/%m/%d"): | |
| try: | |
| return datetime.strptime(s, fmt).strftime("%Y-%m-%d") | |
| except ValueError: | |
| continue | |
| return s | |
| def pick_labels(lang: str): | |
| return FIELD_LABELS_EN if lang == "English" else FIELD_LABELS_ZH | |
| def format_field_name(key: str, lang: str) -> str: | |
| """获取字段的友好展示名称""" | |
| labels = pick_labels(lang) | |
| if key in labels: | |
| return labels[key] | |
| # 如果没有预定义,就格式化原始字段名:snake_case -> Title Case | |
| return key.replace("_", " ").title() | |
| def should_normalize_date(key: str) -> bool: | |
| """判断字段是否应该进行日期格式化""" | |
| date_keywords = ["date", "birthday", "birth", "expiry", "issue", "valid"] | |
| key_lower = key.lower() | |
| return any(kw in key_lower for kw in date_keywords) | |
| # ----------------------------- | |
| # 3) 动态遍历并排序字段 | |
| # ----------------------------- | |
| def sort_fields(card: dict) -> list: | |
| """ | |
| 智能排序字段: | |
| 1. 优先展示 PRIORITY_FIELDS 中的字段 | |
| 2. 其余字段按字母排序 | |
| """ | |
| all_keys = list(card.keys()) | |
| # 分离优先字段和其他字段 | |
| priority_keys = [k for k in PRIORITY_FIELDS if k in all_keys] | |
| other_keys = sorted([k for k in all_keys if k not in PRIORITY_FIELDS]) | |
| return priority_keys + other_keys | |
| def build_contact_success_html(lang: str) -> str: | |
| """提交成功提示 HTML""" | |
| title = "✅ Submitted Successfully" if lang == "English" else "✅ 提交成功" | |
| msg = ( | |
| "Thank you! Our team will contact you soon." | |
| if lang == "English" | |
| else "感谢您的提交,我们的团队会尽快与您联系。" | |
| ) | |
| return f""" | |
| <style> | |
| .contact-success-container {{ | |
| background: var(--block-background-fill); | |
| border: 1px solid var(--border-color-accent); | |
| border-radius: 8px; | |
| padding: 20px; | |
| color: var(--body-text-color); | |
| }} | |
| .contact-success-title {{ | |
| font-weight: bold; | |
| font-size: 16px; | |
| margin-bottom: 8px; | |
| }} | |
| .contact-success-message {{ | |
| font-size: 14px; | |
| }} | |
| </style> | |
| <div class="contact-success-container"> | |
| <div class="contact-success-title">{title}</div> | |
| <div class="contact-success-message">{msg}</div> | |
| </div> | |
| """ | |
| def submit_cooperation_form( | |
| first_name: str, | |
| last_name: str, | |
| phone: str, | |
| email: str, | |
| country: str, | |
| industry: str, | |
| description: str, | |
| lang: str, | |
| ): | |
| """调用合作意向接口,提交表单""" | |
| if not COOP_INTENT_URL: | |
| msg = ( | |
| "Backend URL is not configured. Please set COOP_INTENT_URL in .env." | |
| if lang == "English" | |
| else "后端接口地址未配置,请在 .env 中设置 COOP_INTENT_URL。" | |
| ) | |
| return build_error_html(msg, lang) | |
| # 简单必填校验(你可以按需扩展) | |
| if not email: | |
| msg = "Email is required" if lang == "English" else "邮箱为必填项" | |
| return build_error_html(msg, lang) | |
| payload = { | |
| "firstName": first_name or "", | |
| "lastName": last_name or "", | |
| "phone": phone or "", | |
| "email": email or "", | |
| "country": country or "", | |
| "industry": industry or "", | |
| "description": description or "", | |
| } | |
| try: | |
| resp = requests.post( | |
| COOP_INTENT_URL, | |
| headers={"Content-Type": "application/json"}, | |
| json=payload, | |
| timeout=10, | |
| ) | |
| if 200 <= resp.status_code < 300: | |
| return build_contact_success_html(lang) | |
| else: | |
| err = f"{resp.status_code}: {resp.text}" | |
| return build_error_html(err, lang) | |
| except Exception as e: | |
| return build_error_html(str(e), lang) | |
| # ----------------------------- | |
| # 4) 结果渲染 | |
| # ----------------------------- | |
| def build_result_html(card: dict, lang: str) -> str: | |
| """ | |
| 动态遍历 card_info 所有字段并生成 HTML 卡片 | |
| """ | |
| if not card: | |
| if lang == "English": | |
| return '<div style="padding:20px;color:#999;">No data extracted</div>' | |
| else: | |
| return '<div style="padding:20px;color:#999;">未提取到数据</div>' | |
| # 智能排序字段 | |
| sorted_keys = sort_fields(card) | |
| # 构建字段 HTML | |
| html_items = "" | |
| for key in sorted_keys: | |
| value = card[key] | |
| # 跳过空值 | |
| if value in (None, "", []): | |
| continue | |
| # 日期格式化 | |
| if should_normalize_date(key): | |
| value = normalize_date(str(value)) | |
| # 处理列表类型 | |
| if isinstance(value, list): | |
| value = ", ".join(str(v) for v in value) | |
| # 处理字典类型(嵌套对象) | |
| if isinstance(value, dict): | |
| value = json.dumps(value, ensure_ascii=False, indent=2) | |
| # HTML 转义 | |
| value_display = str(value).replace("<", "<").replace(">", ">") | |
| label = format_field_name(key, lang) | |
| html_items += f""" | |
| <div class="kd-field"> | |
| <div class="kd-label">{label}</div> | |
| <div class="kd-value">{value_display}</div> | |
| </div> | |
| """ | |
| # 添加成功状态提示 | |
| status_text = "✅ Recognition Successful" if lang == "English" else "✅ 识别成功" | |
| html = f""" | |
| <style> | |
| .kd-container {{ | |
| background: var(--block-background-fill); | |
| border-radius: 12px; | |
| overflow: hidden; | |
| box-shadow: 0 1px 6px rgba(0,0,0,0.1); | |
| font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "PingFang SC", "Microsoft YaHei", sans-serif; | |
| border: 1px solid var(--block-border-color); | |
| }} | |
| .kd-status {{ | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 16px 20px; | |
| font-size: 16px; | |
| font-weight: 600; | |
| }} | |
| .kd-card {{ | |
| padding: 20px; | |
| color: var(--body-text-color); | |
| }} | |
| .kd-grid {{ | |
| display: grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 16px 24px; | |
| align-items: start; | |
| }} | |
| .kd-field {{ | |
| display: flex; | |
| flex-direction: column; | |
| padding: 12px; | |
| border-radius: 8px; | |
| background: var(--background-fill-secondary); | |
| transition: background 0.2s; | |
| border: 1px solid var(--border-color-primary); | |
| }} | |
| .kd-field:hover {{ | |
| background: var(--background-fill-tertiary); | |
| border-color: var(--border-color-accent); | |
| }} | |
| .kd-label {{ | |
| font-size: 12px; | |
| color: var(--body-text-color-subdued); | |
| font-weight: 500; | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| margin-bottom: 6px; | |
| }} | |
| .kd-value {{ | |
| font-size: 15px; | |
| font-weight: 600; | |
| color: var(--body-text-color); | |
| word-break: break-word; | |
| white-space: pre-wrap; | |
| }} | |
| @media (max-width: 768px) {{ | |
| .kd-grid {{ grid-template-columns: 1fr; }} | |
| }} | |
| </style> | |
| <div class="kd-container"> | |
| <div class="kd-status">{status_text}</div> | |
| <div class="kd-card"> | |
| <div class="kd-grid"> | |
| {html_items} | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| return html | |
| def build_error_html(text: str, lang: str) -> str: | |
| """错误提示 HTML""" | |
| title = "⚠️ Request Failed" if lang == "English" else "⚠️ 请求失败" | |
| return f""" | |
| <style> | |
| .error-container {{ | |
| background: var(--error-background-fill); | |
| border: 1px solid var(--error-border-color); | |
| border-radius: 8px; | |
| padding: 20px; | |
| color: var(--error-text-color); | |
| }} | |
| .error-title {{ | |
| font-weight: bold; | |
| font-size: 16px; | |
| margin-bottom: 8px; | |
| }} | |
| .error-message {{ | |
| font-size: 14px; | |
| }} | |
| </style> | |
| <div class="error-container"> | |
| <div class="error-title">{title}</div> | |
| <div class="error-message">{text}</div> | |
| </div> | |
| """ | |
| # ----------------------------- | |
| # 5) API 调用 | |
| # ----------------------------- | |
| def kyc_ocr(image, country_code: str, option_tokens: list[str]): | |
| try: | |
| img_base64 = image_to_base64(image) | |
| api_url = API_URL_ID if country_code == "ID" else API_URL_SG | |
| payload = { | |
| "image": img_base64, | |
| "country": country_code, | |
| "scenario": "Ocr", | |
| "options": ",".join(option_tokens) if option_tokens else "" | |
| } | |
| resp = requests.post(api_url, headers={"Content-Type": "application/json"}, data=json.dumps(payload)) | |
| if resp.status_code == 200: | |
| return {"ok": True, "data": resp.json(), "err": ""} | |
| else: | |
| return {"ok": False, "data": None, "err": f"{resp.status_code}: {resp.text}"} | |
| except Exception as e: | |
| return {"ok": False, "data": None, "err": str(e)} | |
| # ----------------------------- | |
| # 6) 下拉/复选与 token 互转 | |
| # ----------------------------- | |
| def _display_to_code_country(display: str, lang: str): | |
| m = COUNTRIES_EN if lang == "English" else COUNTRIES_ZH | |
| return m.get(display) | |
| def _display_to_tokens_options(selected_displays, lang: str): | |
| m = OPTIONS_EN if lang == "English" else OPTIONS_ZH | |
| rev = {k: v for k, v in m.items()} | |
| return [rev[d] for d in (selected_displays or []) if d in rev] | |
| def _tokens_to_displays_options(tokens: list[str], lang: str): | |
| m = OPTIONS_EN if lang == "English" else OPTIONS_ZH | |
| by_token = {v: k for k, v in m.items()} | |
| return [by_token[t] for t in (tokens or []) if t in by_token] | |
| # ----------------------------- | |
| # 7) 业务主函数 | |
| # ----------------------------- | |
| def run_ocr(image, country_display, option_displays, lang): | |
| code = _display_to_code_country(country_display, lang) | |
| if not code: | |
| err_msg = "Please select a supported country" if lang == "English" else "请选择国家" | |
| return (build_error_html(err_msg, lang), None) | |
| tokens = _display_to_tokens_options(option_displays, lang) | |
| r = kyc_ocr(image, code, tokens) | |
| if not r["ok"]: | |
| return (build_error_html(r["err"], lang), None) | |
| data = r["data"] or {} | |
| card = (data.get("card_info") or data.get("data") or {}) | |
| # 兼容:有些接口 card_info 为列表 | |
| if isinstance(card, list) and card: | |
| card = card[0] | |
| # 新增:直接将 document_forgery_info 的 result 添加到卡片数据 | |
| forgery_info = data.get("document_forgery_info", {}) | |
| if forgery_info and isinstance(forgery_info, dict): | |
| result = forgery_info.get("result") | |
| if result: | |
| card["document_forgery_result"] = result # 直接使用原始值 | |
| result_html = build_result_html(card, lang) | |
| return (result_html, data) | |
| # ----------------------------- | |
| # 8) Gradio UI | |
| # ----------------------------- | |
| def build_ui(): | |
| with gr.Blocks(title="KYC OCR Global Recognition") as demo: | |
| lang_state = gr.State("English") | |
| selected_option_tokens = gr.State([]) | |
| ui_lang = gr.Radio(choices=["English", "中文"], value="English", | |
| label="UI Language", interactive=True) | |
| intro_md = gr.Markdown(INTRO_EN) | |
| with gr.Row(): | |
| with gr.Column(scale=6): | |
| image_input = gr.Image(type="filepath", label="Upload Document") | |
| with gr.Column(scale=5): | |
| country_input = gr.Dropdown( | |
| choices=list(COUNTRIES_EN.keys()), | |
| value="Indonesia 🇮🇩", | |
| label="Select Country", | |
| interactive=True | |
| ) | |
| options_group = gr.CheckboxGroup( | |
| choices=list(OPTIONS_EN.keys()), | |
| value=[], | |
| label="Options", | |
| interactive=True | |
| ) | |
| submit_btn = gr.Button("Start Recognition", variant="primary") | |
| # 输出:结果展示 | |
| result_html = gr.HTML() | |
| # 提交逻辑 | |
| def on_submit(image, country_display, option_displays, lang_cur): | |
| tokens = _display_to_tokens_options(option_displays, lang_cur) | |
| html, raw = run_ocr(image, country_display, option_displays, lang_cur) | |
| return html, tokens | |
| submit_btn.click( | |
| fn=on_submit, | |
| inputs=[image_input, country_input, options_group, lang_state], | |
| outputs=[result_html, selected_option_tokens] | |
| ) | |
| # 语言切换逻辑 | |
| def on_lang_change(selected_lang, tokens_state): | |
| displays = _tokens_to_displays_options(tokens_state, selected_lang) | |
| if selected_lang == "English": | |
| return ( | |
| "English", | |
| gr.update(value="English"), | |
| gr.update(value=INTRO_EN), | |
| gr.update(label="Upload Document"), | |
| gr.update(choices=list(COUNTRIES_EN.keys()), | |
| value="Indonesia 🇮🇩", | |
| label="Select Country"), | |
| gr.update(choices=list(OPTIONS_EN.keys()), | |
| value=displays, | |
| label="Options"), | |
| gr.update(value="Start Recognition"), | |
| ) | |
| else: | |
| return ( | |
| "中文", | |
| gr.update(value="中文"), | |
| gr.update(value=INTRO_ZH), | |
| gr.update(label="上传图片"), | |
| gr.update(choices=list(COUNTRIES_ZH.keys()), | |
| value="印尼 🇮🇩", | |
| label="选择证件国家"), | |
| gr.update(choices=list(OPTIONS_ZH.keys()), | |
| value=displays, | |
| label="选项"), | |
| gr.update(value="开始识别"), | |
| ) | |
| ui_lang.change( | |
| fn=on_lang_change, | |
| inputs=[ui_lang, selected_option_tokens], | |
| outputs=[lang_state, ui_lang, intro_md, image_input, country_input, options_group, submit_btn] | |
| ) | |
| # ----------------------------- | |
| # 联系我们表单(新增) | |
| # ----------------------------- | |
| gr.Markdown("## 📩 Contact Us") | |
| with gr.Row(): | |
| with gr.Column(): | |
| contact_first_name = gr.Textbox( | |
| label="First Name", | |
| placeholder="e.g. John" | |
| ) | |
| contact_last_name = gr.Textbox( | |
| label="Last Name", | |
| placeholder="e.g. Doe" | |
| ) | |
| contact_phone = gr.Textbox( | |
| label="Phone", | |
| placeholder="+65 1234 5678" | |
| ) | |
| contact_email = gr.Textbox( | |
| label="Email *", | |
| placeholder="your@email.com" | |
| ) | |
| with gr.Column(): | |
| contact_country = gr.Textbox( | |
| label="Country", | |
| placeholder="sg / id / cn ..." | |
| ) | |
| contact_industry = gr.Textbox( | |
| label="Industry", | |
| placeholder="e.g. Fintech, E-commerce" | |
| ) | |
| contact_description = gr.Textbox( | |
| label="Description", | |
| lines=5, | |
| placeholder="Briefly describe your scenario" | |
| ) | |
| contact_submit_btn = gr.Button( | |
| "Submit Cooperation Intent", | |
| variant="secondary" | |
| ) | |
| contact_result_html = gr.HTML() | |
| def on_contact_submit( | |
| first_name, | |
| last_name, | |
| phone, | |
| email, | |
| country, | |
| industry, | |
| description, | |
| lang_cur, | |
| ): | |
| return submit_cooperation_form( | |
| first_name, | |
| last_name, | |
| phone, | |
| email, | |
| country, | |
| industry, | |
| description, | |
| lang_cur, | |
| ) | |
| contact_submit_btn.click( | |
| fn=on_contact_submit, | |
| inputs=[ | |
| contact_first_name, | |
| contact_last_name, | |
| contact_phone, | |
| contact_email, | |
| contact_country, | |
| contact_industry, | |
| contact_description, | |
| lang_state, | |
| ], | |
| outputs=[contact_result_html], | |
| ) | |
| return demo | |
| # ----------------------------- | |
| # 9) 入口 | |
| # ----------------------------- | |
| if __name__ == "__main__": | |
| demo = build_ui() | |
| demo.launch() |