Global-ID-OCR / app.py
reeqrreeqr's picture
Update app.py
991f9fd verified
# -*- coding: utf-8 -*-
"""
KYC OCR Global Recognition (Gradio)
- 动态遍历:不依赖固定字段,自动展示 card_info 中所有字段
- 智能排序:重要字段优先,其余按字母排序
- 友好化:日期规范化(18-08-2000 -> 2000-08-18)
- 国际化:中英双语联动
"""
import os
import io
import json
import base64
from datetime import datetime
import gradio as gr
import requests
from PIL import Image
from dotenv import load_dotenv
# -----------------------------
# 0) 全局配置
# -----------------------------
os.environ["GRADIO_LANGUAGE"] = "en"
load_dotenv()
API_URL_ID = os.getenv("API_URL1", "")
API_URL_SG = os.getenv("API_URL2", "")
COOP_INTENT_URL = os.getenv("COOP_INTENT_URL", "")
# -----------------------------
# 1) 字段优先级与文案(i18n)
# -----------------------------
# 定义优先展示的字段顺序(前面的优先显示)
PRIORITY_FIELDS = [
"document_forgery_result","name", "full_name", "nik", "id_number", "passport_number",
"gender", "birthday", "birthplace", "nationality"
]
# 字段名映射(可选,用于美化展示)
FIELD_LABELS_EN = {
"name": "Name",
"full_name": "Full Name",
"nik": "ID Number",
"id_number": "ID Number",
"passport_number": "Passport Number",
"gender": "Gender",
"birthday": "Birthday",
"birthplace": "Birthplace",
"nationality": "Nationality",
"address": "Address",
"village": "Village",
"street": "Street",
"rt_rw": "RT/RW",
"city": "City/Regency",
"province": "Province/State",
"occupation": "Occupation",
"marital_status": "Marital Status",
"religion": "Religion",
"issue_location": "Issue Location",
"issue_date": "Issue Date",
"expiry_date": "Expiry Date",
"blood_type": "Blood Type",
"document_type": "Document Type",
}
FIELD_LABELS_ZH = {
"name": "姓名",
"full_name": "全名",
"nik": "证件号",
"id_number": "证件号",
"passport_number": "护照号",
"gender": "性别",
"birthday": "出生日期",
"birthplace": "出生地",
"nationality": "国籍",
"address": "地址",
"village": "村/社区",
"street": "街道",
"rt_rw": "RT/RW",
"city": "城市/县",
"province": "省/州",
"occupation": "职业",
"marital_status": "婚姻状况",
"religion": "宗教",
"issue_location": "签发地",
"issue_date": "签发日期",
"expiry_date": "到期日期",
"blood_type": "血型",
"document_type": "证件类型",
}
INTRO_EN = """
# 🌏 Global ID Recognition (KYC OCR)
Supports global ID card OCR recognition.
Upload any ID (passport, driver's license, national ID). We will extract key fields.
✅ High Accuracy (99.7%)
✅ Worldwide documents
✅ More cost-effective
✅ Fast (<1s)
For more details and trial accounts, please visit www.trustdecision.com.
"""
INTRO_ZH = """
# 🌏 全球证件识别(KYC OCR)
上传身份证、护照或驾照等证件图片,自动识别关键信息。
✅ 高准确率(99.7%)
✅ 全球证件支持
✅ 性价比更高
✅ 极速响应(<1s)
详情及试用账号申请,请访问 TrustDecision 官网:www.trustdecision.com。
"""
COUNTRIES_EN = {
"Indonesia 🇮🇩": "ID",
"Singapore 🇸🇬": "SG",
"Thailand 🇹🇭": "TH",
"Mexico 🇲🇽": "MX",
"Pakistan 🇵🇰": "PK",
"Vietnam 🇻🇳": "VN",
"Philippines 🇵🇭": "PH",
"Malaysia 🇲🇾": "MY",
"China 🇨🇳": "CN",
"Japan 🇯🇵": "JP",
"South Korea 🇰🇷": "KR",
"India 🇮🇳": "IN",
}
COUNTRIES_ZH = {
"印尼 🇮🇩": "ID",
"新加坡 🇸🇬": "SG",
"泰国 🇹🇭": "TH",
"墨西哥 🇲🇽": "MX",
"巴基斯坦 🇵🇰": "PK",
"越南 🇻🇳": "VN",
"菲律宾 🇵🇭": "PH",
"马来西亚 🇲🇾": "MY",
"中国 🇨🇳": "CN",
"日本 🇯🇵": "JP",
"韩国 🇰🇷": "KR",
"印度 🇮🇳": "IN",
}
OPTIONS_EN = {
"Document forgery detection": "document_forgery"
}
OPTIONS_ZH = {
"证件伪造检测": "document_forgery"
}
# -----------------------------
# 2) 工具函数
# -----------------------------
def image_to_base64(image_path: str) -> str:
with Image.open(image_path) as img:
bio = io.BytesIO()
img.save(bio, format=img.format if img.format else "JPEG")
return base64.b64encode(bio.getvalue()).decode("utf-8")
def normalize_date(s: str) -> str:
"""日期友好化:18-08-2000 -> 2000-08-18"""
if not isinstance(s, str):
return s
s = s.strip()
if not s:
return s
if s.upper().startswith("SEUMUR"):
return "Lifetime"
for fmt in ("%d-%m-%Y", "%Y-%m-%d", "%d/%m/%Y", "%Y/%m/%d"):
try:
return datetime.strptime(s, fmt).strftime("%Y-%m-%d")
except ValueError:
continue
return s
def pick_labels(lang: str):
return FIELD_LABELS_EN if lang == "English" else FIELD_LABELS_ZH
def format_field_name(key: str, lang: str) -> str:
"""获取字段的友好展示名称"""
labels = pick_labels(lang)
if key in labels:
return labels[key]
# 如果没有预定义,就格式化原始字段名:snake_case -> Title Case
return key.replace("_", " ").title()
def should_normalize_date(key: str) -> bool:
"""判断字段是否应该进行日期格式化"""
date_keywords = ["date", "birthday", "birth", "expiry", "issue", "valid"]
key_lower = key.lower()
return any(kw in key_lower for kw in date_keywords)
# -----------------------------
# 3) 动态遍历并排序字段
# -----------------------------
def sort_fields(card: dict) -> list:
"""
智能排序字段:
1. 优先展示 PRIORITY_FIELDS 中的字段
2. 其余字段按字母排序
"""
all_keys = list(card.keys())
# 分离优先字段和其他字段
priority_keys = [k for k in PRIORITY_FIELDS if k in all_keys]
other_keys = sorted([k for k in all_keys if k not in PRIORITY_FIELDS])
return priority_keys + other_keys
def build_contact_success_html(lang: str) -> str:
"""提交成功提示 HTML"""
title = "✅ Submitted Successfully" if lang == "English" else "✅ 提交成功"
msg = (
"Thank you! Our team will contact you soon."
if lang == "English"
else "感谢您的提交,我们的团队会尽快与您联系。"
)
return f"""
<style>
.contact-success-container {{
background: var(--block-background-fill);
border: 1px solid var(--border-color-accent);
border-radius: 8px;
padding: 20px;
color: var(--body-text-color);
}}
.contact-success-title {{
font-weight: bold;
font-size: 16px;
margin-bottom: 8px;
}}
.contact-success-message {{
font-size: 14px;
}}
</style>
<div class="contact-success-container">
<div class="contact-success-title">{title}</div>
<div class="contact-success-message">{msg}</div>
</div>
"""
def submit_cooperation_form(
first_name: str,
last_name: str,
phone: str,
email: str,
country: str,
industry: str,
description: str,
lang: str,
):
"""调用合作意向接口,提交表单"""
if not COOP_INTENT_URL:
msg = (
"Backend URL is not configured. Please set COOP_INTENT_URL in .env."
if lang == "English"
else "后端接口地址未配置,请在 .env 中设置 COOP_INTENT_URL。"
)
return build_error_html(msg, lang)
# 简单必填校验(你可以按需扩展)
if not email:
msg = "Email is required" if lang == "English" else "邮箱为必填项"
return build_error_html(msg, lang)
payload = {
"firstName": first_name or "",
"lastName": last_name or "",
"phone": phone or "",
"email": email or "",
"country": country or "",
"industry": industry or "",
"description": description or "",
}
try:
resp = requests.post(
COOP_INTENT_URL,
headers={"Content-Type": "application/json"},
json=payload,
timeout=10,
)
if 200 <= resp.status_code < 300:
return build_contact_success_html(lang)
else:
err = f"{resp.status_code}: {resp.text}"
return build_error_html(err, lang)
except Exception as e:
return build_error_html(str(e), lang)
# -----------------------------
# 4) 结果渲染
# -----------------------------
def build_result_html(card: dict, lang: str) -> str:
"""
动态遍历 card_info 所有字段并生成 HTML 卡片
"""
if not card:
if lang == "English":
return '<div style="padding:20px;color:#999;">No data extracted</div>'
else:
return '<div style="padding:20px;color:#999;">未提取到数据</div>'
# 智能排序字段
sorted_keys = sort_fields(card)
# 构建字段 HTML
html_items = ""
for key in sorted_keys:
value = card[key]
# 跳过空值
if value in (None, "", []):
continue
# 日期格式化
if should_normalize_date(key):
value = normalize_date(str(value))
# 处理列表类型
if isinstance(value, list):
value = ", ".join(str(v) for v in value)
# 处理字典类型(嵌套对象)
if isinstance(value, dict):
value = json.dumps(value, ensure_ascii=False, indent=2)
# HTML 转义
value_display = str(value).replace("<", "&lt;").replace(">", "&gt;")
label = format_field_name(key, lang)
html_items += f"""
<div class="kd-field">
<div class="kd-label">{label}</div>
<div class="kd-value">{value_display}</div>
</div>
"""
# 添加成功状态提示
status_text = "✅ Recognition Successful" if lang == "English" else "✅ 识别成功"
html = f"""
<style>
.kd-container {{
background: var(--block-background-fill);
border-radius: 12px;
overflow: hidden;
box-shadow: 0 1px 6px rgba(0,0,0,0.1);
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "PingFang SC", "Microsoft YaHei", sans-serif;
border: 1px solid var(--block-border-color);
}}
.kd-status {{
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
padding: 16px 20px;
font-size: 16px;
font-weight: 600;
}}
.kd-card {{
padding: 20px;
color: var(--body-text-color);
}}
.kd-grid {{
display: grid;
grid-template-columns: 1fr 1fr;
gap: 16px 24px;
align-items: start;
}}
.kd-field {{
display: flex;
flex-direction: column;
padding: 12px;
border-radius: 8px;
background: var(--background-fill-secondary);
transition: background 0.2s;
border: 1px solid var(--border-color-primary);
}}
.kd-field:hover {{
background: var(--background-fill-tertiary);
border-color: var(--border-color-accent);
}}
.kd-label {{
font-size: 12px;
color: var(--body-text-color-subdued);
font-weight: 500;
text-transform: uppercase;
letter-spacing: 0.5px;
margin-bottom: 6px;
}}
.kd-value {{
font-size: 15px;
font-weight: 600;
color: var(--body-text-color);
word-break: break-word;
white-space: pre-wrap;
}}
@media (max-width: 768px) {{
.kd-grid {{ grid-template-columns: 1fr; }}
}}
</style>
<div class="kd-container">
<div class="kd-status">{status_text}</div>
<div class="kd-card">
<div class="kd-grid">
{html_items}
</div>
</div>
</div>
"""
return html
def build_error_html(text: str, lang: str) -> str:
"""错误提示 HTML"""
title = "⚠️ Request Failed" if lang == "English" else "⚠️ 请求失败"
return f"""
<style>
.error-container {{
background: var(--error-background-fill);
border: 1px solid var(--error-border-color);
border-radius: 8px;
padding: 20px;
color: var(--error-text-color);
}}
.error-title {{
font-weight: bold;
font-size: 16px;
margin-bottom: 8px;
}}
.error-message {{
font-size: 14px;
}}
</style>
<div class="error-container">
<div class="error-title">{title}</div>
<div class="error-message">{text}</div>
</div>
"""
# -----------------------------
# 5) API 调用
# -----------------------------
def kyc_ocr(image, country_code: str, option_tokens: list[str]):
try:
img_base64 = image_to_base64(image)
api_url = API_URL_ID if country_code == "ID" else API_URL_SG
payload = {
"image": img_base64,
"country": country_code,
"scenario": "Ocr",
"options": ",".join(option_tokens) if option_tokens else ""
}
resp = requests.post(api_url, headers={"Content-Type": "application/json"}, data=json.dumps(payload))
if resp.status_code == 200:
return {"ok": True, "data": resp.json(), "err": ""}
else:
return {"ok": False, "data": None, "err": f"{resp.status_code}: {resp.text}"}
except Exception as e:
return {"ok": False, "data": None, "err": str(e)}
# -----------------------------
# 6) 下拉/复选与 token 互转
# -----------------------------
def _display_to_code_country(display: str, lang: str):
m = COUNTRIES_EN if lang == "English" else COUNTRIES_ZH
return m.get(display)
def _display_to_tokens_options(selected_displays, lang: str):
m = OPTIONS_EN if lang == "English" else OPTIONS_ZH
rev = {k: v for k, v in m.items()}
return [rev[d] for d in (selected_displays or []) if d in rev]
def _tokens_to_displays_options(tokens: list[str], lang: str):
m = OPTIONS_EN if lang == "English" else OPTIONS_ZH
by_token = {v: k for k, v in m.items()}
return [by_token[t] for t in (tokens or []) if t in by_token]
# -----------------------------
# 7) 业务主函数
# -----------------------------
def run_ocr(image, country_display, option_displays, lang):
code = _display_to_code_country(country_display, lang)
if not code:
err_msg = "Please select a supported country" if lang == "English" else "请选择国家"
return (build_error_html(err_msg, lang), None)
tokens = _display_to_tokens_options(option_displays, lang)
r = kyc_ocr(image, code, tokens)
if not r["ok"]:
return (build_error_html(r["err"], lang), None)
data = r["data"] or {}
card = (data.get("card_info") or data.get("data") or {})
# 兼容:有些接口 card_info 为列表
if isinstance(card, list) and card:
card = card[0]
# 新增:直接将 document_forgery_info 的 result 添加到卡片数据
forgery_info = data.get("document_forgery_info", {})
if forgery_info and isinstance(forgery_info, dict):
result = forgery_info.get("result")
if result:
card["document_forgery_result"] = result # 直接使用原始值
result_html = build_result_html(card, lang)
return (result_html, data)
# -----------------------------
# 8) Gradio UI
# -----------------------------
def build_ui():
with gr.Blocks(title="KYC OCR Global Recognition") as demo:
lang_state = gr.State("English")
selected_option_tokens = gr.State([])
ui_lang = gr.Radio(choices=["English", "中文"], value="English",
label="UI Language", interactive=True)
intro_md = gr.Markdown(INTRO_EN)
with gr.Row():
with gr.Column(scale=6):
image_input = gr.Image(type="filepath", label="Upload Document")
with gr.Column(scale=5):
country_input = gr.Dropdown(
choices=list(COUNTRIES_EN.keys()),
value="Indonesia 🇮🇩",
label="Select Country",
interactive=True
)
options_group = gr.CheckboxGroup(
choices=list(OPTIONS_EN.keys()),
value=[],
label="Options",
interactive=True
)
submit_btn = gr.Button("Start Recognition", variant="primary")
# 输出:结果展示
result_html = gr.HTML()
# 提交逻辑
def on_submit(image, country_display, option_displays, lang_cur):
tokens = _display_to_tokens_options(option_displays, lang_cur)
html, raw = run_ocr(image, country_display, option_displays, lang_cur)
return html, tokens
submit_btn.click(
fn=on_submit,
inputs=[image_input, country_input, options_group, lang_state],
outputs=[result_html, selected_option_tokens]
)
# 语言切换逻辑
def on_lang_change(selected_lang, tokens_state):
displays = _tokens_to_displays_options(tokens_state, selected_lang)
if selected_lang == "English":
return (
"English",
gr.update(value="English"),
gr.update(value=INTRO_EN),
gr.update(label="Upload Document"),
gr.update(choices=list(COUNTRIES_EN.keys()),
value="Indonesia 🇮🇩",
label="Select Country"),
gr.update(choices=list(OPTIONS_EN.keys()),
value=displays,
label="Options"),
gr.update(value="Start Recognition"),
)
else:
return (
"中文",
gr.update(value="中文"),
gr.update(value=INTRO_ZH),
gr.update(label="上传图片"),
gr.update(choices=list(COUNTRIES_ZH.keys()),
value="印尼 🇮🇩",
label="选择证件国家"),
gr.update(choices=list(OPTIONS_ZH.keys()),
value=displays,
label="选项"),
gr.update(value="开始识别"),
)
ui_lang.change(
fn=on_lang_change,
inputs=[ui_lang, selected_option_tokens],
outputs=[lang_state, ui_lang, intro_md, image_input, country_input, options_group, submit_btn]
)
# -----------------------------
# 联系我们表单(新增)
# -----------------------------
gr.Markdown("## 📩 Contact Us")
with gr.Row():
with gr.Column():
contact_first_name = gr.Textbox(
label="First Name",
placeholder="e.g. John"
)
contact_last_name = gr.Textbox(
label="Last Name",
placeholder="e.g. Doe"
)
contact_phone = gr.Textbox(
label="Phone",
placeholder="+65 1234 5678"
)
contact_email = gr.Textbox(
label="Email *",
placeholder="your@email.com"
)
with gr.Column():
contact_country = gr.Textbox(
label="Country",
placeholder="sg / id / cn ..."
)
contact_industry = gr.Textbox(
label="Industry",
placeholder="e.g. Fintech, E-commerce"
)
contact_description = gr.Textbox(
label="Description",
lines=5,
placeholder="Briefly describe your scenario"
)
contact_submit_btn = gr.Button(
"Submit Cooperation Intent",
variant="secondary"
)
contact_result_html = gr.HTML()
def on_contact_submit(
first_name,
last_name,
phone,
email,
country,
industry,
description,
lang_cur,
):
return submit_cooperation_form(
first_name,
last_name,
phone,
email,
country,
industry,
description,
lang_cur,
)
contact_submit_btn.click(
fn=on_contact_submit,
inputs=[
contact_first_name,
contact_last_name,
contact_phone,
contact_email,
contact_country,
contact_industry,
contact_description,
lang_state,
],
outputs=[contact_result_html],
)
return demo
# -----------------------------
# 9) 入口
# -----------------------------
if __name__ == "__main__":
demo = build_ui()
demo.launch()