Spaces:
Running on Zero
Running on Zero
| import gc | |
| import asyncio | |
| import base64 | |
| import io | |
| import json | |
| import os | |
| import re | |
| import time | |
| import uuid | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| from threading import Lock, Thread | |
| from typing import Optional | |
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| from fastapi.responses import HTMLResponse | |
| from pypinyin import Style, lazy_pinyin | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
| try: | |
| import transformers.utils.import_utils as transformers_import_utils | |
| if not hasattr(transformers_import_utils, "is_torch_fx_available"): | |
| transformers_import_utils.is_torch_fx_available = lambda: True | |
| except Exception: | |
| pass | |
| try: | |
| import spaces | |
| except Exception: | |
| class _SpacesFallback: | |
| def GPU(*args, **kwargs): | |
| def decorator(fn): | |
| return fn | |
| return decorator | |
| spaces = _SpacesFallback() | |
| DEFAULT_MODEL_ID = "Alphaplasti/ToneBridge-MiniCPM4.1-8B" | |
| MODEL_ID = os.getenv("MODEL_ID", DEFAULT_MODEL_ID).strip() or DEFAULT_MODEL_ID | |
| HF_TOKEN = (os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or "").strip() or None | |
| TTS_PROVIDER = os.getenv("TTS_PROVIDER", "edge").strip().lower() or "edge" | |
| DEFAULT_TTS_MODEL_ID = "openbmb/VoxCPM2" | |
| TTS_MODEL_ID = os.getenv("TTS_MODEL_ID", DEFAULT_TTS_MODEL_ID).strip() or DEFAULT_TTS_MODEL_ID | |
| DEFAULT_ENABLE_SERVER_TTS = "false" if TTS_PROVIDER == "browser" else "true" | |
| ENABLE_SERVER_TTS = os.getenv("ENABLE_SERVER_TTS", DEFAULT_ENABLE_SERVER_TTS).strip().lower() in {"1", "true", "yes", "y"} | |
| SERVER_TTS_ENABLED = ENABLE_SERVER_TTS and TTS_PROVIDER != "browser" | |
| TTS_MAX_CHARS = int(os.getenv("TTS_MAX_CHARS", "180")) | |
| EDGE_TTS_VOICE = os.getenv("EDGE_TTS_VOICE", "zh-CN-YunjianNeural").strip() | |
| EDGE_TTS_RATE = os.getenv("EDGE_TTS_RATE", "+0%").strip() | |
| EDGE_TTS_PITCH = os.getenv("EDGE_TTS_PITCH", "+0Hz").strip() | |
| EDGE_TTS_VOLUME = os.getenv("EDGE_TTS_VOLUME", "+0%").strip() | |
| EDGE_TTS_KARAOKE_DURATION_FACTOR = float(os.getenv("EDGE_TTS_KARAOKE_DURATION_FACTOR", "0.86")) | |
| VOXCPM_VOICE_STYLE = os.getenv( | |
| "VOXCPM_VOICE_STYLE", | |
| "A calm adult male Mandarin teacher in his 30s or 40s, warm low-pitched voice, natural conversational speed, clear Standard Mandarin, not childlike, not female", | |
| ).strip() | |
| VOXCPM_CFG_VALUE = float(os.getenv("VOXCPM_CFG_VALUE", "2.0")) | |
| VOXCPM_INFERENCE_TIMESTEPS = int(os.getenv("VOXCPM_INFERENCE_TIMESTEPS", "6")) | |
| VOXCPM_RETRY_BADCASE = os.getenv("VOXCPM_RETRY_BADCASE", "false").strip().lower() in {"1", "true", "yes", "y"} | |
| VOXCPM_OUTPUT_SAMPLE_RATE = int(os.getenv("VOXCPM_OUTPUT_SAMPLE_RATE", "24000")) | |
| MAX_INPUT_CHARS = int(os.getenv("MAX_INPUT_CHARS", "1200")) | |
| MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "220")) | |
| LOAD_IN_4BIT = os.getenv("LOAD_IN_4BIT", "true").strip().lower() in {"1", "true", "yes", "y"} | |
| PRELOAD_MODEL = os.getenv("PRELOAD_MODEL", "true").strip().lower() in {"1", "true", "yes", "y"} | |
| SPACE_DIR = Path(__file__).resolve().parent | |
| METRICS_FILE = Path(os.getenv("METRICS_FILE", "tonebridge_usage_metrics.jsonl")) | |
| METRICS_REPO_SYNC = os.getenv("METRICS_REPO_SYNC", "false").strip().lower() in {"1", "true", "yes", "y"} | |
| METRICS_REPO_ID = ( | |
| os.getenv("METRICS_REPO_ID") | |
| or os.getenv("SPACE_ID") | |
| or os.getenv("HF_SPACE_ID") | |
| or "" | |
| ).strip() | |
| DEFAULT_METRICS_REPO_PATH = ( | |
| METRICS_FILE.name if METRICS_FILE.is_absolute() else str(METRICS_FILE).replace("\\", "/") | |
| ) | |
| METRICS_REPO_PATH = os.getenv("METRICS_REPO_PATH", DEFAULT_METRICS_REPO_PATH).strip().lstrip("/") | |
| HF_METRICS_TOKEN = ( | |
| os.getenv("HF_METRICS_TOKEN") | |
| or os.getenv("HF_TOKEN") | |
| or os.getenv("HUGGING_FACE_HUB_TOKEN") | |
| or "" | |
| ).strip() or None | |
| METRICS_LOCK = Lock() | |
| metrics_sync_error: Optional[str] = None | |
| tokenizer = None | |
| model = None | |
| load_error: Optional[str] = None | |
| tts_model = None | |
| tts_load_error: Optional[str] = None | |
| app = gr.Server() | |
| SYSTEM_PROMPT = """You are ToneBridge, a Mandarin Chinese teacher for beginner learners. | |
| Your task is to correct ONE student Chinese sentence according to the selected context and tone. | |
| Your default behavior is conservative minimal correction. | |
| Do not create a richer new sentence. | |
| Do not improve style just because another phrasing is possible. | |
| Do not shorten, expand, or rewrite a correct sentence. | |
| Preserve the student's meaning, length, intention, and punctuation style as much as possible. | |
| Never add information that is absent from the original sentence. | |
| When in doubt, choose no correction. | |
| Inputs: | |
| Context: {context} | |
| Tone: {tone} | |
| Correction style: {correction_style} | |
| Student sentence: {sentence} | |
| Correction decision rule: | |
| A correction is allowed ONLY if the original sentence has a clear problem: | |
| - wrong character | |
| - wrong word | |
| - missing necessary word | |
| - extra incorrect word | |
| - wrong measure word | |
| - wrong word order | |
| - wrong grammar pattern | |
| - tone/politeness inappropriate for the selected context | |
| If the sentence is understandable, grammatical, and natural enough for the selected context, do NOT correct it. | |
| Acceptable variants are not errors. | |
| A more formal, shorter, smoother, or more common version is NOT a correction if the original is already acceptable. | |
| Important anti-overcorrection rules: | |
| - Do not remove 一 from 有一只猫 only to make it more casual. 有一只猫 and 有只猫 can both be correct. | |
| - Do not add 的 or 色 only to make an adjective-noun phrase sound more standard if the original is already acceptable. | |
| - Do not change basic location patterns such as "A 在 B 的旁边" if they are correct and natural. | |
| - Do not change word order unless the original word order is actually wrong. | |
| - Do not mark "word order" unless the corrected sentence visibly changes the order of words. | |
| - Do not correct punctuation-only issues unless punctuation creates real confusion. | |
| - Do not replace a correct casual sentence with a formal sentence unless the selected context requires formality. | |
| - For a casual or friendly tone, do not use 您 or 您好. Use 你 / 你好. | |
| - For a teacher, client, manager, or very formal context, 您 may be appropriate. | |
| Error type consistency: | |
| - If the corrected sentence is identical to the original, Error type must be "none". | |
| - If Error type is "none", the corrected sentence must be identical to the original. | |
| - If you replace one Chinese character with another that has the same or very close pinyin, Error type should be "character/input-method mistake", not politeness. | |
| - If you cannot explain the correction by pointing to a clear visible problem, return no correction. | |
| Output rules: | |
| Return exactly 5 short lines. | |
| Use exactly these labels in this order. | |
| Do not use markdown. | |
| Do not output pinyin. | |
| Do not output translations. | |
| Do not output hidden reasoning, chain-of-thought, or <think> tags. | |
| Explanations must be only in English. | |
| Why and Tip must be English sentences. Do not explain in Chinese. | |
| You may mention isolated Chinese words or characters inside English explanations only when necessary. | |
| Chinese sentences must stay in Chinese characters. | |
| Give only one corrected sentence. | |
| Add at most one gentle emoji in Why or Tip, never inside Chinese sentences. | |
| Allowed Error type values: | |
| none | |
| character/input-method mistake | |
| wrong character | |
| wrong word | |
| missing word | |
| extra word | |
| measure word | |
| word order | |
| grammar | |
| tone | |
| Required format: | |
| Original sentence: <student sentence> | |
| Corrected sentence: <corrected sentence or identical original> | |
| Error type: <one allowed Error type value> | |
| Why: <short beginner-friendly explanation in English> | |
| Tip: <one short practical tip in English> | |
| For a correct sentence: | |
| Original sentence: <student sentence> | |
| Corrected sentence: <identical student sentence> | |
| Error type: none | |
| Why: This sentence is correct and natural. 😊 | |
| Tip: Keep it as it is. | |
| Examples: | |
| Input: | |
| 红桌子上有一只猫 | |
| Output: | |
| Original sentence: 红桌子上有一只猫 | |
| Corrected sentence: 红桌子上有一只猫 | |
| Error type: none | |
| Why: This sentence is correct and natural. 😊 | |
| Tip: 有只猫 is only a casual variant, not a correction. | |
| Input: | |
| 桌子上猫有一只 | |
| Output: | |
| Original sentence: 桌子上猫有一只 | |
| Corrected sentence: 桌子上有一只猫 | |
| Error type: word order | |
| Why: In this location pattern, use place + 有 + object. 😊 | |
| Tip: Put 有 before the thing that exists. | |
| Input: | |
| 我想喝谁 | |
| Output: | |
| Original sentence: 我想喝谁 | |
| Corrected sentence: 我想喝水 | |
| Error type: character/input-method mistake | |
| Why: 谁 and 水 have close pinyin, but 水 means water. 😊 | |
| Tip: Check same-sound characters when typing. | |
| """ | |
| def normalize_space(text: str) -> str: | |
| return re.sub(r"\s+", " ", (text or "").strip()) | |
| def has_chinese(text: str) -> bool: | |
| return re.search(r"[\u4e00-\u9fff]", text or "") is not None | |
| def to_pinyin(text: str) -> str: | |
| return " ".join(lazy_pinyin(text or "", style=Style.TONE)) | |
| def should_add_pinyin_for_line(line: str) -> bool: | |
| labels = ("Original sentence", "Corrected sentence") | |
| return any(label in (line or "") for label in labels) | |
| def chinese_segments(text: str): | |
| pattern = r"[\u4e00-\u9fff,。!?、;:“”‘’()《》〈〉…—\s]+" | |
| return [seg.strip() for seg in re.findall(pattern, text or "") if has_chinese(seg)] | |
| def add_pinyin_under_chinese(text: str) -> str: | |
| lines = (text or "").splitlines() | |
| enriched = [] | |
| for line in lines: | |
| clean = line.strip() | |
| if not clean: | |
| continue | |
| enriched.append(clean) | |
| segments = chinese_segments(line) if should_add_pinyin_for_line(line) else [] | |
| if segments: | |
| enriched.append("*" + " / ".join(to_pinyin(seg) for seg in segments) + "*") | |
| enriched.append("") | |
| return "\n".join(enriched).strip() | |
| def add_section_emojis(text: str) -> str: | |
| labels = { | |
| "Original sentence": "📝 Original sentence", | |
| "Corrected sentence": "✅ Corrected sentence", | |
| "Error type": "🔎 Error type", | |
| "Why": "💡 Why", | |
| "Tip": "🌱 Tip", | |
| } | |
| out = text or "" | |
| for source, target in labels.items(): | |
| out = re.sub(rf"(?m)^(\s*){re.escape(source)}\s*:", rf"\1{target} :", out) | |
| return out | |
| def normalize_model_markdown(text: str) -> str: | |
| out = (text or "").strip() | |
| out = out.replace("\\r\\n", "\n").replace("\\n", "\n").replace("\\t", " ") | |
| labels = [ | |
| "Original sentence", | |
| "Corrected sentence", | |
| "Error type", | |
| "Why", | |
| "Tip", | |
| ] | |
| for label in labels: | |
| out = re.sub(rf"\s+(?={re.escape(label)}\s*:)", "\n", out) | |
| return out.strip() | |
| def clean_corrected_sentence_value(value: str) -> str: | |
| text = normalize_space((value or "").replace("*", "")) | |
| text = re.split(r"\s+(?:Original sentence|Error type|Why|Tip)\s*:", text, maxsplit=1)[0].strip() | |
| extra_after_sentence = re.match(r"^(.+?[。!?!?])(?=\s*[\u4e00-\u9fffA-Za-z])", text) | |
| if extra_after_sentence: | |
| text = extra_after_sentence.group(1) | |
| return text.strip() | |
| def clean_correction_output(text: str) -> str: | |
| lines = normalize_model_markdown(text).splitlines() | |
| cleaned = [] | |
| for line in lines: | |
| match = re.match(r"^(Corrected sentence\s*:\s*)(.+)$", line.strip(), flags=re.I) | |
| if match: | |
| cleaned.append(match.group(1) + clean_corrected_sentence_value(match.group(2))) | |
| else: | |
| cleaned.append(line) | |
| return "\n".join(cleaned).strip() | |
| def wrap_result(markdown: str) -> str: | |
| return markdown.strip() if markdown else "No correction was produced." | |
| def final_result(markdown: str) -> str: | |
| friendly = add_section_emojis(clean_correction_output(markdown)) | |
| return wrap_result(add_pinyin_under_chinese(friendly)) | |
| def utc_now_iso() -> str: | |
| return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z") | |
| def extract_result_field(markdown: str, label: str) -> str: | |
| text = normalize_model_markdown(markdown or "") | |
| match = re.search(rf"(?im)^.*?{re.escape(label)}\s*:\s*(.+)$", text) | |
| if not match: | |
| return "" | |
| value = match.group(1).replace("*", "").strip() | |
| if label.lower() == "corrected sentence": | |
| return clean_corrected_sentence_value(value) | |
| return normalize_space(value) | |
| def is_mostly_chinese_explanation(text: str) -> bool: | |
| value = text or "" | |
| chinese_count = len(re.findall(r"[\u4e00-\u9fff]", value)) | |
| latin_count = len(re.findall(r"[A-Za-z]", value)) | |
| return chinese_count >= 6 and chinese_count > latin_count | |
| def english_feedback_fallback(error_type: str, label: str) -> str: | |
| kind = normalize_space(error_type).lower() | |
| is_tip = label.lower() == "tip" | |
| if "none" in kind: | |
| return "Keep it as it is." if is_tip else "This sentence is correct and natural." | |
| if "character" in kind or "input" in kind: | |
| return ( | |
| "When typing, check characters with similar pronunciation." | |
| if is_tip | |
| else "One character changes the meaning; the corrected sentence uses the intended word." | |
| ) | |
| if "word order" in kind or "order" in kind: | |
| return ( | |
| "Practice the same sentence pattern with one small change at a time." | |
| if is_tip | |
| else "The correction fixes the word order so the Mandarin pattern is clearer." | |
| ) | |
| if "measure" in kind: | |
| return ( | |
| "Pair nouns with their usual measure words." | |
| if is_tip | |
| else "The correction uses a measure word that fits the noun better." | |
| ) | |
| if "tone" in kind or "register" in kind or "polite" in kind: | |
| return ( | |
| "Match the wording to the relationship and situation." | |
| if is_tip | |
| else "The correction makes the tone fit the selected context better." | |
| ) | |
| if "word" in kind: | |
| return ( | |
| "Check the meaning of each key word before sending." | |
| if is_tip | |
| else "The correction replaces a word that does not fit the intended meaning." | |
| ) | |
| return ( | |
| "Practice the sentence pattern with one small change at a time." | |
| if is_tip | |
| else "The correction fixes a grammar issue while keeping the original meaning." | |
| ) | |
| def build_plain_correction_output( | |
| original_sentence: str, | |
| corrected_sentence: str, | |
| error_type: str, | |
| why: str, | |
| tip: str, | |
| ) -> str: | |
| return "\n".join( | |
| [ | |
| f"Original sentence: {original_sentence}", | |
| f"Corrected sentence: {corrected_sentence}", | |
| f"Error type: {error_type or 'none'}", | |
| f"Why: {why}", | |
| f"Tip: {tip}", | |
| ] | |
| ) | |
| def generate_english_feedback_repair( | |
| original_sentence: str, | |
| corrected_sentence: str, | |
| error_type: str, | |
| why: str, | |
| tip: str, | |
| ) -> str: | |
| if model is None or tokenizer is None: | |
| return "" | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "Rewrite Mandarin correction feedback. Keep Original sentence, Corrected sentence, " | |
| "and Error type unchanged. Rewrite only Why and Tip in beginner-friendly English. " | |
| "Do not explain in Chinese. Do not output pinyin. Return exactly the same five labels." | |
| ), | |
| }, | |
| { | |
| "role": "user", | |
| "content": build_plain_correction_output( | |
| original_sentence, | |
| corrected_sentence, | |
| error_type, | |
| why, | |
| tip, | |
| ), | |
| }, | |
| ] | |
| try: | |
| try: | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True, | |
| enable_thinking=False, | |
| ) | |
| except TypeError: | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True, | |
| ) | |
| inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
| with torch.inference_mode(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=150, | |
| do_sample=False, | |
| use_cache=True, | |
| repetition_penalty=1.05, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| ) | |
| generated = outputs[0][inputs["input_ids"].shape[-1]:] | |
| repaired = tokenizer.decode(generated, skip_special_tokens=True).strip() | |
| del inputs, outputs, generated | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| return strip_thinking(repaired) | |
| except Exception: | |
| return "" | |
| def ensure_english_feedback(answer: str, original_sentence: str) -> str: | |
| why = extract_result_field(answer, "Why") | |
| tip = extract_result_field(answer, "Tip") | |
| if not (is_mostly_chinese_explanation(why) or is_mostly_chinese_explanation(tip)): | |
| return answer | |
| original = extract_result_field(answer, "Original sentence") or original_sentence | |
| corrected = extract_result_field(answer, "Corrected sentence") or original | |
| error_type = extract_result_field(answer, "Error type") or "none" | |
| repaired = generate_english_feedback_repair(original, corrected, error_type, why, tip) | |
| repaired_why = extract_result_field(repaired, "Why") or why | |
| repaired_tip = extract_result_field(repaired, "Tip") or tip | |
| if not repaired_why or is_mostly_chinese_explanation(repaired_why): | |
| repaired_why = english_feedback_fallback(error_type, "Why") | |
| if not repaired_tip or is_mostly_chinese_explanation(repaired_tip): | |
| repaired_tip = english_feedback_fallback(error_type, "Tip") | |
| return build_plain_correction_output( | |
| original, | |
| corrected, | |
| error_type, | |
| repaired_why, | |
| repaired_tip, | |
| ) | |
| def metrics_file_path() -> Path: | |
| return METRICS_FILE if METRICS_FILE.is_absolute() else SPACE_DIR / METRICS_FILE | |
| def sync_usage_metrics_to_repo(commit_message: str) -> None: | |
| global metrics_sync_error | |
| if not METRICS_REPO_SYNC: | |
| return | |
| path = metrics_file_path() | |
| if not path.exists(): | |
| return | |
| if not METRICS_REPO_ID: | |
| metrics_sync_error = "Metrics repo sync is enabled, but METRICS_REPO_ID or SPACE_ID is missing." | |
| return | |
| if not HF_METRICS_TOKEN: | |
| metrics_sync_error = "Metrics repo sync is enabled, but HF_METRICS_TOKEN or HF_TOKEN is missing." | |
| return | |
| try: | |
| from huggingface_hub import upload_file | |
| upload_file( | |
| path_or_fileobj=str(path), | |
| path_in_repo=METRICS_REPO_PATH or path.name, | |
| repo_id=METRICS_REPO_ID, | |
| repo_type="space", | |
| token=HF_METRICS_TOKEN, | |
| commit_message=commit_message, | |
| ) | |
| metrics_sync_error = None | |
| except Exception as exc: | |
| metrics_sync_error = f"Metrics repo sync failed: {exc}" | |
| def read_usage_records_unlocked() -> list[dict]: | |
| path = metrics_file_path() | |
| if not path.exists(): | |
| return [] | |
| records = [] | |
| with path.open("r", encoding="utf-8") as handle: | |
| for line in handle: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| try: | |
| record = json.loads(line) | |
| except json.JSONDecodeError: | |
| continue | |
| if isinstance(record, dict): | |
| records.append(record) | |
| return records | |
| def write_usage_records_unlocked(records: list[dict]) -> None: | |
| path = metrics_file_path() | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| with path.open("w", encoding="utf-8") as handle: | |
| for record in records: | |
| handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n") | |
| def append_usage_record(record: dict) -> None: | |
| path = metrics_file_path() | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| with METRICS_LOCK: | |
| with path.open("a", encoding="utf-8") as handle: | |
| handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n") | |
| sync_usage_metrics_to_repo("Update ToneBridge usage metrics") | |
| def update_usage_evaluation(request_id: str, evaluation: str) -> Optional[dict]: | |
| with METRICS_LOCK: | |
| records = read_usage_records_unlocked() | |
| updated_record = None | |
| for record in records: | |
| if record.get("request_id") == request_id: | |
| record["evaluation"] = evaluation | |
| record["evaluated_at"] = utc_now_iso() | |
| updated_record = record | |
| break | |
| if updated_record is not None: | |
| write_usage_records_unlocked(records) | |
| if updated_record is not None: | |
| sync_usage_metrics_to_repo("Update ToneBridge feedback metrics") | |
| return updated_record | |
| def metric_public_view(record: dict) -> dict: | |
| return { | |
| "request_id": record.get("request_id", ""), | |
| "created_at": record.get("created_at", ""), | |
| "original_sentence": record.get("original_sentence", ""), | |
| "corrected_sentence": record.get("corrected_sentence", ""), | |
| "evaluation": record.get("evaluation"), | |
| "generation_time_seconds": record.get("generation_time_seconds"), | |
| "status": record.get("status", ""), | |
| "context": record.get("context", ""), | |
| "target_tone": record.get("target_tone", ""), | |
| "correction_mode": record.get("correction_mode", ""), | |
| "error_type": record.get("error_type", ""), | |
| "model_id": record.get("model_id", ""), | |
| } | |
| def device_label() -> str: | |
| if torch.cuda.is_available(): | |
| name = torch.cuda.get_device_name(0) | |
| mem_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3) | |
| return f"GPU: {name} ({mem_gb:.1f} GB)" | |
| return "CPU: no CUDA GPU detected" | |
| def load_model(): | |
| global tokenizer, model, load_error | |
| if model is not None and tokenizer is not None: | |
| return | |
| try: | |
| cuda_available = torch.cuda.is_available() | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True, token=HF_TOKEN) | |
| load_kwargs = { | |
| "torch_dtype": "auto", | |
| "device_map": "auto", | |
| "trust_remote_code": True, | |
| "low_cpu_mem_usage": True, | |
| "token": HF_TOKEN, | |
| } | |
| if LOAD_IN_4BIT and cuda_available: | |
| load_kwargs["quantization_config"] = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.float16, | |
| bnb_4bit_use_double_quant=True, | |
| ) | |
| try: | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| attn_implementation="sdpa", | |
| **load_kwargs, | |
| ) | |
| except Exception: | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| **load_kwargs, | |
| ) | |
| model.eval() | |
| load_error = None | |
| except Exception as exc: | |
| load_error = f"Model load failed: {exc}" | |
| tokenizer = None | |
| model = None | |
| if PRELOAD_MODEL: | |
| load_model() | |
| def correction_mode_guidance(correction_mode: str) -> str: | |
| if correction_mode == "Natural correction": | |
| return ( | |
| "Natural correction: make the sentence sound natural for the chosen context, " | |
| "but only if the original is actually unnatural, incorrect, or socially inappropriate. " | |
| "If the original is already correct and natural, keep it unchanged." | |
| ) | |
| return ( | |
| "Minimal correction: change only the characters, grammar, or word order that are necessary. " | |
| "Do not rewrite the sentence if a small correction is enough." | |
| ) | |
| def context_tone_guidance(context: str, target_tone: str) -> str: | |
| context_key = normalize_space(context).lower() | |
| tone_key = normalize_space(target_tone).lower() | |
| if context_key == "wechat" and "friendly" in tone_key: | |
| return ( | |
| "WeChat + Friendly: treat the sentence like a short instant message. " | |
| "Be concise, direct, and casual. If the original sounds formal, literary, ceremonial, " | |
| "or like an invitation letter, correct it as a tone/register issue. " | |
| "Avoid stiff phrases such as 敬请, 阁下, 拨冗, 莅临, 寒舍 unless the user explicitly wants formal wording. " | |
| "Prefer everyday wording with 你, 有空, 方便, 一下, 吗, or 吧 when appropriate. " | |
| "The corrected sentence should usually be short." | |
| ) | |
| if context_key == "wechat": | |
| return ( | |
| "WeChat context: prefer concise instant-message wording. " | |
| "Avoid ceremonial or overly literary phrasing unless the target tone is explicitly formal." | |
| ) | |
| return "No extra context-specific rule." | |
| def build_user_prompt(context: str, sentence: str, target_tone: str, correction_mode: str) -> str: | |
| context = normalize_space(context) | |
| target_tone = normalize_space(target_tone) | |
| sentence = (sentence or "").strip() | |
| if not context: | |
| context = "contexte non precise" | |
| return f"""Social context: {context} | |
| Target tone: {target_tone} | |
| Correction style: {correction_mode} | |
| Correction style instruction: {correction_mode_guidance(correction_mode)} | |
| Context and tone instruction: {context_tone_guidance(context, target_tone)} | |
| Explanation language: English only | |
| Student's Chinese sentence: | |
| {sentence} | |
| Before correcting, decide whether the sentence is already correct, natural, and appropriate for the context. | |
| If it is correct, keep exactly the same sentence in "Corrected sentence". | |
| In that case, use "none" as the error type and explain simply that the sentence is correct. | |
| Correct the sentence while preserving its intention and length. | |
| Prefer the smallest possible correction. | |
| Do not turn a short sentence into a long sentence. | |
| The "Corrected sentence" line must contain only one Chinese sentence. Do not add a second option, leftover characters, notes, vocabulary, or pinyin after it. | |
| Do not add names, emotions, encouragement, or information that was not in the original sentence. | |
| Do not replace a correct sentence with a paraphrase. For example, "火车站在超市的旁边" is correct and natural for "The train station is next to the supermarket"; do not correct it to "火车站旁有超市". | |
| If you replace one Chinese character with another character that has the same or very close pinyin, mention in "Why" that it is probably a character/input-method mistake. | |
| All explanations, titles, and tips must be in English. | |
| Add one line "Error type" with a short category: character/input mistake, grammar, word order, tone/register, naturalness, or none. | |
| Use real line breaks between sections. Do not output escaped newline characters like \\n. | |
| Do not write a long paragraph. Maximum 5 short lines. | |
| /no_think""" | |
| CONTEXT_TONE_PROFILES = { | |
| "friendly-informal": { | |
| "context": "friendly everyday conversation with a friend or close person", | |
| "tone": "informal friendly", | |
| "correction_style": "tone-aware", | |
| "instruction": ( | |
| "Keep the sentence simple, natural, and friendly. Prefer everyday spoken wording. " | |
| "Use \u4f60 when a pronoun is needed. Avoid \u60a8, \u662f\u5426, ceremonial, literary, or stiff formal wording." | |
| ), | |
| }, | |
| "work-informal": { | |
| "context": "workplace message to a colleague or familiar coworker", | |
| "tone": "informal professional", | |
| "correction_style": "tone-aware", | |
| "instruction": ( | |
| "Keep the sentence clear, polite, and work-appropriate without sounding stiff. " | |
| "Avoid slang, but do not over-formalize if the original is already natural." | |
| ), | |
| }, | |
| "work-formal": { | |
| "context": "workplace message to a manager, client, teacher, or formal contact", | |
| "tone": "formal professional", | |
| "correction_style": "tone-aware", | |
| "instruction": ( | |
| "Use respectful, professional wording when needed. \u60a8 and \u8bf7 may be appropriate. " | |
| "Avoid overly casual phrasing if the relationship requires formality." | |
| ), | |
| }, | |
| "wechat-informal": { | |
| "context": "WeChat message to a friend or close contact", | |
| "tone": "informal instant message", | |
| "correction_style": "tone-aware", | |
| "instruction": ( | |
| "Prefer short, direct instant-message wording. Use \u4f60, \u6709\u7a7a, \u65b9\u4fbf, " | |
| "\u4e00\u4e0b, \u5417, or \u5427 when appropriate. Avoid \u60a8, \u662f\u5426, " | |
| "\u656c\u8bf7, \u9601\u4e0b, \u62e8\u5197, \u8385\u4e34, and invitation-letter style." | |
| ), | |
| }, | |
| "wechat-formal": { | |
| "context": "WeChat message in a professional or formal relationship", | |
| "tone": "formal concise instant message", | |
| "correction_style": "tone-aware", | |
| "instruction": ( | |
| "Keep the message concise like WeChat, but respectful. \u8bf7 and \u60a8 may be appropriate. " | |
| "Avoid both casual slang and overly ceremonial letter-style wording." | |
| ), | |
| }, | |
| } | |
| CONTEXT_TONE_ALIASES = { | |
| "amical-informel": "friendly-informal", | |
| "amis-informel": "friendly-informal", | |
| "friends": "friendly-informal", | |
| "family": "friendly-informal", | |
| "friendly": "friendly-informal", | |
| "work": "work-formal", | |
| "work-informel": "work-informal", | |
| "work-formel": "work-formal", | |
| "wechat": "wechat-informal", | |
| "wechat-informel": "wechat-informal", | |
| "wechat-formel": "wechat-formal", | |
| } | |
| def normalize_context_tone(value: str) -> str: | |
| key = normalize_space(value).lower().replace("_", "-") | |
| key = re.sub(r"\s+", "-", key) | |
| return CONTEXT_TONE_ALIASES.get(key, key if key in CONTEXT_TONE_PROFILES else "friendly-informal") | |
| def context_tone_profile(value: str) -> dict: | |
| key = normalize_context_tone(value) | |
| profile = dict(CONTEXT_TONE_PROFILES[key]) | |
| profile["key"] = key | |
| return profile | |
| def build_user_prompt(context: str, sentence: str, target_tone: str = "", correction_mode: str = "") -> str: | |
| profile = context_tone_profile(context) | |
| sentence = (sentence or "").strip() | |
| return f"""Selected context-tone: {profile["key"]} | |
| Context: {profile["context"]} | |
| Tone: {profile["tone"]} | |
| Correction style: {profile["correction_style"]} | |
| Profile instruction: {profile["instruction"]} | |
| Explanation language: English only | |
| Student's Chinese sentence: | |
| {sentence} | |
| Before correcting, decide whether the sentence is already correct, natural, and appropriate for the selected context-tone. | |
| If it is correct, keep exactly the same sentence in "Corrected sentence". | |
| In that case, use "none" as the error type and explain simply that the sentence is correct. | |
| Correct the sentence while preserving its intention and length. | |
| Prefer the smallest possible correction. | |
| Do not turn a short sentence into a long sentence. | |
| The "Corrected sentence" line must contain only one Chinese sentence. Do not add a second option, leftover characters, notes, vocabulary, or pinyin after it. | |
| Do not add names, emotions, encouragement, or information that was not in the original sentence. | |
| Do not replace a correct sentence with a paraphrase. | |
| If you replace one Chinese character with another character that has the same or very close pinyin, mention in "Why" that it is probably a character/input-method mistake. | |
| All explanations, titles, and tips must be in English. | |
| Use real line breaks between sections. Do not output escaped newline characters like \\n. | |
| Do not write a long paragraph. Maximum 5 short lines. | |
| Now correct the input sentence. | |
| /no_think""" | |
| def _generate_correction_gpu( | |
| context: str, | |
| sentence: str, | |
| target_tone: str, | |
| correction_mode: str = "tone-aware", | |
| ) -> str: | |
| sentence = (sentence or "").strip() | |
| if not sentence: | |
| message = "Add a Chinese sentence first." | |
| return wrap_result(message) | |
| if len(sentence) > MAX_INPUT_CHARS: | |
| return wrap_result(f"The sentence is too long ({len(sentence)} characters). Current limit: {MAX_INPUT_CHARS}.") | |
| load_model() | |
| if load_error: | |
| return wrap_result(load_error) | |
| if model is None or tokenizer is None: | |
| message = "The model is not available." | |
| return wrap_result(message) | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": build_user_prompt(context, sentence, target_tone, correction_mode)}, | |
| ] | |
| try: | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True, | |
| enable_thinking=False, | |
| ) | |
| except TypeError: | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True, | |
| ) | |
| inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
| with torch.inference_mode(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=MAX_NEW_TOKENS, | |
| do_sample=False, | |
| use_cache=True, | |
| repetition_penalty=1.05, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| ) | |
| generated = outputs[0][inputs["input_ids"].shape[-1]:] | |
| answer = tokenizer.decode(generated, skip_special_tokens=True).strip() | |
| answer = strip_thinking(answer) | |
| del inputs, outputs, generated | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| if not answer: | |
| answer = "The model did not produce a response." | |
| else: | |
| answer = ensure_english_feedback(answer, sentence) | |
| return final_result(answer) | |
| def generate_correction( | |
| context: str, | |
| sentence: str, | |
| target_tone: str, | |
| correction_mode: str = "tone-aware", | |
| ) -> dict: | |
| original_sentence = (sentence or "").strip() | |
| profile = context_tone_profile(context) | |
| started = time.perf_counter() | |
| result = _generate_correction_gpu(context, sentence, target_tone, correction_mode) | |
| generation_time_seconds = round(time.perf_counter() - started, 3) | |
| corrected_sentence = extract_result_field(result, "Corrected sentence") | |
| error_type = extract_result_field(result, "Error type") | |
| should_record = bool(original_sentence) and len(original_sentence) <= MAX_INPUT_CHARS | |
| request_id = str(uuid.uuid4()) if should_record else "" | |
| metrics_error = "" | |
| if should_record: | |
| metric_status = "ok" if corrected_sentence else "unparsed_or_error" | |
| record = { | |
| "request_id": request_id, | |
| "created_at": utc_now_iso(), | |
| "model_id": MODEL_ID, | |
| "context": profile["key"], | |
| "target_tone": profile["tone"], | |
| "correction_mode": profile["correction_style"], | |
| "original_sentence": original_sentence, | |
| "corrected_sentence": corrected_sentence, | |
| "evaluation": None, | |
| "generation_time_seconds": generation_time_seconds, | |
| "error_type": error_type, | |
| "status": metric_status, | |
| } | |
| try: | |
| append_usage_record(record) | |
| except Exception as exc: | |
| metrics_error = f"Metrics save failed: {exc}" | |
| return { | |
| "ok": bool(result), | |
| "request_id": request_id, | |
| "result": result, | |
| "original_sentence": original_sentence, | |
| "corrected_sentence": corrected_sentence, | |
| "evaluation": None, | |
| "generation_time_seconds": generation_time_seconds, | |
| "status": "ok" if corrected_sentence else "unparsed_or_error", | |
| "metrics_error": metrics_error, | |
| "metrics_sync_error": metrics_sync_error, | |
| } | |
| def rate_response(request_id: str, evaluation: str) -> dict: | |
| request_id = normalize_space(request_id) | |
| evaluation_key = normalize_space(evaluation).lower().replace("-", "_").replace(" ", "_") | |
| aliases = { | |
| "up": "thumbs_up", | |
| "thumb_up": "thumbs_up", | |
| "thumbs_up": "thumbs_up", | |
| "positive": "thumbs_up", | |
| "down": "thumbs_down", | |
| "thumb_down": "thumbs_down", | |
| "thumbs_down": "thumbs_down", | |
| "negative": "thumbs_down", | |
| } | |
| normalized_evaluation = aliases.get(evaluation_key) | |
| if not request_id: | |
| return {"ok": False, "error": "Missing request_id."} | |
| if not normalized_evaluation: | |
| return {"ok": False, "error": "Evaluation must be thumbs_up or thumbs_down."} | |
| try: | |
| record = update_usage_evaluation(request_id, normalized_evaluation) | |
| except Exception as exc: | |
| return {"ok": False, "error": f"Metrics update failed: {exc}"} | |
| if record is None: | |
| return {"ok": False, "error": "Metric record not found."} | |
| return { | |
| "ok": True, | |
| "request_id": request_id, | |
| "evaluation": normalized_evaluation, | |
| "metrics_sync_error": metrics_sync_error, | |
| "record": metric_public_view(record), | |
| } | |
| def usage_metrics(limit: int = 500) -> dict: | |
| try: | |
| limit = max(1, min(int(limit or 500), 5000)) | |
| except Exception: | |
| limit = 500 | |
| try: | |
| with METRICS_LOCK: | |
| records = read_usage_records_unlocked() | |
| except Exception as exc: | |
| return {"ok": False, "error": f"Metrics read failed: {exc}", "records": []} | |
| recent = records[-limit:] | |
| return { | |
| "ok": True, | |
| "count": len(records), | |
| "returned": len(recent), | |
| "metrics_file": str(metrics_file_path()), | |
| "metrics_repo_sync": METRICS_REPO_SYNC, | |
| "metrics_repo_id": METRICS_REPO_ID, | |
| "metrics_repo_path": METRICS_REPO_PATH, | |
| "metrics_sync_error": metrics_sync_error, | |
| "records": [metric_public_view(record) for record in recent], | |
| } | |
| def clean_tts_text(text: str) -> str: | |
| text = re.sub(r"[^\u4e00-\u9fff,。!?、;:\s]", "", text or "") | |
| return normalize_space(text)[:TTS_MAX_CHARS] | |
| def trim_tts_silence(audio, sample_rate: int): | |
| arr = np.asarray(audio, dtype=np.float32) | |
| if arr.ndim > 2: | |
| arr = np.squeeze(arr) | |
| if arr.ndim == 2 and arr.shape[0] <= 2 and arr.shape[0] < arr.shape[1]: | |
| arr = arr.T | |
| if arr.ndim == 2 and arr.shape[1] == 1: | |
| arr = arr[:, 0] | |
| if not sample_rate or arr.size == 0: | |
| return arr, 0, 0 | |
| energy = np.max(np.abs(arr), axis=1) if arr.ndim == 2 else np.abs(arr) | |
| peak = float(np.max(energy)) if energy.size else 0.0 | |
| if peak <= 1e-6: | |
| return arr, 0, 0 | |
| threshold = max(peak * 0.025, 0.002) | |
| voiced = np.flatnonzero(energy > threshold) | |
| if voiced.size == 0: | |
| return arr, 0, 0 | |
| pad_start = int(sample_rate * 0.06) | |
| pad_end = int(sample_rate * 0.14) | |
| start = max(0, int(voiced[0]) - pad_start) | |
| end = min(len(energy), int(voiced[-1]) + pad_end) | |
| trimmed = arr[start:end] | |
| trim_start_ms = int(start / sample_rate * 1000) | |
| trim_end_ms = int((len(energy) - end) / sample_rate * 1000) | |
| return trimmed, trim_start_ms, trim_end_ms | |
| def resample_audio(audio, source_rate: int, target_rate: int): | |
| if not source_rate or not target_rate or source_rate == target_rate: | |
| return audio, source_rate | |
| if target_rate <= 0 or source_rate <= 0: | |
| return audio, source_rate | |
| arr = np.asarray(audio) | |
| if arr.size == 0: | |
| return arr, source_rate | |
| source_len = arr.shape[0] | |
| target_len = max(1, int(round(source_len * target_rate / source_rate))) | |
| source_positions = np.linspace(0, source_len - 1, num=source_len) | |
| target_positions = np.linspace(0, source_len - 1, num=target_len) | |
| if arr.ndim == 1: | |
| return np.interp(target_positions, source_positions, arr).astype(arr.dtype), target_rate | |
| channels = [ | |
| np.interp(target_positions, source_positions, arr[:, channel]) | |
| for channel in range(arr.shape[1]) | |
| ] | |
| return np.stack(channels, axis=1).astype(arr.dtype), target_rate | |
| def load_tts_model(): | |
| global tts_model, tts_load_error | |
| if tts_model is not None: | |
| return | |
| if not SERVER_TTS_ENABLED: | |
| tts_load_error = "Server TTS is disabled." | |
| return | |
| try: | |
| from voxcpm import VoxCPM | |
| try: | |
| tts_model = VoxCPM.from_pretrained(TTS_MODEL_ID, load_denoiser=False) | |
| except TypeError: | |
| tts_model = VoxCPM.from_pretrained(TTS_MODEL_ID) | |
| tts_load_error = None | |
| except Exception as exc: | |
| tts_model = None | |
| tts_load_error = f"Server TTS failed: {exc}" | |
| async def _edge_tts_audio_bytes(text: str) -> bytes: | |
| import edge_tts | |
| communicate = edge_tts.Communicate( | |
| text=text, | |
| voice=EDGE_TTS_VOICE, | |
| rate=EDGE_TTS_RATE, | |
| pitch=EDGE_TTS_PITCH, | |
| volume=EDGE_TTS_VOLUME, | |
| ) | |
| chunks = [] | |
| async for chunk in communicate.stream(): | |
| if chunk.get("type") == "audio" and chunk.get("data"): | |
| chunks.append(chunk["data"]) | |
| return b"".join(chunks) | |
| def run_async_safely(coro): | |
| try: | |
| asyncio.get_running_loop() | |
| except RuntimeError: | |
| return asyncio.run(coro) | |
| result = {} | |
| def runner(): | |
| try: | |
| result["value"] = asyncio.run(coro) | |
| except Exception as exc: | |
| result["error"] = exc | |
| thread = Thread(target=runner) | |
| thread.start() | |
| thread.join() | |
| if "error" in result: | |
| raise result["error"] | |
| return result.get("value") | |
| def generate_edge_tts(text: str, speaker: str = "edge-tts") -> dict: | |
| phrase = clean_tts_text(text) | |
| if not phrase: | |
| return {"ok": False, "error": "No Chinese text to read."} | |
| if not SERVER_TTS_ENABLED: | |
| return {"ok": False, "error": "Server TTS is disabled."} | |
| try: | |
| audio_bytes = run_async_safely(_edge_tts_audio_bytes(phrase)) | |
| if not audio_bytes: | |
| return {"ok": False, "error": "Edge TTS returned no audio."} | |
| payload = base64.b64encode(audio_bytes).decode("ascii") | |
| return { | |
| "ok": True, | |
| "audio": f"data:audio/mpeg;base64,{payload}", | |
| "duration_ms": 0, | |
| "speaker": speaker or EDGE_TTS_VOICE, | |
| "voice": EDGE_TTS_VOICE, | |
| "source": "edge-tts", | |
| } | |
| except Exception as exc: | |
| return {"ok": False, "error": f"Edge TTS generation failed: {exc}"} | |
| def _generate_tts_gpu(text: str, speaker: str = "VoxCPM2") -> dict: | |
| phrase = clean_tts_text(text) | |
| if not phrase: | |
| return {"ok": False, "error": "No Chinese text to read."} | |
| load_tts_model() | |
| if tts_load_error or tts_model is None: | |
| return {"ok": False, "error": tts_load_error or "Server TTS model is not available."} | |
| try: | |
| import soundfile as sf | |
| synthesis_text = f"({VOXCPM_VOICE_STYLE}){phrase}" if VOXCPM_VOICE_STYLE else phrase | |
| try: | |
| audio = tts_model.generate( | |
| text=synthesis_text, | |
| cfg_value=VOXCPM_CFG_VALUE, | |
| inference_timesteps=VOXCPM_INFERENCE_TIMESTEPS, | |
| normalize=True, | |
| denoise=False, | |
| retry_badcase=VOXCPM_RETRY_BADCASE, | |
| retry_badcase_max_times=1, | |
| ) | |
| except TypeError: | |
| audio = tts_model.generate( | |
| text=synthesis_text, | |
| cfg_value=VOXCPM_CFG_VALUE, | |
| inference_timesteps=VOXCPM_INFERENCE_TIMESTEPS, | |
| ) | |
| if isinstance(audio, (list, tuple)): | |
| audio = audio[0] | |
| if hasattr(audio, "detach"): | |
| audio = audio.detach().cpu().float().numpy() | |
| sample_rate = getattr(getattr(tts_model, "tts_model", None), "sample_rate", 48000) | |
| audio, trim_start_ms, trim_end_ms = trim_tts_silence(audio, sample_rate) | |
| audio, sample_rate = resample_audio(audio, sample_rate, VOXCPM_OUTPUT_SAMPLE_RATE) | |
| buffer = io.BytesIO() | |
| sf.write(buffer, audio, sample_rate, format="WAV") | |
| audio_bytes = buffer.getvalue() | |
| duration_ms = int(len(audio) / sample_rate * 1000) if sample_rate else 0 | |
| payload = base64.b64encode(audio_bytes).decode("ascii") | |
| return { | |
| "ok": True, | |
| "audio": f"data:audio/wav;base64,{payload}", | |
| "duration_ms": duration_ms, | |
| "sample_rate": sample_rate, | |
| "speaker": speaker or "VoxCPM2", | |
| "source": "server", | |
| "trim_start_ms": trim_start_ms, | |
| "trim_end_ms": trim_end_ms, | |
| } | |
| except Exception as exc: | |
| return {"ok": False, "error": f"Server TTS generation failed: {exc}"} | |
| def generate_tts(text: str, speaker: str = "edge-tts") -> dict: | |
| provider = normalize_space(TTS_PROVIDER).lower() | |
| if provider in {"edge", "edge-tts", "microsoft", "microsoft-edge"}: | |
| return generate_edge_tts(text, speaker) | |
| if provider in {"voxcpm", "voxcpm2", "server"}: | |
| return _generate_tts_gpu(text, speaker or "VoxCPM2") | |
| return {"ok": False, "error": f"Unsupported TTS provider: {TTS_PROVIDER}"} | |
| def strip_thinking(text: str) -> str: | |
| return re.sub(r"(?is)<think>.*?</think>", "", text or "").strip() | |
| def runtime_info() -> str: | |
| loaded = "yes" if model is not None and tokenizer is not None else "no" | |
| return "\n".join( | |
| [ | |
| f"MODEL_ID: {MODEL_ID}", | |
| f"TTS_PROVIDER: {TTS_PROVIDER}", | |
| f"TTS_MODEL_ID: {TTS_MODEL_ID}", | |
| f"EDGE_TTS_VOICE: {EDGE_TTS_VOICE}", | |
| f"EDGE_TTS_KARAOKE_DURATION_FACTOR: {EDGE_TTS_KARAOKE_DURATION_FACTOR}", | |
| f"Model loaded: {loaded}", | |
| f"Server TTS enabled: {SERVER_TTS_ENABLED}", | |
| f"LOAD_IN_4BIT: {LOAD_IN_4BIT}", | |
| device_label(), | |
| f"MAX_INPUT_CHARS: {MAX_INPUT_CHARS}", | |
| f"MAX_NEW_TOKENS: {MAX_NEW_TOKENS}", | |
| f"METRICS_FILE: {metrics_file_path()}", | |
| f"METRICS_REPO_SYNC: {METRICS_REPO_SYNC}", | |
| f"METRICS_REPO_ID: {METRICS_REPO_ID or '(not configured)'}", | |
| f"METRICS_REPO_PATH: {METRICS_REPO_PATH}", | |
| f"METRICS_SYNC_ERROR: {metrics_sync_error or '(none)'}", | |
| f"VOXCPM_INFERENCE_TIMESTEPS: {VOXCPM_INFERENCE_TIMESTEPS}", | |
| f"VOXCPM_OUTPUT_SAMPLE_RATE: {VOXCPM_OUTPUT_SAMPLE_RATE}", | |
| f"VOXCPM_RETRY_BADCASE: {VOXCPM_RETRY_BADCASE}", | |
| ] | |
| ) | |
| FRONTEND_HTML = r""" | |
| <!doctype html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="utf-8" /> | |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> | |
| <title>ToneBridge Mandarin Coach</title> | |
| <style> | |
| :root { | |
| --bg: #fbf3e7; | |
| --paper: #fffaf1; | |
| --panel: rgba(255, 250, 241, .94); | |
| --text: #25140f; | |
| --muted: #785f4d; | |
| --border: #ead7bd; | |
| --primary: #b42318; | |
| --primary-strong: #8f1c13; | |
| --gold: #d69e2e; | |
| --jade: #0f766e; | |
| --soft: #fff1d6; | |
| --shadow: 0 22px 60px rgba(92, 38, 17, .16); | |
| } | |
| * { box-sizing: border-box; } | |
| body { | |
| margin: 0; | |
| min-height: 100vh; | |
| background: | |
| radial-gradient(circle at 12% 4%, rgba(180, 35, 24, .18), transparent 26rem), | |
| radial-gradient(circle at 88% 10%, rgba(214, 158, 46, .20), transparent 24rem), | |
| linear-gradient(135deg, rgba(180, 35, 24, .04) 25%, transparent 25%) 0 0 / 26px 26px, | |
| linear-gradient(180deg, #fffaf1 0%, var(--bg) 100%); | |
| color: var(--text); | |
| font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif; | |
| } | |
| main { | |
| width: min(1120px, calc(100% - 28px)); | |
| margin: 0 auto; | |
| padding: 28px 0 40px; | |
| } | |
| header { | |
| display: flex; | |
| align-items: flex-end; | |
| justify-content: space-between; | |
| gap: 18px; | |
| margin-bottom: 18px; | |
| } | |
| h1 { | |
| margin: 0; | |
| font-size: clamp(2rem, 5vw, 3.3rem); | |
| line-height: 1; | |
| letter-spacing: 0; | |
| color: #24110c; | |
| } | |
| .subtitle { | |
| margin: 10px 0 0; | |
| color: var(--muted); | |
| font-size: 1rem; | |
| max-width: 46rem; | |
| } | |
| .badge { | |
| white-space: nowrap; | |
| padding: 10px 14px; | |
| border-radius: 999px; | |
| background: #fff3d8; | |
| color: #8f1c13; | |
| border: 1px solid #efc56a; | |
| font-weight: 700; | |
| font-size: .9rem; | |
| box-shadow: inset 0 1px 0 rgba(255, 255, 255, .9); | |
| } | |
| .app { | |
| display: grid; | |
| grid-template-columns: minmax(0, .92fr) minmax(0, 1.08fr); | |
| gap: 18px; | |
| align-items: stretch; | |
| } | |
| .card { | |
| background: var(--panel); | |
| border: 1px solid var(--border); | |
| border-radius: 22px; | |
| box-shadow: var(--shadow); | |
| padding: 20px; | |
| backdrop-filter: blur(10px); | |
| position: relative; | |
| overflow: hidden; | |
| } | |
| .card::before { | |
| content: "春"; | |
| position: absolute; | |
| right: 18px; | |
| top: 10px; | |
| color: rgba(180, 35, 24, .055); | |
| font-size: 5rem; | |
| font-weight: 900; | |
| pointer-events: none; | |
| } | |
| .field { margin-bottom: 16px; } | |
| label { | |
| display: block; | |
| margin: 0 0 7px; | |
| color: #4b271d; | |
| font-size: .92rem; | |
| font-weight: 750; | |
| } | |
| select, textarea { | |
| width: 100%; | |
| min-height: 46px; | |
| border: 1px solid #dec7a7; | |
| border-radius: 13px; | |
| background: #fffdf8; | |
| color: var(--text); | |
| padding: 12px 13px; | |
| font: inherit; | |
| font-size: 16px; | |
| outline: none; | |
| transition: border-color .16s, box-shadow .16s; | |
| } | |
| textarea { | |
| min-height: 142px; | |
| resize: vertical; | |
| line-height: 1.45; | |
| } | |
| select:focus, textarea:focus { | |
| border-color: var(--primary); | |
| box-shadow: 0 0 0 4px rgba(180, 35, 24, .12); | |
| } | |
| .actions { | |
| display: grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 12px; | |
| margin-top: 10px; | |
| } | |
| button { | |
| min-height: 48px; | |
| border: 0; | |
| border-radius: 14px; | |
| padding: 0 16px; | |
| cursor: pointer; | |
| font: inherit; | |
| font-weight: 800; | |
| transition: transform .12s, box-shadow .12s, background .12s; | |
| } | |
| button:active { transform: translateY(1px); } | |
| .primary { | |
| color: #fff; | |
| background: linear-gradient(135deg, var(--primary), #d9480f); | |
| box-shadow: 0 14px 28px rgba(180, 35, 24, .26); | |
| } | |
| .primary:hover { background: var(--primary-strong); } | |
| .mic-button { | |
| color: #fffaf1; | |
| background: linear-gradient(135deg, var(--jade), #0d9488); | |
| box-shadow: 0 14px 28px rgba(15, 118, 110, .22); | |
| } | |
| .mic-button.listening { | |
| background: linear-gradient(135deg, #d69e2e, #b45309); | |
| animation: pulseMic 1.1s ease-in-out infinite; | |
| } | |
| @keyframes pulseMic { | |
| 0%, 100% { transform: translateY(0); box-shadow: 0 12px 24px rgba(214, 158, 46, .24); } | |
| 50% { transform: translateY(-1px); box-shadow: 0 18px 34px rgba(214, 158, 46, .38); } | |
| } | |
| .voice-hint { | |
| margin: 8px 0 0; | |
| color: var(--muted); | |
| font-size: .88rem; | |
| } | |
| .field-note { | |
| margin: 7px 0 0; | |
| color: var(--muted); | |
| font-size: .84rem; | |
| line-height: 1.35; | |
| } | |
| .learning-history { | |
| margin-top: 16px; | |
| padding: 14px; | |
| border: 1px solid #efc56a; | |
| border-radius: 16px; | |
| background: rgba(255, 243, 216, .62); | |
| } | |
| .history-title { | |
| color: #8f1c13; | |
| font-weight: 850; | |
| margin-bottom: 8px; | |
| } | |
| .history-summary { | |
| margin-bottom: 9px; | |
| color: #4b271d; | |
| font-size: .9rem; | |
| font-weight: 700; | |
| } | |
| .learning-history ul { | |
| margin: 0; | |
| padding-left: 18px; | |
| color: var(--muted); | |
| font-size: .9rem; | |
| line-height: 1.45; | |
| } | |
| .result-head { | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| gap: 12px; | |
| margin-bottom: 14px; | |
| } | |
| .result-title { | |
| margin: 0; | |
| font-size: 1.1rem; | |
| } | |
| .status { | |
| color: var(--muted); | |
| font-size: .9rem; | |
| } | |
| .result-tools { | |
| display: flex; | |
| align-items: center; | |
| gap: 10px; | |
| } | |
| .copy-button { | |
| min-height: 34px; | |
| border-radius: 999px; | |
| padding: 0 12px; | |
| color: #8f1c13; | |
| background: #fff3d8; | |
| border: 1px solid #efc56a; | |
| font-size: .86rem; | |
| box-shadow: none; | |
| } | |
| .copy-button:disabled { | |
| cursor: not-allowed; | |
| opacity: .55; | |
| } | |
| .result { | |
| min-height: 330px; | |
| border-radius: 18px; | |
| border: 1px solid var(--border); | |
| background: | |
| linear-gradient(180deg, rgba(255, 253, 248, .98) 0%, rgba(255, 247, 231, .98) 100%); | |
| padding: 18px; | |
| line-height: 1.55; | |
| overflow-wrap: anywhere; | |
| display: grid; | |
| align-content: start; | |
| gap: 12px; | |
| } | |
| .result-section { | |
| border: 1px solid rgba(234, 215, 189, .95); | |
| border-radius: 14px; | |
| background: rgba(255, 253, 248, .86); | |
| padding: 13px 14px; | |
| } | |
| .result-section.corrected-section { | |
| border-color: rgba(180, 35, 24, .38); | |
| background: #fff8eb; | |
| box-shadow: 0 12px 30px rgba(180, 35, 24, .10); | |
| } | |
| .result-section.compact-section { | |
| padding: 10px 14px; | |
| background: rgba(255, 243, 216, .58); | |
| } | |
| .result strong { | |
| display: block; | |
| color: #8f1c13; | |
| margin-bottom: 7px; | |
| } | |
| .corrected-section strong { | |
| font-size: .95rem; | |
| } | |
| .corrected-section { | |
| font-size: 1.18rem; | |
| } | |
| .result em { | |
| display: block; | |
| color: var(--jade); | |
| font-style: normal; | |
| margin-top: 6px; | |
| font-size: .96rem; | |
| } | |
| .reading-panel { | |
| display: none; | |
| margin-top: 16px; | |
| border: 1px solid var(--border); | |
| border-radius: 18px; | |
| background: rgba(255, 250, 241, .72); | |
| overflow: hidden; | |
| } | |
| .reading-tab { | |
| width: 100%; | |
| min-height: 42px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| gap: 12px; | |
| border-radius: 0; | |
| background: #fff3d8; | |
| color: #8f1c13; | |
| border-bottom: 1px solid #efc56a; | |
| padding: 0 12px; | |
| font-weight: 900; | |
| } | |
| .replay-button { | |
| min-height: 38px; | |
| color: #fffaf1; | |
| background: linear-gradient(135deg, var(--jade), #0d9488); | |
| border-radius: 999px; | |
| padding: 0 14px; | |
| } | |
| .replay-button:disabled { | |
| cursor: wait; | |
| opacity: .72; | |
| filter: saturate(.75); | |
| } | |
| .karaoke-box { | |
| padding: 16px; | |
| } | |
| .karaoke-window { | |
| min-height: 94px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| gap: 6px; | |
| border-radius: 16px; | |
| background: #fffdf8; | |
| border: 1px solid #ead7bd; | |
| overflow: hidden; | |
| } | |
| .karaoke-char { | |
| display: inline-grid; | |
| place-items: center; | |
| min-width: 42px; | |
| height: 58px; | |
| border-radius: 12px; | |
| font-size: clamp(1.8rem, 5vw, 2.7rem); | |
| font-weight: 850; | |
| color: #a78b7a; | |
| background: #fff8eb; | |
| transition: color .18s, background .18s, transform .18s; | |
| } | |
| .karaoke-char.read { | |
| color: #8f1c13; | |
| background: #ffe7b3; | |
| } | |
| .karaoke-char.active { | |
| color: #fffaf1; | |
| background: linear-gradient(135deg, #b42318, #d69e2e); | |
| transform: translateY(-2px) scale(1.04); | |
| box-shadow: 0 10px 22px rgba(180, 35, 24, .22); | |
| } | |
| .karaoke-strip { | |
| display: flex; | |
| gap: 5px; | |
| overflow-x: auto; | |
| padding: 12px 2px 0; | |
| scroll-behavior: smooth; | |
| } | |
| .strip-char { | |
| flex: 0 0 auto; | |
| min-width: 24px; | |
| height: 30px; | |
| display: inline-grid; | |
| place-items: center; | |
| border-radius: 8px; | |
| color: #8c6f5a; | |
| background: rgba(255, 255, 255, .5); | |
| } | |
| .strip-char.read { | |
| color: #8f1c13; | |
| background: #ffe7b3; | |
| } | |
| .strip-char.active { | |
| color: #fffaf1; | |
| background: var(--jade); | |
| } | |
| .karaoke-hint { | |
| margin: 10px 0 0; | |
| color: var(--muted); | |
| font-size: .9rem; | |
| text-align: center; | |
| } | |
| .placeholder { | |
| color: var(--muted); | |
| display: grid; | |
| place-items: center; | |
| min-height: 280px; | |
| text-align: center; | |
| } | |
| .examples { | |
| display: flex; | |
| gap: 8px; | |
| flex-wrap: wrap; | |
| margin-top: 12px; | |
| } | |
| .examples-label { | |
| margin: 12px 0 0; | |
| color: var(--muted); | |
| font-size: .9rem; | |
| font-weight: 700; | |
| } | |
| .example { | |
| min-height: 34px; | |
| padding: 0 11px; | |
| border-radius: 999px; | |
| background: #fffdf8; | |
| border: 1px solid #efc56a; | |
| color: #8f1c13; | |
| font-size: .9rem; | |
| font-weight: 700; | |
| } | |
| .example:hover { | |
| background: #fff3d8; | |
| } | |
| @media (max-width: 820px) { | |
| main { width: min(100% - 18px, 680px); padding-top: 16px; } | |
| header { display: block; } | |
| .badge { display: inline-block; margin-top: 14px; } | |
| .app { grid-template-columns: 1fr; } | |
| .card { border-radius: 18px; padding: 15px; } | |
| .result { min-height: 220px; } | |
| .actions { | |
| grid-template-columns: 1fr; | |
| position: sticky; | |
| bottom: 8px; | |
| z-index: 2; | |
| } | |
| } | |
| /* Premium product layer for the custom gr.Server frontend */ | |
| :root { | |
| --canvas: #fbf0df; | |
| --paper: #fffaf1; | |
| --paper-soft: #fff4df; | |
| --ink: #26110c; | |
| --brown: #5a3024; | |
| --muted: #806755; | |
| --line: #e8cfac; | |
| --line-strong: #d8af70; | |
| --red: #b42318; | |
| --red-strong: #8f1c13; | |
| --gold: #d69e2e; | |
| --jade: #0f766e; | |
| --plum: #663a5d; | |
| --shadow-premium: 0 18px 42px rgba(91, 43, 18, .15); | |
| --shadow-soft: 0 8px 24px rgba(91, 43, 18, .10); | |
| } | |
| body { | |
| background: | |
| linear-gradient(90deg, rgba(180, 35, 24, .06) 1px, transparent 1px) 0 0 / 34px 34px, | |
| linear-gradient(0deg, rgba(214, 158, 46, .06) 1px, transparent 1px) 0 0 / 34px 34px, | |
| repeating-linear-gradient(135deg, rgba(180, 35, 24, .035) 0 1px, transparent 1px 22px), | |
| linear-gradient(180deg, #fffaf2 0%, var(--canvas) 100%); | |
| color: var(--ink); | |
| } | |
| body::before { | |
| content: ""; | |
| position: fixed; | |
| inset: 0; | |
| pointer-events: none; | |
| background: | |
| linear-gradient(90deg, rgba(255, 250, 241, .88), rgba(255, 250, 241, 0) 18%, rgba(255, 250, 241, 0) 82%, rgba(255, 250, 241, .88)), | |
| repeating-linear-gradient(120deg, transparent 0 54px, rgba(143, 28, 19, .045) 54px 64px); | |
| mix-blend-mode: multiply; | |
| } | |
| main { | |
| width: min(1180px, calc(100% - 32px)); | |
| padding: 30px 0 44px; | |
| position: relative; | |
| } | |
| .hero { | |
| display: grid; | |
| grid-template-columns: minmax(0, 1fr) minmax(300px, 360px); | |
| gap: 22px; | |
| align-items: end; | |
| margin-bottom: 20px; | |
| } | |
| .brand-row { | |
| display: flex; | |
| align-items: center; | |
| gap: 10px; | |
| margin-bottom: 12px; | |
| } | |
| .seal { | |
| width: 42px; | |
| height: 42px; | |
| display: inline-grid; | |
| place-items: center; | |
| border-radius: 8px; | |
| background: linear-gradient(135deg, var(--red), #d9480f); | |
| color: #fffaf1; | |
| font-weight: 900; | |
| box-shadow: 0 10px 24px rgba(180, 35, 24, .24); | |
| } | |
| .eyebrow, | |
| .panel-kicker { | |
| color: var(--red-strong); | |
| font-size: .78rem; | |
| font-weight: 900; | |
| text-transform: uppercase; | |
| } | |
| h1 { | |
| max-width: 760px; | |
| font-size: clamp(2.95rem, 6.8vw, 5.35rem); | |
| line-height: .96; | |
| color: var(--ink); | |
| } | |
| .subtitle { | |
| margin-top: 18px; | |
| color: var(--muted); | |
| font-size: clamp(.98rem, 1.5vw, 1.1rem); | |
| line-height: 1.5; | |
| } | |
| .hero-pills { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 8px; | |
| margin-top: 16px; | |
| } | |
| .hero-pills span, | |
| .badge { | |
| display: inline-flex; | |
| align-items: center; | |
| min-height: 34px; | |
| padding: 0 12px; | |
| border-radius: 999px; | |
| border: 1px solid rgba(214, 158, 46, .72); | |
| background: rgba(255, 250, 241, .82); | |
| color: var(--brown); | |
| font-size: .86rem; | |
| font-weight: 800; | |
| box-shadow: inset 0 1px 0 rgba(255, 255, 255, .95); | |
| } | |
| .hero-visual { | |
| min-height: 208px; | |
| border: 1px solid rgba(216, 175, 112, .78); | |
| border-radius: 8px; | |
| background: | |
| linear-gradient(135deg, rgba(255, 250, 241, .95), rgba(255, 244, 223, .84)), | |
| repeating-linear-gradient(90deg, transparent 0 14px, rgba(180, 35, 24, .035) 14px 15px); | |
| box-shadow: var(--shadow-premium); | |
| padding: 16px; | |
| display: grid; | |
| align-content: start; | |
| justify-items: center; | |
| gap: 14px; | |
| position: relative; | |
| overflow: hidden; | |
| } | |
| .hero-visual::after { | |
| content: "\706F"; | |
| position: absolute; | |
| right: -8px; | |
| bottom: -38px; | |
| font-size: 10rem; | |
| font-weight: 950; | |
| color: rgba(180, 35, 24, .10); | |
| line-height: 1; | |
| } | |
| .hero-visual .badge { | |
| width: max-content; | |
| color: var(--red-strong); | |
| background: #fff3d8; | |
| position: relative; | |
| z-index: 1; | |
| } | |
| .lantern-mark { | |
| width: 142px; | |
| height: 142px; | |
| display: grid; | |
| place-items: center; | |
| position: relative; | |
| z-index: 1; | |
| border-radius: 50% 50% 46% 46%; | |
| background: | |
| radial-gradient(circle at 50% 34%, rgba(255, 243, 216, .98), rgba(214, 158, 46, .28) 58%, rgba(180, 35, 24, .16)), | |
| linear-gradient(135deg, rgba(255, 250, 241, .88), rgba(255, 231, 179, .72)); | |
| border: 1px solid rgba(180, 35, 24, .22); | |
| box-shadow: 0 18px 38px rgba(180, 35, 24, .15); | |
| } | |
| .lantern-mark::before, | |
| .lantern-mark::after { | |
| content: ""; | |
| position: absolute; | |
| left: 50%; | |
| width: 52px; | |
| height: 9px; | |
| transform: translateX(-50%); | |
| border-radius: 999px; | |
| background: linear-gradient(90deg, var(--red), #d9480f); | |
| } | |
| .lantern-mark::before { top: -5px; } | |
| .lantern-mark::after { bottom: -5px; } | |
| .lantern-mark span { | |
| color: var(--red-strong); | |
| font-size: 4.4rem; | |
| font-weight: 950; | |
| line-height: 1; | |
| } | |
| .app { | |
| grid-template-columns: minmax(320px, .9fr) minmax(0, 1.1fr); | |
| gap: 20px; | |
| align-items: start; | |
| } | |
| .panel, | |
| .card { | |
| background: rgba(255, 250, 241, .94); | |
| border: 1px solid var(--line); | |
| border-radius: 8px; | |
| box-shadow: var(--shadow-premium); | |
| padding: 20px; | |
| overflow: hidden; | |
| } | |
| .card::before { content: none; } | |
| .input-panel { | |
| position: sticky; | |
| top: 16px; | |
| } | |
| .panel-heading, | |
| .result-head { | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| gap: 14px; | |
| margin-bottom: 16px; | |
| } | |
| .panel-heading h2, | |
| .result-title { | |
| margin: 2px 0 0; | |
| color: var(--ink); | |
| font-size: 1.25rem; | |
| line-height: 1.15; | |
| } | |
| .field-grid { | |
| display: grid; | |
| grid-template-columns: 1fr 1fr; | |
| gap: 12px; | |
| } | |
| label { | |
| color: var(--brown); | |
| font-size: .9rem; | |
| font-weight: 850; | |
| } | |
| select, | |
| textarea { | |
| min-height: 48px; | |
| border-color: #d9bd92; | |
| border-radius: 8px; | |
| background: rgba(255, 253, 248, .96); | |
| color: var(--ink); | |
| transition: border-color .16s, box-shadow .16s, background .16s; | |
| } | |
| textarea { | |
| min-height: 148px; | |
| line-height: 1.5; | |
| } | |
| select:focus, | |
| textarea:focus { | |
| border-color: var(--red); | |
| background: #fffefb; | |
| box-shadow: 0 0 0 4px rgba(180, 35, 24, .12); | |
| } | |
| .field-note, | |
| .voice-hint { | |
| color: var(--muted); | |
| font-size: .84rem; | |
| line-height: 1.4; | |
| } | |
| button { | |
| border-radius: 8px; | |
| font-weight: 900; | |
| line-height: 1.1; | |
| transition: transform .12s, box-shadow .12s, background .12s, border-color .12s; | |
| } | |
| button:disabled { cursor: not-allowed; opacity: .62; } | |
| .primary { | |
| color: #fffaf1; | |
| background: linear-gradient(135deg, var(--red), #de3d16); | |
| box-shadow: 0 14px 28px rgba(180, 35, 24, .25); | |
| } | |
| .primary:hover { background: linear-gradient(135deg, var(--red-strong), #ca350f); } | |
| .mic-button { | |
| background: linear-gradient(135deg, var(--jade), #0d9488); | |
| box-shadow: 0 14px 28px rgba(15, 118, 110, .20); | |
| } | |
| .mic-button.listening { | |
| background: linear-gradient(135deg, var(--gold), #b45309); | |
| } | |
| .examples-label { | |
| margin: 16px 0 8px; | |
| color: var(--brown); | |
| font-size: .9rem; | |
| font-weight: 850; | |
| } | |
| .example, | |
| .copy-button { | |
| min-height: 36px; | |
| border-radius: 999px; | |
| background: rgba(255, 253, 248, .92); | |
| border: 1px solid rgba(214, 158, 46, .70); | |
| color: var(--red-strong); | |
| font-size: .88rem; | |
| font-weight: 850; | |
| box-shadow: none; | |
| } | |
| .example:hover, | |
| .copy-button:hover:not(:disabled) { | |
| background: #fff3d8; | |
| border-color: var(--gold); | |
| } | |
| .learning-history { | |
| border-color: rgba(15, 118, 110, .28); | |
| border-radius: 8px; | |
| background: linear-gradient(180deg, rgba(255, 253, 248, .86), rgba(238, 249, 246, .58)); | |
| } | |
| .history-title { | |
| color: var(--jade); | |
| font-weight: 950; | |
| } | |
| .coach-panel { min-height: 620px; } | |
| .result-tools { | |
| justify-content: flex-end; | |
| flex-wrap: wrap; | |
| } | |
| .status { | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 7px; | |
| color: var(--muted); | |
| font-weight: 750; | |
| } | |
| .status::before { | |
| content: ""; | |
| width: 8px; | |
| height: 8px; | |
| border-radius: 50%; | |
| background: var(--jade); | |
| box-shadow: 0 0 0 4px rgba(15, 118, 110, .12); | |
| } | |
| .result { | |
| min-height: 350px; | |
| border-radius: 8px; | |
| border-color: var(--line); | |
| background: | |
| linear-gradient(180deg, rgba(255, 253, 248, .97), rgba(255, 246, 230, .92)), | |
| linear-gradient(90deg, rgba(180, 35, 24, .045) 1px, transparent 1px) 0 0 / 28px 28px; | |
| box-shadow: inset 0 1px 0 rgba(255, 255, 255, .95); | |
| } | |
| .result-section { | |
| border-color: rgba(232, 207, 172, .94); | |
| border-left: 4px solid rgba(214, 158, 46, .86); | |
| border-radius: 8px; | |
| background: rgba(255, 253, 248, .90); | |
| box-shadow: var(--shadow-soft); | |
| } | |
| .result-section.original-section { border-left-color: var(--plum); } | |
| .result-section.corrected-section { | |
| border-color: rgba(180, 35, 24, .38); | |
| border-left-color: var(--red); | |
| background: linear-gradient(135deg, #fff8eb, #fffdf8); | |
| box-shadow: 0 14px 32px rgba(180, 35, 24, .12); | |
| font-size: 1.16rem; | |
| } | |
| .result-section.tip-section { | |
| border-left-color: var(--jade); | |
| background: linear-gradient(135deg, rgba(239, 250, 247, .95), rgba(255, 253, 248, .92)); | |
| } | |
| .result strong { | |
| color: var(--red-strong); | |
| font-size: .95rem; | |
| } | |
| .result em { | |
| color: var(--jade); | |
| font-weight: 750; | |
| } | |
| .placeholder { | |
| align-content: center; | |
| gap: 8px; | |
| padding: 22px; | |
| } | |
| .placeholder strong { | |
| color: var(--ink); | |
| margin: 0; | |
| font-size: 1.05rem; | |
| } | |
| .placeholder-icon { | |
| width: 62px; | |
| height: 62px; | |
| display: inline-grid; | |
| place-items: center; | |
| border-radius: 8px; | |
| color: #fffaf1; | |
| background: linear-gradient(135deg, var(--red), var(--gold)); | |
| font-size: 1.8rem; | |
| font-weight: 950; | |
| box-shadow: 0 12px 28px rgba(180, 35, 24, .24); | |
| } | |
| .loader-mark { | |
| width: 36px; | |
| height: 36px; | |
| border-radius: 50%; | |
| border: 3px solid rgba(214, 158, 46, .28); | |
| border-top-color: var(--red); | |
| animation: spin .85s linear infinite; | |
| } | |
| @keyframes spin { to { transform: rotate(360deg); } } | |
| .reading-panel { | |
| border-color: rgba(15, 118, 110, .26); | |
| border-radius: 8px; | |
| background: linear-gradient(180deg, rgba(239, 250, 247, .76), rgba(255, 250, 241, .82)); | |
| box-shadow: var(--shadow-soft); | |
| } | |
| .reading-tab { | |
| min-height: 44px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| gap: 12px; | |
| background: rgba(15, 118, 110, .10); | |
| color: var(--jade); | |
| border-bottom: 1px solid rgba(15, 118, 110, .22); | |
| padding: 0 12px; | |
| font-weight: 950; | |
| } | |
| .replay-button { | |
| min-height: 34px; | |
| padding: 0 13px; | |
| background: linear-gradient(135deg, var(--jade), #0d9488); | |
| box-shadow: 0 10px 22px rgba(15, 118, 110, .18); | |
| } | |
| .replay-button:disabled { | |
| box-shadow: none; | |
| } | |
| .karaoke-window, | |
| .karaoke-char { | |
| border-radius: 8px; | |
| } | |
| .karaoke-char { | |
| font-weight: 900; | |
| } | |
| .karaoke-char.active { | |
| background: linear-gradient(135deg, var(--red), var(--gold)); | |
| } | |
| @media (max-width: 920px) { | |
| main { width: min(100% - 20px, 720px); padding-top: 18px; } | |
| .hero { grid-template-columns: 1fr; } | |
| .hero-visual { min-height: 140px; } | |
| .app { grid-template-columns: 1fr; } | |
| .input-panel { position: static; } | |
| .coach-panel { min-height: 0; } | |
| } | |
| @media (max-width: 620px) { | |
| main { width: min(100% - 14px, 520px); padding-bottom: 22px; } | |
| .hero { gap: 14px; } | |
| .hero-pills { gap: 6px; } | |
| .hero-pills span { min-height: 32px; font-size: .8rem; } | |
| .panel, | |
| .card { padding: 14px; } | |
| .field-grid, | |
| .actions { grid-template-columns: 1fr; } | |
| textarea { min-height: 132px; } | |
| .result-head, | |
| .panel-heading { align-items: flex-start; } | |
| .result-tools { justify-content: flex-start; } | |
| .result { min-height: 230px; padding: 13px; } | |
| .actions { | |
| position: sticky; | |
| bottom: 8px; | |
| z-index: 2; | |
| } | |
| button { min-height: 46px; } | |
| } | |
| /* V5 lightweight delight layer: no external assets, no framework */ | |
| .coach-illustration { | |
| width: 82px; | |
| height: 82px; | |
| display: inline-grid; | |
| place-items: center; | |
| position: relative; | |
| margin-bottom: 6px; | |
| } | |
| .coach-face { | |
| width: 64px; | |
| height: 64px; | |
| border-radius: 50% 50% 45% 45%; | |
| background: linear-gradient(160deg, #fff3d8, #ffd6a1); | |
| border: 2px solid rgba(180, 35, 24, .24); | |
| box-shadow: 0 12px 24px rgba(91, 43, 18, .14); | |
| position: relative; | |
| } | |
| .coach-face::before, | |
| .coach-face::after { | |
| content: ""; | |
| position: absolute; | |
| top: 27px; | |
| width: 7px; | |
| height: 7px; | |
| border-radius: 50%; | |
| background: var(--ink); | |
| } | |
| .coach-face::before { left: 20px; } | |
| .coach-face::after { right: 20px; } | |
| .coach-smile { | |
| position: absolute; | |
| left: 22px; | |
| top: 39px; | |
| width: 20px; | |
| height: 10px; | |
| border-bottom: 3px solid var(--red-strong); | |
| border-radius: 0 0 999px 999px; | |
| } | |
| .coach-hat { | |
| position: absolute; | |
| top: 8px; | |
| width: 58px; | |
| height: 16px; | |
| border-radius: 999px 999px 6px 6px; | |
| background: linear-gradient(135deg, var(--red), #d9480f); | |
| box-shadow: 0 5px 14px rgba(180, 35, 24, .18); | |
| } | |
| .coach-card { | |
| width: min(100%, 430px); | |
| display: grid; | |
| grid-template-columns: auto 1fr; | |
| gap: 14px; | |
| align-items: center; | |
| padding: 15px; | |
| border: 1px solid rgba(214, 158, 46, .55); | |
| border-radius: 8px; | |
| background: rgba(255, 250, 241, .78); | |
| box-shadow: var(--shadow-soft); | |
| } | |
| .coach-bubble { | |
| text-align: left; | |
| color: var(--brown); | |
| line-height: 1.45; | |
| } | |
| .coach-bubble strong { | |
| display: block; | |
| margin-bottom: 4px; | |
| color: var(--red-strong); | |
| } | |
| .loading-tip { | |
| margin-top: 10px; | |
| color: var(--jade); | |
| font-weight: 800; | |
| } | |
| .loading-steps { | |
| width: min(100%, 430px); | |
| display: grid; | |
| grid-template-columns: repeat(3, 1fr); | |
| gap: 8px; | |
| margin-top: 12px; | |
| } | |
| .loading-step { | |
| min-height: 34px; | |
| display: inline-grid; | |
| place-items: center; | |
| border-radius: 999px; | |
| border: 1px solid rgba(214, 158, 46, .42); | |
| background: rgba(255, 253, 248, .72); | |
| color: var(--muted); | |
| font-size: .82rem; | |
| font-weight: 850; | |
| animation: stepGlow 2.4s ease-in-out infinite; | |
| } | |
| .loading-step:nth-child(2) { animation-delay: .35s; } | |
| .loading-step:nth-child(3) { animation-delay: .7s; } | |
| @keyframes stepGlow { | |
| 0%, 100% { border-color: rgba(214, 158, 46, .42); color: var(--muted); } | |
| 35% { border-color: rgba(180, 35, 24, .55); color: var(--red-strong); background: #fff3d8; } | |
| } | |
| .result-arrived { | |
| animation: resultArrive .34s ease-out; | |
| } | |
| @keyframes resultArrive { | |
| from { opacity: .35; transform: translateY(8px); } | |
| to { opacity: 1; transform: translateY(0); } | |
| } | |
| .focus-button { | |
| min-height: 36px; | |
| border-radius: 999px; | |
| padding: 0 12px; | |
| border: 1px solid rgba(15, 118, 110, .38); | |
| background: rgba(239, 250, 247, .82); | |
| color: var(--jade); | |
| font-size: .88rem; | |
| box-shadow: none; | |
| } | |
| .focus-button:hover { | |
| background: rgba(221, 247, 240, .95); | |
| border-color: var(--jade); | |
| } | |
| .feedback-panel { | |
| min-height: 52px; | |
| margin-top: 12px; | |
| padding: 9px 10px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| gap: 10px; | |
| border: 1px solid rgba(232, 207, 172, .94); | |
| border-radius: 8px; | |
| background: rgba(255, 253, 248, .82); | |
| box-shadow: var(--shadow-soft); | |
| } | |
| .feedback-panel[hidden] { | |
| display: none; | |
| } | |
| .feedback-label { | |
| color: var(--brown); | |
| font-size: .88rem; | |
| font-weight: 850; | |
| } | |
| .feedback-actions { | |
| display: inline-flex; | |
| gap: 8px; | |
| } | |
| .feedback-button { | |
| width: 42px; | |
| min-width: 42px; | |
| min-height: 38px; | |
| padding: 0; | |
| border: 1px solid rgba(214, 158, 46, .62); | |
| border-radius: 8px; | |
| background: rgba(255, 250, 241, .95); | |
| color: var(--ink); | |
| font-size: 1rem; | |
| box-shadow: none; | |
| } | |
| .feedback-button:hover:not(:disabled), | |
| .feedback-button.selected { | |
| background: #fff3d8; | |
| border-color: var(--gold); | |
| box-shadow: inset 0 0 0 2px rgba(214, 158, 46, .18); | |
| } | |
| .feedback-button.selected { | |
| color: var(--red-strong); | |
| } | |
| .feedback-status { | |
| min-width: 92px; | |
| color: var(--muted); | |
| font-size: .84rem; | |
| font-weight: 750; | |
| text-align: right; | |
| } | |
| @media (max-width: 620px) { | |
| .feedback-panel { | |
| align-items: flex-start; | |
| flex-direction: column; | |
| } | |
| .feedback-status { | |
| min-width: 0; | |
| text-align: left; | |
| } | |
| } | |
| body.focus-mode { | |
| overflow: auto; | |
| } | |
| body.focus-mode main { | |
| width: min(1180px, calc(100% - 24px)); | |
| padding: 16px 0 24px; | |
| } | |
| body.focus-mode .hero { | |
| display: none; | |
| } | |
| body.focus-mode .app { | |
| grid-template-columns: minmax(320px, .82fr) minmax(0, 1.18fr); | |
| align-items: stretch; | |
| min-height: calc(100vh - 40px); | |
| } | |
| body.focus-mode .input-panel { | |
| position: static; | |
| min-height: calc(100vh - 40px); | |
| display: grid; | |
| align-content: start; | |
| } | |
| body.focus-mode .input-panel .panel-heading h2::after { | |
| content: " · focus"; | |
| color: var(--jade); | |
| font-weight: 800; | |
| } | |
| body.focus-mode .field-grid { | |
| grid-template-columns: 1fr; | |
| } | |
| body.focus-mode textarea { | |
| min-height: clamp(190px, 32vh, 360px); | |
| font-size: clamp(1.1rem, 2vw, 1.35rem); | |
| line-height: 1.65; | |
| } | |
| body.focus-mode .examples-label, | |
| body.focus-mode .examples, | |
| body.focus-mode .learning-history, | |
| body.focus-mode .voice-hint { | |
| display: none; | |
| } | |
| body.focus-mode .actions { | |
| margin-top: 14px; | |
| } | |
| body.focus-mode .coach-panel { | |
| min-height: calc(100vh - 40px); | |
| } | |
| body.focus-mode .result { | |
| min-height: 42vh; | |
| font-size: 1.04rem; | |
| } | |
| body.focus-mode .corrected-section { | |
| font-size: clamp(1.25rem, 3vw, 1.65rem); | |
| } | |
| body.focus-mode .reading-panel { | |
| margin-top: 18px; | |
| } | |
| body.focus-mode .karaoke-window { | |
| min-height: 150px; | |
| } | |
| body.focus-mode .karaoke-char { | |
| min-width: 58px; | |
| height: 76px; | |
| font-size: clamp(2.4rem, 7vw, 4rem); | |
| } | |
| @media (max-width: 920px) { | |
| body.focus-mode .app { | |
| grid-template-columns: 1fr; | |
| } | |
| body.focus-mode .input-panel, | |
| body.focus-mode .coach-panel { | |
| min-height: auto; | |
| } | |
| } | |
| @media (max-width: 620px) { | |
| .coach-card { | |
| grid-template-columns: 1fr; | |
| justify-items: center; | |
| text-align: center; | |
| } | |
| .coach-bubble { | |
| text-align: center; | |
| } | |
| .loading-steps { | |
| grid-template-columns: 1fr; | |
| } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <main> | |
| <header class="hero"> | |
| <div> | |
| <div class="brand-row"> | |
| <span class="seal" aria-hidden="true">语</span> | |
| <span class="eyebrow">Mandarin sentence coach</span> | |
| </div> | |
| <h1>ToneBridge</h1> | |
| <p class="subtitle">Build natural Mandarin sentences, one gentle correction at a time. 😊</p> | |
| <div class="hero-pills" aria-label="Highlights"> | |
| <span>Context aware</span> | |
| <span>Natural tone</span> | |
| <span>Reading voice</span> | |
| </div> | |
| </div> | |
| <div class="hero-visual" aria-hidden="true"> | |
| <div class="badge">🏮 中文小助手</div> | |
| <div class="lantern-mark"> | |
| <span>语</span> | |
| </div> | |
| </div> | |
| </header> | |
| <section class="app"> | |
| <form class="panel input-panel" id="form"> | |
| <div class="panel-heading"> | |
| <div> | |
| <span class="panel-kicker">Write</span> | |
| <h2>Your sentence</h2> | |
| </div> | |
| </div> | |
| <div class="field"> | |
| <label for="context">Context & tone</label> | |
| <select id="context"> | |
| <option value="friendly-informal" selected>Friendly-informal</option> | |
| <option value="work-informal">Work-informal</option> | |
| <option value="work-formal">Work-formal</option> | |
| <option value="wechat-informal">Wechat-informal</option> | |
| <option value="wechat-formal">Wechat-formal</option> | |
| </select> | |
| <p class="field-note">ToneBridge applies a conservative tone-aware correction for the selected situation.</p> | |
| </div> | |
| <div class="field"> | |
| <label for="sentence">Chinese sentence to correct</label> | |
| <textarea id="sentence" placeholder="Ex. 我今天想喝书。"></textarea> | |
| </div> | |
| <div class="actions"> | |
| <button class="primary" id="submit" type="submit">✨ Correct my sentence</button> | |
| <button class="mic-button" id="voice" type="button">🎙️ Speak & correct</button> | |
| </div> | |
| <p class="voice-hint">Voice mode listens until you click stop, then corrects the sentence and reads the corrected version aloud.</p> | |
| <p class="examples-label">Examples: tap one to fill the form.</p> | |
| <div class="examples" aria-label="Examples"> | |
| <button class="example" type="button" data-context="friendly-informal" data-text="我今天想喝书。">Wrong word</button> | |
| <button class="example" type="button" data-context="friendly-informal" data-text="火车站在超市的旁边">Already correct</button> | |
| <button class="example" type="button" data-context="work-formal" data-text="我今天迟到,你等我">Formal tone</button> | |
| <button class="example" type="button" data-context="wechat-informal" data-text="您今晚是否方便出来?">Context</button> | |
| </div> | |
| <div class="learning-history"> | |
| <div class="history-title">Learning notes</div> | |
| <div class="history-summary" id="historySummary">Your patterns will appear here.</div> | |
| <ul id="historyList"> | |
| <li>Your last correction types will appear here.</li> | |
| </ul> | |
| </div> | |
| </form> | |
| <section class="panel coach-panel"> | |
| <div class="result-head"> | |
| <div> | |
| <span class="panel-kicker">Coach answer</span> | |
| <h2 class="result-title">Correction 😊</h2> | |
| </div> | |
| <div class="result-tools"> | |
| <button class="focus-button" id="focusMode" type="button">Focus mode</button> | |
| <button class="copy-button" id="copyCorrected" type="button" disabled>Copy sentence</button> | |
| <span class="status" id="status">Ready</span> | |
| </div> | |
| </div> | |
| <div class="result" id="result"> | |
| <div class="placeholder"> | |
| <span class="placeholder-icon">好</span> | |
| <strong>Ready when you are</strong> | |
| <span>Your correction will appear here.</span> | |
| </div> | |
| </div> | |
| <div class="feedback-panel" id="feedbackPanel" hidden> | |
| <span class="feedback-label">Response rating</span> | |
| <div class="feedback-actions" aria-label="Rate this response"> | |
| <button class="feedback-button" id="thumbUp" type="button" title="Thumbs up" aria-label="Thumbs up">👍</button> | |
| <button class="feedback-button" id="thumbDown" type="button" title="Thumbs down" aria-label="Thumbs down">👎</button> | |
| </div> | |
| <span class="feedback-status" id="feedbackStatus"></span> | |
| </div> | |
| <div class="reading-panel" id="readingPanel"> | |
| <div class="reading-tab"> | |
| <span>🎧 Reading</span> | |
| <button class="replay-button" id="replay" type="button">▶️ Replay</button> | |
| </div> | |
| <div class="karaoke-box"> | |
| <audio id="serverAudio" preload="auto"></audio> | |
| <div class="karaoke-window" id="karaokeWindow"> | |
| <span class="karaoke-char">听</span> | |
| <span class="karaoke-char">一</span> | |
| <span class="karaoke-char">听</span> | |
| </div> | |
| <div class="karaoke-strip" id="karaokeStrip"></div> | |
| <p class="karaoke-hint" id="karaokeHint">Replay the corrected sentence and follow the characters.</p> | |
| </div> | |
| </div> | |
| </section> | |
| </section> | |
| </main> | |
| <script type="module"> | |
| import { client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js"; | |
| const form = document.querySelector("#form"); | |
| const result = document.querySelector("#result"); | |
| const status = document.querySelector("#status"); | |
| const submit = document.querySelector("#submit"); | |
| const voice = document.querySelector("#voice"); | |
| const readingPanel = document.querySelector("#readingPanel"); | |
| const replay = document.querySelector("#replay"); | |
| const focusMode = document.querySelector("#focusMode"); | |
| const copyCorrected = document.querySelector("#copyCorrected"); | |
| const serverAudio = document.querySelector("#serverAudio"); | |
| const karaokeWindow = document.querySelector("#karaokeWindow"); | |
| const karaokeStrip = document.querySelector("#karaokeStrip"); | |
| const karaokeHint = document.querySelector("#karaokeHint"); | |
| const historyList = document.querySelector("#historyList"); | |
| const historySummary = document.querySelector("#historySummary"); | |
| const feedbackPanel = document.querySelector("#feedbackPanel"); | |
| const feedbackStatus = document.querySelector("#feedbackStatus"); | |
| const thumbUp = document.querySelector("#thumbUp"); | |
| const thumbDown = document.querySelector("#thumbDown"); | |
| let clientPromise = null; | |
| let recognition = null; | |
| let isListening = false; | |
| let karaokeTimer = null; | |
| let boundaryDrivenReading = false; | |
| let currentCorrectedSentence = ""; | |
| let currentOriginalSentence = ""; | |
| let currentRequestId = ""; | |
| let currentEvaluation = ""; | |
| let currentGenerationTimeSeconds = null; | |
| let currentTtsKey = ""; | |
| let currentTtsPromise = null; | |
| let currentTtsPayload = null; | |
| let replayState = "idle"; | |
| const ttsPayloadCache = new Map(); | |
| const ttsPromiseCache = new Map(); | |
| const TTS_CACHE_LIMIT = 8; | |
| const SERVER_TTS_READING_DELAY_MS = 120; | |
| const SERVER_TTS_FIRST_PLAY_DELAY_MS = 900; | |
| const SERVER_TTS_PRIME_MS = 90; | |
| const BROWSER_TTS_RATE = 1.0; | |
| const SERVER_TTS_ENABLED = "__SERVER_TTS_ENABLED__" === "true"; | |
| const SERVER_TTS_PROVIDER = "__TTS_PROVIDER__"; | |
| const EDGE_TTS_KARAOKE_DURATION_FACTOR = Number("__EDGE_TTS_KARAOKE_DURATION_FACTOR__") || 0.86; | |
| let shouldCorrectAfterStop = false; | |
| const recentErrorTypes = []; | |
| const loadingTips = { | |
| base: [ | |
| "Short Mandarin sentences are often natural. Simple is good. 😊", | |
| "If the pinyin is right but the character is wrong, it may be an input-method mistake.", | |
| "A tiny word order change can make Mandarin feel much smoother.", | |
| "吧 can soften a suggestion, a bit like 'shall we?'", | |
| "请 makes many requests feel smoother, especially outside close relationships." | |
| ], | |
| Friends: [ | |
| "With friends, 你好 is usually enough. 您好 can feel too formal.", | |
| "Friendly Mandarin often sounds better when it stays short and direct." | |
| ], | |
| Family: [ | |
| "With family, natural warmth often matters more than formal politeness.", | |
| "Family sentences can be simple and still sound kind." | |
| ], | |
| Work: [ | |
| "At work, 请 can soften a request without making it too long.", | |
| "For work messages, clear and polite usually beats very formal." | |
| ], | |
| WeChat: [ | |
| "On WeChat, shorter sentences usually feel more natural.", | |
| "A friendly particle like 吧 can make a message feel lighter." | |
| ] | |
| }; | |
| function field(id) { | |
| return document.querySelector(id); | |
| } | |
| function escapeHtml(value) { | |
| return String(value) | |
| .replace(/&/g, "&") | |
| .replace(/</g, "<") | |
| .replace(/>/g, ">") | |
| .replace(/"/g, """) | |
| .replace(/'/g, "'"); | |
| } | |
| function renderMarkdown(markdown) { | |
| const blocks = normalizeCorrectionText(markdown).split(/\n{2,}/).map((block) => block.trim()).filter(Boolean); | |
| return blocks.map((block) => { | |
| let html = escapeHtml(block); | |
| html = html.replace(/^\*(.+?)\*$/gm, "<em>$1</em>"); | |
| html = html.replace( | |
| /^(📝\s*Original sentence|✅\s*Corrected sentence|🔎\s*Error type|💡\s*Why|🌱\s*Tip)\s*:\s*/i, | |
| "<strong>$1 :</strong> " | |
| ); | |
| const lower = block.toLowerCase(); | |
| let cls = "result-section"; | |
| if (lower.includes("original sentence")) cls += " original-section"; | |
| else if (lower.includes("corrected sentence")) cls += " corrected-section"; | |
| else if (lower.includes("error type")) cls += " compact-section"; | |
| else if (lower.includes("tip")) cls += " tip-section"; | |
| return `<section class="${cls}">${html.replace(/\n/g, "<br>")}</section>`; | |
| }).join(""); | |
| } | |
| async function getClient() { | |
| if (!clientPromise) { | |
| clientPromise = client(window.location.origin); | |
| } | |
| return clientPromise; | |
| } | |
| function extractResult(response) { | |
| let data = response?.data ?? response?.output ?? response?.value ?? response; | |
| if (Array.isArray(data)) data = data[0]; | |
| if (data && typeof data === "object" && "value" in data) data = data.value; | |
| if (data && typeof data === "object" && "data" in data) data = data.data; | |
| if (Array.isArray(data)) data = data[0]; | |
| if (data && typeof data === "object" && "result" in data) data = data.result; | |
| if (data && typeof data === "object") data = JSON.stringify(data, null, 2); | |
| return normalizeCorrectionText(data || "No correction was returned."); | |
| } | |
| function extractCorrectionPayload(response) { | |
| const data = extractPayload(response); | |
| if (data && typeof data === "object") return data; | |
| return { | |
| ok: Boolean(data), | |
| result: normalizeCorrectionText(data || "No correction was returned."), | |
| request_id: "", | |
| corrected_sentence: "", | |
| generation_time_seconds: null | |
| }; | |
| } | |
| function normalizeCorrectionText(text) { | |
| let out = String(text || "") | |
| .replace(/\\r\\n/g, "\n") | |
| .replace(/\\n/g, "\n") | |
| .replace(/\\t/g, " "); | |
| out = out.replace( | |
| /(📝|✅|🔎|💡|🌱)\s+(Original sentence|Corrected sentence|Error type|Why|Tip)\s*:/g, | |
| "$1__LABEL_SPACE__$2 :" | |
| ); | |
| ["Original sentence", "Corrected sentence", "Error type", "Why", "Tip"].forEach((label) => { | |
| out = out.replace(new RegExp(`\\s+(?=${label}\\s*:)`, "g"), "\n\n"); | |
| }); | |
| return out.replace(/__LABEL_SPACE__/g, " ").trim(); | |
| } | |
| function extractPayload(response) { | |
| let data = response?.data ?? response?.output ?? response?.value ?? response; | |
| if (Array.isArray(data)) data = data[0]; | |
| if (data && typeof data === "object" && "value" in data) data = data.value; | |
| if (data && typeof data === "object" && "data" in data) data = data.data; | |
| if (Array.isArray(data)) data = data[0]; | |
| return data; | |
| } | |
| function setLoading(isLoading) { | |
| submit.disabled = isLoading; | |
| voice.disabled = isLoading; | |
| submit.textContent = isLoading ? "Thinking gently..." : "✨ Correct my sentence"; | |
| if (isLoading) status.textContent = "Working 🌱"; | |
| } | |
| function setReplayState(state) { | |
| replayState = state; | |
| if (!replay) return; | |
| const labels = { | |
| idle: "▶️ Replay", | |
| preparing: "Preparing voice...", | |
| ready: "▶️ Replay", | |
| playing: "Reading..." | |
| }; | |
| replay.textContent = labels[state] || labels.ready; | |
| replay.disabled = state === "idle" || state === "preparing" || state === "playing"; | |
| } | |
| function tipsForCurrentContext() { | |
| const context = field("#context")?.value || ""; | |
| return [...(loadingTips[context] || []), ...loadingTips.base]; | |
| } | |
| function loadingCoachHtml(tip) { | |
| return ` | |
| <div class="placeholder"> | |
| <div class="coach-card"> | |
| <span class="coach-illustration" aria-hidden="true"> | |
| <span class="coach-hat"></span> | |
| <span class="coach-face"><span class="coach-smile"></span></span> | |
| </span> | |
| <span class="coach-bubble"> | |
| <strong>Checking gently...</strong> | |
| <span>The coach is reading your sentence, context, and tone.</span> | |
| <span class="loading-tip" id="loadingTip">${escapeHtml(tip)}</span> | |
| </span> | |
| </div> | |
| <div class="loading-steps" aria-hidden="true"> | |
| <span class="loading-step">Meaning</span> | |
| <span class="loading-step">Tone</span> | |
| <span class="loading-step">Naturalness</span> | |
| </div> | |
| </div>`; | |
| } | |
| function showLoadingCoach() { | |
| const tips = tipsForCurrentContext(); | |
| const tip = tips[Math.floor(Math.random() * tips.length)] || loadingTips.base[0]; | |
| result.innerHTML = loadingCoachHtml(tip); | |
| } | |
| function revealResult() { | |
| result.classList.remove("result-arrived"); | |
| void result.offsetWidth; | |
| result.classList.add("result-arrived"); | |
| } | |
| function cleanCorrectedSentenceValue(value) { | |
| let out = String(value || "") | |
| .replace(/\*/g, "") | |
| .replace(/\s+(?:Original sentence|Error type|Why|Tip)\s*:.+$/i, "") | |
| .trim(); | |
| const extraAfterSentence = out.match(/^(.+?[。!?!?])(?=\s*[\u4e00-\u9fffA-Za-z])/); | |
| if (extraAfterSentence) out = extraAfterSentence[1]; | |
| return out.trim(); | |
| } | |
| function extractCorrectedSentence(markdown) { | |
| const text = markdown || ""; | |
| const patterns = [ | |
| /(?:\*\*)?(?:✅\s*)?Corrected sentence\s*:\s*(?:\*\*)?\s*([^\n]+)/i, | |
| /(?:\*\*)?Corrected sentence\s*:\s*(?:\*\*)?\s*([^\n]+)/i | |
| ]; | |
| for (const pattern of patterns) { | |
| const match = text.match(pattern); | |
| if (match?.[1]) return cleanCorrectedSentenceValue(match[1]); | |
| } | |
| return ""; | |
| } | |
| function extractErrorType(markdown) { | |
| const text = markdown || ""; | |
| const patterns = [ | |
| /(?:\*\*)?(?:🔎\s*)?Error type\s*:\s*(?:\*\*)?\s*([^\n]+)/i, | |
| /(?:\*\*)?Error type\s*:\s*(?:\*\*)?\s*([^\n]+)/i | |
| ]; | |
| for (const pattern of patterns) { | |
| const match = text.match(pattern); | |
| if (match?.[1]) return match[1].replace(/\*/g, "").trim(); | |
| } | |
| return ""; | |
| } | |
| function updateLearningHistory(markdown) { | |
| const errorType = extractErrorType(markdown); | |
| if (!errorType || !historyList) return; | |
| const sentence = field("#sentence").value.trim(); | |
| const item = document.createElement("li"); | |
| item.textContent = `${errorType} · ${sentence.slice(0, 18)}${sentence.length > 18 ? "..." : ""}`; | |
| if (historyList.children.length === 1 && historyList.children[0].textContent.includes("will appear")) { | |
| historyList.innerHTML = ""; | |
| } | |
| historyList.prepend(item); | |
| while (historyList.children.length > 4) historyList.removeChild(historyList.lastElementChild); | |
| recentErrorTypes.unshift(errorType.toLowerCase()); | |
| while (recentErrorTypes.length > 6) recentErrorTypes.pop(); | |
| const counts = recentErrorTypes.reduce((acc, type) => { | |
| acc[type] = (acc[type] || 0) + 1; | |
| return acc; | |
| }, {}); | |
| const top = Object.entries(counts).sort((a, b) => b[1] - a[1])[0]?.[0]; | |
| if (historySummary && top) { | |
| historySummary.textContent = top === "none" | |
| ? "Nice: your recent sentences are often already correct." | |
| : `Recent pattern: ${top}.`; | |
| } | |
| } | |
| function resetFeedbackPanel() { | |
| currentRequestId = ""; | |
| currentOriginalSentence = ""; | |
| currentEvaluation = ""; | |
| currentGenerationTimeSeconds = null; | |
| if (feedbackPanel) feedbackPanel.hidden = true; | |
| if (feedbackStatus) feedbackStatus.textContent = ""; | |
| [thumbUp, thumbDown].forEach((button) => { | |
| if (!button) return; | |
| button.disabled = true; | |
| button.classList.remove("selected"); | |
| }); | |
| } | |
| function showFeedbackPanel() { | |
| if (!feedbackPanel || !currentRequestId) return; | |
| feedbackPanel.hidden = false; | |
| if (feedbackStatus) feedbackStatus.textContent = currentGenerationTimeSeconds | |
| ? `${currentGenerationTimeSeconds}s` | |
| : ""; | |
| [thumbUp, thumbDown].forEach((button) => { | |
| if (!button) return; | |
| button.disabled = false; | |
| button.classList.remove("selected"); | |
| }); | |
| } | |
| function markFeedbackSelection(evaluation) { | |
| currentEvaluation = evaluation || ""; | |
| thumbUp?.classList.toggle("selected", currentEvaluation === "thumbs_up"); | |
| thumbDown?.classList.toggle("selected", currentEvaluation === "thumbs_down"); | |
| if (feedbackStatus) feedbackStatus.textContent = currentEvaluation ? "Saved" : ""; | |
| } | |
| async function submitFeedback(evaluation) { | |
| if (!currentRequestId) return; | |
| [thumbUp, thumbDown].forEach((button) => { | |
| if (button) button.disabled = true; | |
| }); | |
| if (feedbackStatus) feedbackStatus.textContent = "Saving..."; | |
| try { | |
| const app = await getClient(); | |
| const response = await app.predict("/rate_response", { | |
| request_id: currentRequestId, | |
| evaluation | |
| }); | |
| const payload = extractPayload(response); | |
| if (!payload?.ok) throw new Error(payload?.error || "Feedback save failed"); | |
| markFeedbackSelection(payload.evaluation); | |
| } catch (error) { | |
| console.error(error); | |
| if (feedbackStatus) feedbackStatus.textContent = "Save failed"; | |
| } finally { | |
| [thumbUp, thumbDown].forEach((button) => { | |
| if (button) button.disabled = false; | |
| }); | |
| } | |
| } | |
| function chineseReadingUnits(text) { | |
| return Array.from(text || "").filter((char) => /[\u4e00-\u9fff,。!?、;:]/.test(char)); | |
| } | |
| function renderKaraoke(chars, activeIndex = -1) { | |
| if (!chars.length) { | |
| karaokeWindow.innerHTML = '<span class="karaoke-char">听</span><span class="karaoke-char">一</span><span class="karaoke-char">听</span>'; | |
| karaokeStrip.innerHTML = ""; | |
| karaokeHint.textContent = "Replay the corrected sentence and follow the characters."; | |
| return; | |
| } | |
| const windowSize = Math.min(4, Math.max(2, chars.length)); | |
| let start = Math.max(0, activeIndex - Math.floor(windowSize / 2)); | |
| start = Math.min(start, Math.max(0, chars.length - windowSize)); | |
| const visible = chars.slice(start, start + windowSize); | |
| karaokeWindow.innerHTML = visible.map((char, offset) => { | |
| const index = start + offset; | |
| const state = index < activeIndex ? "read" : index === activeIndex ? "active" : ""; | |
| return `<span class="karaoke-char ${state}">${escapeHtml(char)}</span>`; | |
| }).join(""); | |
| if (karaokeStrip.childElementCount !== chars.length) { | |
| karaokeStrip.innerHTML = chars.map((char) => `<span class="strip-char">${escapeHtml(char)}</span>`).join(""); | |
| } | |
| [...karaokeStrip.children].forEach((node, index) => { | |
| node.className = "strip-char" + (index < activeIndex ? " read" : index === activeIndex ? " active" : ""); | |
| if (index === activeIndex) node.scrollIntoView({ behavior: "smooth", inline: "center", block: "nearest" }); | |
| }); | |
| karaokeHint.textContent = activeIndex >= 0 ? "Follow the highlighted characters as you listen." : "Ready for reading."; | |
| } | |
| function stopKaraokeTimer() { | |
| if (karaokeTimer) { | |
| clearTimeout(karaokeTimer); | |
| karaokeTimer = null; | |
| } | |
| } | |
| function isReadingPunctuation(char) { | |
| return /[,。!?、;:]/.test(char); | |
| } | |
| function readingWeight(char) { | |
| return isReadingPunctuation(char) ? 1.9 : 1; | |
| } | |
| function msPerReadingUnit(char, rate) { | |
| const clamped = Math.min(1.0, Math.max(0.25, Number(rate) || 0.55)); | |
| const base = isReadingPunctuation(char) ? 230 : 145; | |
| return base / clamped; | |
| } | |
| function startKaraoke(text, rate) { | |
| const chars = chineseReadingUnits(text); | |
| stopKaraokeTimer(); | |
| boundaryDrivenReading = false; | |
| renderKaraoke(chars, -1); | |
| if (!chars.length) return; | |
| let index = -1; | |
| const tick = () => { | |
| if (boundaryDrivenReading) return; | |
| index += 1; | |
| if (index >= chars.length) { | |
| renderKaraoke(chars, chars.length - 1); | |
| karaokeTimer = null; | |
| karaokeHint.textContent = "Great. Replay with the voice button whenever you want. 👍"; | |
| return; | |
| } | |
| renderKaraoke(chars, index); | |
| karaokeTimer = setTimeout(tick, msPerReadingUnit(chars[index], rate)); | |
| }; | |
| karaokeTimer = setTimeout(tick, 20); | |
| } | |
| function startKaraokeByDuration(text, durationMs, leadInMs = 0) { | |
| const chars = chineseReadingUnits(text); | |
| stopKaraokeTimer(); | |
| boundaryDrivenReading = false; | |
| renderKaraoke(chars, -1); | |
| if (!chars.length || !durationMs) return; | |
| const usableDuration = Math.max(350, Number(durationMs) - Number(leadInMs || 0) - 80); | |
| const weights = chars.map(readingWeight); | |
| const totalWeight = weights.reduce((sum, value) => sum + value, 0) || chars.length; | |
| let index = -1; | |
| const tick = () => { | |
| index += 1; | |
| if (index >= chars.length) { | |
| renderKaraoke(chars, chars.length - 1); | |
| karaokeTimer = null; | |
| karaokeHint.textContent = "Great. Replay whenever you want. 👍"; | |
| return; | |
| } | |
| renderKaraoke(chars, index); | |
| const delay = Math.max(55, usableDuration * (weights[index] / totalWeight)); | |
| karaokeTimer = setTimeout(tick, delay); | |
| }; | |
| karaokeTimer = setTimeout(tick, 30); | |
| } | |
| function karaokeIndexFromAudioProgress(chars, progress) { | |
| if (!chars.length) return -1; | |
| const clamped = Math.min(1, Math.max(0, Number(progress) || 0)); | |
| const weights = chars.map(readingWeight); | |
| const totalWeight = weights.reduce((sum, value) => sum + value, 0) || chars.length; | |
| let seen = 0; | |
| for (let index = 0; index < chars.length; index += 1) { | |
| seen += weights[index]; | |
| if (clamped <= seen / totalWeight) return index; | |
| } | |
| return chars.length - 1; | |
| } | |
| function startKaraokeFromAudio(text, audio, leadInMs = 0) { | |
| const chars = chineseReadingUnits(text); | |
| stopKaraokeTimer(); | |
| boundaryDrivenReading = false; | |
| renderKaraoke(chars, -1); | |
| if (!chars.length || !audio) return; | |
| const estimatedDurationMs = Math.max( | |
| 650, | |
| chars.reduce((sum, char) => sum + msPerReadingUnit(char, 0.85), 0) | |
| ); | |
| const tick = () => { | |
| if (audio.paused || audio.ended) { | |
| karaokeTimer = null; | |
| return; | |
| } | |
| const rawDurationMs = Math.round((audio.duration || 0) * 1000); | |
| const durationFactor = SERVER_TTS_PROVIDER.startsWith("edge") | |
| ? EDGE_TTS_KARAOKE_DURATION_FACTOR | |
| : 1; | |
| const durationMs = Number.isFinite(rawDurationMs) && rawDurationMs > 1 | |
| ? Math.max(1, rawDurationMs * durationFactor) | |
| : estimatedDurationMs * durationFactor; | |
| const currentMs = Math.max(0, Math.round((audio.currentTime || 0) * 1000) - Number(leadInMs || 0)); | |
| const index = karaokeIndexFromAudioProgress(chars, currentMs / durationMs); | |
| renderKaraoke(chars, index); | |
| karaokeTimer = setTimeout(tick, 60); | |
| }; | |
| karaokeTimer = setTimeout(tick, 20); | |
| } | |
| function syncKaraokeFromBoundary(text, charIndex) { | |
| if (!Number.isFinite(charIndex) || charIndex < 0) return; | |
| const chars = chineseReadingUnits(text); | |
| if (!chars.length) return; | |
| boundaryDrivenReading = true; | |
| stopKaraokeTimer(); | |
| const before = chineseReadingUnits(Array.from(text).slice(0, charIndex + 1).join("")); | |
| const index = Math.max(0, Math.min(chars.length - 1, before.length - 1)); | |
| renderKaraoke(chars, index); | |
| } | |
| function chineseVoices() { | |
| const voices = window.speechSynthesis?.getVoices?.() || []; | |
| return voices.filter((item) => item.lang?.toLowerCase().startsWith("zh")); | |
| } | |
| function voiceScore(item) { | |
| const text = `${item.name || ""} ${item.lang || ""}`.toLowerCase(); | |
| let score = 0; | |
| if (text.includes("zh-cn") || text.includes("mandarin")) score += 20; | |
| if (text.includes("google")) score += 18; | |
| if (text.includes("microsoft")) score += 16; | |
| if (text.includes("natural") || text.includes("premium") || text.includes("online")) score += 12; | |
| if (text.includes("xiaoxiao") || text.includes("xiaoyi") || text.includes("yunjian")) score += 8; | |
| if (item.localService === false) score += 4; | |
| return score; | |
| } | |
| function pickChineseVoice() { | |
| const voices = chineseVoices(); | |
| if (!voices.length) return null; | |
| return voices.sort((a, b) => voiceScore(b) - voiceScore(a))[0] || null; | |
| } | |
| function ttsCacheKey(text) { | |
| return [ | |
| (text || "").trim(), | |
| SERVER_TTS_PROVIDER | |
| ].join("||"); | |
| } | |
| function rememberTtsPayload(key, payload) { | |
| if (!key || !payload?.audio) return; | |
| if (ttsPayloadCache.has(key)) ttsPayloadCache.delete(key); | |
| ttsPayloadCache.set(key, payload); | |
| while (ttsPayloadCache.size > TTS_CACHE_LIMIT) { | |
| const oldestKey = ttsPayloadCache.keys().next().value; | |
| ttsPayloadCache.delete(oldestKey); | |
| } | |
| } | |
| function cachedTtsPayload(key) { | |
| const payload = ttsPayloadCache.get(key); | |
| if (!payload) return null; | |
| ttsPayloadCache.delete(key); | |
| ttsPayloadCache.set(key, payload); | |
| return payload; | |
| } | |
| function resetPreparedTTS() { | |
| currentTtsKey = ""; | |
| currentTtsPromise = null; | |
| currentTtsPayload = null; | |
| setReplayState("idle"); | |
| stopKaraokeTimer(); | |
| if (serverAudio) { | |
| serverAudio.onplaying = null; | |
| serverAudio.onended = null; | |
| serverAudio.pause(); | |
| serverAudio.muted = false; | |
| serverAudio.volume = 1; | |
| delete serverAudio.dataset.ttsKey; | |
| serverAudio.removeAttribute("src"); | |
| serverAudio.load(); | |
| } | |
| } | |
| function wait(ms) { | |
| return new Promise((resolve) => setTimeout(resolve, ms)); | |
| } | |
| function waitForAudioReady(audio, timeoutMs = 1800) { | |
| if (!audio) return Promise.resolve(); | |
| if (audio.readyState >= HTMLMediaElement.HAVE_FUTURE_DATA) return Promise.resolve(); | |
| return new Promise((resolve) => { | |
| let settled = false; | |
| const finish = () => { | |
| if (settled) return; | |
| settled = true; | |
| clearTimeout(timer); | |
| audio.removeEventListener("canplay", finish); | |
| audio.removeEventListener("canplaythrough", finish); | |
| audio.removeEventListener("loadeddata", finish); | |
| audio.removeEventListener("error", finish); | |
| resolve(); | |
| }; | |
| const timer = setTimeout(finish, timeoutMs); | |
| audio.addEventListener("canplay", finish, { once: true }); | |
| audio.addEventListener("canplaythrough", finish, { once: true }); | |
| audio.addEventListener("loadeddata", finish, { once: true }); | |
| audio.addEventListener("error", finish, { once: true }); | |
| }); | |
| } | |
| async function prepareAudioElement(payload, key) { | |
| if (!serverAudio || !payload?.audio || currentTtsKey !== key) return payload; | |
| if (serverAudio.dataset.ttsKey !== key) { | |
| serverAudio.pause(); | |
| serverAudio.onplaying = null; | |
| serverAudio.onended = null; | |
| serverAudio.src = payload.audio; | |
| serverAudio.dataset.ttsKey = key; | |
| serverAudio.preload = "auto"; | |
| serverAudio.load(); | |
| } | |
| await waitForAudioReady(serverAudio); | |
| if (currentTtsKey !== key || payload.primed) return payload; | |
| if (SERVER_TTS_PROVIDER.startsWith("edge")) { | |
| payload.primed = true; | |
| return payload; | |
| } | |
| try { | |
| serverAudio.muted = true; | |
| serverAudio.volume = 0; | |
| serverAudio.currentTime = 0; | |
| await serverAudio.play(); | |
| await wait(SERVER_TTS_PRIME_MS); | |
| serverAudio.pause(); | |
| serverAudio.currentTime = 0; | |
| payload.primed = true; | |
| } catch (error) { | |
| payload.primed = false; | |
| } finally { | |
| serverAudio.muted = false; | |
| serverAudio.volume = 1; | |
| } | |
| return payload; | |
| } | |
| async function prepareServerTTS(text) { | |
| if (!SERVER_TTS_ENABLED) return null; | |
| const phrase = (text || "").trim(); | |
| if (!phrase || !serverAudio) return null; | |
| const key = ttsCacheKey(phrase); | |
| const cachedPayload = cachedTtsPayload(key); | |
| if (cachedPayload) { | |
| currentTtsKey = key; | |
| currentTtsPayload = cachedPayload; | |
| currentTtsPromise = null; | |
| await prepareAudioElement(cachedPayload, key); | |
| karaokeHint.textContent = "Voice ready. Press Replay to read it."; | |
| setReplayState("ready"); | |
| return cachedPayload; | |
| } | |
| if (currentTtsKey === key && currentTtsPayload) { | |
| setReplayState("ready"); | |
| return currentTtsPayload; | |
| } | |
| if (currentTtsKey === key && currentTtsPromise) { | |
| setReplayState("preparing"); | |
| return currentTtsPromise; | |
| } | |
| if (ttsPromiseCache.has(key)) { | |
| currentTtsKey = key; | |
| currentTtsPromise = ttsPromiseCache.get(key); | |
| setReplayState("preparing"); | |
| return currentTtsPromise; | |
| } | |
| currentTtsKey = key; | |
| currentTtsPayload = null; | |
| karaokeHint.textContent = "Preparing a natural Mandarin voice..."; | |
| setReplayState("preparing"); | |
| currentTtsPromise = (async () => { | |
| const app = await getClient(); | |
| const response = await app.predict("/tts", { | |
| text: phrase, | |
| speaker: SERVER_TTS_PROVIDER | |
| }); | |
| const payload = extractPayload(response); | |
| if (!payload?.ok || !payload?.audio) { | |
| console.warn("Server TTS fallback", payload?.error || payload); | |
| if (currentTtsKey === key) { | |
| currentTtsPayload = null; | |
| karaokeHint.textContent = "Server voice unavailable. Browser voice will be used."; | |
| setReplayState("ready"); | |
| } | |
| return null; | |
| } | |
| if (currentTtsKey === key) { | |
| await prepareAudioElement(payload, key); | |
| } | |
| rememberTtsPayload(key, payload); | |
| if (currentTtsKey === key) { | |
| currentTtsPayload = payload; | |
| karaokeHint.textContent = "Voice ready. Press Replay to read it."; | |
| setReplayState("ready"); | |
| } | |
| return payload; | |
| })(); | |
| ttsPromiseCache.set(key, currentTtsPromise); | |
| try { | |
| return await currentTtsPromise; | |
| } catch (error) { | |
| console.warn("Server TTS unavailable, using browser voice.", error); | |
| if (currentTtsKey === key) { | |
| currentTtsPayload = null; | |
| karaokeHint.textContent = "Server voice unavailable. Browser voice will be used."; | |
| setReplayState("ready"); | |
| } | |
| return null; | |
| } finally { | |
| ttsPromiseCache.delete(key); | |
| if (currentTtsKey === key) currentTtsPromise = null; | |
| } | |
| } | |
| async function speakWithServerTTS(text) { | |
| if (!SERVER_TTS_ENABLED) return false; | |
| const phrase = (text || "").trim(); | |
| if (!phrase || !serverAudio) return false; | |
| try { | |
| const payload = await prepareServerTTS(phrase); | |
| if (!payload?.audio) return false; | |
| window.speechSynthesis?.cancel?.(); | |
| const key = ttsCacheKey(phrase); | |
| await prepareAudioElement(payload, key); | |
| serverAudio.pause(); | |
| serverAudio.currentTime = 0; | |
| let readingStarted = false; | |
| const isFirstUserPlay = !payload.playedOnce; | |
| const readingDelay = SERVER_TTS_PROVIDER.startsWith("edge") | |
| ? 0 | |
| : payload.primed || !isFirstUserPlay | |
| ? SERVER_TTS_READING_DELAY_MS | |
| : SERVER_TTS_FIRST_PLAY_DELAY_MS; | |
| serverAudio.onplaying = () => { | |
| if (readingStarted) return; | |
| readingStarted = true; | |
| setReplayState("playing"); | |
| payload.playedOnce = true; | |
| const durationMs = payload.duration_ms || Math.round((serverAudio.duration || 0) * 1000); | |
| if (SERVER_TTS_PROVIDER.startsWith("edge")) { | |
| startKaraokeFromAudio(phrase, serverAudio, readingDelay); | |
| } else { | |
| setTimeout( | |
| () => startKaraokeByDuration(phrase, durationMs, readingDelay), | |
| readingDelay | |
| ); | |
| } | |
| }; | |
| serverAudio.onended = () => { | |
| stopKaraokeTimer(); | |
| const chars = chineseReadingUnits(phrase); | |
| if (chars.length) renderKaraoke(chars, chars.length - 1); | |
| setReplayState("ready"); | |
| }; | |
| await serverAudio.play(); | |
| return true; | |
| } catch (error) { | |
| console.warn("Server TTS unavailable, using browser voice.", error); | |
| setReplayState("ready"); | |
| return false; | |
| } | |
| } | |
| function speakWithBrowserTTS(text) { | |
| const phrase = (text || "").trim(); | |
| if (!phrase || !("speechSynthesis" in window)) return false; | |
| serverAudio?.pause?.(); | |
| window.speechSynthesis.cancel(); | |
| const utterance = new SpeechSynthesisUtterance(phrase); | |
| utterance.lang = "zh-CN"; | |
| const rate = BROWSER_TTS_RATE; | |
| utterance.rate = rate; | |
| utterance.pitch = 1; | |
| const voice = pickChineseVoice(); | |
| if (voice) utterance.voice = voice; | |
| utterance.onstart = () => { | |
| setReplayState("playing"); | |
| setTimeout(() => startKaraoke(phrase, rate), 120); | |
| }; | |
| utterance.onboundary = (event) => syncKaraokeFromBoundary(phrase, event.charIndex); | |
| utterance.onend = () => { | |
| stopKaraokeTimer(); | |
| const chars = chineseReadingUnits(phrase); | |
| if (chars.length) renderKaraoke(chars, chars.length - 1); | |
| setReplayState("ready"); | |
| }; | |
| utterance.onerror = () => setReplayState("ready"); | |
| window.speechSynthesis.speak(utterance); | |
| return true; | |
| } | |
| async function speakChinese(text) { | |
| const usedServer = await speakWithServerTTS(text); | |
| if (!usedServer) { | |
| karaokeHint.textContent = SERVER_TTS_ENABLED ? "Using browser voice fallback." : "Using browser voice."; | |
| if (!speakWithBrowserTTS(text)) setReplayState("ready"); | |
| } | |
| } | |
| async function runCorrection({ speak = false } = {}) { | |
| const sentence = field("#sentence").value.trim(); | |
| if (!sentence) { | |
| result.innerHTML = '<div class="placeholder"><span class="placeholder-icon">写</span><strong>Add a sentence first</strong><span>Paste or speak one Chinese sentence.</span></div>'; | |
| return ""; | |
| } | |
| setLoading(true); | |
| currentCorrectedSentence = ""; | |
| if (copyCorrected) copyCorrected.disabled = true; | |
| resetFeedbackPanel(); | |
| resetPreparedTTS(); | |
| readingPanel.style.display = "none"; | |
| showLoadingCoach(); | |
| try { | |
| const app = await getClient(); | |
| const response = await app.predict("/corriger", { | |
| context: field("#context").value, | |
| sentence, | |
| target_tone: "", | |
| correction_mode: "tone-aware" | |
| }); | |
| console.log("Gradio response", response); | |
| const payload = extractCorrectionPayload(response); | |
| const data = normalizeCorrectionText(payload.result || extractResult(response)); | |
| result.innerHTML = renderMarkdown(data); | |
| revealResult(); | |
| updateLearningHistory(data); | |
| currentGenerationTimeSeconds = Number.isFinite(Number(payload.generation_time_seconds)) | |
| ? Number(payload.generation_time_seconds) | |
| : null; | |
| status.textContent = currentGenerationTimeSeconds ? `Done (${currentGenerationTimeSeconds}s)` : "Done"; | |
| if (payload.metrics_error) console.warn(payload.metrics_error); | |
| const corrected = payload.corrected_sentence || extractCorrectedSentence(data); | |
| currentCorrectedSentence = corrected; | |
| currentOriginalSentence = payload.original_sentence || sentence; | |
| currentRequestId = payload.request_id || ""; | |
| if (currentRequestId) showFeedbackPanel(); | |
| if (copyCorrected) copyCorrected.disabled = !corrected; | |
| if (corrected) { | |
| readingPanel.style.display = "block"; | |
| renderKaraoke(chineseReadingUnits(corrected), -1); | |
| if (SERVER_TTS_ENABLED) { | |
| prepareServerTTS(corrected); | |
| } else { | |
| karaokeHint.textContent = "Browser voice ready. Press Replay to read it."; | |
| setReplayState("ready"); | |
| } | |
| } else { | |
| readingPanel.style.display = "none"; | |
| } | |
| if (speak) { | |
| await speakChinese(corrected); | |
| } | |
| return data; | |
| } catch (error) { | |
| console.error(error); | |
| currentCorrectedSentence = ""; | |
| if (copyCorrected) copyCorrected.disabled = true; | |
| readingPanel.style.display = "none"; | |
| result.innerHTML = '<div class="placeholder"><span class="placeholder-icon">!</span><strong>Correction failed</strong><span>Please try again in a moment.</span></div>'; | |
| status.textContent = "Error"; | |
| return ""; | |
| } finally { | |
| setLoading(false); | |
| } | |
| } | |
| form.addEventListener("submit", async (event) => { | |
| event.preventDefault(); | |
| await runCorrection({ speak: false }); | |
| }); | |
| replay.addEventListener("click", async () => { | |
| if (!currentCorrectedSentence || replayState === "preparing" || replayState === "playing") return; | |
| setReplayState("preparing"); | |
| await speakChinese(currentCorrectedSentence); | |
| }); | |
| copyCorrected?.addEventListener("click", async () => { | |
| if (!currentCorrectedSentence) return; | |
| try { | |
| await navigator.clipboard.writeText(currentCorrectedSentence); | |
| copyCorrected.textContent = "Copied"; | |
| setTimeout(() => { | |
| copyCorrected.textContent = "Copy sentence"; | |
| }, 1200); | |
| } catch (error) { | |
| console.error(error); | |
| copyCorrected.textContent = "Copy failed"; | |
| setTimeout(() => { | |
| copyCorrected.textContent = "Copy sentence"; | |
| }, 1200); | |
| } | |
| }); | |
| thumbUp?.addEventListener("click", () => submitFeedback("thumbs_up")); | |
| thumbDown?.addEventListener("click", () => submitFeedback("thumbs_down")); | |
| function applyFocusMode(enabled) { | |
| document.body.classList.toggle("focus-mode", enabled); | |
| if (focusMode) focusMode.textContent = enabled ? "Exit focus" : "Focus mode"; | |
| } | |
| focusMode?.addEventListener("click", async () => { | |
| const enabled = !document.body.classList.contains("focus-mode"); | |
| applyFocusMode(enabled); | |
| try { | |
| if (enabled && document.documentElement.requestFullscreen) { | |
| focusMode.dataset.fullscreen = "on"; | |
| await document.documentElement.requestFullscreen(); | |
| } else if (!enabled && document.fullscreenElement) { | |
| focusMode.dataset.fullscreen = ""; | |
| await document.exitFullscreen(); | |
| } | |
| } catch (error) { | |
| focusMode.dataset.fullscreen = ""; | |
| } | |
| }); | |
| document.addEventListener("fullscreenchange", () => { | |
| if (!document.fullscreenElement && focusMode?.dataset.fullscreen === "on") { | |
| focusMode.dataset.fullscreen = ""; | |
| applyFocusMode(false); | |
| } | |
| }); | |
| function setupRecognition() { | |
| const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; | |
| if (!SpeechRecognition) return null; | |
| const recog = new SpeechRecognition(); | |
| recog.lang = "zh-CN"; | |
| recog.continuous = true; | |
| recog.interimResults = true; | |
| recog.maxAlternatives = 1; | |
| let finalTranscript = ""; | |
| recog.onstart = () => { | |
| isListening = true; | |
| shouldCorrectAfterStop = false; | |
| voice.classList.add("listening"); | |
| voice.textContent = "⏹️ Click to stop"; | |
| status.textContent = "Listening 🎙️"; | |
| result.innerHTML = '<div class="placeholder">Speak at your rhythm. Click the button again when your sentence is finished. 🎙️</div>'; | |
| }; | |
| recog.onresult = (event) => { | |
| let interim = ""; | |
| for (let i = event.resultIndex; i < event.results.length; i += 1) { | |
| const chunk = event.results[i][0].transcript; | |
| if (event.results[i].isFinal) finalTranscript += chunk; | |
| else interim += chunk; | |
| } | |
| const heard = (finalTranscript + interim).trim(); | |
| field("#sentence").value = heard; | |
| }; | |
| recog.onerror = (event) => { | |
| console.error("Speech recognition error", event); | |
| result.innerHTML = '<div class="placeholder">I could not hear clearly. Please try again, slowly. 🌱</div>'; | |
| }; | |
| recog.onend = async () => { | |
| isListening = false; | |
| voice.classList.remove("listening"); | |
| voice.textContent = "🎙️ Speak & correct"; | |
| const captured = field("#sentence").value.trim(); | |
| if (captured && shouldCorrectAfterStop) await runCorrection({ speak: true }); | |
| else if (!shouldCorrectAfterStop && captured) { | |
| try { | |
| recog.start(); | |
| return; | |
| } catch (error) { | |
| console.error(error); | |
| status.textContent = "Ready"; | |
| } | |
| } else status.textContent = "Ready"; | |
| shouldCorrectAfterStop = false; | |
| finalTranscript = ""; | |
| }; | |
| return recog; | |
| } | |
| voice.addEventListener("click", () => { | |
| if (!recognition) recognition = setupRecognition(); | |
| if (!recognition) { | |
| result.innerHTML = '<div class="placeholder">Voice mode is not supported in this browser. Chrome or Edge usually work best. 🌱</div>'; | |
| return; | |
| } | |
| if (isListening) { | |
| shouldCorrectAfterStop = true; | |
| status.textContent = "Correcting..."; | |
| recognition.stop(); | |
| return; | |
| } | |
| try { | |
| recognition.start(); | |
| } catch (error) { | |
| console.error(error); | |
| } | |
| }); | |
| document.querySelectorAll(".example").forEach((button) => { | |
| button.addEventListener("click", () => { | |
| field("#context").value = button.dataset.context; | |
| field("#sentence").value = button.dataset.text; | |
| }); | |
| }); | |
| </script> | |
| </body> | |
| </html> | |
| """ | |
| async def index(): | |
| return ( | |
| FRONTEND_HTML | |
| .replace("__SERVER_TTS_ENABLED__", "true" if SERVER_TTS_ENABLED else "false") | |
| .replace("__TTS_PROVIDER__", TTS_PROVIDER) | |
| .replace("__EDGE_TTS_KARAOKE_DURATION_FACTOR__", str(EDGE_TTS_KARAOKE_DURATION_FACTOR)) | |
| ) | |
| demo = app | |
| if __name__ == "__main__": | |
| demo.launch(ssr_mode=False) | |