Tone-Bridge / app.py
Alphaplasti's picture
Upload 3 files
c03e26c verified
import gc
import asyncio
import base64
import io
import json
import os
import re
import time
import uuid
from datetime import datetime, timezone
from pathlib import Path
from threading import Lock, Thread
from typing import Optional
import gradio as gr
import numpy as np
import torch
from fastapi.responses import HTMLResponse
from pypinyin import Style, lazy_pinyin
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
try:
import transformers.utils.import_utils as transformers_import_utils
if not hasattr(transformers_import_utils, "is_torch_fx_available"):
transformers_import_utils.is_torch_fx_available = lambda: True
except Exception:
pass
try:
import spaces
except Exception:
class _SpacesFallback:
@staticmethod
def GPU(*args, **kwargs):
def decorator(fn):
return fn
return decorator
spaces = _SpacesFallback()
DEFAULT_MODEL_ID = "Alphaplasti/ToneBridge-MiniCPM4.1-8B"
MODEL_ID = os.getenv("MODEL_ID", DEFAULT_MODEL_ID).strip() or DEFAULT_MODEL_ID
HF_TOKEN = (os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_HUB_TOKEN") or "").strip() or None
TTS_PROVIDER = os.getenv("TTS_PROVIDER", "edge").strip().lower() or "edge"
DEFAULT_TTS_MODEL_ID = "openbmb/VoxCPM2"
TTS_MODEL_ID = os.getenv("TTS_MODEL_ID", DEFAULT_TTS_MODEL_ID).strip() or DEFAULT_TTS_MODEL_ID
DEFAULT_ENABLE_SERVER_TTS = "false" if TTS_PROVIDER == "browser" else "true"
ENABLE_SERVER_TTS = os.getenv("ENABLE_SERVER_TTS", DEFAULT_ENABLE_SERVER_TTS).strip().lower() in {"1", "true", "yes", "y"}
SERVER_TTS_ENABLED = ENABLE_SERVER_TTS and TTS_PROVIDER != "browser"
TTS_MAX_CHARS = int(os.getenv("TTS_MAX_CHARS", "180"))
EDGE_TTS_VOICE = os.getenv("EDGE_TTS_VOICE", "zh-CN-YunjianNeural").strip()
EDGE_TTS_RATE = os.getenv("EDGE_TTS_RATE", "+0%").strip()
EDGE_TTS_PITCH = os.getenv("EDGE_TTS_PITCH", "+0Hz").strip()
EDGE_TTS_VOLUME = os.getenv("EDGE_TTS_VOLUME", "+0%").strip()
EDGE_TTS_KARAOKE_DURATION_FACTOR = float(os.getenv("EDGE_TTS_KARAOKE_DURATION_FACTOR", "0.86"))
VOXCPM_VOICE_STYLE = os.getenv(
"VOXCPM_VOICE_STYLE",
"A calm adult male Mandarin teacher in his 30s or 40s, warm low-pitched voice, natural conversational speed, clear Standard Mandarin, not childlike, not female",
).strip()
VOXCPM_CFG_VALUE = float(os.getenv("VOXCPM_CFG_VALUE", "2.0"))
VOXCPM_INFERENCE_TIMESTEPS = int(os.getenv("VOXCPM_INFERENCE_TIMESTEPS", "6"))
VOXCPM_RETRY_BADCASE = os.getenv("VOXCPM_RETRY_BADCASE", "false").strip().lower() in {"1", "true", "yes", "y"}
VOXCPM_OUTPUT_SAMPLE_RATE = int(os.getenv("VOXCPM_OUTPUT_SAMPLE_RATE", "24000"))
MAX_INPUT_CHARS = int(os.getenv("MAX_INPUT_CHARS", "1200"))
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "220"))
LOAD_IN_4BIT = os.getenv("LOAD_IN_4BIT", "true").strip().lower() in {"1", "true", "yes", "y"}
PRELOAD_MODEL = os.getenv("PRELOAD_MODEL", "true").strip().lower() in {"1", "true", "yes", "y"}
SPACE_DIR = Path(__file__).resolve().parent
METRICS_FILE = Path(os.getenv("METRICS_FILE", "tonebridge_usage_metrics.jsonl"))
METRICS_REPO_SYNC = os.getenv("METRICS_REPO_SYNC", "false").strip().lower() in {"1", "true", "yes", "y"}
METRICS_REPO_ID = (
os.getenv("METRICS_REPO_ID")
or os.getenv("SPACE_ID")
or os.getenv("HF_SPACE_ID")
or ""
).strip()
DEFAULT_METRICS_REPO_PATH = (
METRICS_FILE.name if METRICS_FILE.is_absolute() else str(METRICS_FILE).replace("\\", "/")
)
METRICS_REPO_PATH = os.getenv("METRICS_REPO_PATH", DEFAULT_METRICS_REPO_PATH).strip().lstrip("/")
HF_METRICS_TOKEN = (
os.getenv("HF_METRICS_TOKEN")
or os.getenv("HF_TOKEN")
or os.getenv("HUGGING_FACE_HUB_TOKEN")
or ""
).strip() or None
METRICS_LOCK = Lock()
metrics_sync_error: Optional[str] = None
tokenizer = None
model = None
load_error: Optional[str] = None
tts_model = None
tts_load_error: Optional[str] = None
app = gr.Server()
SYSTEM_PROMPT = """You are ToneBridge, a Mandarin Chinese teacher for beginner learners.
Your task is to correct ONE student Chinese sentence according to the selected context and tone.
Your default behavior is conservative minimal correction.
Do not create a richer new sentence.
Do not improve style just because another phrasing is possible.
Do not shorten, expand, or rewrite a correct sentence.
Preserve the student's meaning, length, intention, and punctuation style as much as possible.
Never add information that is absent from the original sentence.
When in doubt, choose no correction.
Inputs:
Context: {context}
Tone: {tone}
Correction style: {correction_style}
Student sentence: {sentence}
Correction decision rule:
A correction is allowed ONLY if the original sentence has a clear problem:
- wrong character
- wrong word
- missing necessary word
- extra incorrect word
- wrong measure word
- wrong word order
- wrong grammar pattern
- tone/politeness inappropriate for the selected context
If the sentence is understandable, grammatical, and natural enough for the selected context, do NOT correct it.
Acceptable variants are not errors.
A more formal, shorter, smoother, or more common version is NOT a correction if the original is already acceptable.
Important anti-overcorrection rules:
- Do not remove 一 from 有一只猫 only to make it more casual. 有一只猫 and 有只猫 can both be correct.
- Do not add 的 or 色 only to make an adjective-noun phrase sound more standard if the original is already acceptable.
- Do not change basic location patterns such as "A 在 B 的旁边" if they are correct and natural.
- Do not change word order unless the original word order is actually wrong.
- Do not mark "word order" unless the corrected sentence visibly changes the order of words.
- Do not correct punctuation-only issues unless punctuation creates real confusion.
- Do not replace a correct casual sentence with a formal sentence unless the selected context requires formality.
- For a casual or friendly tone, do not use 您 or 您好. Use 你 / 你好.
- For a teacher, client, manager, or very formal context, 您 may be appropriate.
Error type consistency:
- If the corrected sentence is identical to the original, Error type must be "none".
- If Error type is "none", the corrected sentence must be identical to the original.
- If you replace one Chinese character with another that has the same or very close pinyin, Error type should be "character/input-method mistake", not politeness.
- If you cannot explain the correction by pointing to a clear visible problem, return no correction.
Output rules:
Return exactly 5 short lines.
Use exactly these labels in this order.
Do not use markdown.
Do not output pinyin.
Do not output translations.
Do not output hidden reasoning, chain-of-thought, or <think> tags.
Explanations must be only in English.
Why and Tip must be English sentences. Do not explain in Chinese.
You may mention isolated Chinese words or characters inside English explanations only when necessary.
Chinese sentences must stay in Chinese characters.
Give only one corrected sentence.
Add at most one gentle emoji in Why or Tip, never inside Chinese sentences.
Allowed Error type values:
none
character/input-method mistake
wrong character
wrong word
missing word
extra word
measure word
word order
grammar
tone
Required format:
Original sentence: <student sentence>
Corrected sentence: <corrected sentence or identical original>
Error type: <one allowed Error type value>
Why: <short beginner-friendly explanation in English>
Tip: <one short practical tip in English>
For a correct sentence:
Original sentence: <student sentence>
Corrected sentence: <identical student sentence>
Error type: none
Why: This sentence is correct and natural. 😊
Tip: Keep it as it is.
Examples:
Input:
红桌子上有一只猫
Output:
Original sentence: 红桌子上有一只猫
Corrected sentence: 红桌子上有一只猫
Error type: none
Why: This sentence is correct and natural. 😊
Tip: 有只猫 is only a casual variant, not a correction.
Input:
桌子上猫有一只
Output:
Original sentence: 桌子上猫有一只
Corrected sentence: 桌子上有一只猫
Error type: word order
Why: In this location pattern, use place + 有 + object. 😊
Tip: Put 有 before the thing that exists.
Input:
我想喝谁
Output:
Original sentence: 我想喝谁
Corrected sentence: 我想喝水
Error type: character/input-method mistake
Why: 谁 and 水 have close pinyin, but 水 means water. 😊
Tip: Check same-sound characters when typing.
"""
def normalize_space(text: str) -> str:
return re.sub(r"\s+", " ", (text or "").strip())
def has_chinese(text: str) -> bool:
return re.search(r"[\u4e00-\u9fff]", text or "") is not None
def to_pinyin(text: str) -> str:
return " ".join(lazy_pinyin(text or "", style=Style.TONE))
def should_add_pinyin_for_line(line: str) -> bool:
labels = ("Original sentence", "Corrected sentence")
return any(label in (line or "") for label in labels)
def chinese_segments(text: str):
pattern = r"[\u4e00-\u9fff,。!?、;:“”‘’()《》〈〉…—\s]+"
return [seg.strip() for seg in re.findall(pattern, text or "") if has_chinese(seg)]
def add_pinyin_under_chinese(text: str) -> str:
lines = (text or "").splitlines()
enriched = []
for line in lines:
clean = line.strip()
if not clean:
continue
enriched.append(clean)
segments = chinese_segments(line) if should_add_pinyin_for_line(line) else []
if segments:
enriched.append("*" + " / ".join(to_pinyin(seg) for seg in segments) + "*")
enriched.append("")
return "\n".join(enriched).strip()
def add_section_emojis(text: str) -> str:
labels = {
"Original sentence": "📝 Original sentence",
"Corrected sentence": "✅ Corrected sentence",
"Error type": "🔎 Error type",
"Why": "💡 Why",
"Tip": "🌱 Tip",
}
out = text or ""
for source, target in labels.items():
out = re.sub(rf"(?m)^(\s*){re.escape(source)}\s*:", rf"\1{target} :", out)
return out
def normalize_model_markdown(text: str) -> str:
out = (text or "").strip()
out = out.replace("\\r\\n", "\n").replace("\\n", "\n").replace("\\t", " ")
labels = [
"Original sentence",
"Corrected sentence",
"Error type",
"Why",
"Tip",
]
for label in labels:
out = re.sub(rf"\s+(?={re.escape(label)}\s*:)", "\n", out)
return out.strip()
def clean_corrected_sentence_value(value: str) -> str:
text = normalize_space((value or "").replace("*", ""))
text = re.split(r"\s+(?:Original sentence|Error type|Why|Tip)\s*:", text, maxsplit=1)[0].strip()
extra_after_sentence = re.match(r"^(.+?[。!?!?])(?=\s*[\u4e00-\u9fffA-Za-z])", text)
if extra_after_sentence:
text = extra_after_sentence.group(1)
return text.strip()
def clean_correction_output(text: str) -> str:
lines = normalize_model_markdown(text).splitlines()
cleaned = []
for line in lines:
match = re.match(r"^(Corrected sentence\s*:\s*)(.+)$", line.strip(), flags=re.I)
if match:
cleaned.append(match.group(1) + clean_corrected_sentence_value(match.group(2)))
else:
cleaned.append(line)
return "\n".join(cleaned).strip()
def wrap_result(markdown: str) -> str:
return markdown.strip() if markdown else "No correction was produced."
def final_result(markdown: str) -> str:
friendly = add_section_emojis(clean_correction_output(markdown))
return wrap_result(add_pinyin_under_chinese(friendly))
def utc_now_iso() -> str:
return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
def extract_result_field(markdown: str, label: str) -> str:
text = normalize_model_markdown(markdown or "")
match = re.search(rf"(?im)^.*?{re.escape(label)}\s*:\s*(.+)$", text)
if not match:
return ""
value = match.group(1).replace("*", "").strip()
if label.lower() == "corrected sentence":
return clean_corrected_sentence_value(value)
return normalize_space(value)
def is_mostly_chinese_explanation(text: str) -> bool:
value = text or ""
chinese_count = len(re.findall(r"[\u4e00-\u9fff]", value))
latin_count = len(re.findall(r"[A-Za-z]", value))
return chinese_count >= 6 and chinese_count > latin_count
def english_feedback_fallback(error_type: str, label: str) -> str:
kind = normalize_space(error_type).lower()
is_tip = label.lower() == "tip"
if "none" in kind:
return "Keep it as it is." if is_tip else "This sentence is correct and natural."
if "character" in kind or "input" in kind:
return (
"When typing, check characters with similar pronunciation."
if is_tip
else "One character changes the meaning; the corrected sentence uses the intended word."
)
if "word order" in kind or "order" in kind:
return (
"Practice the same sentence pattern with one small change at a time."
if is_tip
else "The correction fixes the word order so the Mandarin pattern is clearer."
)
if "measure" in kind:
return (
"Pair nouns with their usual measure words."
if is_tip
else "The correction uses a measure word that fits the noun better."
)
if "tone" in kind or "register" in kind or "polite" in kind:
return (
"Match the wording to the relationship and situation."
if is_tip
else "The correction makes the tone fit the selected context better."
)
if "word" in kind:
return (
"Check the meaning of each key word before sending."
if is_tip
else "The correction replaces a word that does not fit the intended meaning."
)
return (
"Practice the sentence pattern with one small change at a time."
if is_tip
else "The correction fixes a grammar issue while keeping the original meaning."
)
def build_plain_correction_output(
original_sentence: str,
corrected_sentence: str,
error_type: str,
why: str,
tip: str,
) -> str:
return "\n".join(
[
f"Original sentence: {original_sentence}",
f"Corrected sentence: {corrected_sentence}",
f"Error type: {error_type or 'none'}",
f"Why: {why}",
f"Tip: {tip}",
]
)
def generate_english_feedback_repair(
original_sentence: str,
corrected_sentence: str,
error_type: str,
why: str,
tip: str,
) -> str:
if model is None or tokenizer is None:
return ""
messages = [
{
"role": "system",
"content": (
"Rewrite Mandarin correction feedback. Keep Original sentence, Corrected sentence, "
"and Error type unchanged. Rewrite only Why and Tip in beginner-friendly English. "
"Do not explain in Chinese. Do not output pinyin. Return exactly the same five labels."
),
},
{
"role": "user",
"content": build_plain_correction_output(
original_sentence,
corrected_sentence,
error_type,
why,
tip,
),
},
]
try:
try:
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False,
)
except TypeError:
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
inputs = tokenizer([text], return_tensors="pt").to(model.device)
with torch.inference_mode():
outputs = model.generate(
**inputs,
max_new_tokens=150,
do_sample=False,
use_cache=True,
repetition_penalty=1.05,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
generated = outputs[0][inputs["input_ids"].shape[-1]:]
repaired = tokenizer.decode(generated, skip_special_tokens=True).strip()
del inputs, outputs, generated
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
return strip_thinking(repaired)
except Exception:
return ""
def ensure_english_feedback(answer: str, original_sentence: str) -> str:
why = extract_result_field(answer, "Why")
tip = extract_result_field(answer, "Tip")
if not (is_mostly_chinese_explanation(why) or is_mostly_chinese_explanation(tip)):
return answer
original = extract_result_field(answer, "Original sentence") or original_sentence
corrected = extract_result_field(answer, "Corrected sentence") or original
error_type = extract_result_field(answer, "Error type") or "none"
repaired = generate_english_feedback_repair(original, corrected, error_type, why, tip)
repaired_why = extract_result_field(repaired, "Why") or why
repaired_tip = extract_result_field(repaired, "Tip") or tip
if not repaired_why or is_mostly_chinese_explanation(repaired_why):
repaired_why = english_feedback_fallback(error_type, "Why")
if not repaired_tip or is_mostly_chinese_explanation(repaired_tip):
repaired_tip = english_feedback_fallback(error_type, "Tip")
return build_plain_correction_output(
original,
corrected,
error_type,
repaired_why,
repaired_tip,
)
def metrics_file_path() -> Path:
return METRICS_FILE if METRICS_FILE.is_absolute() else SPACE_DIR / METRICS_FILE
def sync_usage_metrics_to_repo(commit_message: str) -> None:
global metrics_sync_error
if not METRICS_REPO_SYNC:
return
path = metrics_file_path()
if not path.exists():
return
if not METRICS_REPO_ID:
metrics_sync_error = "Metrics repo sync is enabled, but METRICS_REPO_ID or SPACE_ID is missing."
return
if not HF_METRICS_TOKEN:
metrics_sync_error = "Metrics repo sync is enabled, but HF_METRICS_TOKEN or HF_TOKEN is missing."
return
try:
from huggingface_hub import upload_file
upload_file(
path_or_fileobj=str(path),
path_in_repo=METRICS_REPO_PATH or path.name,
repo_id=METRICS_REPO_ID,
repo_type="space",
token=HF_METRICS_TOKEN,
commit_message=commit_message,
)
metrics_sync_error = None
except Exception as exc:
metrics_sync_error = f"Metrics repo sync failed: {exc}"
def read_usage_records_unlocked() -> list[dict]:
path = metrics_file_path()
if not path.exists():
return []
records = []
with path.open("r", encoding="utf-8") as handle:
for line in handle:
line = line.strip()
if not line:
continue
try:
record = json.loads(line)
except json.JSONDecodeError:
continue
if isinstance(record, dict):
records.append(record)
return records
def write_usage_records_unlocked(records: list[dict]) -> None:
path = metrics_file_path()
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8") as handle:
for record in records:
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n")
def append_usage_record(record: dict) -> None:
path = metrics_file_path()
path.parent.mkdir(parents=True, exist_ok=True)
with METRICS_LOCK:
with path.open("a", encoding="utf-8") as handle:
handle.write(json.dumps(record, ensure_ascii=False, sort_keys=True) + "\n")
sync_usage_metrics_to_repo("Update ToneBridge usage metrics")
def update_usage_evaluation(request_id: str, evaluation: str) -> Optional[dict]:
with METRICS_LOCK:
records = read_usage_records_unlocked()
updated_record = None
for record in records:
if record.get("request_id") == request_id:
record["evaluation"] = evaluation
record["evaluated_at"] = utc_now_iso()
updated_record = record
break
if updated_record is not None:
write_usage_records_unlocked(records)
if updated_record is not None:
sync_usage_metrics_to_repo("Update ToneBridge feedback metrics")
return updated_record
def metric_public_view(record: dict) -> dict:
return {
"request_id": record.get("request_id", ""),
"created_at": record.get("created_at", ""),
"original_sentence": record.get("original_sentence", ""),
"corrected_sentence": record.get("corrected_sentence", ""),
"evaluation": record.get("evaluation"),
"generation_time_seconds": record.get("generation_time_seconds"),
"status": record.get("status", ""),
"context": record.get("context", ""),
"target_tone": record.get("target_tone", ""),
"correction_mode": record.get("correction_mode", ""),
"error_type": record.get("error_type", ""),
"model_id": record.get("model_id", ""),
}
def device_label() -> str:
if torch.cuda.is_available():
name = torch.cuda.get_device_name(0)
mem_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)
return f"GPU: {name} ({mem_gb:.1f} GB)"
return "CPU: no CUDA GPU detected"
def load_model():
global tokenizer, model, load_error
if model is not None and tokenizer is not None:
return
try:
cuda_available = torch.cuda.is_available()
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True, token=HF_TOKEN)
load_kwargs = {
"torch_dtype": "auto",
"device_map": "auto",
"trust_remote_code": True,
"low_cpu_mem_usage": True,
"token": HF_TOKEN,
}
if LOAD_IN_4BIT and cuda_available:
load_kwargs["quantization_config"] = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
)
try:
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
attn_implementation="sdpa",
**load_kwargs,
)
except Exception:
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
**load_kwargs,
)
model.eval()
load_error = None
except Exception as exc:
load_error = f"Model load failed: {exc}"
tokenizer = None
model = None
if PRELOAD_MODEL:
load_model()
def correction_mode_guidance(correction_mode: str) -> str:
if correction_mode == "Natural correction":
return (
"Natural correction: make the sentence sound natural for the chosen context, "
"but only if the original is actually unnatural, incorrect, or socially inappropriate. "
"If the original is already correct and natural, keep it unchanged."
)
return (
"Minimal correction: change only the characters, grammar, or word order that are necessary. "
"Do not rewrite the sentence if a small correction is enough."
)
def context_tone_guidance(context: str, target_tone: str) -> str:
context_key = normalize_space(context).lower()
tone_key = normalize_space(target_tone).lower()
if context_key == "wechat" and "friendly" in tone_key:
return (
"WeChat + Friendly: treat the sentence like a short instant message. "
"Be concise, direct, and casual. If the original sounds formal, literary, ceremonial, "
"or like an invitation letter, correct it as a tone/register issue. "
"Avoid stiff phrases such as 敬请, 阁下, 拨冗, 莅临, 寒舍 unless the user explicitly wants formal wording. "
"Prefer everyday wording with 你, 有空, 方便, 一下, 吗, or 吧 when appropriate. "
"The corrected sentence should usually be short."
)
if context_key == "wechat":
return (
"WeChat context: prefer concise instant-message wording. "
"Avoid ceremonial or overly literary phrasing unless the target tone is explicitly formal."
)
return "No extra context-specific rule."
def build_user_prompt(context: str, sentence: str, target_tone: str, correction_mode: str) -> str:
context = normalize_space(context)
target_tone = normalize_space(target_tone)
sentence = (sentence or "").strip()
if not context:
context = "contexte non precise"
return f"""Social context: {context}
Target tone: {target_tone}
Correction style: {correction_mode}
Correction style instruction: {correction_mode_guidance(correction_mode)}
Context and tone instruction: {context_tone_guidance(context, target_tone)}
Explanation language: English only
Student's Chinese sentence:
{sentence}
Before correcting, decide whether the sentence is already correct, natural, and appropriate for the context.
If it is correct, keep exactly the same sentence in "Corrected sentence".
In that case, use "none" as the error type and explain simply that the sentence is correct.
Correct the sentence while preserving its intention and length.
Prefer the smallest possible correction.
Do not turn a short sentence into a long sentence.
The "Corrected sentence" line must contain only one Chinese sentence. Do not add a second option, leftover characters, notes, vocabulary, or pinyin after it.
Do not add names, emotions, encouragement, or information that was not in the original sentence.
Do not replace a correct sentence with a paraphrase. For example, "火车站在超市的旁边" is correct and natural for "The train station is next to the supermarket"; do not correct it to "火车站旁有超市".
If you replace one Chinese character with another character that has the same or very close pinyin, mention in "Why" that it is probably a character/input-method mistake.
All explanations, titles, and tips must be in English.
Add one line "Error type" with a short category: character/input mistake, grammar, word order, tone/register, naturalness, or none.
Use real line breaks between sections. Do not output escaped newline characters like \\n.
Do not write a long paragraph. Maximum 5 short lines.
/no_think"""
CONTEXT_TONE_PROFILES = {
"friendly-informal": {
"context": "friendly everyday conversation with a friend or close person",
"tone": "informal friendly",
"correction_style": "tone-aware",
"instruction": (
"Keep the sentence simple, natural, and friendly. Prefer everyday spoken wording. "
"Use \u4f60 when a pronoun is needed. Avoid \u60a8, \u662f\u5426, ceremonial, literary, or stiff formal wording."
),
},
"work-informal": {
"context": "workplace message to a colleague or familiar coworker",
"tone": "informal professional",
"correction_style": "tone-aware",
"instruction": (
"Keep the sentence clear, polite, and work-appropriate without sounding stiff. "
"Avoid slang, but do not over-formalize if the original is already natural."
),
},
"work-formal": {
"context": "workplace message to a manager, client, teacher, or formal contact",
"tone": "formal professional",
"correction_style": "tone-aware",
"instruction": (
"Use respectful, professional wording when needed. \u60a8 and \u8bf7 may be appropriate. "
"Avoid overly casual phrasing if the relationship requires formality."
),
},
"wechat-informal": {
"context": "WeChat message to a friend or close contact",
"tone": "informal instant message",
"correction_style": "tone-aware",
"instruction": (
"Prefer short, direct instant-message wording. Use \u4f60, \u6709\u7a7a, \u65b9\u4fbf, "
"\u4e00\u4e0b, \u5417, or \u5427 when appropriate. Avoid \u60a8, \u662f\u5426, "
"\u656c\u8bf7, \u9601\u4e0b, \u62e8\u5197, \u8385\u4e34, and invitation-letter style."
),
},
"wechat-formal": {
"context": "WeChat message in a professional or formal relationship",
"tone": "formal concise instant message",
"correction_style": "tone-aware",
"instruction": (
"Keep the message concise like WeChat, but respectful. \u8bf7 and \u60a8 may be appropriate. "
"Avoid both casual slang and overly ceremonial letter-style wording."
),
},
}
CONTEXT_TONE_ALIASES = {
"amical-informel": "friendly-informal",
"amis-informel": "friendly-informal",
"friends": "friendly-informal",
"family": "friendly-informal",
"friendly": "friendly-informal",
"work": "work-formal",
"work-informel": "work-informal",
"work-formel": "work-formal",
"wechat": "wechat-informal",
"wechat-informel": "wechat-informal",
"wechat-formel": "wechat-formal",
}
def normalize_context_tone(value: str) -> str:
key = normalize_space(value).lower().replace("_", "-")
key = re.sub(r"\s+", "-", key)
return CONTEXT_TONE_ALIASES.get(key, key if key in CONTEXT_TONE_PROFILES else "friendly-informal")
def context_tone_profile(value: str) -> dict:
key = normalize_context_tone(value)
profile = dict(CONTEXT_TONE_PROFILES[key])
profile["key"] = key
return profile
def build_user_prompt(context: str, sentence: str, target_tone: str = "", correction_mode: str = "") -> str:
profile = context_tone_profile(context)
sentence = (sentence or "").strip()
return f"""Selected context-tone: {profile["key"]}
Context: {profile["context"]}
Tone: {profile["tone"]}
Correction style: {profile["correction_style"]}
Profile instruction: {profile["instruction"]}
Explanation language: English only
Student's Chinese sentence:
{sentence}
Before correcting, decide whether the sentence is already correct, natural, and appropriate for the selected context-tone.
If it is correct, keep exactly the same sentence in "Corrected sentence".
In that case, use "none" as the error type and explain simply that the sentence is correct.
Correct the sentence while preserving its intention and length.
Prefer the smallest possible correction.
Do not turn a short sentence into a long sentence.
The "Corrected sentence" line must contain only one Chinese sentence. Do not add a second option, leftover characters, notes, vocabulary, or pinyin after it.
Do not add names, emotions, encouragement, or information that was not in the original sentence.
Do not replace a correct sentence with a paraphrase.
If you replace one Chinese character with another character that has the same or very close pinyin, mention in "Why" that it is probably a character/input-method mistake.
All explanations, titles, and tips must be in English.
Use real line breaks between sections. Do not output escaped newline characters like \\n.
Do not write a long paragraph. Maximum 5 short lines.
Now correct the input sentence.
/no_think"""
@spaces.GPU(duration=90)
def _generate_correction_gpu(
context: str,
sentence: str,
target_tone: str,
correction_mode: str = "tone-aware",
) -> str:
sentence = (sentence or "").strip()
if not sentence:
message = "Add a Chinese sentence first."
return wrap_result(message)
if len(sentence) > MAX_INPUT_CHARS:
return wrap_result(f"The sentence is too long ({len(sentence)} characters). Current limit: {MAX_INPUT_CHARS}.")
load_model()
if load_error:
return wrap_result(load_error)
if model is None or tokenizer is None:
message = "The model is not available."
return wrap_result(message)
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": build_user_prompt(context, sentence, target_tone, correction_mode)},
]
try:
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False,
)
except TypeError:
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
inputs = tokenizer([text], return_tensors="pt").to(model.device)
with torch.inference_mode():
outputs = model.generate(
**inputs,
max_new_tokens=MAX_NEW_TOKENS,
do_sample=False,
use_cache=True,
repetition_penalty=1.05,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
generated = outputs[0][inputs["input_ids"].shape[-1]:]
answer = tokenizer.decode(generated, skip_special_tokens=True).strip()
answer = strip_thinking(answer)
del inputs, outputs, generated
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
if not answer:
answer = "The model did not produce a response."
else:
answer = ensure_english_feedback(answer, sentence)
return final_result(answer)
@app.api(name="corriger")
def generate_correction(
context: str,
sentence: str,
target_tone: str,
correction_mode: str = "tone-aware",
) -> dict:
original_sentence = (sentence or "").strip()
profile = context_tone_profile(context)
started = time.perf_counter()
result = _generate_correction_gpu(context, sentence, target_tone, correction_mode)
generation_time_seconds = round(time.perf_counter() - started, 3)
corrected_sentence = extract_result_field(result, "Corrected sentence")
error_type = extract_result_field(result, "Error type")
should_record = bool(original_sentence) and len(original_sentence) <= MAX_INPUT_CHARS
request_id = str(uuid.uuid4()) if should_record else ""
metrics_error = ""
if should_record:
metric_status = "ok" if corrected_sentence else "unparsed_or_error"
record = {
"request_id": request_id,
"created_at": utc_now_iso(),
"model_id": MODEL_ID,
"context": profile["key"],
"target_tone": profile["tone"],
"correction_mode": profile["correction_style"],
"original_sentence": original_sentence,
"corrected_sentence": corrected_sentence,
"evaluation": None,
"generation_time_seconds": generation_time_seconds,
"error_type": error_type,
"status": metric_status,
}
try:
append_usage_record(record)
except Exception as exc:
metrics_error = f"Metrics save failed: {exc}"
return {
"ok": bool(result),
"request_id": request_id,
"result": result,
"original_sentence": original_sentence,
"corrected_sentence": corrected_sentence,
"evaluation": None,
"generation_time_seconds": generation_time_seconds,
"status": "ok" if corrected_sentence else "unparsed_or_error",
"metrics_error": metrics_error,
"metrics_sync_error": metrics_sync_error,
}
@app.api(name="rate_response")
def rate_response(request_id: str, evaluation: str) -> dict:
request_id = normalize_space(request_id)
evaluation_key = normalize_space(evaluation).lower().replace("-", "_").replace(" ", "_")
aliases = {
"up": "thumbs_up",
"thumb_up": "thumbs_up",
"thumbs_up": "thumbs_up",
"positive": "thumbs_up",
"down": "thumbs_down",
"thumb_down": "thumbs_down",
"thumbs_down": "thumbs_down",
"negative": "thumbs_down",
}
normalized_evaluation = aliases.get(evaluation_key)
if not request_id:
return {"ok": False, "error": "Missing request_id."}
if not normalized_evaluation:
return {"ok": False, "error": "Evaluation must be thumbs_up or thumbs_down."}
try:
record = update_usage_evaluation(request_id, normalized_evaluation)
except Exception as exc:
return {"ok": False, "error": f"Metrics update failed: {exc}"}
if record is None:
return {"ok": False, "error": "Metric record not found."}
return {
"ok": True,
"request_id": request_id,
"evaluation": normalized_evaluation,
"metrics_sync_error": metrics_sync_error,
"record": metric_public_view(record),
}
@app.api(name="usage_metrics")
def usage_metrics(limit: int = 500) -> dict:
try:
limit = max(1, min(int(limit or 500), 5000))
except Exception:
limit = 500
try:
with METRICS_LOCK:
records = read_usage_records_unlocked()
except Exception as exc:
return {"ok": False, "error": f"Metrics read failed: {exc}", "records": []}
recent = records[-limit:]
return {
"ok": True,
"count": len(records),
"returned": len(recent),
"metrics_file": str(metrics_file_path()),
"metrics_repo_sync": METRICS_REPO_SYNC,
"metrics_repo_id": METRICS_REPO_ID,
"metrics_repo_path": METRICS_REPO_PATH,
"metrics_sync_error": metrics_sync_error,
"records": [metric_public_view(record) for record in recent],
}
def clean_tts_text(text: str) -> str:
text = re.sub(r"[^\u4e00-\u9fff,。!?、;:\s]", "", text or "")
return normalize_space(text)[:TTS_MAX_CHARS]
def trim_tts_silence(audio, sample_rate: int):
arr = np.asarray(audio, dtype=np.float32)
if arr.ndim > 2:
arr = np.squeeze(arr)
if arr.ndim == 2 and arr.shape[0] <= 2 and arr.shape[0] < arr.shape[1]:
arr = arr.T
if arr.ndim == 2 and arr.shape[1] == 1:
arr = arr[:, 0]
if not sample_rate or arr.size == 0:
return arr, 0, 0
energy = np.max(np.abs(arr), axis=1) if arr.ndim == 2 else np.abs(arr)
peak = float(np.max(energy)) if energy.size else 0.0
if peak <= 1e-6:
return arr, 0, 0
threshold = max(peak * 0.025, 0.002)
voiced = np.flatnonzero(energy > threshold)
if voiced.size == 0:
return arr, 0, 0
pad_start = int(sample_rate * 0.06)
pad_end = int(sample_rate * 0.14)
start = max(0, int(voiced[0]) - pad_start)
end = min(len(energy), int(voiced[-1]) + pad_end)
trimmed = arr[start:end]
trim_start_ms = int(start / sample_rate * 1000)
trim_end_ms = int((len(energy) - end) / sample_rate * 1000)
return trimmed, trim_start_ms, trim_end_ms
def resample_audio(audio, source_rate: int, target_rate: int):
if not source_rate or not target_rate or source_rate == target_rate:
return audio, source_rate
if target_rate <= 0 or source_rate <= 0:
return audio, source_rate
arr = np.asarray(audio)
if arr.size == 0:
return arr, source_rate
source_len = arr.shape[0]
target_len = max(1, int(round(source_len * target_rate / source_rate)))
source_positions = np.linspace(0, source_len - 1, num=source_len)
target_positions = np.linspace(0, source_len - 1, num=target_len)
if arr.ndim == 1:
return np.interp(target_positions, source_positions, arr).astype(arr.dtype), target_rate
channels = [
np.interp(target_positions, source_positions, arr[:, channel])
for channel in range(arr.shape[1])
]
return np.stack(channels, axis=1).astype(arr.dtype), target_rate
def load_tts_model():
global tts_model, tts_load_error
if tts_model is not None:
return
if not SERVER_TTS_ENABLED:
tts_load_error = "Server TTS is disabled."
return
try:
from voxcpm import VoxCPM
try:
tts_model = VoxCPM.from_pretrained(TTS_MODEL_ID, load_denoiser=False)
except TypeError:
tts_model = VoxCPM.from_pretrained(TTS_MODEL_ID)
tts_load_error = None
except Exception as exc:
tts_model = None
tts_load_error = f"Server TTS failed: {exc}"
async def _edge_tts_audio_bytes(text: str) -> bytes:
import edge_tts
communicate = edge_tts.Communicate(
text=text,
voice=EDGE_TTS_VOICE,
rate=EDGE_TTS_RATE,
pitch=EDGE_TTS_PITCH,
volume=EDGE_TTS_VOLUME,
)
chunks = []
async for chunk in communicate.stream():
if chunk.get("type") == "audio" and chunk.get("data"):
chunks.append(chunk["data"])
return b"".join(chunks)
def run_async_safely(coro):
try:
asyncio.get_running_loop()
except RuntimeError:
return asyncio.run(coro)
result = {}
def runner():
try:
result["value"] = asyncio.run(coro)
except Exception as exc:
result["error"] = exc
thread = Thread(target=runner)
thread.start()
thread.join()
if "error" in result:
raise result["error"]
return result.get("value")
def generate_edge_tts(text: str, speaker: str = "edge-tts") -> dict:
phrase = clean_tts_text(text)
if not phrase:
return {"ok": False, "error": "No Chinese text to read."}
if not SERVER_TTS_ENABLED:
return {"ok": False, "error": "Server TTS is disabled."}
try:
audio_bytes = run_async_safely(_edge_tts_audio_bytes(phrase))
if not audio_bytes:
return {"ok": False, "error": "Edge TTS returned no audio."}
payload = base64.b64encode(audio_bytes).decode("ascii")
return {
"ok": True,
"audio": f"data:audio/mpeg;base64,{payload}",
"duration_ms": 0,
"speaker": speaker or EDGE_TTS_VOICE,
"voice": EDGE_TTS_VOICE,
"source": "edge-tts",
}
except Exception as exc:
return {"ok": False, "error": f"Edge TTS generation failed: {exc}"}
@spaces.GPU(duration=60)
def _generate_tts_gpu(text: str, speaker: str = "VoxCPM2") -> dict:
phrase = clean_tts_text(text)
if not phrase:
return {"ok": False, "error": "No Chinese text to read."}
load_tts_model()
if tts_load_error or tts_model is None:
return {"ok": False, "error": tts_load_error or "Server TTS model is not available."}
try:
import soundfile as sf
synthesis_text = f"({VOXCPM_VOICE_STYLE}){phrase}" if VOXCPM_VOICE_STYLE else phrase
try:
audio = tts_model.generate(
text=synthesis_text,
cfg_value=VOXCPM_CFG_VALUE,
inference_timesteps=VOXCPM_INFERENCE_TIMESTEPS,
normalize=True,
denoise=False,
retry_badcase=VOXCPM_RETRY_BADCASE,
retry_badcase_max_times=1,
)
except TypeError:
audio = tts_model.generate(
text=synthesis_text,
cfg_value=VOXCPM_CFG_VALUE,
inference_timesteps=VOXCPM_INFERENCE_TIMESTEPS,
)
if isinstance(audio, (list, tuple)):
audio = audio[0]
if hasattr(audio, "detach"):
audio = audio.detach().cpu().float().numpy()
sample_rate = getattr(getattr(tts_model, "tts_model", None), "sample_rate", 48000)
audio, trim_start_ms, trim_end_ms = trim_tts_silence(audio, sample_rate)
audio, sample_rate = resample_audio(audio, sample_rate, VOXCPM_OUTPUT_SAMPLE_RATE)
buffer = io.BytesIO()
sf.write(buffer, audio, sample_rate, format="WAV")
audio_bytes = buffer.getvalue()
duration_ms = int(len(audio) / sample_rate * 1000) if sample_rate else 0
payload = base64.b64encode(audio_bytes).decode("ascii")
return {
"ok": True,
"audio": f"data:audio/wav;base64,{payload}",
"duration_ms": duration_ms,
"sample_rate": sample_rate,
"speaker": speaker or "VoxCPM2",
"source": "server",
"trim_start_ms": trim_start_ms,
"trim_end_ms": trim_end_ms,
}
except Exception as exc:
return {"ok": False, "error": f"Server TTS generation failed: {exc}"}
@app.api(name="tts")
def generate_tts(text: str, speaker: str = "edge-tts") -> dict:
provider = normalize_space(TTS_PROVIDER).lower()
if provider in {"edge", "edge-tts", "microsoft", "microsoft-edge"}:
return generate_edge_tts(text, speaker)
if provider in {"voxcpm", "voxcpm2", "server"}:
return _generate_tts_gpu(text, speaker or "VoxCPM2")
return {"ok": False, "error": f"Unsupported TTS provider: {TTS_PROVIDER}"}
def strip_thinking(text: str) -> str:
return re.sub(r"(?is)<think>.*?</think>", "", text or "").strip()
def runtime_info() -> str:
loaded = "yes" if model is not None and tokenizer is not None else "no"
return "\n".join(
[
f"MODEL_ID: {MODEL_ID}",
f"TTS_PROVIDER: {TTS_PROVIDER}",
f"TTS_MODEL_ID: {TTS_MODEL_ID}",
f"EDGE_TTS_VOICE: {EDGE_TTS_VOICE}",
f"EDGE_TTS_KARAOKE_DURATION_FACTOR: {EDGE_TTS_KARAOKE_DURATION_FACTOR}",
f"Model loaded: {loaded}",
f"Server TTS enabled: {SERVER_TTS_ENABLED}",
f"LOAD_IN_4BIT: {LOAD_IN_4BIT}",
device_label(),
f"MAX_INPUT_CHARS: {MAX_INPUT_CHARS}",
f"MAX_NEW_TOKENS: {MAX_NEW_TOKENS}",
f"METRICS_FILE: {metrics_file_path()}",
f"METRICS_REPO_SYNC: {METRICS_REPO_SYNC}",
f"METRICS_REPO_ID: {METRICS_REPO_ID or '(not configured)'}",
f"METRICS_REPO_PATH: {METRICS_REPO_PATH}",
f"METRICS_SYNC_ERROR: {metrics_sync_error or '(none)'}",
f"VOXCPM_INFERENCE_TIMESTEPS: {VOXCPM_INFERENCE_TIMESTEPS}",
f"VOXCPM_OUTPUT_SAMPLE_RATE: {VOXCPM_OUTPUT_SAMPLE_RATE}",
f"VOXCPM_RETRY_BADCASE: {VOXCPM_RETRY_BADCASE}",
]
)
FRONTEND_HTML = r"""
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>ToneBridge Mandarin Coach</title>
<style>
:root {
--bg: #fbf3e7;
--paper: #fffaf1;
--panel: rgba(255, 250, 241, .94);
--text: #25140f;
--muted: #785f4d;
--border: #ead7bd;
--primary: #b42318;
--primary-strong: #8f1c13;
--gold: #d69e2e;
--jade: #0f766e;
--soft: #fff1d6;
--shadow: 0 22px 60px rgba(92, 38, 17, .16);
}
* { box-sizing: border-box; }
body {
margin: 0;
min-height: 100vh;
background:
radial-gradient(circle at 12% 4%, rgba(180, 35, 24, .18), transparent 26rem),
radial-gradient(circle at 88% 10%, rgba(214, 158, 46, .20), transparent 24rem),
linear-gradient(135deg, rgba(180, 35, 24, .04) 25%, transparent 25%) 0 0 / 26px 26px,
linear-gradient(180deg, #fffaf1 0%, var(--bg) 100%);
color: var(--text);
font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
}
main {
width: min(1120px, calc(100% - 28px));
margin: 0 auto;
padding: 28px 0 40px;
}
header {
display: flex;
align-items: flex-end;
justify-content: space-between;
gap: 18px;
margin-bottom: 18px;
}
h1 {
margin: 0;
font-size: clamp(2rem, 5vw, 3.3rem);
line-height: 1;
letter-spacing: 0;
color: #24110c;
}
.subtitle {
margin: 10px 0 0;
color: var(--muted);
font-size: 1rem;
max-width: 46rem;
}
.badge {
white-space: nowrap;
padding: 10px 14px;
border-radius: 999px;
background: #fff3d8;
color: #8f1c13;
border: 1px solid #efc56a;
font-weight: 700;
font-size: .9rem;
box-shadow: inset 0 1px 0 rgba(255, 255, 255, .9);
}
.app {
display: grid;
grid-template-columns: minmax(0, .92fr) minmax(0, 1.08fr);
gap: 18px;
align-items: stretch;
}
.card {
background: var(--panel);
border: 1px solid var(--border);
border-radius: 22px;
box-shadow: var(--shadow);
padding: 20px;
backdrop-filter: blur(10px);
position: relative;
overflow: hidden;
}
.card::before {
content: "春";
position: absolute;
right: 18px;
top: 10px;
color: rgba(180, 35, 24, .055);
font-size: 5rem;
font-weight: 900;
pointer-events: none;
}
.field { margin-bottom: 16px; }
label {
display: block;
margin: 0 0 7px;
color: #4b271d;
font-size: .92rem;
font-weight: 750;
}
select, textarea {
width: 100%;
min-height: 46px;
border: 1px solid #dec7a7;
border-radius: 13px;
background: #fffdf8;
color: var(--text);
padding: 12px 13px;
font: inherit;
font-size: 16px;
outline: none;
transition: border-color .16s, box-shadow .16s;
}
textarea {
min-height: 142px;
resize: vertical;
line-height: 1.45;
}
select:focus, textarea:focus {
border-color: var(--primary);
box-shadow: 0 0 0 4px rgba(180, 35, 24, .12);
}
.actions {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 12px;
margin-top: 10px;
}
button {
min-height: 48px;
border: 0;
border-radius: 14px;
padding: 0 16px;
cursor: pointer;
font: inherit;
font-weight: 800;
transition: transform .12s, box-shadow .12s, background .12s;
}
button:active { transform: translateY(1px); }
.primary {
color: #fff;
background: linear-gradient(135deg, var(--primary), #d9480f);
box-shadow: 0 14px 28px rgba(180, 35, 24, .26);
}
.primary:hover { background: var(--primary-strong); }
.mic-button {
color: #fffaf1;
background: linear-gradient(135deg, var(--jade), #0d9488);
box-shadow: 0 14px 28px rgba(15, 118, 110, .22);
}
.mic-button.listening {
background: linear-gradient(135deg, #d69e2e, #b45309);
animation: pulseMic 1.1s ease-in-out infinite;
}
@keyframes pulseMic {
0%, 100% { transform: translateY(0); box-shadow: 0 12px 24px rgba(214, 158, 46, .24); }
50% { transform: translateY(-1px); box-shadow: 0 18px 34px rgba(214, 158, 46, .38); }
}
.voice-hint {
margin: 8px 0 0;
color: var(--muted);
font-size: .88rem;
}
.field-note {
margin: 7px 0 0;
color: var(--muted);
font-size: .84rem;
line-height: 1.35;
}
.learning-history {
margin-top: 16px;
padding: 14px;
border: 1px solid #efc56a;
border-radius: 16px;
background: rgba(255, 243, 216, .62);
}
.history-title {
color: #8f1c13;
font-weight: 850;
margin-bottom: 8px;
}
.history-summary {
margin-bottom: 9px;
color: #4b271d;
font-size: .9rem;
font-weight: 700;
}
.learning-history ul {
margin: 0;
padding-left: 18px;
color: var(--muted);
font-size: .9rem;
line-height: 1.45;
}
.result-head {
display: flex;
align-items: center;
justify-content: space-between;
gap: 12px;
margin-bottom: 14px;
}
.result-title {
margin: 0;
font-size: 1.1rem;
}
.status {
color: var(--muted);
font-size: .9rem;
}
.result-tools {
display: flex;
align-items: center;
gap: 10px;
}
.copy-button {
min-height: 34px;
border-radius: 999px;
padding: 0 12px;
color: #8f1c13;
background: #fff3d8;
border: 1px solid #efc56a;
font-size: .86rem;
box-shadow: none;
}
.copy-button:disabled {
cursor: not-allowed;
opacity: .55;
}
.result {
min-height: 330px;
border-radius: 18px;
border: 1px solid var(--border);
background:
linear-gradient(180deg, rgba(255, 253, 248, .98) 0%, rgba(255, 247, 231, .98) 100%);
padding: 18px;
line-height: 1.55;
overflow-wrap: anywhere;
display: grid;
align-content: start;
gap: 12px;
}
.result-section {
border: 1px solid rgba(234, 215, 189, .95);
border-radius: 14px;
background: rgba(255, 253, 248, .86);
padding: 13px 14px;
}
.result-section.corrected-section {
border-color: rgba(180, 35, 24, .38);
background: #fff8eb;
box-shadow: 0 12px 30px rgba(180, 35, 24, .10);
}
.result-section.compact-section {
padding: 10px 14px;
background: rgba(255, 243, 216, .58);
}
.result strong {
display: block;
color: #8f1c13;
margin-bottom: 7px;
}
.corrected-section strong {
font-size: .95rem;
}
.corrected-section {
font-size: 1.18rem;
}
.result em {
display: block;
color: var(--jade);
font-style: normal;
margin-top: 6px;
font-size: .96rem;
}
.reading-panel {
display: none;
margin-top: 16px;
border: 1px solid var(--border);
border-radius: 18px;
background: rgba(255, 250, 241, .72);
overflow: hidden;
}
.reading-tab {
width: 100%;
min-height: 42px;
display: flex;
align-items: center;
justify-content: space-between;
gap: 12px;
border-radius: 0;
background: #fff3d8;
color: #8f1c13;
border-bottom: 1px solid #efc56a;
padding: 0 12px;
font-weight: 900;
}
.replay-button {
min-height: 38px;
color: #fffaf1;
background: linear-gradient(135deg, var(--jade), #0d9488);
border-radius: 999px;
padding: 0 14px;
}
.replay-button:disabled {
cursor: wait;
opacity: .72;
filter: saturate(.75);
}
.karaoke-box {
padding: 16px;
}
.karaoke-window {
min-height: 94px;
display: flex;
align-items: center;
justify-content: center;
gap: 6px;
border-radius: 16px;
background: #fffdf8;
border: 1px solid #ead7bd;
overflow: hidden;
}
.karaoke-char {
display: inline-grid;
place-items: center;
min-width: 42px;
height: 58px;
border-radius: 12px;
font-size: clamp(1.8rem, 5vw, 2.7rem);
font-weight: 850;
color: #a78b7a;
background: #fff8eb;
transition: color .18s, background .18s, transform .18s;
}
.karaoke-char.read {
color: #8f1c13;
background: #ffe7b3;
}
.karaoke-char.active {
color: #fffaf1;
background: linear-gradient(135deg, #b42318, #d69e2e);
transform: translateY(-2px) scale(1.04);
box-shadow: 0 10px 22px rgba(180, 35, 24, .22);
}
.karaoke-strip {
display: flex;
gap: 5px;
overflow-x: auto;
padding: 12px 2px 0;
scroll-behavior: smooth;
}
.strip-char {
flex: 0 0 auto;
min-width: 24px;
height: 30px;
display: inline-grid;
place-items: center;
border-radius: 8px;
color: #8c6f5a;
background: rgba(255, 255, 255, .5);
}
.strip-char.read {
color: #8f1c13;
background: #ffe7b3;
}
.strip-char.active {
color: #fffaf1;
background: var(--jade);
}
.karaoke-hint {
margin: 10px 0 0;
color: var(--muted);
font-size: .9rem;
text-align: center;
}
.placeholder {
color: var(--muted);
display: grid;
place-items: center;
min-height: 280px;
text-align: center;
}
.examples {
display: flex;
gap: 8px;
flex-wrap: wrap;
margin-top: 12px;
}
.examples-label {
margin: 12px 0 0;
color: var(--muted);
font-size: .9rem;
font-weight: 700;
}
.example {
min-height: 34px;
padding: 0 11px;
border-radius: 999px;
background: #fffdf8;
border: 1px solid #efc56a;
color: #8f1c13;
font-size: .9rem;
font-weight: 700;
}
.example:hover {
background: #fff3d8;
}
@media (max-width: 820px) {
main { width: min(100% - 18px, 680px); padding-top: 16px; }
header { display: block; }
.badge { display: inline-block; margin-top: 14px; }
.app { grid-template-columns: 1fr; }
.card { border-radius: 18px; padding: 15px; }
.result { min-height: 220px; }
.actions {
grid-template-columns: 1fr;
position: sticky;
bottom: 8px;
z-index: 2;
}
}
/* Premium product layer for the custom gr.Server frontend */
:root {
--canvas: #fbf0df;
--paper: #fffaf1;
--paper-soft: #fff4df;
--ink: #26110c;
--brown: #5a3024;
--muted: #806755;
--line: #e8cfac;
--line-strong: #d8af70;
--red: #b42318;
--red-strong: #8f1c13;
--gold: #d69e2e;
--jade: #0f766e;
--plum: #663a5d;
--shadow-premium: 0 18px 42px rgba(91, 43, 18, .15);
--shadow-soft: 0 8px 24px rgba(91, 43, 18, .10);
}
body {
background:
linear-gradient(90deg, rgba(180, 35, 24, .06) 1px, transparent 1px) 0 0 / 34px 34px,
linear-gradient(0deg, rgba(214, 158, 46, .06) 1px, transparent 1px) 0 0 / 34px 34px,
repeating-linear-gradient(135deg, rgba(180, 35, 24, .035) 0 1px, transparent 1px 22px),
linear-gradient(180deg, #fffaf2 0%, var(--canvas) 100%);
color: var(--ink);
}
body::before {
content: "";
position: fixed;
inset: 0;
pointer-events: none;
background:
linear-gradient(90deg, rgba(255, 250, 241, .88), rgba(255, 250, 241, 0) 18%, rgba(255, 250, 241, 0) 82%, rgba(255, 250, 241, .88)),
repeating-linear-gradient(120deg, transparent 0 54px, rgba(143, 28, 19, .045) 54px 64px);
mix-blend-mode: multiply;
}
main {
width: min(1180px, calc(100% - 32px));
padding: 30px 0 44px;
position: relative;
}
.hero {
display: grid;
grid-template-columns: minmax(0, 1fr) minmax(300px, 360px);
gap: 22px;
align-items: end;
margin-bottom: 20px;
}
.brand-row {
display: flex;
align-items: center;
gap: 10px;
margin-bottom: 12px;
}
.seal {
width: 42px;
height: 42px;
display: inline-grid;
place-items: center;
border-radius: 8px;
background: linear-gradient(135deg, var(--red), #d9480f);
color: #fffaf1;
font-weight: 900;
box-shadow: 0 10px 24px rgba(180, 35, 24, .24);
}
.eyebrow,
.panel-kicker {
color: var(--red-strong);
font-size: .78rem;
font-weight: 900;
text-transform: uppercase;
}
h1 {
max-width: 760px;
font-size: clamp(2.95rem, 6.8vw, 5.35rem);
line-height: .96;
color: var(--ink);
}
.subtitle {
margin-top: 18px;
color: var(--muted);
font-size: clamp(.98rem, 1.5vw, 1.1rem);
line-height: 1.5;
}
.hero-pills {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 16px;
}
.hero-pills span,
.badge {
display: inline-flex;
align-items: center;
min-height: 34px;
padding: 0 12px;
border-radius: 999px;
border: 1px solid rgba(214, 158, 46, .72);
background: rgba(255, 250, 241, .82);
color: var(--brown);
font-size: .86rem;
font-weight: 800;
box-shadow: inset 0 1px 0 rgba(255, 255, 255, .95);
}
.hero-visual {
min-height: 208px;
border: 1px solid rgba(216, 175, 112, .78);
border-radius: 8px;
background:
linear-gradient(135deg, rgba(255, 250, 241, .95), rgba(255, 244, 223, .84)),
repeating-linear-gradient(90deg, transparent 0 14px, rgba(180, 35, 24, .035) 14px 15px);
box-shadow: var(--shadow-premium);
padding: 16px;
display: grid;
align-content: start;
justify-items: center;
gap: 14px;
position: relative;
overflow: hidden;
}
.hero-visual::after {
content: "\706F";
position: absolute;
right: -8px;
bottom: -38px;
font-size: 10rem;
font-weight: 950;
color: rgba(180, 35, 24, .10);
line-height: 1;
}
.hero-visual .badge {
width: max-content;
color: var(--red-strong);
background: #fff3d8;
position: relative;
z-index: 1;
}
.lantern-mark {
width: 142px;
height: 142px;
display: grid;
place-items: center;
position: relative;
z-index: 1;
border-radius: 50% 50% 46% 46%;
background:
radial-gradient(circle at 50% 34%, rgba(255, 243, 216, .98), rgba(214, 158, 46, .28) 58%, rgba(180, 35, 24, .16)),
linear-gradient(135deg, rgba(255, 250, 241, .88), rgba(255, 231, 179, .72));
border: 1px solid rgba(180, 35, 24, .22);
box-shadow: 0 18px 38px rgba(180, 35, 24, .15);
}
.lantern-mark::before,
.lantern-mark::after {
content: "";
position: absolute;
left: 50%;
width: 52px;
height: 9px;
transform: translateX(-50%);
border-radius: 999px;
background: linear-gradient(90deg, var(--red), #d9480f);
}
.lantern-mark::before { top: -5px; }
.lantern-mark::after { bottom: -5px; }
.lantern-mark span {
color: var(--red-strong);
font-size: 4.4rem;
font-weight: 950;
line-height: 1;
}
.app {
grid-template-columns: minmax(320px, .9fr) minmax(0, 1.1fr);
gap: 20px;
align-items: start;
}
.panel,
.card {
background: rgba(255, 250, 241, .94);
border: 1px solid var(--line);
border-radius: 8px;
box-shadow: var(--shadow-premium);
padding: 20px;
overflow: hidden;
}
.card::before { content: none; }
.input-panel {
position: sticky;
top: 16px;
}
.panel-heading,
.result-head {
display: flex;
align-items: center;
justify-content: space-between;
gap: 14px;
margin-bottom: 16px;
}
.panel-heading h2,
.result-title {
margin: 2px 0 0;
color: var(--ink);
font-size: 1.25rem;
line-height: 1.15;
}
.field-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 12px;
}
label {
color: var(--brown);
font-size: .9rem;
font-weight: 850;
}
select,
textarea {
min-height: 48px;
border-color: #d9bd92;
border-radius: 8px;
background: rgba(255, 253, 248, .96);
color: var(--ink);
transition: border-color .16s, box-shadow .16s, background .16s;
}
textarea {
min-height: 148px;
line-height: 1.5;
}
select:focus,
textarea:focus {
border-color: var(--red);
background: #fffefb;
box-shadow: 0 0 0 4px rgba(180, 35, 24, .12);
}
.field-note,
.voice-hint {
color: var(--muted);
font-size: .84rem;
line-height: 1.4;
}
button {
border-radius: 8px;
font-weight: 900;
line-height: 1.1;
transition: transform .12s, box-shadow .12s, background .12s, border-color .12s;
}
button:disabled { cursor: not-allowed; opacity: .62; }
.primary {
color: #fffaf1;
background: linear-gradient(135deg, var(--red), #de3d16);
box-shadow: 0 14px 28px rgba(180, 35, 24, .25);
}
.primary:hover { background: linear-gradient(135deg, var(--red-strong), #ca350f); }
.mic-button {
background: linear-gradient(135deg, var(--jade), #0d9488);
box-shadow: 0 14px 28px rgba(15, 118, 110, .20);
}
.mic-button.listening {
background: linear-gradient(135deg, var(--gold), #b45309);
}
.examples-label {
margin: 16px 0 8px;
color: var(--brown);
font-size: .9rem;
font-weight: 850;
}
.example,
.copy-button {
min-height: 36px;
border-radius: 999px;
background: rgba(255, 253, 248, .92);
border: 1px solid rgba(214, 158, 46, .70);
color: var(--red-strong);
font-size: .88rem;
font-weight: 850;
box-shadow: none;
}
.example:hover,
.copy-button:hover:not(:disabled) {
background: #fff3d8;
border-color: var(--gold);
}
.learning-history {
border-color: rgba(15, 118, 110, .28);
border-radius: 8px;
background: linear-gradient(180deg, rgba(255, 253, 248, .86), rgba(238, 249, 246, .58));
}
.history-title {
color: var(--jade);
font-weight: 950;
}
.coach-panel { min-height: 620px; }
.result-tools {
justify-content: flex-end;
flex-wrap: wrap;
}
.status {
display: inline-flex;
align-items: center;
gap: 7px;
color: var(--muted);
font-weight: 750;
}
.status::before {
content: "";
width: 8px;
height: 8px;
border-radius: 50%;
background: var(--jade);
box-shadow: 0 0 0 4px rgba(15, 118, 110, .12);
}
.result {
min-height: 350px;
border-radius: 8px;
border-color: var(--line);
background:
linear-gradient(180deg, rgba(255, 253, 248, .97), rgba(255, 246, 230, .92)),
linear-gradient(90deg, rgba(180, 35, 24, .045) 1px, transparent 1px) 0 0 / 28px 28px;
box-shadow: inset 0 1px 0 rgba(255, 255, 255, .95);
}
.result-section {
border-color: rgba(232, 207, 172, .94);
border-left: 4px solid rgba(214, 158, 46, .86);
border-radius: 8px;
background: rgba(255, 253, 248, .90);
box-shadow: var(--shadow-soft);
}
.result-section.original-section { border-left-color: var(--plum); }
.result-section.corrected-section {
border-color: rgba(180, 35, 24, .38);
border-left-color: var(--red);
background: linear-gradient(135deg, #fff8eb, #fffdf8);
box-shadow: 0 14px 32px rgba(180, 35, 24, .12);
font-size: 1.16rem;
}
.result-section.tip-section {
border-left-color: var(--jade);
background: linear-gradient(135deg, rgba(239, 250, 247, .95), rgba(255, 253, 248, .92));
}
.result strong {
color: var(--red-strong);
font-size: .95rem;
}
.result em {
color: var(--jade);
font-weight: 750;
}
.placeholder {
align-content: center;
gap: 8px;
padding: 22px;
}
.placeholder strong {
color: var(--ink);
margin: 0;
font-size: 1.05rem;
}
.placeholder-icon {
width: 62px;
height: 62px;
display: inline-grid;
place-items: center;
border-radius: 8px;
color: #fffaf1;
background: linear-gradient(135deg, var(--red), var(--gold));
font-size: 1.8rem;
font-weight: 950;
box-shadow: 0 12px 28px rgba(180, 35, 24, .24);
}
.loader-mark {
width: 36px;
height: 36px;
border-radius: 50%;
border: 3px solid rgba(214, 158, 46, .28);
border-top-color: var(--red);
animation: spin .85s linear infinite;
}
@keyframes spin { to { transform: rotate(360deg); } }
.reading-panel {
border-color: rgba(15, 118, 110, .26);
border-radius: 8px;
background: linear-gradient(180deg, rgba(239, 250, 247, .76), rgba(255, 250, 241, .82));
box-shadow: var(--shadow-soft);
}
.reading-tab {
min-height: 44px;
display: flex;
align-items: center;
justify-content: space-between;
gap: 12px;
background: rgba(15, 118, 110, .10);
color: var(--jade);
border-bottom: 1px solid rgba(15, 118, 110, .22);
padding: 0 12px;
font-weight: 950;
}
.replay-button {
min-height: 34px;
padding: 0 13px;
background: linear-gradient(135deg, var(--jade), #0d9488);
box-shadow: 0 10px 22px rgba(15, 118, 110, .18);
}
.replay-button:disabled {
box-shadow: none;
}
.karaoke-window,
.karaoke-char {
border-radius: 8px;
}
.karaoke-char {
font-weight: 900;
}
.karaoke-char.active {
background: linear-gradient(135deg, var(--red), var(--gold));
}
@media (max-width: 920px) {
main { width: min(100% - 20px, 720px); padding-top: 18px; }
.hero { grid-template-columns: 1fr; }
.hero-visual { min-height: 140px; }
.app { grid-template-columns: 1fr; }
.input-panel { position: static; }
.coach-panel { min-height: 0; }
}
@media (max-width: 620px) {
main { width: min(100% - 14px, 520px); padding-bottom: 22px; }
.hero { gap: 14px; }
.hero-pills { gap: 6px; }
.hero-pills span { min-height: 32px; font-size: .8rem; }
.panel,
.card { padding: 14px; }
.field-grid,
.actions { grid-template-columns: 1fr; }
textarea { min-height: 132px; }
.result-head,
.panel-heading { align-items: flex-start; }
.result-tools { justify-content: flex-start; }
.result { min-height: 230px; padding: 13px; }
.actions {
position: sticky;
bottom: 8px;
z-index: 2;
}
button { min-height: 46px; }
}
/* V5 lightweight delight layer: no external assets, no framework */
.coach-illustration {
width: 82px;
height: 82px;
display: inline-grid;
place-items: center;
position: relative;
margin-bottom: 6px;
}
.coach-face {
width: 64px;
height: 64px;
border-radius: 50% 50% 45% 45%;
background: linear-gradient(160deg, #fff3d8, #ffd6a1);
border: 2px solid rgba(180, 35, 24, .24);
box-shadow: 0 12px 24px rgba(91, 43, 18, .14);
position: relative;
}
.coach-face::before,
.coach-face::after {
content: "";
position: absolute;
top: 27px;
width: 7px;
height: 7px;
border-radius: 50%;
background: var(--ink);
}
.coach-face::before { left: 20px; }
.coach-face::after { right: 20px; }
.coach-smile {
position: absolute;
left: 22px;
top: 39px;
width: 20px;
height: 10px;
border-bottom: 3px solid var(--red-strong);
border-radius: 0 0 999px 999px;
}
.coach-hat {
position: absolute;
top: 8px;
width: 58px;
height: 16px;
border-radius: 999px 999px 6px 6px;
background: linear-gradient(135deg, var(--red), #d9480f);
box-shadow: 0 5px 14px rgba(180, 35, 24, .18);
}
.coach-card {
width: min(100%, 430px);
display: grid;
grid-template-columns: auto 1fr;
gap: 14px;
align-items: center;
padding: 15px;
border: 1px solid rgba(214, 158, 46, .55);
border-radius: 8px;
background: rgba(255, 250, 241, .78);
box-shadow: var(--shadow-soft);
}
.coach-bubble {
text-align: left;
color: var(--brown);
line-height: 1.45;
}
.coach-bubble strong {
display: block;
margin-bottom: 4px;
color: var(--red-strong);
}
.loading-tip {
margin-top: 10px;
color: var(--jade);
font-weight: 800;
}
.loading-steps {
width: min(100%, 430px);
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 8px;
margin-top: 12px;
}
.loading-step {
min-height: 34px;
display: inline-grid;
place-items: center;
border-radius: 999px;
border: 1px solid rgba(214, 158, 46, .42);
background: rgba(255, 253, 248, .72);
color: var(--muted);
font-size: .82rem;
font-weight: 850;
animation: stepGlow 2.4s ease-in-out infinite;
}
.loading-step:nth-child(2) { animation-delay: .35s; }
.loading-step:nth-child(3) { animation-delay: .7s; }
@keyframes stepGlow {
0%, 100% { border-color: rgba(214, 158, 46, .42); color: var(--muted); }
35% { border-color: rgba(180, 35, 24, .55); color: var(--red-strong); background: #fff3d8; }
}
.result-arrived {
animation: resultArrive .34s ease-out;
}
@keyframes resultArrive {
from { opacity: .35; transform: translateY(8px); }
to { opacity: 1; transform: translateY(0); }
}
.focus-button {
min-height: 36px;
border-radius: 999px;
padding: 0 12px;
border: 1px solid rgba(15, 118, 110, .38);
background: rgba(239, 250, 247, .82);
color: var(--jade);
font-size: .88rem;
box-shadow: none;
}
.focus-button:hover {
background: rgba(221, 247, 240, .95);
border-color: var(--jade);
}
.feedback-panel {
min-height: 52px;
margin-top: 12px;
padding: 9px 10px;
display: flex;
align-items: center;
justify-content: space-between;
gap: 10px;
border: 1px solid rgba(232, 207, 172, .94);
border-radius: 8px;
background: rgba(255, 253, 248, .82);
box-shadow: var(--shadow-soft);
}
.feedback-panel[hidden] {
display: none;
}
.feedback-label {
color: var(--brown);
font-size: .88rem;
font-weight: 850;
}
.feedback-actions {
display: inline-flex;
gap: 8px;
}
.feedback-button {
width: 42px;
min-width: 42px;
min-height: 38px;
padding: 0;
border: 1px solid rgba(214, 158, 46, .62);
border-radius: 8px;
background: rgba(255, 250, 241, .95);
color: var(--ink);
font-size: 1rem;
box-shadow: none;
}
.feedback-button:hover:not(:disabled),
.feedback-button.selected {
background: #fff3d8;
border-color: var(--gold);
box-shadow: inset 0 0 0 2px rgba(214, 158, 46, .18);
}
.feedback-button.selected {
color: var(--red-strong);
}
.feedback-status {
min-width: 92px;
color: var(--muted);
font-size: .84rem;
font-weight: 750;
text-align: right;
}
@media (max-width: 620px) {
.feedback-panel {
align-items: flex-start;
flex-direction: column;
}
.feedback-status {
min-width: 0;
text-align: left;
}
}
body.focus-mode {
overflow: auto;
}
body.focus-mode main {
width: min(1180px, calc(100% - 24px));
padding: 16px 0 24px;
}
body.focus-mode .hero {
display: none;
}
body.focus-mode .app {
grid-template-columns: minmax(320px, .82fr) minmax(0, 1.18fr);
align-items: stretch;
min-height: calc(100vh - 40px);
}
body.focus-mode .input-panel {
position: static;
min-height: calc(100vh - 40px);
display: grid;
align-content: start;
}
body.focus-mode .input-panel .panel-heading h2::after {
content: " · focus";
color: var(--jade);
font-weight: 800;
}
body.focus-mode .field-grid {
grid-template-columns: 1fr;
}
body.focus-mode textarea {
min-height: clamp(190px, 32vh, 360px);
font-size: clamp(1.1rem, 2vw, 1.35rem);
line-height: 1.65;
}
body.focus-mode .examples-label,
body.focus-mode .examples,
body.focus-mode .learning-history,
body.focus-mode .voice-hint {
display: none;
}
body.focus-mode .actions {
margin-top: 14px;
}
body.focus-mode .coach-panel {
min-height: calc(100vh - 40px);
}
body.focus-mode .result {
min-height: 42vh;
font-size: 1.04rem;
}
body.focus-mode .corrected-section {
font-size: clamp(1.25rem, 3vw, 1.65rem);
}
body.focus-mode .reading-panel {
margin-top: 18px;
}
body.focus-mode .karaoke-window {
min-height: 150px;
}
body.focus-mode .karaoke-char {
min-width: 58px;
height: 76px;
font-size: clamp(2.4rem, 7vw, 4rem);
}
@media (max-width: 920px) {
body.focus-mode .app {
grid-template-columns: 1fr;
}
body.focus-mode .input-panel,
body.focus-mode .coach-panel {
min-height: auto;
}
}
@media (max-width: 620px) {
.coach-card {
grid-template-columns: 1fr;
justify-items: center;
text-align: center;
}
.coach-bubble {
text-align: center;
}
.loading-steps {
grid-template-columns: 1fr;
}
}
</style>
</head>
<body>
<main>
<header class="hero">
<div>
<div class="brand-row">
<span class="seal" aria-hidden="true">语</span>
<span class="eyebrow">Mandarin sentence coach</span>
</div>
<h1>ToneBridge</h1>
<p class="subtitle">Build natural Mandarin sentences, one gentle correction at a time. 😊</p>
<div class="hero-pills" aria-label="Highlights">
<span>Context aware</span>
<span>Natural tone</span>
<span>Reading voice</span>
</div>
</div>
<div class="hero-visual" aria-hidden="true">
<div class="badge">🏮 中文小助手</div>
<div class="lantern-mark">
<span>语</span>
</div>
</div>
</header>
<section class="app">
<form class="panel input-panel" id="form">
<div class="panel-heading">
<div>
<span class="panel-kicker">Write</span>
<h2>Your sentence</h2>
</div>
</div>
<div class="field">
<label for="context">Context & tone</label>
<select id="context">
<option value="friendly-informal" selected>Friendly-informal</option>
<option value="work-informal">Work-informal</option>
<option value="work-formal">Work-formal</option>
<option value="wechat-informal">Wechat-informal</option>
<option value="wechat-formal">Wechat-formal</option>
</select>
<p class="field-note">ToneBridge applies a conservative tone-aware correction for the selected situation.</p>
</div>
<div class="field">
<label for="sentence">Chinese sentence to correct</label>
<textarea id="sentence" placeholder="Ex. 我今天想喝书。"></textarea>
</div>
<div class="actions">
<button class="primary" id="submit" type="submit">✨ Correct my sentence</button>
<button class="mic-button" id="voice" type="button">🎙️ Speak & correct</button>
</div>
<p class="voice-hint">Voice mode listens until you click stop, then corrects the sentence and reads the corrected version aloud.</p>
<p class="examples-label">Examples: tap one to fill the form.</p>
<div class="examples" aria-label="Examples">
<button class="example" type="button" data-context="friendly-informal" data-text="我今天想喝书。">Wrong word</button>
<button class="example" type="button" data-context="friendly-informal" data-text="火车站在超市的旁边">Already correct</button>
<button class="example" type="button" data-context="work-formal" data-text="我今天迟到,你等我">Formal tone</button>
<button class="example" type="button" data-context="wechat-informal" data-text="您今晚是否方便出来?">Context</button>
</div>
<div class="learning-history">
<div class="history-title">Learning notes</div>
<div class="history-summary" id="historySummary">Your patterns will appear here.</div>
<ul id="historyList">
<li>Your last correction types will appear here.</li>
</ul>
</div>
</form>
<section class="panel coach-panel">
<div class="result-head">
<div>
<span class="panel-kicker">Coach answer</span>
<h2 class="result-title">Correction 😊</h2>
</div>
<div class="result-tools">
<button class="focus-button" id="focusMode" type="button">Focus mode</button>
<button class="copy-button" id="copyCorrected" type="button" disabled>Copy sentence</button>
<span class="status" id="status">Ready</span>
</div>
</div>
<div class="result" id="result">
<div class="placeholder">
<span class="placeholder-icon">好</span>
<strong>Ready when you are</strong>
<span>Your correction will appear here.</span>
</div>
</div>
<div class="feedback-panel" id="feedbackPanel" hidden>
<span class="feedback-label">Response rating</span>
<div class="feedback-actions" aria-label="Rate this response">
<button class="feedback-button" id="thumbUp" type="button" title="Thumbs up" aria-label="Thumbs up">&#128077;</button>
<button class="feedback-button" id="thumbDown" type="button" title="Thumbs down" aria-label="Thumbs down">&#128078;</button>
</div>
<span class="feedback-status" id="feedbackStatus"></span>
</div>
<div class="reading-panel" id="readingPanel">
<div class="reading-tab">
<span>🎧 Reading</span>
<button class="replay-button" id="replay" type="button">▶️ Replay</button>
</div>
<div class="karaoke-box">
<audio id="serverAudio" preload="auto"></audio>
<div class="karaoke-window" id="karaokeWindow">
<span class="karaoke-char">听</span>
<span class="karaoke-char">一</span>
<span class="karaoke-char">听</span>
</div>
<div class="karaoke-strip" id="karaokeStrip"></div>
<p class="karaoke-hint" id="karaokeHint">Replay the corrected sentence and follow the characters.</p>
</div>
</div>
</section>
</section>
</main>
<script type="module">
import { client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
const form = document.querySelector("#form");
const result = document.querySelector("#result");
const status = document.querySelector("#status");
const submit = document.querySelector("#submit");
const voice = document.querySelector("#voice");
const readingPanel = document.querySelector("#readingPanel");
const replay = document.querySelector("#replay");
const focusMode = document.querySelector("#focusMode");
const copyCorrected = document.querySelector("#copyCorrected");
const serverAudio = document.querySelector("#serverAudio");
const karaokeWindow = document.querySelector("#karaokeWindow");
const karaokeStrip = document.querySelector("#karaokeStrip");
const karaokeHint = document.querySelector("#karaokeHint");
const historyList = document.querySelector("#historyList");
const historySummary = document.querySelector("#historySummary");
const feedbackPanel = document.querySelector("#feedbackPanel");
const feedbackStatus = document.querySelector("#feedbackStatus");
const thumbUp = document.querySelector("#thumbUp");
const thumbDown = document.querySelector("#thumbDown");
let clientPromise = null;
let recognition = null;
let isListening = false;
let karaokeTimer = null;
let boundaryDrivenReading = false;
let currentCorrectedSentence = "";
let currentOriginalSentence = "";
let currentRequestId = "";
let currentEvaluation = "";
let currentGenerationTimeSeconds = null;
let currentTtsKey = "";
let currentTtsPromise = null;
let currentTtsPayload = null;
let replayState = "idle";
const ttsPayloadCache = new Map();
const ttsPromiseCache = new Map();
const TTS_CACHE_LIMIT = 8;
const SERVER_TTS_READING_DELAY_MS = 120;
const SERVER_TTS_FIRST_PLAY_DELAY_MS = 900;
const SERVER_TTS_PRIME_MS = 90;
const BROWSER_TTS_RATE = 1.0;
const SERVER_TTS_ENABLED = "__SERVER_TTS_ENABLED__" === "true";
const SERVER_TTS_PROVIDER = "__TTS_PROVIDER__";
const EDGE_TTS_KARAOKE_DURATION_FACTOR = Number("__EDGE_TTS_KARAOKE_DURATION_FACTOR__") || 0.86;
let shouldCorrectAfterStop = false;
const recentErrorTypes = [];
const loadingTips = {
base: [
"Short Mandarin sentences are often natural. Simple is good. 😊",
"If the pinyin is right but the character is wrong, it may be an input-method mistake.",
"A tiny word order change can make Mandarin feel much smoother.",
"吧 can soften a suggestion, a bit like 'shall we?'",
"请 makes many requests feel smoother, especially outside close relationships."
],
Friends: [
"With friends, 你好 is usually enough. 您好 can feel too formal.",
"Friendly Mandarin often sounds better when it stays short and direct."
],
Family: [
"With family, natural warmth often matters more than formal politeness.",
"Family sentences can be simple and still sound kind."
],
Work: [
"At work, 请 can soften a request without making it too long.",
"For work messages, clear and polite usually beats very formal."
],
WeChat: [
"On WeChat, shorter sentences usually feel more natural.",
"A friendly particle like 吧 can make a message feel lighter."
]
};
function field(id) {
return document.querySelector(id);
}
function escapeHtml(value) {
return String(value)
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
function renderMarkdown(markdown) {
const blocks = normalizeCorrectionText(markdown).split(/\n{2,}/).map((block) => block.trim()).filter(Boolean);
return blocks.map((block) => {
let html = escapeHtml(block);
html = html.replace(/^\*(.+?)\*$/gm, "<em>$1</em>");
html = html.replace(
/^(📝\s*Original sentence|✅\s*Corrected sentence|🔎\s*Error type|💡\s*Why|🌱\s*Tip)\s*:\s*/i,
"<strong>$1 :</strong> "
);
const lower = block.toLowerCase();
let cls = "result-section";
if (lower.includes("original sentence")) cls += " original-section";
else if (lower.includes("corrected sentence")) cls += " corrected-section";
else if (lower.includes("error type")) cls += " compact-section";
else if (lower.includes("tip")) cls += " tip-section";
return `<section class="${cls}">${html.replace(/\n/g, "<br>")}</section>`;
}).join("");
}
async function getClient() {
if (!clientPromise) {
clientPromise = client(window.location.origin);
}
return clientPromise;
}
function extractResult(response) {
let data = response?.data ?? response?.output ?? response?.value ?? response;
if (Array.isArray(data)) data = data[0];
if (data && typeof data === "object" && "value" in data) data = data.value;
if (data && typeof data === "object" && "data" in data) data = data.data;
if (Array.isArray(data)) data = data[0];
if (data && typeof data === "object" && "result" in data) data = data.result;
if (data && typeof data === "object") data = JSON.stringify(data, null, 2);
return normalizeCorrectionText(data || "No correction was returned.");
}
function extractCorrectionPayload(response) {
const data = extractPayload(response);
if (data && typeof data === "object") return data;
return {
ok: Boolean(data),
result: normalizeCorrectionText(data || "No correction was returned."),
request_id: "",
corrected_sentence: "",
generation_time_seconds: null
};
}
function normalizeCorrectionText(text) {
let out = String(text || "")
.replace(/\\r\\n/g, "\n")
.replace(/\\n/g, "\n")
.replace(/\\t/g, " ");
out = out.replace(
/(📝|✅|🔎|💡|🌱)\s+(Original sentence|Corrected sentence|Error type|Why|Tip)\s*:/g,
"$1__LABEL_SPACE__$2 :"
);
["Original sentence", "Corrected sentence", "Error type", "Why", "Tip"].forEach((label) => {
out = out.replace(new RegExp(`\\s+(?=${label}\\s*:)`, "g"), "\n\n");
});
return out.replace(/__LABEL_SPACE__/g, " ").trim();
}
function extractPayload(response) {
let data = response?.data ?? response?.output ?? response?.value ?? response;
if (Array.isArray(data)) data = data[0];
if (data && typeof data === "object" && "value" in data) data = data.value;
if (data && typeof data === "object" && "data" in data) data = data.data;
if (Array.isArray(data)) data = data[0];
return data;
}
function setLoading(isLoading) {
submit.disabled = isLoading;
voice.disabled = isLoading;
submit.textContent = isLoading ? "Thinking gently..." : "✨ Correct my sentence";
if (isLoading) status.textContent = "Working 🌱";
}
function setReplayState(state) {
replayState = state;
if (!replay) return;
const labels = {
idle: "▶️ Replay",
preparing: "Preparing voice...",
ready: "▶️ Replay",
playing: "Reading..."
};
replay.textContent = labels[state] || labels.ready;
replay.disabled = state === "idle" || state === "preparing" || state === "playing";
}
function tipsForCurrentContext() {
const context = field("#context")?.value || "";
return [...(loadingTips[context] || []), ...loadingTips.base];
}
function loadingCoachHtml(tip) {
return `
<div class="placeholder">
<div class="coach-card">
<span class="coach-illustration" aria-hidden="true">
<span class="coach-hat"></span>
<span class="coach-face"><span class="coach-smile"></span></span>
</span>
<span class="coach-bubble">
<strong>Checking gently...</strong>
<span>The coach is reading your sentence, context, and tone.</span>
<span class="loading-tip" id="loadingTip">${escapeHtml(tip)}</span>
</span>
</div>
<div class="loading-steps" aria-hidden="true">
<span class="loading-step">Meaning</span>
<span class="loading-step">Tone</span>
<span class="loading-step">Naturalness</span>
</div>
</div>`;
}
function showLoadingCoach() {
const tips = tipsForCurrentContext();
const tip = tips[Math.floor(Math.random() * tips.length)] || loadingTips.base[0];
result.innerHTML = loadingCoachHtml(tip);
}
function revealResult() {
result.classList.remove("result-arrived");
void result.offsetWidth;
result.classList.add("result-arrived");
}
function cleanCorrectedSentenceValue(value) {
let out = String(value || "")
.replace(/\*/g, "")
.replace(/\s+(?:Original sentence|Error type|Why|Tip)\s*:.+$/i, "")
.trim();
const extraAfterSentence = out.match(/^(.+?[。!?!?])(?=\s*[\u4e00-\u9fffA-Za-z])/);
if (extraAfterSentence) out = extraAfterSentence[1];
return out.trim();
}
function extractCorrectedSentence(markdown) {
const text = markdown || "";
const patterns = [
/(?:\*\*)?(?:✅\s*)?Corrected sentence\s*:\s*(?:\*\*)?\s*([^\n]+)/i,
/(?:\*\*)?Corrected sentence\s*:\s*(?:\*\*)?\s*([^\n]+)/i
];
for (const pattern of patterns) {
const match = text.match(pattern);
if (match?.[1]) return cleanCorrectedSentenceValue(match[1]);
}
return "";
}
function extractErrorType(markdown) {
const text = markdown || "";
const patterns = [
/(?:\*\*)?(?:🔎\s*)?Error type\s*:\s*(?:\*\*)?\s*([^\n]+)/i,
/(?:\*\*)?Error type\s*:\s*(?:\*\*)?\s*([^\n]+)/i
];
for (const pattern of patterns) {
const match = text.match(pattern);
if (match?.[1]) return match[1].replace(/\*/g, "").trim();
}
return "";
}
function updateLearningHistory(markdown) {
const errorType = extractErrorType(markdown);
if (!errorType || !historyList) return;
const sentence = field("#sentence").value.trim();
const item = document.createElement("li");
item.textContent = `${errorType} · ${sentence.slice(0, 18)}${sentence.length > 18 ? "..." : ""}`;
if (historyList.children.length === 1 && historyList.children[0].textContent.includes("will appear")) {
historyList.innerHTML = "";
}
historyList.prepend(item);
while (historyList.children.length > 4) historyList.removeChild(historyList.lastElementChild);
recentErrorTypes.unshift(errorType.toLowerCase());
while (recentErrorTypes.length > 6) recentErrorTypes.pop();
const counts = recentErrorTypes.reduce((acc, type) => {
acc[type] = (acc[type] || 0) + 1;
return acc;
}, {});
const top = Object.entries(counts).sort((a, b) => b[1] - a[1])[0]?.[0];
if (historySummary && top) {
historySummary.textContent = top === "none"
? "Nice: your recent sentences are often already correct."
: `Recent pattern: ${top}.`;
}
}
function resetFeedbackPanel() {
currentRequestId = "";
currentOriginalSentence = "";
currentEvaluation = "";
currentGenerationTimeSeconds = null;
if (feedbackPanel) feedbackPanel.hidden = true;
if (feedbackStatus) feedbackStatus.textContent = "";
[thumbUp, thumbDown].forEach((button) => {
if (!button) return;
button.disabled = true;
button.classList.remove("selected");
});
}
function showFeedbackPanel() {
if (!feedbackPanel || !currentRequestId) return;
feedbackPanel.hidden = false;
if (feedbackStatus) feedbackStatus.textContent = currentGenerationTimeSeconds
? `${currentGenerationTimeSeconds}s`
: "";
[thumbUp, thumbDown].forEach((button) => {
if (!button) return;
button.disabled = false;
button.classList.remove("selected");
});
}
function markFeedbackSelection(evaluation) {
currentEvaluation = evaluation || "";
thumbUp?.classList.toggle("selected", currentEvaluation === "thumbs_up");
thumbDown?.classList.toggle("selected", currentEvaluation === "thumbs_down");
if (feedbackStatus) feedbackStatus.textContent = currentEvaluation ? "Saved" : "";
}
async function submitFeedback(evaluation) {
if (!currentRequestId) return;
[thumbUp, thumbDown].forEach((button) => {
if (button) button.disabled = true;
});
if (feedbackStatus) feedbackStatus.textContent = "Saving...";
try {
const app = await getClient();
const response = await app.predict("/rate_response", {
request_id: currentRequestId,
evaluation
});
const payload = extractPayload(response);
if (!payload?.ok) throw new Error(payload?.error || "Feedback save failed");
markFeedbackSelection(payload.evaluation);
} catch (error) {
console.error(error);
if (feedbackStatus) feedbackStatus.textContent = "Save failed";
} finally {
[thumbUp, thumbDown].forEach((button) => {
if (button) button.disabled = false;
});
}
}
function chineseReadingUnits(text) {
return Array.from(text || "").filter((char) => /[\u4e00-\u9fff,。!?、;:]/.test(char));
}
function renderKaraoke(chars, activeIndex = -1) {
if (!chars.length) {
karaokeWindow.innerHTML = '<span class="karaoke-char">听</span><span class="karaoke-char">一</span><span class="karaoke-char">听</span>';
karaokeStrip.innerHTML = "";
karaokeHint.textContent = "Replay the corrected sentence and follow the characters.";
return;
}
const windowSize = Math.min(4, Math.max(2, chars.length));
let start = Math.max(0, activeIndex - Math.floor(windowSize / 2));
start = Math.min(start, Math.max(0, chars.length - windowSize));
const visible = chars.slice(start, start + windowSize);
karaokeWindow.innerHTML = visible.map((char, offset) => {
const index = start + offset;
const state = index < activeIndex ? "read" : index === activeIndex ? "active" : "";
return `<span class="karaoke-char ${state}">${escapeHtml(char)}</span>`;
}).join("");
if (karaokeStrip.childElementCount !== chars.length) {
karaokeStrip.innerHTML = chars.map((char) => `<span class="strip-char">${escapeHtml(char)}</span>`).join("");
}
[...karaokeStrip.children].forEach((node, index) => {
node.className = "strip-char" + (index < activeIndex ? " read" : index === activeIndex ? " active" : "");
if (index === activeIndex) node.scrollIntoView({ behavior: "smooth", inline: "center", block: "nearest" });
});
karaokeHint.textContent = activeIndex >= 0 ? "Follow the highlighted characters as you listen." : "Ready for reading.";
}
function stopKaraokeTimer() {
if (karaokeTimer) {
clearTimeout(karaokeTimer);
karaokeTimer = null;
}
}
function isReadingPunctuation(char) {
return /[,。!?、;:]/.test(char);
}
function readingWeight(char) {
return isReadingPunctuation(char) ? 1.9 : 1;
}
function msPerReadingUnit(char, rate) {
const clamped = Math.min(1.0, Math.max(0.25, Number(rate) || 0.55));
const base = isReadingPunctuation(char) ? 230 : 145;
return base / clamped;
}
function startKaraoke(text, rate) {
const chars = chineseReadingUnits(text);
stopKaraokeTimer();
boundaryDrivenReading = false;
renderKaraoke(chars, -1);
if (!chars.length) return;
let index = -1;
const tick = () => {
if (boundaryDrivenReading) return;
index += 1;
if (index >= chars.length) {
renderKaraoke(chars, chars.length - 1);
karaokeTimer = null;
karaokeHint.textContent = "Great. Replay with the voice button whenever you want. 👍";
return;
}
renderKaraoke(chars, index);
karaokeTimer = setTimeout(tick, msPerReadingUnit(chars[index], rate));
};
karaokeTimer = setTimeout(tick, 20);
}
function startKaraokeByDuration(text, durationMs, leadInMs = 0) {
const chars = chineseReadingUnits(text);
stopKaraokeTimer();
boundaryDrivenReading = false;
renderKaraoke(chars, -1);
if (!chars.length || !durationMs) return;
const usableDuration = Math.max(350, Number(durationMs) - Number(leadInMs || 0) - 80);
const weights = chars.map(readingWeight);
const totalWeight = weights.reduce((sum, value) => sum + value, 0) || chars.length;
let index = -1;
const tick = () => {
index += 1;
if (index >= chars.length) {
renderKaraoke(chars, chars.length - 1);
karaokeTimer = null;
karaokeHint.textContent = "Great. Replay whenever you want. 👍";
return;
}
renderKaraoke(chars, index);
const delay = Math.max(55, usableDuration * (weights[index] / totalWeight));
karaokeTimer = setTimeout(tick, delay);
};
karaokeTimer = setTimeout(tick, 30);
}
function karaokeIndexFromAudioProgress(chars, progress) {
if (!chars.length) return -1;
const clamped = Math.min(1, Math.max(0, Number(progress) || 0));
const weights = chars.map(readingWeight);
const totalWeight = weights.reduce((sum, value) => sum + value, 0) || chars.length;
let seen = 0;
for (let index = 0; index < chars.length; index += 1) {
seen += weights[index];
if (clamped <= seen / totalWeight) return index;
}
return chars.length - 1;
}
function startKaraokeFromAudio(text, audio, leadInMs = 0) {
const chars = chineseReadingUnits(text);
stopKaraokeTimer();
boundaryDrivenReading = false;
renderKaraoke(chars, -1);
if (!chars.length || !audio) return;
const estimatedDurationMs = Math.max(
650,
chars.reduce((sum, char) => sum + msPerReadingUnit(char, 0.85), 0)
);
const tick = () => {
if (audio.paused || audio.ended) {
karaokeTimer = null;
return;
}
const rawDurationMs = Math.round((audio.duration || 0) * 1000);
const durationFactor = SERVER_TTS_PROVIDER.startsWith("edge")
? EDGE_TTS_KARAOKE_DURATION_FACTOR
: 1;
const durationMs = Number.isFinite(rawDurationMs) && rawDurationMs > 1
? Math.max(1, rawDurationMs * durationFactor)
: estimatedDurationMs * durationFactor;
const currentMs = Math.max(0, Math.round((audio.currentTime || 0) * 1000) - Number(leadInMs || 0));
const index = karaokeIndexFromAudioProgress(chars, currentMs / durationMs);
renderKaraoke(chars, index);
karaokeTimer = setTimeout(tick, 60);
};
karaokeTimer = setTimeout(tick, 20);
}
function syncKaraokeFromBoundary(text, charIndex) {
if (!Number.isFinite(charIndex) || charIndex < 0) return;
const chars = chineseReadingUnits(text);
if (!chars.length) return;
boundaryDrivenReading = true;
stopKaraokeTimer();
const before = chineseReadingUnits(Array.from(text).slice(0, charIndex + 1).join(""));
const index = Math.max(0, Math.min(chars.length - 1, before.length - 1));
renderKaraoke(chars, index);
}
function chineseVoices() {
const voices = window.speechSynthesis?.getVoices?.() || [];
return voices.filter((item) => item.lang?.toLowerCase().startsWith("zh"));
}
function voiceScore(item) {
const text = `${item.name || ""} ${item.lang || ""}`.toLowerCase();
let score = 0;
if (text.includes("zh-cn") || text.includes("mandarin")) score += 20;
if (text.includes("google")) score += 18;
if (text.includes("microsoft")) score += 16;
if (text.includes("natural") || text.includes("premium") || text.includes("online")) score += 12;
if (text.includes("xiaoxiao") || text.includes("xiaoyi") || text.includes("yunjian")) score += 8;
if (item.localService === false) score += 4;
return score;
}
function pickChineseVoice() {
const voices = chineseVoices();
if (!voices.length) return null;
return voices.sort((a, b) => voiceScore(b) - voiceScore(a))[0] || null;
}
function ttsCacheKey(text) {
return [
(text || "").trim(),
SERVER_TTS_PROVIDER
].join("||");
}
function rememberTtsPayload(key, payload) {
if (!key || !payload?.audio) return;
if (ttsPayloadCache.has(key)) ttsPayloadCache.delete(key);
ttsPayloadCache.set(key, payload);
while (ttsPayloadCache.size > TTS_CACHE_LIMIT) {
const oldestKey = ttsPayloadCache.keys().next().value;
ttsPayloadCache.delete(oldestKey);
}
}
function cachedTtsPayload(key) {
const payload = ttsPayloadCache.get(key);
if (!payload) return null;
ttsPayloadCache.delete(key);
ttsPayloadCache.set(key, payload);
return payload;
}
function resetPreparedTTS() {
currentTtsKey = "";
currentTtsPromise = null;
currentTtsPayload = null;
setReplayState("idle");
stopKaraokeTimer();
if (serverAudio) {
serverAudio.onplaying = null;
serverAudio.onended = null;
serverAudio.pause();
serverAudio.muted = false;
serverAudio.volume = 1;
delete serverAudio.dataset.ttsKey;
serverAudio.removeAttribute("src");
serverAudio.load();
}
}
function wait(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
function waitForAudioReady(audio, timeoutMs = 1800) {
if (!audio) return Promise.resolve();
if (audio.readyState >= HTMLMediaElement.HAVE_FUTURE_DATA) return Promise.resolve();
return new Promise((resolve) => {
let settled = false;
const finish = () => {
if (settled) return;
settled = true;
clearTimeout(timer);
audio.removeEventListener("canplay", finish);
audio.removeEventListener("canplaythrough", finish);
audio.removeEventListener("loadeddata", finish);
audio.removeEventListener("error", finish);
resolve();
};
const timer = setTimeout(finish, timeoutMs);
audio.addEventListener("canplay", finish, { once: true });
audio.addEventListener("canplaythrough", finish, { once: true });
audio.addEventListener("loadeddata", finish, { once: true });
audio.addEventListener("error", finish, { once: true });
});
}
async function prepareAudioElement(payload, key) {
if (!serverAudio || !payload?.audio || currentTtsKey !== key) return payload;
if (serverAudio.dataset.ttsKey !== key) {
serverAudio.pause();
serverAudio.onplaying = null;
serverAudio.onended = null;
serverAudio.src = payload.audio;
serverAudio.dataset.ttsKey = key;
serverAudio.preload = "auto";
serverAudio.load();
}
await waitForAudioReady(serverAudio);
if (currentTtsKey !== key || payload.primed) return payload;
if (SERVER_TTS_PROVIDER.startsWith("edge")) {
payload.primed = true;
return payload;
}
try {
serverAudio.muted = true;
serverAudio.volume = 0;
serverAudio.currentTime = 0;
await serverAudio.play();
await wait(SERVER_TTS_PRIME_MS);
serverAudio.pause();
serverAudio.currentTime = 0;
payload.primed = true;
} catch (error) {
payload.primed = false;
} finally {
serverAudio.muted = false;
serverAudio.volume = 1;
}
return payload;
}
async function prepareServerTTS(text) {
if (!SERVER_TTS_ENABLED) return null;
const phrase = (text || "").trim();
if (!phrase || !serverAudio) return null;
const key = ttsCacheKey(phrase);
const cachedPayload = cachedTtsPayload(key);
if (cachedPayload) {
currentTtsKey = key;
currentTtsPayload = cachedPayload;
currentTtsPromise = null;
await prepareAudioElement(cachedPayload, key);
karaokeHint.textContent = "Voice ready. Press Replay to read it.";
setReplayState("ready");
return cachedPayload;
}
if (currentTtsKey === key && currentTtsPayload) {
setReplayState("ready");
return currentTtsPayload;
}
if (currentTtsKey === key && currentTtsPromise) {
setReplayState("preparing");
return currentTtsPromise;
}
if (ttsPromiseCache.has(key)) {
currentTtsKey = key;
currentTtsPromise = ttsPromiseCache.get(key);
setReplayState("preparing");
return currentTtsPromise;
}
currentTtsKey = key;
currentTtsPayload = null;
karaokeHint.textContent = "Preparing a natural Mandarin voice...";
setReplayState("preparing");
currentTtsPromise = (async () => {
const app = await getClient();
const response = await app.predict("/tts", {
text: phrase,
speaker: SERVER_TTS_PROVIDER
});
const payload = extractPayload(response);
if (!payload?.ok || !payload?.audio) {
console.warn("Server TTS fallback", payload?.error || payload);
if (currentTtsKey === key) {
currentTtsPayload = null;
karaokeHint.textContent = "Server voice unavailable. Browser voice will be used.";
setReplayState("ready");
}
return null;
}
if (currentTtsKey === key) {
await prepareAudioElement(payload, key);
}
rememberTtsPayload(key, payload);
if (currentTtsKey === key) {
currentTtsPayload = payload;
karaokeHint.textContent = "Voice ready. Press Replay to read it.";
setReplayState("ready");
}
return payload;
})();
ttsPromiseCache.set(key, currentTtsPromise);
try {
return await currentTtsPromise;
} catch (error) {
console.warn("Server TTS unavailable, using browser voice.", error);
if (currentTtsKey === key) {
currentTtsPayload = null;
karaokeHint.textContent = "Server voice unavailable. Browser voice will be used.";
setReplayState("ready");
}
return null;
} finally {
ttsPromiseCache.delete(key);
if (currentTtsKey === key) currentTtsPromise = null;
}
}
async function speakWithServerTTS(text) {
if (!SERVER_TTS_ENABLED) return false;
const phrase = (text || "").trim();
if (!phrase || !serverAudio) return false;
try {
const payload = await prepareServerTTS(phrase);
if (!payload?.audio) return false;
window.speechSynthesis?.cancel?.();
const key = ttsCacheKey(phrase);
await prepareAudioElement(payload, key);
serverAudio.pause();
serverAudio.currentTime = 0;
let readingStarted = false;
const isFirstUserPlay = !payload.playedOnce;
const readingDelay = SERVER_TTS_PROVIDER.startsWith("edge")
? 0
: payload.primed || !isFirstUserPlay
? SERVER_TTS_READING_DELAY_MS
: SERVER_TTS_FIRST_PLAY_DELAY_MS;
serverAudio.onplaying = () => {
if (readingStarted) return;
readingStarted = true;
setReplayState("playing");
payload.playedOnce = true;
const durationMs = payload.duration_ms || Math.round((serverAudio.duration || 0) * 1000);
if (SERVER_TTS_PROVIDER.startsWith("edge")) {
startKaraokeFromAudio(phrase, serverAudio, readingDelay);
} else {
setTimeout(
() => startKaraokeByDuration(phrase, durationMs, readingDelay),
readingDelay
);
}
};
serverAudio.onended = () => {
stopKaraokeTimer();
const chars = chineseReadingUnits(phrase);
if (chars.length) renderKaraoke(chars, chars.length - 1);
setReplayState("ready");
};
await serverAudio.play();
return true;
} catch (error) {
console.warn("Server TTS unavailable, using browser voice.", error);
setReplayState("ready");
return false;
}
}
function speakWithBrowserTTS(text) {
const phrase = (text || "").trim();
if (!phrase || !("speechSynthesis" in window)) return false;
serverAudio?.pause?.();
window.speechSynthesis.cancel();
const utterance = new SpeechSynthesisUtterance(phrase);
utterance.lang = "zh-CN";
const rate = BROWSER_TTS_RATE;
utterance.rate = rate;
utterance.pitch = 1;
const voice = pickChineseVoice();
if (voice) utterance.voice = voice;
utterance.onstart = () => {
setReplayState("playing");
setTimeout(() => startKaraoke(phrase, rate), 120);
};
utterance.onboundary = (event) => syncKaraokeFromBoundary(phrase, event.charIndex);
utterance.onend = () => {
stopKaraokeTimer();
const chars = chineseReadingUnits(phrase);
if (chars.length) renderKaraoke(chars, chars.length - 1);
setReplayState("ready");
};
utterance.onerror = () => setReplayState("ready");
window.speechSynthesis.speak(utterance);
return true;
}
async function speakChinese(text) {
const usedServer = await speakWithServerTTS(text);
if (!usedServer) {
karaokeHint.textContent = SERVER_TTS_ENABLED ? "Using browser voice fallback." : "Using browser voice.";
if (!speakWithBrowserTTS(text)) setReplayState("ready");
}
}
async function runCorrection({ speak = false } = {}) {
const sentence = field("#sentence").value.trim();
if (!sentence) {
result.innerHTML = '<div class="placeholder"><span class="placeholder-icon">写</span><strong>Add a sentence first</strong><span>Paste or speak one Chinese sentence.</span></div>';
return "";
}
setLoading(true);
currentCorrectedSentence = "";
if (copyCorrected) copyCorrected.disabled = true;
resetFeedbackPanel();
resetPreparedTTS();
readingPanel.style.display = "none";
showLoadingCoach();
try {
const app = await getClient();
const response = await app.predict("/corriger", {
context: field("#context").value,
sentence,
target_tone: "",
correction_mode: "tone-aware"
});
console.log("Gradio response", response);
const payload = extractCorrectionPayload(response);
const data = normalizeCorrectionText(payload.result || extractResult(response));
result.innerHTML = renderMarkdown(data);
revealResult();
updateLearningHistory(data);
currentGenerationTimeSeconds = Number.isFinite(Number(payload.generation_time_seconds))
? Number(payload.generation_time_seconds)
: null;
status.textContent = currentGenerationTimeSeconds ? `Done (${currentGenerationTimeSeconds}s)` : "Done";
if (payload.metrics_error) console.warn(payload.metrics_error);
const corrected = payload.corrected_sentence || extractCorrectedSentence(data);
currentCorrectedSentence = corrected;
currentOriginalSentence = payload.original_sentence || sentence;
currentRequestId = payload.request_id || "";
if (currentRequestId) showFeedbackPanel();
if (copyCorrected) copyCorrected.disabled = !corrected;
if (corrected) {
readingPanel.style.display = "block";
renderKaraoke(chineseReadingUnits(corrected), -1);
if (SERVER_TTS_ENABLED) {
prepareServerTTS(corrected);
} else {
karaokeHint.textContent = "Browser voice ready. Press Replay to read it.";
setReplayState("ready");
}
} else {
readingPanel.style.display = "none";
}
if (speak) {
await speakChinese(corrected);
}
return data;
} catch (error) {
console.error(error);
currentCorrectedSentence = "";
if (copyCorrected) copyCorrected.disabled = true;
readingPanel.style.display = "none";
result.innerHTML = '<div class="placeholder"><span class="placeholder-icon">!</span><strong>Correction failed</strong><span>Please try again in a moment.</span></div>';
status.textContent = "Error";
return "";
} finally {
setLoading(false);
}
}
form.addEventListener("submit", async (event) => {
event.preventDefault();
await runCorrection({ speak: false });
});
replay.addEventListener("click", async () => {
if (!currentCorrectedSentence || replayState === "preparing" || replayState === "playing") return;
setReplayState("preparing");
await speakChinese(currentCorrectedSentence);
});
copyCorrected?.addEventListener("click", async () => {
if (!currentCorrectedSentence) return;
try {
await navigator.clipboard.writeText(currentCorrectedSentence);
copyCorrected.textContent = "Copied";
setTimeout(() => {
copyCorrected.textContent = "Copy sentence";
}, 1200);
} catch (error) {
console.error(error);
copyCorrected.textContent = "Copy failed";
setTimeout(() => {
copyCorrected.textContent = "Copy sentence";
}, 1200);
}
});
thumbUp?.addEventListener("click", () => submitFeedback("thumbs_up"));
thumbDown?.addEventListener("click", () => submitFeedback("thumbs_down"));
function applyFocusMode(enabled) {
document.body.classList.toggle("focus-mode", enabled);
if (focusMode) focusMode.textContent = enabled ? "Exit focus" : "Focus mode";
}
focusMode?.addEventListener("click", async () => {
const enabled = !document.body.classList.contains("focus-mode");
applyFocusMode(enabled);
try {
if (enabled && document.documentElement.requestFullscreen) {
focusMode.dataset.fullscreen = "on";
await document.documentElement.requestFullscreen();
} else if (!enabled && document.fullscreenElement) {
focusMode.dataset.fullscreen = "";
await document.exitFullscreen();
}
} catch (error) {
focusMode.dataset.fullscreen = "";
}
});
document.addEventListener("fullscreenchange", () => {
if (!document.fullscreenElement && focusMode?.dataset.fullscreen === "on") {
focusMode.dataset.fullscreen = "";
applyFocusMode(false);
}
});
function setupRecognition() {
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRecognition) return null;
const recog = new SpeechRecognition();
recog.lang = "zh-CN";
recog.continuous = true;
recog.interimResults = true;
recog.maxAlternatives = 1;
let finalTranscript = "";
recog.onstart = () => {
isListening = true;
shouldCorrectAfterStop = false;
voice.classList.add("listening");
voice.textContent = "⏹️ Click to stop";
status.textContent = "Listening 🎙️";
result.innerHTML = '<div class="placeholder">Speak at your rhythm. Click the button again when your sentence is finished. 🎙️</div>';
};
recog.onresult = (event) => {
let interim = "";
for (let i = event.resultIndex; i < event.results.length; i += 1) {
const chunk = event.results[i][0].transcript;
if (event.results[i].isFinal) finalTranscript += chunk;
else interim += chunk;
}
const heard = (finalTranscript + interim).trim();
field("#sentence").value = heard;
};
recog.onerror = (event) => {
console.error("Speech recognition error", event);
result.innerHTML = '<div class="placeholder">I could not hear clearly. Please try again, slowly. 🌱</div>';
};
recog.onend = async () => {
isListening = false;
voice.classList.remove("listening");
voice.textContent = "🎙️ Speak & correct";
const captured = field("#sentence").value.trim();
if (captured && shouldCorrectAfterStop) await runCorrection({ speak: true });
else if (!shouldCorrectAfterStop && captured) {
try {
recog.start();
return;
} catch (error) {
console.error(error);
status.textContent = "Ready";
}
} else status.textContent = "Ready";
shouldCorrectAfterStop = false;
finalTranscript = "";
};
return recog;
}
voice.addEventListener("click", () => {
if (!recognition) recognition = setupRecognition();
if (!recognition) {
result.innerHTML = '<div class="placeholder">Voice mode is not supported in this browser. Chrome or Edge usually work best. 🌱</div>';
return;
}
if (isListening) {
shouldCorrectAfterStop = true;
status.textContent = "Correcting...";
recognition.stop();
return;
}
try {
recognition.start();
} catch (error) {
console.error(error);
}
});
document.querySelectorAll(".example").forEach((button) => {
button.addEventListener("click", () => {
field("#context").value = button.dataset.context;
field("#sentence").value = button.dataset.text;
});
});
</script>
</body>
</html>
"""
@app.get("/", response_class=HTMLResponse)
async def index():
return (
FRONTEND_HTML
.replace("__SERVER_TTS_ENABLED__", "true" if SERVER_TTS_ENABLED else "false")
.replace("__TTS_PROVIDER__", TTS_PROVIDER)
.replace("__EDGE_TTS_KARAOKE_DURATION_FACTOR__", str(EDGE_TTS_KARAOKE_DURATION_FACTOR))
)
demo = app
if __name__ == "__main__":
demo.launch(ssr_mode=False)