Alphaplasti commited on
Commit
cf90e02
·
verified ·
1 Parent(s): aa34582

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -49
app.py CHANGED
@@ -88,7 +88,7 @@ tts_load_error: Optional[str] = None
88
  app = gr.Server()
89
 
90
 
91
- SYSTEM_PROMPT = """You are a Mandarin Chinese teacher for beginner learners.
92
 
93
  Your task is to correct ONE student Chinese sentence according to the selected context and tone.
94
  Your default behavior is conservative minimal correction.
@@ -152,10 +152,22 @@ Chinese sentences must stay in Chinese characters.
152
  Give only one corrected sentence.
153
  Add at most one gentle emoji in Why or Tip, never inside Chinese sentences.
154
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  Required format:
156
  Original sentence: <student sentence>
157
  Corrected sentence: <corrected sentence or identical original>
158
- Error type: <none / wrong character / wrong word / missing word / extra word / measure word / word order / grammar / tone>
159
  Why: <short beginner-friendly explanation in English>
160
  Tip: <one short practical tip in English>
161
 
@@ -705,14 +717,117 @@ Add one line "Error type" with a short category: character/input mistake, gramma
705
  Use real line breaks between sections. Do not output escaped newline characters like \\n.
706
  Do not write a long paragraph. Maximum 5 short lines.
707
  /no_think"""
708
-
709
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710
  @spaces.GPU(duration=90)
711
  def _generate_correction_gpu(
712
  context: str,
713
  sentence: str,
714
  target_tone: str,
715
- correction_mode: str = "Minimal correction",
716
  ) -> str:
717
  sentence = (sentence or "").strip()
718
  if not sentence:
@@ -781,9 +896,10 @@ def generate_correction(
781
  context: str,
782
  sentence: str,
783
  target_tone: str,
784
- correction_mode: str = "Minimal correction",
785
  ) -> dict:
786
  original_sentence = (sentence or "").strip()
 
787
  started = time.perf_counter()
788
  result = _generate_correction_gpu(context, sentence, target_tone, correction_mode)
789
  generation_time_seconds = round(time.perf_counter() - started, 3)
@@ -800,9 +916,9 @@ def generate_correction(
800
  "request_id": request_id,
801
  "created_at": utc_now_iso(),
802
  "model_id": MODEL_ID,
803
- "context": normalize_space(context),
804
- "target_tone": normalize_space(target_tone),
805
- "correction_mode": normalize_space(correction_mode),
806
  "original_sentence": original_sentence,
807
  "corrected_sentence": corrected_sentence,
808
  "evaluation": None,
@@ -2413,37 +2529,16 @@ FRONTEND_HTML = r"""
2413
  </div>
2414
  </div>
2415
 
2416
- <div class="field-grid">
2417
- <div class="field">
2418
- <label for="context">Context</label>
2419
- <select id="context">
2420
- <option>Friends</option>
2421
- <option>Family</option>
2422
- <option>Work</option>
2423
- <option>WeChat</option>
2424
- </select>
2425
- </div>
2426
-
2427
- <div class="field">
2428
- <label for="tone">Tone</label>
2429
- <select id="tone">
2430
- <option>Neutral polite</option>
2431
- <option>Friendly</option>
2432
- <option>Polite</option>
2433
- <option>Very polite</option>
2434
- <option>Formal</option>
2435
- <option>Natural spoken</option>
2436
- </select>
2437
- </div>
2438
- </div>
2439
-
2440
  <div class="field">
2441
- <label for="mode">Correction style</label>
2442
- <select id="mode">
2443
- <option selected>Minimal correction</option>
2444
- <option>Natural correction</option>
 
 
 
2445
  </select>
2446
- <p class="field-note">Minimal fixes only what is needed. Natural keeps your meaning but smooths the sentence for the chosen context.</p>
2447
  </div>
2448
 
2449
  <div class="field">
@@ -2459,10 +2554,10 @@ FRONTEND_HTML = r"""
2459
 
2460
  <p class="examples-label">Examples: tap one to fill the form.</p>
2461
  <div class="examples" aria-label="Examples">
2462
- <button class="example" type="button" data-context="Friends" data-tone="Natural spoken" data-mode="Minimal correction" data-text="我今天想喝书。">Wrong word</button>
2463
- <button class="example" type="button" data-context="Family" data-tone="Natural spoken" data-mode="Minimal correction" data-text="火车站在超市的旁边">Already correct</button>
2464
- <button class="example" type="button" data-context="Friends" data-tone="Friendly" data-mode="Natural correction" data-text="桌子的上有一个猫">Measure word</button>
2465
- <button class="example" type="button" data-context="WeChat" data-tone="Friendly" data-mode="Natural correction" data-text="我可以借你的笔吗?">Context</button>
2466
  </div>
2467
 
2468
  <div class="learning-history">
@@ -3288,8 +3383,8 @@ FRONTEND_HTML = r"""
3288
  const response = await app.predict("/corriger", {
3289
  context: field("#context").value,
3290
  sentence,
3291
- target_tone: field("#tone").value,
3292
- correction_mode: field("#mode").value
3293
  });
3294
  console.log("Gradio response", response);
3295
  const payload = extractCorrectionPayload(response);
@@ -3470,14 +3565,12 @@ FRONTEND_HTML = r"""
3470
  }
3471
  });
3472
 
3473
- document.querySelectorAll(".example").forEach((button) => {
3474
- button.addEventListener("click", () => {
3475
  field("#context").value = button.dataset.context;
3476
- field("#tone").value = button.dataset.tone;
3477
- field("#mode").value = button.dataset.mode || "Minimal correction";
3478
  field("#sentence").value = button.dataset.text;
3479
  });
3480
- });
3481
 
3482
  </script>
3483
  </body>
 
88
  app = gr.Server()
89
 
90
 
91
+ SYSTEM_PROMPT = """You are ToneBridge, a Mandarin Chinese teacher for beginner learners.
92
 
93
  Your task is to correct ONE student Chinese sentence according to the selected context and tone.
94
  Your default behavior is conservative minimal correction.
 
152
  Give only one corrected sentence.
153
  Add at most one gentle emoji in Why or Tip, never inside Chinese sentences.
154
 
155
+ Allowed Error type values:
156
+ none
157
+ character/input-method mistake
158
+ wrong character
159
+ wrong word
160
+ missing word
161
+ extra word
162
+ measure word
163
+ word order
164
+ grammar
165
+ tone
166
+
167
  Required format:
168
  Original sentence: <student sentence>
169
  Corrected sentence: <corrected sentence or identical original>
170
+ Error type: <one allowed Error type value>
171
  Why: <short beginner-friendly explanation in English>
172
  Tip: <one short practical tip in English>
173
 
 
717
  Use real line breaks between sections. Do not output escaped newline characters like \\n.
718
  Do not write a long paragraph. Maximum 5 short lines.
719
  /no_think"""
720
+
721
+
722
+ CONTEXT_TONE_PROFILES = {
723
+ "amical-informel": {
724
+ "context": "friendly everyday conversation with a friend or close person",
725
+ "tone": "informal friendly",
726
+ "correction_style": "tone-aware",
727
+ "instruction": (
728
+ "Keep the sentence simple, natural, and friendly. Prefer everyday spoken wording. "
729
+ "Use \u4f60 when a pronoun is needed. Avoid \u60a8, \u662f\u5426, ceremonial, literary, or stiff formal wording."
730
+ ),
731
+ },
732
+ "work-informel": {
733
+ "context": "workplace message to a colleague or familiar coworker",
734
+ "tone": "informal professional",
735
+ "correction_style": "tone-aware",
736
+ "instruction": (
737
+ "Keep the sentence clear, polite, and work-appropriate without sounding stiff. "
738
+ "Avoid slang, but do not over-formalize if the original is already natural."
739
+ ),
740
+ },
741
+ "work-formel": {
742
+ "context": "workplace message to a manager, client, teacher, or formal contact",
743
+ "tone": "formal professional",
744
+ "correction_style": "tone-aware",
745
+ "instruction": (
746
+ "Use respectful, professional wording when needed. \u60a8 and \u8bf7 may be appropriate. "
747
+ "Avoid overly casual phrasing if the relationship requires formality."
748
+ ),
749
+ },
750
+ "wechat-informel": {
751
+ "context": "WeChat message to a friend or close contact",
752
+ "tone": "informal instant message",
753
+ "correction_style": "tone-aware",
754
+ "instruction": (
755
+ "Prefer short, direct instant-message wording. Use \u4f60, \u6709\u7a7a, \u65b9\u4fbf, "
756
+ "\u4e00\u4e0b, \u5417, or \u5427 when appropriate. Avoid \u60a8, \u662f\u5426, "
757
+ "\u656c\u8bf7, \u9601\u4e0b, \u62e8\u5197, \u8385\u4e34, and invitation-letter style."
758
+ ),
759
+ },
760
+ "wechat-formel": {
761
+ "context": "WeChat message in a professional or formal relationship",
762
+ "tone": "formal concise instant message",
763
+ "correction_style": "tone-aware",
764
+ "instruction": (
765
+ "Keep the message concise like WeChat, but respectful. \u8bf7 and \u60a8 may be appropriate. "
766
+ "Avoid both casual slang and overly ceremonial letter-style wording."
767
+ ),
768
+ },
769
+ }
770
+
771
+
772
+ CONTEXT_TONE_ALIASES = {
773
+ "friends": "amical-informel",
774
+ "family": "amical-informel",
775
+ "friendly": "amical-informel",
776
+ "work": "work-formel",
777
+ "wechat": "wechat-informel",
778
+ }
779
+
780
+
781
+ def normalize_context_tone(value: str) -> str:
782
+ key = normalize_space(value).lower().replace("_", "-")
783
+ key = re.sub(r"\s+", "-", key)
784
+ return CONTEXT_TONE_ALIASES.get(key, key if key in CONTEXT_TONE_PROFILES else "amical-informel")
785
+
786
+
787
+ def context_tone_profile(value: str) -> dict:
788
+ key = normalize_context_tone(value)
789
+ profile = dict(CONTEXT_TONE_PROFILES[key])
790
+ profile["key"] = key
791
+ return profile
792
+
793
+
794
+ def build_user_prompt(context: str, sentence: str, target_tone: str = "", correction_mode: str = "") -> str:
795
+ profile = context_tone_profile(context)
796
+ sentence = (sentence or "").strip()
797
+
798
+ return f"""Selected context-tone: {profile["key"]}
799
+ Context: {profile["context"]}
800
+ Tone: {profile["tone"]}
801
+ Correction style: {profile["correction_style"]}
802
+ Profile instruction: {profile["instruction"]}
803
+ Explanation language: English only
804
+
805
+ Student's Chinese sentence:
806
+ {sentence}
807
+
808
+ Before correcting, decide whether the sentence is already correct, natural, and appropriate for the selected context-tone.
809
+ If it is correct, keep exactly the same sentence in "Corrected sentence".
810
+ In that case, use "none" as the error type and explain simply that the sentence is correct.
811
+ Correct the sentence while preserving its intention and length.
812
+ Prefer the smallest possible correction.
813
+ Do not turn a short sentence into a long sentence.
814
+ The "Corrected sentence" line must contain only one Chinese sentence. Do not add a second option, leftover characters, notes, vocabulary, or pinyin after it.
815
+ Do not add names, emotions, encouragement, or information that was not in the original sentence.
816
+ Do not replace a correct sentence with a paraphrase.
817
+ If you replace one Chinese character with another character that has the same or very close pinyin, mention in "Why" that it is probably a character/input-method mistake.
818
+ All explanations, titles, and tips must be in English.
819
+ Use real line breaks between sections. Do not output escaped newline characters like \\n.
820
+ Do not write a long paragraph. Maximum 5 short lines.
821
+ Now correct the input sentence.
822
+ /no_think"""
823
+
824
+
825
  @spaces.GPU(duration=90)
826
  def _generate_correction_gpu(
827
  context: str,
828
  sentence: str,
829
  target_tone: str,
830
+ correction_mode: str = "tone-aware",
831
  ) -> str:
832
  sentence = (sentence or "").strip()
833
  if not sentence:
 
896
  context: str,
897
  sentence: str,
898
  target_tone: str,
899
+ correction_mode: str = "tone-aware",
900
  ) -> dict:
901
  original_sentence = (sentence or "").strip()
902
+ profile = context_tone_profile(context)
903
  started = time.perf_counter()
904
  result = _generate_correction_gpu(context, sentence, target_tone, correction_mode)
905
  generation_time_seconds = round(time.perf_counter() - started, 3)
 
916
  "request_id": request_id,
917
  "created_at": utc_now_iso(),
918
  "model_id": MODEL_ID,
919
+ "context": profile["key"],
920
+ "target_tone": profile["tone"],
921
+ "correction_mode": profile["correction_style"],
922
  "original_sentence": original_sentence,
923
  "corrected_sentence": corrected_sentence,
924
  "evaluation": None,
 
2529
  </div>
2530
  </div>
2531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2532
  <div class="field">
2533
+ <label for="context">Context & tone</label>
2534
+ <select id="context">
2535
+ <option selected>amical-informel</option>
2536
+ <option>work-informel</option>
2537
+ <option>work-formel</option>
2538
+ <option>wechat-informel</option>
2539
+ <option>wechat-formel</option>
2540
  </select>
2541
+ <p class="field-note">ToneBridge applies a conservative tone-aware correction for the selected situation.</p>
2542
  </div>
2543
 
2544
  <div class="field">
 
2554
 
2555
  <p class="examples-label">Examples: tap one to fill the form.</p>
2556
  <div class="examples" aria-label="Examples">
2557
+ <button class="example" type="button" data-context="amical-informel" data-text="我今天想喝书。">Wrong word</button>
2558
+ <button class="example" type="button" data-context="amical-informel" data-text="火车站在超市的旁边">Already correct</button>
2559
+ <button class="example" type="button" data-context="work-formel" data-text="桌子的上有一个猫">Measure word</button>
2560
+ <button class="example" type="button" data-context="wechat-informel" data-text="您今晚是否方便出来?">Context</button>
2561
  </div>
2562
 
2563
  <div class="learning-history">
 
3383
  const response = await app.predict("/corriger", {
3384
  context: field("#context").value,
3385
  sentence,
3386
+ target_tone: "",
3387
+ correction_mode: "tone-aware"
3388
  });
3389
  console.log("Gradio response", response);
3390
  const payload = extractCorrectionPayload(response);
 
3565
  }
3566
  });
3567
 
3568
+ document.querySelectorAll(".example").forEach((button) => {
3569
+ button.addEventListener("click", () => {
3570
  field("#context").value = button.dataset.context;
 
 
3571
  field("#sentence").value = button.dataset.text;
3572
  });
3573
+ });
3574
 
3575
  </script>
3576
  </body>