maya-voice-agent / src /gujarati_processor.py
rudyByte
feat(E5): complete Gujarati perfection β€” Sarvam STT, fixed TTS, code-mix prompts
ebf9701
"""
gujarati_processor.py β€” Gujarati Language Post-Processor for Maya
THREE FUNCTIONS:
1. correct_transcript(text) β€” fixes common STT errors in Gujarati
2. add_natural_fillers(text) β€” adds human-like filler words to TTS output
3. build_gujarati_system_prompt_addon() β€” returns the code-mixing
instruction block to append to Maya's system prompt
"""
import re
import random
from typing import Optional
# ── TRANSCRIPT CORRECTIONS ────────────────────────────────────────────────────
TRANSCRIPT_CORRECTIONS = {
# Phone call context misheard words
r"\bshoe\b": "shu", # "shu" (what) heard as "shoe"
r"\bkhem cho\b": "kem cho", # greeting
r"\bchem cho\b": "kem cho",
r"\bkal\b(?= [a-z])": "kal", # "kal" (tomorrow) β€” keep as-is
r"\bcall\b(?= [a-z])": "kal", # Sarvam mishears "kal" as "call" sometimes
# Common appointment/dental vocabulary
r"\bapoinment\b": "appointment",
r"\bapointment\b": "appointment",
r"\bdocter\b": "doctor",
r"\bdenter\b": "dentist",
r"\bcleeaning\b": "cleaning",
r"\bclining\b": "cleaning",
# Time expressions β€” Sarvam sometimes transcribes these wrong
r"\bbaze\b": "baje", # "baje" (o'clock in Gujarati)
r"\bbaj\b(?=\s)": "baje",
r"\bvagye\b": "vage", # "vage" (o'clock variant)
r"\bvaagye\b": "vage",
# Name of Ahmedabad localities (commonly mispronounced)
r"\bvastrar\b": "Vastrapur",
r"\bsatelite\b": "Satellite",
r"\bnavrang\b": "Navrangpura",
r"\bnaranpur\b": "Naranpura",
# Gujarati filler words Sarvam sometimes misses
r"\bhan\b": "haa", # "haa" (yes/okay)
r"\bthik\b": "theek", # "theek" (okay)
}
def correct_transcript(text: str) -> str:
if not text:
return text
corrected = text
for pattern, replacement in TRANSCRIPT_CORRECTIONS.items():
corrected = re.sub(pattern, replacement, corrected, flags=re.IGNORECASE)
return corrected
# ── NATURAL GUJARATI FILLERS ──────────────────────────────────────────────────
GUJARATI_FILLERS = [
"Juo, ", # "Look, " β€” very natural in Gujarati
"Haa, ", # "Yes, " β€” affirmative opener
"Acha, ", # "Okay, " β€” acknowledgment
"Bilkul, ", # "Absolutely, "
"Theek chhe, ", # "That's fine, "
"Samjhi gayi, ", # "I understand, " (Maya referring to herself)
]
HINDI_FILLERS = [
"Jee, ", # "Yes, "
"Haan, ", # "Yes/Okay"
"Achha, ", # "Okay"
"Bilkul, ", # "Absolutely"
"Theek hai, ", # "That's fine"
"Samajh gayi, ", # "I understand"
]
ENGLISH_FILLERS = [
"Sure, ",
"Of course, ",
"Absolutely, ",
"Got it, ",
"I see, ",
]
FILLER_MAP = {
"gujarati": GUJARATI_FILLERS,
"hindi": HINDI_FILLERS,
"english": ENGLISH_FILLERS,
}
# Words that already start the response naturally β€” don't add filler before these
NATURAL_STARTERS = [
# Gujarati
"Namaste", "Haa", "Juo", "Acha", "Bilkul", "Theek", "Samjhi",
"3", "4", "5", "6", # Don't add filler before times/numbers
"kal", "aaj", "parso",
# Hindi
"Namaste", "Haan", "Achha", "Bilkul",
# English
"Hello", "Sure", "Of course", "Yes", "No", "I",
# Empathy openers from E2 β€” already natural
"Mane", "Hu samjhi", "Maaf", "Juo samjho",
]
def add_natural_fillers(
text: str,
language: str = "gujarati",
probability: float = 0.35,
) -> str:
if not text or len(text) < 15:
return text
# Don't add filler to already-natural openers
for starter in NATURAL_STARTERS:
if text.startswith(starter):
return text
# Don't add filler before pure questions
if text.strip().endswith("?") and len(text.split()) < 8:
return text
# Probabilistic application
if random.random() >= probability:
return text
fillers = FILLER_MAP.get(language, FILLER_MAP["gujarati"])
chosen = random.choice(fillers)
# Lowercase the first letter of original text after filler
if text and text[0].isupper() and not text[:2].isupper():
text = text[0].lower() + text[1:]
return chosen + text
# ── SYSTEM PROMPT ADDON ───────────────────────────────────────────────────────
def build_gujarati_system_prompt_addon() -> str:
return """
GUJARATI LANGUAGE STYLE β€” MANDATORY RULES:
You are speaking CONVERSATIONAL Gujarati on a phone call.
Real Gujarati speakers mix Gujarati grammar with English nouns.
This is called "code-mixing" and it sounds completely natural.
CORRECT examples of how to respond:
βœ… "Haa, appointment available chhe. Tamaro naam shu chhe?"
βœ… "3 baje slot chhe. Chaleshe?"
βœ… "Ek minute, check karu chhu."
βœ… "Doctor sathe consultation β‚Ή200 chhe."
βœ… "OK, booking confirm thi gayi."
βœ… "Maafi maango, 3 baje busy chhe. 4 baje available chhe?"
WRONG β€” never say these (too formal, nobody speaks like this):
❌ "Haa, niyuktisthan upalabdh chhe. Tamaro naam shu chhe?"
❌ "Trann vage ni jagya chhe. Svikar karso?"
❌ "Ek pal, parischay karu chhu."
❌ "Vaidya sathe paramarsh β‚Ή200 chhe."
❌ "Theek, pratishtha nischit thi gayi."
GRAMMAR RULES for natural Gujarati:
- Use Gujarati verb endings: "chhe", "chhu", "karu", "karso", "thi gayi"
- Use English nouns directly: "appointment", "doctor", "booking", "slot"
- Use English numbers with Gujarati time: "3 baje", "4 vage", "10 minute"
- Short sentences only. Maximum 10 words per sentence.
- Never use long Sanskrit-derived Gujarati compound words.
- Say "check karu chhu" not "thaapan karu chhu"
- Say "confirm thi gayi" not "nischit thi gayi"
- Say "available chhe" not "upalabdh chhe"
RESPONSE LENGTH:
Phone call responses must be SHORT.
Maximum 2 sentences per response.
If the answer is one sentence β€” use ONE sentence.
Do not add pleasantries or padding.
FILLER SOUNDS (already handled by code β€” do NOT add these in your text):
The system adds "Juo,", "Haa,", "Acha," automatically.
Do not start responses with filler words yourself.
""".strip()
def build_hindi_system_prompt_addon() -> str:
return """
HINDI LANGUAGE STYLE β€” MANDATORY RULES:
Conversational Hindi on a phone call. Mix Hindi grammar with English nouns.
CORRECT examples:
βœ… "Haan, appointment available hai. Aapka naam kya hai?"
βœ… "3 baje slot hai. Chalega?"
βœ… "Ek minute, check karti hoon."
βœ… "Doctor ke saath consultation β‚Ή200 hai."
βœ… "OK, booking confirm ho gayi."
WRONG (too formal):
❌ "Haan, niyukti upalabdh hai."
❌ "Tin baje ka sthan hai."
SHORT RESPONSES ONLY. Maximum 2 sentences. Max 10 words per sentence.
""".strip()