Spaces:

MitchellKil
/

phonetic-generator-api

Running

Mitchell Kilpatrick SE2022 commited on 23 days ago

Commit

b24a61c

1 Parent(s): 2b6b06d

Prompt engineering

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,16 +18,51 @@ print("Model loaded.")
 class TextRequest(BaseModel):
     text: str
-@app.post("/predict")
-def predict(request: TextRequest):
-    prompt = f"Text: {request.text}\nIPA:"
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
     with torch.no_grad():
-        outputs = model.generate(**inputs, max_new_tokens=64)
     result = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return {"ipa": result}

 class TextRequest(BaseModel):
     text: str
+def text_to_ipa(text: str) -> str:
+    # Few-shot examples for better IPA predictions
+    prompt = f"""
+You are a Scottish Gaelic teacher.
+Convert Scottish Gaelic text into the International Phonetic Alphabet (IPA).
+Only return the IPA transcription.
+Examples:
+Text: halò
+IPA: /haˈloː/
+Text: là
+IPA: /l̪ˠaː/
+Text: uisge
+IPA: /ˈɯʃkʲə/
+Text: bàta
+IPA: /ˈpaːt̪ə/
+Text: ceòl
+IPA: /kʲɔːl̪ˠ/
+Now convert:
+Text: {text}
+IPA:
+"""
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
     with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=64,
+            do_sample=False  # deterministic output
+        )
+    # Decode and return only the IPA portion
     result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return result.split("IPA:")[-1].strip()
+@app.post("/predict")
+def predict(request: TextRequest):
+    ipa_result = text_to_ipa(request.text)
+    return {"ipa": ipa_result}