Mitchell Kilpatrick SE2022 commited on
Commit
b24a61c
·
1 Parent(s): 2b6b06d

Prompt engineering

Browse files
Files changed (1) hide show
  1. app.py +40 -5
app.py CHANGED
@@ -18,16 +18,51 @@ print("Model loaded.")
18
  class TextRequest(BaseModel):
19
  text: str
20
 
 
 
 
 
 
 
21
 
22
- @app.post("/predict")
23
- def predict(request: TextRequest):
24
- prompt = f"Text: {request.text}\nIPA:"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
27
 
28
  with torch.no_grad():
29
- outputs = model.generate(**inputs, max_new_tokens=64)
 
 
 
 
30
 
 
31
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
32
 
33
- return {"ipa": result}
 
18
  class TextRequest(BaseModel):
19
  text: str
20
 
21
+ def text_to_ipa(text: str) -> str:
22
+ # Few-shot examples for better IPA predictions
23
+ prompt = f"""
24
+ You are a Scottish Gaelic teacher.
25
+ Convert Scottish Gaelic text into the International Phonetic Alphabet (IPA).
26
+ Only return the IPA transcription.
27
 
28
+ Examples:
29
+ Text: halò
30
+ IPA: /haˈloː/
31
+
32
+ Text: là
33
+ IPA: /l̪ˠaː/
34
+
35
+ Text: uisge
36
+ IPA: /ˈɯʃkʲə/
37
+
38
+ Text: bàta
39
+ IPA: /ˈpaːt̪ə/
40
+
41
+ Text: ceòl
42
+ IPA: /kʲɔːl̪ˠ/
43
+
44
+ Now convert:
45
+
46
+ Text: {text}
47
+ IPA:
48
+ """
49
 
50
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
51
 
52
  with torch.no_grad():
53
+ outputs = model.generate(
54
+ **inputs,
55
+ max_new_tokens=64,
56
+ do_sample=False # deterministic output
57
+ )
58
 
59
+ # Decode and return only the IPA portion
60
  result = tokenizer.decode(outputs[0], skip_special_tokens=True)
61
+ return result.split("IPA:")[-1].strip()
62
+
63
+
64
+ @app.post("/predict")
65
+ def predict(request: TextRequest):
66
+ ipa_result = text_to_ipa(request.text)
67
+ return {"ipa": ipa_result}
68