Rogaton Claude commited on
Commit
9ebe8e4
·
1 Parent(s): d7507b9

Load models directly instead of using pipeline API

Browse files

- Use AutoTokenizer and AutoModelForSeq2SeqLM directly
- Implement Coptic-Greek character mapping from handler.py
- Add dialect tags (з for Sahidic, б for Bohairic)
- Properly preprocess input text before translation
- Fixes pipeline_tag and task type errors

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +90 -31
app.py CHANGED
@@ -5,7 +5,8 @@ Supports Coptic↔English translation using megalaa models
5
  """
6
 
7
  import gradio as gr
8
- from transformers import pipeline
 
9
 
10
  # Coptic alphabet for virtual keyboard
11
  COPTIC_LETTERS = [
@@ -14,31 +15,61 @@ COPTIC_LETTERS = [
14
  'ϣ', 'ϥ', 'ϧ', 'ϩ', 'ϫ', 'ϭ', 'ϯ'
15
  ]
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # Model caching
18
- coptic_to_english_pipe = None
19
- english_to_coptic_pipe = None
 
20
 
21
  def load_coptic_to_english():
22
- """Load Coptic → English translation pipeline"""
23
- global coptic_to_english_pipe
24
- if coptic_to_english_pipe is None:
25
- coptic_to_english_pipe = pipeline(
26
- task="text2text-generation",
27
- model="megalaa/coptic-english-translator",
28
- trust_remote_code=True
29
- )
30
- return coptic_to_english_pipe
31
 
32
  def load_english_to_coptic():
33
- """Load English → Coptic translation pipeline"""
34
- global english_to_coptic_pipe
35
- if english_to_coptic_pipe is None:
36
- english_to_coptic_pipe = pipeline(
37
- task="text2text-generation",
38
- model="megalaa/english-coptic-translator",
39
- trust_remote_code=True
40
- )
41
- return english_to_coptic_pipe
42
 
43
  def translate_coptic_to_english(text, dialect):
44
  """Translate Coptic to English"""
@@ -46,15 +77,30 @@ def translate_coptic_to_english(text, dialect):
46
  return "Please enter Coptic text to translate."
47
 
48
  try:
49
- pipe = load_coptic_to_english()
 
 
 
50
 
51
- # Use from_bohairic parameter if Bohairic dialect selected
52
  if dialect == "Bohairic":
53
- result = pipe(text, from_bohairic=True)
54
  else:
55
- result = pipe(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- return result['translation']
58
  except Exception as e:
59
  return f"Translation error: {str(e)}"
60
 
@@ -64,15 +110,28 @@ def translate_english_to_coptic(text, dialect):
64
  return "Please enter English text to translate."
65
 
66
  try:
67
- pipe = load_english_to_coptic()
68
 
69
- # Use to_bohairic parameter if Bohairic dialect selected
70
  if dialect == "Bohairic":
71
- result = pipe(text, to_bohairic=True)
72
  else:
73
- result = pipe(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- return result['translation']
76
  except Exception as e:
77
  return f"Translation error: {str(e)}"
78
 
 
5
  """
6
 
7
  import gradio as gr
8
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
9
+ import torch
10
 
11
  # Coptic alphabet for virtual keyboard
12
  COPTIC_LETTERS = [
 
15
  'ϣ', 'ϥ', 'ϧ', 'ϩ', 'ϫ', 'ϭ', 'ϯ'
16
  ]
17
 
18
+ # Coptic-Greek character mappings (from handler.py)
19
+ COPTIC_TO_GREEK = {
20
+ "ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ",
21
+ "ⲍ": "ζ", "ⲏ": "η", "ⲑ": "θ", "ⲓ": "ι", "ⲕ": "κ", "ⲗ": "λ",
22
+ "ⲙ": "μ", "ⲛ": "ν", "ⲝ": "ξ", "ⲟ": "ο", "ⲡ": "π", "ⲣ": "ρ",
23
+ "ⲥ": "σ", "ⲧ": "τ", "ⲩ": "υ", "ⲫ": "φ", "ⲭ": "χ", "ⲯ": "ψ", "ⲱ": "ω",
24
+ "ϣ": "ʃ", "ϥ": "f", "ϧ": "x", "ϩ": "h", "ϫ": "ɟ", "ϭ": "c", "ϯ": "ti"
25
+ }
26
+
27
+ GREEK_TO_COPTIC = {v: k for k, v in COPTIC_TO_GREEK.items()}
28
+
29
+ def greekify(coptic_text):
30
+ """Convert Coptic Unicode to Greek transcription"""
31
+ result = []
32
+ for char in coptic_text:
33
+ result.append(COPTIC_TO_GREEK.get(char.lower(), char.lower()))
34
+ return "".join(result)
35
+
36
+ def degreekify(greek_text):
37
+ """Convert Greek transcription back to Coptic Unicode"""
38
+ result = []
39
+ i = 0
40
+ while i < len(greek_text):
41
+ if i < len(greek_text) - 1 and greek_text[i:i+2].lower() == 'ti':
42
+ result.append(GREEK_TO_COPTIC.get('ti', greek_text[i:i+2]))
43
+ i += 2
44
+ else:
45
+ result.append(GREEK_TO_COPTIC.get(greek_text[i], greek_text[i]))
46
+ i += 1
47
+ return ''.join(result)
48
+
49
  # Model caching
50
+ coptic_to_english_model = None
51
+ english_to_coptic_model = None
52
+ device = "cuda" if torch.cuda.is_available() else "cpu"
53
 
54
  def load_coptic_to_english():
55
+ """Load Coptic → English translation model"""
56
+ global coptic_to_english_model
57
+ if coptic_to_english_model is None:
58
+ tokenizer = AutoTokenizer.from_pretrained("megalaa/coptic-english-translator")
59
+ model = AutoModelForSeq2SeqLM.from_pretrained("megalaa/coptic-english-translator")
60
+ model = model.to(device)
61
+ coptic_to_english_model = (tokenizer, model)
62
+ return coptic_to_english_model
 
63
 
64
  def load_english_to_coptic():
65
+ """Load English → Coptic translation model"""
66
+ global english_to_coptic_model
67
+ if english_to_coptic_model is None:
68
+ tokenizer = AutoTokenizer.from_pretrained("megalaa/english-coptic-translator")
69
+ model = AutoModelForSeq2SeqLM.from_pretrained("megalaa/english-coptic-translator")
70
+ model = model.to(device)
71
+ english_to_coptic_model = (tokenizer, model)
72
+ return english_to_coptic_model
 
73
 
74
  def translate_coptic_to_english(text, dialect):
75
  """Translate Coptic to English"""
 
77
  return "Please enter Coptic text to translate."
78
 
79
  try:
80
+ tokenizer, model = load_coptic_to_english()
81
+
82
+ # Preprocess: convert Coptic to Greek transcription
83
+ greek_text = greekify(text)
84
 
85
+ # Add dialect tag (from handler.py)
86
  if dialect == "Bohairic":
87
+ greek_text = " + greek_text # Bohairic tag
88
  else:
89
+ greek_text = "з " + greek_text # Sahidic tag
90
+
91
+ # Tokenize and generate
92
+ inputs = tokenizer(greek_text, return_tensors="pt", padding=True).to(device)
93
+ outputs = model.generate(
94
+ **inputs,
95
+ max_new_tokens=128,
96
+ num_beams=5,
97
+ early_stopping=True
98
+ )
99
+
100
+ # Decode
101
+ translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
102
+ return translation
103
 
 
104
  except Exception as e:
105
  return f"Translation error: {str(e)}"
106
 
 
110
  return "Please enter English text to translate."
111
 
112
  try:
113
+ tokenizer, model = load_english_to_coptic()
114
 
115
+ # Add dialect tag
116
  if dialect == "Bohairic":
117
+ input_text = "б " + text # Bohairic tag
118
  else:
119
+ input_text = "з " + text # Sahidic tag
120
+
121
+ # Tokenize and generate
122
+ inputs = tokenizer(input_text, return_tensors="pt", padding=True).to(device)
123
+ outputs = model.generate(
124
+ **inputs,
125
+ max_new_tokens=128,
126
+ num_beams=5,
127
+ early_stopping=True
128
+ )
129
+
130
+ # Decode and convert back to Coptic
131
+ greek_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
132
+ coptic_output = degreekify(greek_output)
133
+ return coptic_output
134
 
 
135
  except Exception as e:
136
  return f"Translation error: {str(e)}"
137