Rogaton Claude commited on
Commit
e0d73ae
·
1 Parent(s): 5461265

Replace megalaa model with Norelad/coptic-megalaa-finetuned

Browse files

- Update Coptic→English model to use the fine-tuned Norelad/coptic-megalaa-finetuned
- Fix preprocessing: add dialect tags (з for Sahidic, б for Bohairic)
- Fix greekify/degreekify character mappings to match model training
- ϣ → ʃ (IPA), ϧ → x, ϫ → ɟ, ϯ → ti
- Add transformers and sentencepiece to requirements.txt
- Update translate_coptic_to_english to accept dialect parameter

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show
  1. apertus_ui.py +315 -89
  2. requirements.txt +2 -0
apertus_ui.py CHANGED
@@ -2,9 +2,77 @@ import streamlit as st
2
  import os
3
  import xml.etree.ElementTree as ET
4
  import re
 
 
5
  from huggingface_hub import InferenceClient
6
  from coptic_parser_core import CopticParserCore
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  # Coptic alphabet helper
9
  COPTIC_ALPHABET = {
10
  'Ⲁ': 'Alpha', 'Ⲃ': 'Beta', 'Ⲅ': 'Gamma', 'Ⲇ': 'Delta', 'Ⲉ': 'Epsilon', 'Ⲋ': 'Zeta',
@@ -102,6 +170,116 @@ def load_coptic_lexicon(file_path=None):
102
 
103
  return lexicon
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  # Language detection and UI
106
  LANGUAGES = {
107
  'en': 'English', 'es': 'Español', 'fr': 'Français', 'de': 'Deutsch',
@@ -120,18 +298,24 @@ selected_lang = st.selectbox("Language / Langue / Idioma",
120
  with st.sidebar:
121
  st.header("Coptic Tools")
122
 
123
- # HuggingFace API Token input
124
- st.subheader("🔑 API Configuration")
125
- hf_token_input = st.text_input(
126
- "HuggingFace API Token",
127
- type="password",
128
- help="Required for Apertus-8B translation. Get your token at: https://huggingface.co/settings/tokens"
129
- )
130
- if hf_token_input:
131
- st.success("✅ API token configured")
132
- else:
133
- st.warning("⚠️ Translation requires an API token")
134
- st.markdown("[Get your free HF token →](https://huggingface.co/settings/tokens)")
 
 
 
 
 
 
135
 
136
  st.divider()
137
 
@@ -373,17 +557,10 @@ if prompt := st.chat_input("Type your message..."):
373
 
374
  st.stop() # Don't continue to translation
375
 
376
- # For translation tasks, check API token
377
- if not hf_token_input:
378
- st.error("⚠️ Please enter your HuggingFace API token in the sidebar to use translation.")
379
- st.stop()
380
-
381
- # Initialize inference client with user token
382
- inference_client = get_inference_client(hf_token_input)
383
-
384
- if not inference_client:
385
- st.error("❌ Failed to initialize inference client. Please check your API token.")
386
- st.stop()
387
 
388
  # Handle parse_and_translate mode
389
  if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals() and analysis_type == 'parse_and_translate':
@@ -407,39 +584,51 @@ if prompt := st.chat_input("Type your message..."):
407
  st.divider()
408
  st.subheader(f"🌍 Translation to {LANGUAGES[target_lang]}")
409
 
410
- # Get translation prompts
411
- COPTIC_PROMPTS_TRANSLATE = get_coptic_prompts(target_language_name)
412
- translate_prompt = f"{COPTIC_PROMPTS_TRANSLATE['translation']} {prompt}"
413
-
414
- with st.spinner("🤖 Translating..."):
415
  try:
416
- messages = [
417
- {"role": "system", "content": "You are a professional Coptic-to-modern-language translator. Provide only direct translations without explanations, commentary, or repeating the source text."},
418
- {"role": "user", "content": translate_prompt}
419
- ]
420
-
421
- response_stream = inference_client.chat_completion(
422
- model=MODEL_NAME,
423
- messages=messages,
424
- max_tokens=512,
425
- temperature=0.5,
426
- top_p=0.9,
427
- stream=True
428
- )
429
-
430
- # Stream the translation
431
- response_placeholder = st.empty()
432
- full_response = ""
433
-
434
- for message in response_stream:
435
- if message.choices[0].delta.content:
436
- full_response += message.choices[0].delta.content
437
- response_placeholder.markdown(full_response + "▌")
438
-
439
- response_placeholder.markdown(full_response)
440
-
441
- combined_response = f"Parse complete. Translation: {full_response}"
442
- st.session_state.messages.append({"role": "assistant", "content": combined_response})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
  except Exception as e:
445
  st.error(f"❌ Translation error: {e}")
@@ -468,42 +657,79 @@ if prompt := st.chat_input("Type your message..."):
468
  with st.chat_message("user"):
469
  st.markdown(full_prompt)
470
 
471
- # Generate response using HuggingFace Inference API
472
  with st.chat_message("assistant"):
473
  try:
474
- with st.spinner("🤖 Generating response..."):
475
- # Prepare messages with system instruction for better control
476
- if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals() and analysis_type == 'translation':
477
- # For translation: strict system message
478
- messages = [
479
- {"role": "system", "content": "You are a professional Coptic-to-modern-language translator. Provide only direct translations without explanations, commentary, or repeating the source text."},
480
- {"role": "user", "content": full_prompt}
481
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
  else:
483
- # For other tasks: standard chat
484
- messages = [{"role": "user", "content": full_prompt}]
485
-
486
- response_stream = inference_client.chat_completion(
487
- model=MODEL_NAME,
488
- messages=messages,
489
- max_tokens=512,
490
- temperature=0.5, # Lower temperature for more focused translations
491
- top_p=0.9,
492
- stream=True
493
- )
494
-
495
- # Stream the response
496
- response_placeholder = st.empty()
497
- full_response = ""
498
-
499
- for message in response_stream:
500
- if message.choices[0].delta.content:
501
- full_response += message.choices[0].delta.content
502
- response_placeholder.markdown(full_response + "▌")
503
-
504
- response_placeholder.markdown(full_response)
505
- st.session_state.messages.append({"role": "assistant", "content": full_response})
506
 
507
  except Exception as e:
508
- st.error(f"❌ Error generating response: {str(e)}")
509
- st.info("💡 Please verify your API token is valid and has not expired.")
 
2
  import os
3
  import xml.etree.ElementTree as ET
4
  import re
5
+ import torch
6
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
  from huggingface_hub import InferenceClient
8
  from coptic_parser_core import CopticParserCore
9
 
10
+ # ========================================
11
+ # MEGALAA MODEL PREPROCESSING FUNCTIONS
12
+ # ========================================
13
+ # These functions convert between Coptic Unicode and Greek transcription
14
+ # Required for megalaa/coptic-english-translator and megalaa/english-coptic-translator
15
+
16
+ COPTIC_TO_GREEK = {
17
+ "ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ",
18
+ "ⲍ": "ζ", "ⲏ": "η", "ⲑ": "θ", "ⲓ": "ι", "ⲕ": "κ", "ⲗ": "λ",
19
+ "ⲙ": "μ", "ⲛ": "ν", "ⲝ": "ξ", "ⲟ": "ο", "ⲡ": "π", "ⲣ": "ρ",
20
+ "ⲥ": "σ", "ⲧ": "τ", "ⲩ": "υ", "ⲫ": "φ", "ⲭ": "χ", "ⲯ": "ψ",
21
+ "ⲱ": "ω",
22
+ # Coptic-specific characters (must match model training)
23
+ "ϣ": "ʃ", "ϥ": "f", "ϧ": "x", "ϩ": "h", "ϫ": "ɟ",
24
+ "ϭ": "c", "ϯ": "ti",
25
+ # Uppercase variants
26
+ "Ⲁ": "Α", "Ⲃ": "Β", "Ⲅ": "Γ", "Ⲇ": "Δ", "Ⲉ": "Ε", "Ⲍ": "Ζ", "Ⲏ": "Η", "Ⲑ": "Θ",
27
+ "Ⲓ": "Ι", "Ⲕ": "Κ", "Ⲗ": "Λ", "Ⲙ": "Μ", "Ⲛ": "Ν", "Ⲝ": "Ξ", "Ⲟ": "Ο", "Ⲡ": "Π",
28
+ "Ⲣ": "Ρ", "Ⲥ": "Σ", "Ⲧ": "Τ", "Ⲩ": "Υ", "Ⲫ": "Φ", "Ⲭ": "Χ", "Ⲯ": "Ψ", "Ⲱ": "Ω",
29
+ "Ϣ": "Ʃ", "Ϥ": "F", "Ϧ": "X", "Ϩ": "H", "Ϫ": "Ɉ", "Ϭ": "C", "Ϯ": "TI"
30
+ }
31
+
32
+ GREEK_TO_COPTIC = {
33
+ "α": "ⲁ", "β": "ⲃ", "γ": "ⲅ", "δ": "ⲇ", "ε": "ⲉ", "ϛ": "ⲋ",
34
+ "ζ": "ⲍ", "η": "ⲏ", "θ": "ⲑ", "ι": "ⲓ", "κ": "ⲕ", "λ": "ⲗ",
35
+ "μ": "ⲙ", "ν": "ⲛ", "ξ": "ⲝ", "ο": "ⲟ", "π": "ⲡ", "ρ": "ⲣ",
36
+ "σ": "ⲥ", "ς": "ⲥ", "τ": "ⲧ", "υ": "ⲩ", "φ": "ⲫ", "χ": "ⲭ", "ψ": "ⲯ",
37
+ "ω": "ⲱ",
38
+ # Coptic-specific characters (must match model training)
39
+ "ʃ": "ϣ", "f": "ϥ", "x": "ϧ", "h": "ϩ", "ɟ": "ϫ",
40
+ "c": "ϭ", "ti": "ϯ",
41
+ # Uppercase variants
42
+ "Α": "Ⲁ", "Β": "Ⲃ", "Γ": "Ⲅ", "Δ": "Ⲇ", "Ε": "Ⲉ", "Ζ": "Ⲍ", "Η": "Ⲏ", "Θ": "Ⲑ",
43
+ "Ι": "Ⲓ", "Κ": "Ⲕ", "Λ": "Ⲗ", "Μ": "Ⲙ", "Ν": "Ⲛ", "Ξ": "Ⲝ", "Ο": "Ⲟ", "Π": "Ⲡ",
44
+ "Ρ": "Ⲣ", "Σ": "Ⲥ", "Τ": "Ⲧ", "Υ": "Ⲩ", "Φ": "Ⲫ", "Χ": "Ⲭ", "Ψ": "Ⲯ", "Ω": "Ⲱ",
45
+ "Ʃ": "Ϣ", "F": "Ϥ", "X": "Ϧ", "H": "Ϩ", "Ɉ": "Ϫ", "C": "Ϭ", "TI": "Ϯ"
46
+ }
47
+
48
+ def greekify(coptic_text):
49
+ """Convert Coptic Unicode to Greek transcription for megalaa models."""
50
+ chars = []
51
+ for c in coptic_text:
52
+ l_c = c.lower()
53
+ chars.append(COPTIC_TO_GREEK.get(l_c, l_c))
54
+ return "".join(chars)
55
+
56
+ def degreekify(greek_text):
57
+ """Convert Greek transcription back to Coptic Unicode.
58
+
59
+ Handles two-character sequences like 'ti' → 'ϯ'
60
+ """
61
+ result = []
62
+ i = 0
63
+ while i < len(greek_text):
64
+ # Check for two-character sequences first
65
+ if i < len(greek_text) - 1:
66
+ two_char = greek_text[i:i+2].lower()
67
+ if two_char == 'ti':
68
+ result.append(GREEK_TO_COPTIC.get(two_char, greek_text[i:i+2]))
69
+ i += 2
70
+ continue
71
+ # Single character
72
+ result.append(GREEK_TO_COPTIC.get(greek_text[i], greek_text[i]))
73
+ i += 1
74
+ return ''.join(result)
75
+
76
  # Coptic alphabet helper
77
  COPTIC_ALPHABET = {
78
  'Ⲁ': 'Alpha', 'Ⲃ': 'Beta', 'Ⲅ': 'Gamma', 'Ⲇ': 'Delta', 'Ⲉ': 'Epsilon', 'Ⲋ': 'Zeta',
 
170
 
171
  return lexicon
172
 
173
+ # ========================================
174
+ # MEGALAA MODEL LOADING
175
+ # ========================================
176
+ # Load and cache megalaa translation models
177
+
178
+ @st.cache_resource
179
+ def load_coptic_to_english_model():
180
+ """Load Coptic → English translation model (Norelad's fine-tuned megalaa)."""
181
+ try:
182
+ with st.spinner("📥 Loading Coptic→English model (first time only, ~600MB)..."):
183
+ model_name = "Norelad/coptic-megalaa-finetuned"
184
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
185
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
186
+
187
+ # Move to GPU if available
188
+ device = "cuda" if torch.cuda.is_available() else "cpu"
189
+ model = model.to(device)
190
+
191
+ st.success(f"✅ Coptic→English model loaded on {device.upper()}")
192
+ return tokenizer, model, device
193
+ except Exception as e:
194
+ st.error(f"Failed to load Coptic→English model: {e}")
195
+ return None, None, None
196
+
197
+ @st.cache_resource
198
+ def load_english_to_coptic_model():
199
+ """Load megalaa English → Coptic translation model."""
200
+ try:
201
+ with st.spinner("📥 Loading English→Coptic model (first time only, ~600MB)..."):
202
+ model_name = "megalaa/english-coptic-translator"
203
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
204
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
205
+
206
+ # Move to GPU if available
207
+ device = "cuda" if torch.cuda.is_available() else "cpu"
208
+ model = model.to(device)
209
+
210
+ st.success(f"✅ English→Coptic model loaded on {device.upper()}")
211
+ return tokenizer, model, device
212
+ except Exception as e:
213
+ st.error(f"Failed to load English→Coptic model: {e}")
214
+ return None, None, None
215
+
216
+ def translate_coptic_to_english(text, dialect='cop-sa'):
217
+ """Translate Coptic text to English using megalaa model.
218
+
219
+ Args:
220
+ text: Coptic text to translate
221
+ dialect: Coptic dialect ('cop-sa' for Sahidic, 'cop-bo' for Bohairic, 'cop' defaults to Sahidic)
222
+ """
223
+ tokenizer, model, device = load_coptic_to_english_model()
224
+
225
+ if tokenizer is None or model is None:
226
+ return "Error: Model not loaded. Please check your internet connection."
227
+
228
+ try:
229
+ # Dialect tags (required by the Norelad/coptic-megalaa-finetuned model)
230
+ DIALECT_TAGS = {
231
+ 'cop-sa': 'з', # Sahidic (Cyrillic 'з')
232
+ 'cop-bo': 'б', # Bohairic (Cyrillic 'б')
233
+ 'cop': 'з' # Default to Sahidic for generic Coptic
234
+ }
235
+
236
+ dialect_tag = DIALECT_TAGS.get(dialect, 'з')
237
+
238
+ # Preprocessing: Convert Coptic Unicode to Greek transcription and add dialect tag
239
+ greek_input = greekify(text.lower())
240
+ greek_input = f"{dialect_tag} {greek_input}"
241
+
242
+ # Tokenize and generate
243
+ inputs = tokenizer(greek_input, return_tensors="pt", padding=True).to(device)
244
+ outputs = model.generate(
245
+ **inputs,
246
+ max_new_tokens=128,
247
+ num_beams=5,
248
+ early_stopping=True
249
+ )
250
+
251
+ # Decode translation
252
+ translation = tokenizer.decode(outputs[0], skip_special_tokens=True)
253
+ return translation
254
+
255
+ except Exception as e:
256
+ return f"Translation error: {e}"
257
+
258
+ def translate_english_to_coptic(text):
259
+ """Translate English text to Coptic using megalaa model."""
260
+ tokenizer, model, device = load_english_to_coptic_model()
261
+
262
+ if tokenizer is None or model is None:
263
+ return "Error: Model not loaded. Please check your internet connection."
264
+
265
+ try:
266
+ # Tokenize and generate (input is already in English)
267
+ inputs = tokenizer(text, return_tensors="pt", padding=True).to(device)
268
+ outputs = model.generate(
269
+ **inputs,
270
+ max_new_tokens=128,
271
+ num_beams=5,
272
+ early_stopping=True
273
+ )
274
+
275
+ # Decode and postprocess: Convert Greek transcription to Coptic Unicode
276
+ greek_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
277
+ coptic_output = degreekify(greek_output)
278
+ return coptic_output
279
+
280
+ except Exception as e:
281
+ return f"Translation error: {e}"
282
+
283
  # Language detection and UI
284
  LANGUAGES = {
285
  'en': 'English', 'es': 'Español', 'fr': 'Français', 'de': 'Deutsch',
 
298
  with st.sidebar:
299
  st.header("Coptic Tools")
300
 
301
+ # Translation Model Selection
302
+ st.subheader("🤖 Translation Model")
303
+ st.info("✨ **NEW:** Using megalaa specialized Coptic models (free, no API token needed!)")
304
+ st.markdown("Models: `megalaa/coptic-english-translator` & `megalaa/english-coptic-translator`")
305
+
306
+ # Optional: HuggingFace API Token for advanced features
307
+ with st.expander("⚙️ Advanced: Use Apertus-8B (optional)"):
308
+ st.caption("For multi-language translation beyond English-Coptic")
309
+ hf_token_input = st.text_input(
310
+ "HuggingFace API Token",
311
+ type="password",
312
+ help="Optional: For Apertus-8B multi-language support"
313
+ )
314
+ use_apertus = st.checkbox("Use Apertus-8B instead of megalaa", value=False)
315
+ if hf_token_input and use_apertus:
316
+ st.success("✅ Apertus-8B enabled")
317
+ elif not use_apertus:
318
+ hf_token_input = None # Disable API usage
319
 
320
  st.divider()
321
 
 
557
 
558
  st.stop() # Don't continue to translation
559
 
560
+ # Initialize inference client if API token is provided (optional for megalaa)
561
+ inference_client = None
562
+ if hf_token_input:
563
+ inference_client = get_inference_client(hf_token_input)
 
 
 
 
 
 
 
564
 
565
  # Handle parse_and_translate mode
566
  if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals() and analysis_type == 'parse_and_translate':
 
584
  st.divider()
585
  st.subheader(f"🌍 Translation to {LANGUAGES[target_lang]}")
586
 
587
+ with st.spinner("🤖 Translating with megalaa model..."):
 
 
 
 
588
  try:
589
+ # Use megalaa for Coptic→English translation
590
+ if target_lang == 'en':
591
+ translation = translate_coptic_to_english(prompt, dialect=selected_lang)
592
+ st.markdown(translation)
593
+
594
+ combined_response = f"Parse complete. Translation: {translation}"
595
+ st.session_state.messages.append({"role": "assistant", "content": combined_response})
596
+ else:
597
+ # For non-English targets, need Apertus or show message
598
+ if inference_client and hf_token_input:
599
+ COPTIC_PROMPTS_TRANSLATE = get_coptic_prompts(target_language_name)
600
+ translate_prompt = f"{COPTIC_PROMPTS_TRANSLATE['translation']} {prompt}"
601
+
602
+ messages = [
603
+ {"role": "system", "content": "You are a professional Coptic-to-modern-language translator. Provide only direct translations without explanations, commentary, or repeating the source text."},
604
+ {"role": "user", "content": translate_prompt}
605
+ ]
606
+
607
+ response_stream = inference_client.chat_completion(
608
+ model=MODEL_NAME,
609
+ messages=messages,
610
+ max_tokens=512,
611
+ temperature=0.5,
612
+ top_p=0.9,
613
+ stream=True
614
+ )
615
+
616
+ # Stream the translation
617
+ response_placeholder = st.empty()
618
+ full_response = ""
619
+
620
+ for message in response_stream:
621
+ if message.choices[0].delta.content:
622
+ full_response += message.choices[0].delta.content
623
+ response_placeholder.markdown(full_response + "▌")
624
+
625
+ response_placeholder.markdown(full_response)
626
+
627
+ combined_response = f"Parse complete. Translation: {full_response}"
628
+ st.session_state.messages.append({"role": "assistant", "content": combined_response})
629
+ else:
630
+ st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B. Please enable it in the sidebar.")
631
+ st.info("💡 Megalaa models currently support English↔Coptic only.")
632
 
633
  except Exception as e:
634
  st.error(f"❌ Translation error: {e}")
 
657
  with st.chat_message("user"):
658
  st.markdown(full_prompt)
659
 
660
+ # Generate response using megalaa models or Apertus API
661
  with st.chat_message("assistant"):
662
  try:
663
+ # Check if this is a Coptic→English translation task
664
+ if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals() and analysis_type == 'translation':
665
+ # Use megalaa models for Coptic translation
666
+ if 'target_lang' in locals() and target_lang == 'en':
667
+ with st.spinner("🤖 Translating with megalaa model..."):
668
+ translation = translate_coptic_to_english(prompt, dialect=selected_lang)
669
+ st.markdown(translation)
670
+ st.session_state.messages.append({"role": "assistant", "content": translation})
671
+ else:
672
+ # Non-English target: requires Apertus
673
+ if inference_client and hf_token_input:
674
+ with st.spinner("🤖 Translating with Apertus-8B..."):
675
+ messages = [
676
+ {"role": "system", "content": "You are a professional Coptic-to-modern-language translator. Provide only direct translations without explanations, commentary, or repeating the source text."},
677
+ {"role": "user", "content": full_prompt}
678
+ ]
679
+
680
+ response_stream = inference_client.chat_completion(
681
+ model=MODEL_NAME,
682
+ messages=messages,
683
+ max_tokens=512,
684
+ temperature=0.5,
685
+ top_p=0.9,
686
+ stream=True
687
+ )
688
+
689
+ response_placeholder = st.empty()
690
+ full_response = ""
691
+
692
+ for message in response_stream:
693
+ if message.choices[0].delta.content:
694
+ full_response += message.choices[0].delta.content
695
+ response_placeholder.markdown(full_response + "▌")
696
+
697
+ response_placeholder.markdown(full_response)
698
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
699
+ else:
700
+ st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B.")
701
+ st.info("💡 Enable Apertus-8B in the sidebar for multi-language support.")
702
+ st.info("💡 Megalaa models currently support English↔Coptic only.")
703
+
704
+ # For non-translation tasks or other languages
705
+ else:
706
+ if inference_client and hf_token_input:
707
+ with st.spinner("🤖 Generating response..."):
708
+ messages = [{"role": "user", "content": full_prompt}]
709
+
710
+ response_stream = inference_client.chat_completion(
711
+ model=MODEL_NAME,
712
+ messages=messages,
713
+ max_tokens=512,
714
+ temperature=0.5,
715
+ top_p=0.9,
716
+ stream=True
717
+ )
718
+
719
+ response_placeholder = st.empty()
720
+ full_response = ""
721
+
722
+ for message in response_stream:
723
+ if message.choices[0].delta.content:
724
+ full_response += message.choices[0].delta.content
725
+ response_placeholder.markdown(full_response + "▌")
726
+
727
+ response_placeholder.markdown(full_response)
728
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
729
  else:
730
+ st.warning("⚠️ This feature requires Apertus-8B. Please enable it in the sidebar.")
731
+ st.info("💡 Coptic→English translation works without API token using megalaa models.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
732
 
733
  except Exception as e:
734
+ st.error(f"❌ Error: {str(e)}")
735
+ st.info("💡 If using Apertus-8B, please verify your API token is valid.")
requirements.txt CHANGED
@@ -3,3 +3,5 @@ huggingface_hub
3
  lxml
4
  stanza
5
  torch
 
 
 
3
  lxml
4
  stanza
5
  torch
6
+ transformers>=4.30.0
7
+ sentencepiece>=0.1.99