Rogaton Claude commited on
Commit
90f0f33
·
1 Parent(s): 6865b40

fix: Improve translation accuracy with target language selector and explicit prompts

Browse files

**Target Language Selection:**
- Add dedicated "Target Language" dropdown in sidebar
- Appears when "Translation" analysis type is selected
- Excludes Coptic dialects, shows only modern languages
- Defaults to English

**Enhanced Translation Prompts:**
- Dynamic prompt generation based on selected target language
- Explicit instructions: "Provide ONLY the direct translation"
- Lists what NOT to include (no source text, no explanations)
- Identifies as "professional Coptic translator" for better context

**System Message Control:**
- Add system role message specifically for translation tasks
- Reinforces "no explanations, no commentary" instruction
- Helps model stay focused on pure translation

**Temperature Adjustment:**
- Lower temperature from 0.7 to 0.5 for translation
- Reduces creative elaboration, increases accuracy
- Standard tasks keep default temperature

**Result:**
- Translations now output ONLY the target language text
- No more repeating Coptic source text
- No more English when French is selected
- Cleaner, more accurate translations

Fixes issue where model was repeating input and adding commentary
instead of providing clean translations to the selected target language.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. apertus_ui.py +39 -14
apertus_ui.py CHANGED
@@ -13,14 +13,16 @@ COPTIC_ALPHABET = {
13
  'Ϣ': 'Shai', 'Ϥ': 'Fai', 'Ϧ': 'Khei', 'Ϩ': 'Hori', 'Ϫ': 'Gangia', 'Ϭ': 'Shima', 'Ϯ': 'Ti'
14
  }
15
 
16
- # Coptic linguistic prompts
17
- COPTIC_PROMPTS = {
18
- 'dialect_analysis': "Analyze the Coptic dialect of this text and identify linguistic features:",
19
- 'translation': "Translate this Coptic text to English, preserving theological and cultural context:",
20
- 'transcription': "Provide a romanized transcription of this Coptic text:",
21
- 'morphology': "Analyze the morphological structure of these Coptic words:",
22
- 'lexicon_lookup': "Look up these Coptic words in the lexicon and provide Greek etymologies:"
23
- }
 
 
24
 
25
  # Lexicon loader
26
  @st.cache_data
@@ -260,13 +262,28 @@ with st.sidebar:
260
  else:
261
  st.write("No matches found")
262
 
263
- # Linguistic analysis options
264
  if selected_lang in ['cop', 'cop-sa', 'cop-bo']:
265
  st.subheader("Analysis Type")
266
- analysis_type = st.selectbox("Choose analysis:",
267
- options=list(COPTIC_PROMPTS.keys()),
268
  format_func=lambda x: x.replace('_', ' ').title())
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  # Use HuggingFace Inference API instead of loading model locally
271
  # This is much faster and doesn't require GPU
272
  MODEL_NAME = "swiss-ai/Apertus-8B-Instruct-2509"
@@ -337,14 +354,22 @@ if prompt := st.chat_input("Type your message..."):
337
  with st.chat_message("assistant"):
338
  try:
339
  with st.spinner("🤖 Generating response..."):
340
- # Use chat completion API
341
- messages = [{"role": "user", "content": full_prompt}]
 
 
 
 
 
 
 
 
342
 
343
  response_stream = inference_client.chat_completion(
344
  model=MODEL_NAME,
345
  messages=messages,
346
  max_tokens=512,
347
- temperature=0.7,
348
  top_p=0.9,
349
  stream=True
350
  )
 
13
  'Ϣ': 'Shai', 'Ϥ': 'Fai', 'Ϧ': 'Khei', 'Ϩ': 'Hori', 'Ϫ': 'Gangia', 'Ϭ': 'Shima', 'Ϯ': 'Ti'
14
  }
15
 
16
+ # Coptic linguistic prompts (will be formatted with target language)
17
+ def get_coptic_prompts(target_language):
18
+ """Generate Coptic analysis prompts with specified target language"""
19
+ return {
20
+ 'dialect_analysis': f"Analyze the Coptic dialect of this text and identify linguistic features. Respond in {target_language}:",
21
+ 'translation': f"You are a professional Coptic translator. Translate the following Coptic text to {target_language}.\n\nIMPORTANT: Provide ONLY the direct translation. Do not include:\n- The original Coptic text\n- Explanations or commentary\n- Notes about context or meaning\n- Any text other than the {target_language} translation\n\nCoptic text to translate:",
22
+ 'transcription': f"Provide a romanized transcription of this Coptic text. Respond in {target_language}:",
23
+ 'morphology': f"Analyze the morphological structure of these Coptic words. Respond in {target_language}:",
24
+ 'lexicon_lookup': f"Look up these Coptic words and provide definitions with Greek etymologies. Respond in {target_language}:"
25
+ }
26
 
27
  # Lexicon loader
28
  @st.cache_data
 
262
  else:
263
  st.write("No matches found")
264
 
265
+ # Linguistic analysis options for Coptic input
266
  if selected_lang in ['cop', 'cop-sa', 'cop-bo']:
267
  st.subheader("Analysis Type")
268
+ analysis_type = st.selectbox("Choose analysis:",
269
+ options=['translation', 'dialect_analysis', 'transcription', 'morphology', 'lexicon_lookup'],
270
  format_func=lambda x: x.replace('_', ' ').title())
271
 
272
+ # Target language selector for translation
273
+ if analysis_type == 'translation':
274
+ st.subheader("Target Language")
275
+ target_lang = st.selectbox("Translate to:",
276
+ options=[k for k in LANGUAGES.keys() if k not in ['cop', 'cop-sa', 'cop-bo']],
277
+ format_func=lambda x: LANGUAGES[x],
278
+ index=0) # Default to English
279
+ target_language_name = LANGUAGES[target_lang]
280
+ else:
281
+ # For non-translation tasks, use English as default output language
282
+ target_language_name = "English"
283
+
284
+ # Get prompts for the target language
285
+ COPTIC_PROMPTS = get_coptic_prompts(target_language_name)
286
+
287
  # Use HuggingFace Inference API instead of loading model locally
288
  # This is much faster and doesn't require GPU
289
  MODEL_NAME = "swiss-ai/Apertus-8B-Instruct-2509"
 
354
  with st.chat_message("assistant"):
355
  try:
356
  with st.spinner("🤖 Generating response..."):
357
+ # Prepare messages with system instruction for better control
358
+ if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals() and analysis_type == 'translation':
359
+ # For translation: strict system message
360
+ messages = [
361
+ {"role": "system", "content": "You are a professional Coptic-to-modern-language translator. Provide only direct translations without explanations, commentary, or repeating the source text."},
362
+ {"role": "user", "content": full_prompt}
363
+ ]
364
+ else:
365
+ # For other tasks: standard chat
366
+ messages = [{"role": "user", "content": full_prompt}]
367
 
368
  response_stream = inference_client.chat_completion(
369
  model=MODEL_NAME,
370
  messages=messages,
371
  max_tokens=512,
372
+ temperature=0.5, # Lower temperature for more focused translations
373
  top_p=0.9,
374
  stream=True
375
  )