Spaces:

jing-ju
/

AI-Translates

Runtime error

App Files Files Community

jing-ju commited on Sep 13

Commit

0fdedb6

verified ·

1 Parent(s): 70a3cb9

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -41

app.py CHANGED Viewed

@@ -87,26 +87,39 @@ def load_model():
         trust_remote_code=True
     )
-    # Create quantization config for fp8
     try:
-        from transformers.quantizers import CompressedTensorsQuantizationConfig
-        quantization_config = CompressedTensorsQuantizationConfig(
             quantization_method="fp8",
             ignore=[]
         )
     except ImportError:
-        # Fallback to dict format
-        quantization_config = {
-            "quantization_method": "fp8",
-            "ignore": []
-        }
     # Load model with quantization config
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
-        trust_remote_code=True,
-        quantization_config=quantization_config,
-        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     )
     return tokenizer, model
@@ -141,14 +154,17 @@ def chunk_text_by_tokens(text, tokenizer, max_tokens):
                 chunks.append(current_chunk.strip())
             # If single sentence is too long, split it forcefully
-            if len(tokenizer.encode(sentence, add_special_tokens=False)) > max_tokens:
-                tokens = tokenizer.encode(sentence, add_special_tokens=False)
-                for i in range(0, len(tokens), max_tokens):
-                    chunk_tokens = tokens[i:i + max_tokens]
-                    chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
-                    chunks.append(chunk_text)
-                current_chunk = ""
-            else:
                 current_chunk = sentence
     if current_chunk:
@@ -171,12 +187,16 @@ def translate_text_chunk(text, target_lang, source_lang, tokenizer, model):
         prompt = f"Translate the following segment into {target_lang}, without additional explanation.\n\n{text}"
     # Apply chat template
-    messages = [{"role": "user", "content": prompt}]
-    input_text = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
     # Tokenize
     inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
@@ -186,7 +206,7 @@ def translate_text_chunk(text, target_lang, source_lang, tokenizer, model):
         outputs = model.generate(
             **inputs,
             **GEN_KW,
-            pad_token_id=tokenizer.eos_token_id
         )
     # Decode
@@ -245,9 +265,21 @@ def translate_batch(text_lines, target_lang, source_lang, tokenizer, model):
 # Load model and tokenizer
 print("Initializing model...")
-tokenizer, model = load_model()
-device = model.device
-print(f"Model loaded on device: {device}")
 # Create Gradio interface
 with gr.Blocks(title="Hunyuan-MT Multi-language Translation") as demo:
@@ -282,12 +314,20 @@ with gr.Blocks(title="Hunyuan-MT Multi-language Translation") as demo:
                         interactive=False
                     )
-            translate_btn.click(
-                fn=lambda text, tgt, src: translate_single(text, tgt, src, tokenizer, model),
-                inputs=[input_text, target_lang, source_lang],
-                outputs=output_text,
-                api_name="translate_text"
-            )
         with gr.TabItem("Batch Translation"):
             with gr.Row():
@@ -315,12 +355,20 @@ with gr.Blocks(title="Hunyuan-MT Multi-language Translation") as demo:
                         interactive=False
                     )
-            batch_translate_btn.click(
-                fn=lambda text, tgt, src: translate_batch(text, tgt, src, tokenizer, model),
-                inputs=[batch_input, batch_target_lang, batch_source_lang],
-                outputs=batch_output,
-                api_name="translate_batch"
-            )
     gr.Markdown("### API Usage")
     gr.Markdown("""

         trust_remote_code=True
     )
+    # Create quantization config for fp8 - must use the actual class
     try:
+        from compressed_tensors import CompressedTensorsConfig
+        quantization_config = CompressedTensorsConfig(
             quantization_method="fp8",
             ignore=[]
         )
+        print("Using CompressedTensorsConfig")
     except ImportError:
+        try:
+            from transformers.quantizers import CompressedTensorsQuantizationConfig
+            quantization_config = CompressedTensorsQuantizationConfig(
+                quantization_method="fp8",
+                ignore=[]
+            )
+            print("Using CompressedTensorsQuantizationConfig")
+        except ImportError:
+            # If both fail, load without custom quantization config
+            print("Loading model without custom quantization config")
+            quantization_config = None
     # Load model with quantization config
+    model_kwargs = {
+        "trust_remote_code": True,
+        "dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
+    }
+    if quantization_config is not None:
+        model_kwargs["quantization_config"] = quantization_config
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
+        **model_kwargs
     )
     return tokenizer, model
                 chunks.append(current_chunk.strip())
             # If single sentence is too long, split it forcefully
+            try:
+                sentence_tokens = tokenizer.encode(sentence, add_special_tokens=False)
+                if len(sentence_tokens) > max_tokens:
+                    for i in range(0, len(sentence_tokens), max_tokens):
+                        chunk_tokens = sentence_tokens[i:i + max_tokens]
+                        chunk_text = tokenizer.decode(chunk_tokens, skip_special_tokens=True)
+                        chunks.append(chunk_text)
+                    current_chunk = ""
+                else:
+                    current_chunk = sentence
+            except:
                 current_chunk = sentence
     if current_chunk:
         prompt = f"Translate the following segment into {target_lang}, without additional explanation.\n\n{text}"
     # Apply chat template
+    try:
+        messages = [{"role": "user", "content": prompt}]
+        input_text = tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+    except:
+        # Fallback if chat template fails
+        input_text = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
     # Tokenize
     inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
         outputs = model.generate(
             **inputs,
             **GEN_KW,
+            pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id else tokenizer.pad_token_id
         )
     # Decode
 # Load model and tokenizer
 print("Initializing model...")
+try:
+    tokenizer, model = load_model()
+    device = model.device
+    print(f"Model loaded successfully on device: {device}")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    # Create dummy functions for interface
+    tokenizer = None
+    model = None
+    def dummy_translate(text, target_lang, source_lang):
+        return f"Model loading failed: {e}"
+    translate_single = dummy_translate
+    translate_batch = lambda text_lines, target_lang, source_lang, *args: dummy_translate(text_lines, target_lang, source_lang)
 # Create Gradio interface
 with gr.Blocks(title="Hunyuan-MT Multi-language Translation") as demo:
                         interactive=False
                     )
+            if tokenizer and model:
+                translate_btn.click(
+                    fn=lambda text, tgt, src: translate_single(text, tgt, src, tokenizer, model),
+                    inputs=[input_text, target_lang, source_lang],
+                    outputs=output_text,
+                    api_name="translate_text"
+                )
+            else:
+                translate_btn.click(
+                    fn=lambda text, tgt, src: translate_single(text, tgt, src),
+                    inputs=[input_text, target_lang, source_lang],
+                    outputs=output_text,
+                    api_name="translate_text"
+                )
         with gr.TabItem("Batch Translation"):
             with gr.Row():
                         interactive=False
                     )
+            if tokenizer and model:
+                batch_translate_btn.click(
+                    fn=lambda text, tgt, src: translate_batch(text, tgt, src, tokenizer, model),
+                    inputs=[batch_input, batch_target_lang, batch_source_lang],
+                    outputs=batch_output,
+                    api_name="translate_batch"
+                )
+            else:
+                batch_translate_btn.click(
+                    fn=lambda text, tgt, src: translate_batch(text, tgt, src),
+                    inputs=[batch_input, batch_target_lang, batch_source_lang],
+                    outputs=batch_output,
+                    api_name="translate_batch"
+                )
     gr.Markdown("### API Usage")
     gr.Markdown("""