Spaces:

GAASH-Lab
/

Matcha-TTS-Kashmiri-Demo

Running

App Files Files Community

saeedabdulmuizz commited on Feb 1

Commit

dda3deb

verified ·

1 Parent(s): d8bac84

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -11

app.py CHANGED Viewed

@@ -88,23 +88,55 @@ def translate(text):
     try:
         # Note: apply_chat_template returns input_ids tensor directly if tokenize=True and return_tensors="pt"
-        input_ids = trans_tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(trans_model.device)
     except Exception as e:
         print(f"Chat template error: {e}")
         return "Error in translation template."
-    with torch.no_grad():
-        # Use greedy decoding (do_sample=False) to avoid NaN/Inf issues with float16 sampling
-        outputs = trans_model.generate(
-            input_ids,
-            max_new_tokens=256,
-            do_sample=False,  # Greedy decoding avoids multinomial NaN errors
-            pad_token_id=trans_tokenizer.pad_token_id,
-            eos_token_id=trans_tokenizer.eos_token_id,
-        )
     # Slice reusing the input length
-    decoded = trans_tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
     return decoded.strip()

     try:
         # Note: apply_chat_template returns input_ids tensor directly if tokenize=True and return_tensors="pt"
+        input_ids = trans_tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+        # Debug: Check devices
+        print(f"[DEBUG] Input device: {input_ids.device}")
+        print(f"[DEBUG] Model device: {trans_model.device}")
+        print(f"[DEBUG] Input shape: {input_ids.shape}")
+        print(f"[DEBUG] Input tokens: {input_ids.shape[1]}")
+        # Move input to model's device
+        input_ids = input_ids.to(trans_model.device)
+        print(f"[DEBUG] Input moved to: {input_ids.device}")
     except Exception as e:
         print(f"Chat template error: {e}")
+        traceback.print_exc()
         return "Error in translation template."
+    try:
+        import time
+        start_time = time.time()
+        print("[DEBUG] Starting generation...")
+        with torch.no_grad():
+            # Use greedy decoding (do_sample=False) to avoid NaN/Inf issues with float16 sampling
+            outputs = trans_model.generate(
+                input_ids,
+                max_new_tokens=128,  # Reduced for faster generation
+                do_sample=False,  # Greedy decoding avoids multinomial NaN errors
+                pad_token_id=trans_tokenizer.pad_token_id,
+                eos_token_id=trans_tokenizer.eos_token_id,
+            )
+        elapsed = time.time() - start_time
+        print(f"[DEBUG] Generation completed in {elapsed:.2f}s")
+        print(f"[DEBUG] Output shape: {outputs.shape}")
+        print(f"[DEBUG] New tokens generated: {outputs.shape[1] - input_ids.shape[1]}")
+    except Exception as e:
+        print(f"Generation error: {e}")
+        traceback.print_exc()
+        return "Error during translation generation."
     # Slice reusing the input length
+    new_tokens = outputs[0][input_ids.shape[1]:]
+    print(f"[DEBUG] New tokens to decode: {len(new_tokens)}")
+    decoded = trans_tokenizer.decode(new_tokens, skip_special_tokens=True)
+    print(f"[DEBUG] Decoded output: '{decoded}'")
     return decoded.strip()