dev-mode-orpheus

Paused

Tomtom84 commited on Apr 21, 2025

Commit

06a62cb

verified ·

1 Parent(s): 53012c3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -247,6 +247,13 @@ async def load_models_startup():
     print("StoppingCriteria initialized.")
     print("✅ Modelle geladen und bereit!", flush=True)
 @app.get("/")
 def hello():
@@ -294,15 +301,20 @@ async def tts(ws: WebSocket):
         print("Starting generation in background thread...")
         await asyncio.to_thread(
-            model.generate,
-            input_ids=ids,
-            attention_mask=attn,
-            max_new_tokens=1500,
-            logits_processor=[masker],
-            stopping_criteria=stopping_criteria,
-            do_sample=False, # Using greedy decoding
-            use_cache=True,
-            streamer=streamer
         )
         print("Generation thread finished.")

     print("StoppingCriteria initialized.")
     print("✅ Modelle geladen und bereit!", flush=True)
+    print(f"Tokenizer EOS ID: {tok.eos_token_id}")
+    print(f"Model Config EOS ID: {model.config.eos_token_id}")
+    print(f"Constant EOS_TOKEN: {EOS_TOKEN}")
+    if tok.eos_token_id != EOS_TOKEN or model.config.eos_token_id != EOS_TOKEN:
+    print("⚠️ WARNING: EOS_TOKEN constant might not match model/tokenizer configuration!")
+    # Consider updating EOS_TOKEN if they differ, e.g.:
+    # EOS_TOKEN = model.config.eos_token_id
 @app.get("/")
 def hello():
         print("Starting generation in background thread...")
         await asyncio.to_thread(
+        model.generate,
+        input_ids=ids,
+        attention_mask=attn,
+        max_new_tokens=2500, # Keep or increase later if needed
+        logits_processor=[masker],
+        stopping_criteria=stopping_criteria,
+        # --- Changes ---
+        do_sample=True,      # Enable sampling
+        temperature=0.6,     # Introduce some randomness (adjust as needed)
+        top_p=0.9,           # Focus sampling on more likely tokens (adjust as needed)
+        repetition_penalty=1.15, # Penalize recently generated tokens (adjust > 1.0)
+        # --- End Changes ---
+        use_cache=True,
+        streamer=streamer
         )
         print("Generation thread finished.")