Spaces:

yasserrmd
/

SinaReason

Sleeping

yasserrmd commited on Sep 22, 2025

Commit

b195830

verified ·

1 Parent(s): 3473b4d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -97,19 +97,11 @@ class SinaReasonMedicalChat:
         # Add current message
         messages.append({"role": "user", "content": message})
-        # Apply chat template
-        prompt = self.tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True,
-        )
-        # Tokenize input and move to the same device as the model
-        inputs = self.tokenizer(
-            text=prompt,
-            return_tensors="pt"
-        ).to(DEVICE)
         # Setup streamer
         streamer = TextIteratorStreamer(
             self.tokenizer,
@@ -120,7 +112,7 @@ class SinaReasonMedicalChat:
         # Generation parameters optimized for medical reasoning
         generation_kwargs = {
-            **inputs,
             "max_new_tokens": max_tokens,
             "temperature": temperature,
             "top_p": top_p,

         # Add current message
         messages.append({"role": "user", "content": message})
+        tokenized = tokenizer.apply_chat_template(messages, return_dict=True)
+        input_ids = torch.tensor(tokenized.input_ids, device="cuda").unsqueeze(0)
+        attention_mask = torch.tensor(tokenized.attention_mask, device="cuda").unsqueeze(0)
         # Setup streamer
         streamer = TextIteratorStreamer(
             self.tokenizer,
         # Generation parameters optimized for medical reasoning
         generation_kwargs = {
+            "input_ids" :input_ids,
             "max_new_tokens": max_tokens,
             "temperature": temperature,
             "top_p": top_p,