yasserrmd commited on
Commit
b195830
·
verified ·
1 Parent(s): 3473b4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -13
app.py CHANGED
@@ -97,19 +97,11 @@ class SinaReasonMedicalChat:
97
  # Add current message
98
  messages.append({"role": "user", "content": message})
99
 
100
- # Apply chat template
101
- prompt = self.tokenizer.apply_chat_template(
102
- messages,
103
- tokenize=False,
104
- add_generation_prompt=True,
105
- )
106
-
107
- # Tokenize input and move to the same device as the model
108
- inputs = self.tokenizer(
109
- text=prompt,
110
- return_tensors="pt"
111
- ).to(DEVICE)
112
 
 
 
 
113
  # Setup streamer
114
  streamer = TextIteratorStreamer(
115
  self.tokenizer,
@@ -120,7 +112,7 @@ class SinaReasonMedicalChat:
120
 
121
  # Generation parameters optimized for medical reasoning
122
  generation_kwargs = {
123
- **inputs,
124
  "max_new_tokens": max_tokens,
125
  "temperature": temperature,
126
  "top_p": top_p,
 
97
  # Add current message
98
  messages.append({"role": "user", "content": message})
99
 
100
+ tokenized = tokenizer.apply_chat_template(messages, return_dict=True)
 
 
 
 
 
 
 
 
 
 
 
101
 
102
+ input_ids = torch.tensor(tokenized.input_ids, device="cuda").unsqueeze(0)
103
+ attention_mask = torch.tensor(tokenized.attention_mask, device="cuda").unsqueeze(0)
104
+
105
  # Setup streamer
106
  streamer = TextIteratorStreamer(
107
  self.tokenizer,
 
112
 
113
  # Generation parameters optimized for medical reasoning
114
  generation_kwargs = {
115
+ "input_ids" :input_ids,
116
  "max_new_tokens": max_tokens,
117
  "temperature": temperature,
118
  "top_p": top_p,