MaziyarPanahi commited on
Commit
f6fba2f
·
verified ·
1 Parent(s): dd73ef2

add python codes to code blocks

Browse files
Files changed (1) hide show
  1. README.md +66 -63
README.md CHANGED
@@ -173,39 +173,41 @@ This configuration:
173
 
174
  **Extracting reasoning content from the API response:**
175
 
176
- from openai import OpenAI
177
-
178
- client = OpenAI(api_key="EMPTY", base_url="http://localhost:8000/v1")
179
-
180
- response = client.chat.completions.create(
181
- model="arcee-ai/Trinity-Large-Thinking",
182
- messages=[
183
- {"role": "user", "content": "What's the weather like in Paris?"}
184
- ],
185
- tools=[ # your tool definitions here
186
- {
187
- "type": "function",
188
- "function": {
189
- "name": "get_weather",
190
- "description": "Get current weather for a location",
191
- "parameters": {
192
- "type": "object",
193
- "properties": {
194
- "location": {"type": "string"}
195
- },
196
- "required": ["location"]
197
- }
198
  }
199
  }
200
- ],
201
- )
 
202
 
203
- # Access reasoning (thinking) content
204
- reasoning = response.choices[0].message.reasoning_content
205
 
206
- # Access final response or tool calls
207
- content = response.choices[0].message.content
208
- tool_calls = response.choices[0].message.tool_calls
 
209
 
210
  **Note on thinking-in-context with vLLM**: When building multi-turn agentic loops, include both `reasoning_content` and `content` in the conversation history you send back to the model. The reasoning content should be re-wrapped in `<think>...</think>` tags within the assistant message.
211
 
@@ -213,40 +215,41 @@ This configuration:
213
 
214
  Use the `main` transformers branch or pass `trust_remote_code=True` with a released version.
215
 
216
- from transformers import AutoTokenizer, AutoModelForCausalLM
217
- import torch
218
-
219
- model_id = "arcee-ai/Trinity-Large-Thinking"
220
- tokenizer = AutoTokenizer.from_pretrained(model_id)
221
- model = AutoModelForCausalLM.from_pretrained(
222
- model_id,
223
- torch_dtype=torch.bfloat16,
224
- device_map="auto",
225
- trust_remote_code=True
226
- )
227
-
228
- messages = [
229
- {"role": "user", "content": "Who are you?"},
230
- ]
231
-
232
- input_ids = tokenizer.apply_chat_template(
233
- messages,
234
- add_generation_prompt=True,
235
- return_tensors="pt"
236
- ).to(model.device)
237
-
238
- outputs = model.generate(
239
- input_ids,
240
- max_new_tokens=4096,
241
- do_sample=True,
242
- temperature=0.6,
243
- top_k=50,
244
- top_p=0.95
245
- )
246
-
247
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
248
- print(response)
249
-
 
250
 
251
  ### API
252
 
 
173
 
174
  **Extracting reasoning content from the API response:**
175
 
176
+ ```python
177
+ from openai import OpenAI
178
+
179
+ client = OpenAI(api_key="EMPTY", base_url="http://localhost:8000/v1")
180
+
181
+ response = client.chat.completions.create(
182
+ model="arcee-ai/Trinity-Large-Thinking",
183
+ messages=[
184
+ {"role": "user", "content": "What's the weather like in Paris?"}
185
+ ],
186
+ tools=[ # your tool definitions here
187
+ {
188
+ "type": "function",
189
+ "function": {
190
+ "name": "get_weather",
191
+ "description": "Get current weather for a location",
192
+ "parameters": {
193
+ "type": "object",
194
+ "properties": {
195
+ "location": {"type": "string"}
196
+ },
197
+ "required": ["location"]
198
  }
199
  }
200
+ }
201
+ ],
202
+ )
203
 
204
+ # Access reasoning (thinking) content
205
+ reasoning = response.choices[0].message.reasoning_content
206
 
207
+ # Access final response or tool calls
208
+ content = response.choices[0].message.content
209
+ tool_calls = response.choices[0].message.tool_calls
210
+ ```
211
 
212
  **Note on thinking-in-context with vLLM**: When building multi-turn agentic loops, include both `reasoning_content` and `content` in the conversation history you send back to the model. The reasoning content should be re-wrapped in `<think>...</think>` tags within the assistant message.
213
 
 
215
 
216
  Use the `main` transformers branch or pass `trust_remote_code=True` with a released version.
217
 
218
+ ```python
219
+ from transformers import AutoTokenizer, AutoModelForCausalLM
220
+ import torch
221
+
222
+ model_id = "arcee-ai/Trinity-Large-Thinking"
223
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
224
+ model = AutoModelForCausalLM.from_pretrained(
225
+ model_id,
226
+ torch_dtype=torch.bfloat16,
227
+ device_map="auto",
228
+ trust_remote_code=True
229
+ )
230
+
231
+ messages = [
232
+ {"role": "user", "content": "Who are you?"},
233
+ ]
234
+
235
+ input_ids = tokenizer.apply_chat_template(
236
+ messages,
237
+ add_generation_prompt=True,
238
+ return_tensors="pt"
239
+ ).to(model.device)
240
+
241
+ outputs = model.generate(
242
+ input_ids,
243
+ max_new_tokens=4096,
244
+ do_sample=True,
245
+ temperature=0.6,
246
+ top_k=50,
247
+ top_p=0.95
248
+ )
249
+
250
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
251
+ print(response)
252
+ ```
253
 
254
  ### API
255