andrewzamai commited on
Commit
6a9e84a
·
verified ·
1 Parent(s): 1e12620

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +18 -8
README.md CHANGED
@@ -310,9 +310,8 @@ An inverse trend can be observed, with SLIMER emerging as the most effective in
310
  <div class="description">JSON SLIMER prompt</div>
311
  <div class="template">
312
  <pre>{
313
- "description": "SLIMER prompt",
314
- "prompt_input": "<|start_header_id|>system<|end_header_id|>\n\nYou are an expert in Named Entity Recognition designed to output JSON only.<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n\nYou are given a text chunk (delimited by triple quotes) and an instruction.\nRead the text and answer to the instruction in the end.\n\"\"\"\n{<span class="highlight-orange">input</span>}\n\"\"\"\nInstruction: Extract the Named Entities of type {<span class="highlight-orange">NE_name</span>} from the text chunk you have read. You are given a DEFINITION and some GUIDELINES.\nDEFINITION: {<span class="highlight-orange">definition</span>}\nGUIDELINES: {<span class="highlight-orange">guidelines</span>}\nReturn a JSON list of instances of this Named Entity type (for example [\"text_span_1\", \"text_span_2\"]. Return an empty list [] if no instances are present. Return only the JSON list, no further motivations or introduction to the answer.<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\n"
315
- }</pre>
316
  </div>
317
  </body>
318
  </html>
@@ -321,14 +320,25 @@ An inverse trend can be observed, with SLIMER emerging as the most effective in
321
  ```python
322
  from vllm import LLM, SamplingParams
323
 
324
- vllm_model = LLM(model="expertai/SLIMER-LLaMA3")
 
325
 
326
- sampling_params = SamplingParams(temperature=0, max_tokens=128)
327
 
328
- prompts = [prompter.generate_prompt(instruction, input) for instruction, input in instruction_input_pairs]
329
- responses = vllm_model.generate(prompts, sampling_params)
330
- ```
 
 
 
 
 
 
 
 
331
 
 
 
332
 
333
  ## Citation
334
 
 
310
  <div class="description">JSON SLIMER prompt</div>
311
  <div class="template">
312
  <pre>{
313
+ "description": "SLIMER PARALLEL 3 prompt",
314
+ "prompt_input": "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful NER assistant designed to output JSON.<|eot_id|>\n<|start_header_id|>user<|end_header_id|>\n\nYou are given a text chunk (delimited by triple quotes) and an instruction.\nRead the text and answer to the instruction in the end.\n\"\"\"\n{<span class="highlight-orange">input</span>}\n\"\"\"\nInstruction: Extract the entities of type {ne_tags} from the text chunk you have read. Be aware that not all of these entities are necessarily present. Do not extract entities that do not exist in the text, return an empty list for that tag. Ensure each entity is assigned to only one appropriate class.\nTo help you, here are dedicated Definition and Guidelines for each entity tag.\n{Def_and_Guidelines}\nReturn only a JSON object. The JSON should strictly follow this format:\n{expected_json_format}.\nDO NOT output anything else, just the JSON itself."}</pre>
 
315
  </div>
316
  </body>
317
  </html>
 
320
  ```python
321
  from vllm import LLM, SamplingParams
322
 
323
+ vllm_model = LLM(model="expertai/SLIMER-PARALLEL-LLaMA3")
324
+ tokenizer = vllm_model.get_tokenizer()
325
 
326
+ sampling_params = SamplingParams(temperature=0, max_tokens=1000, stop=tokenizer.eos_token)
327
 
328
+ # create a dictionary of dictionaries
329
+ # each NE_type as key should have a {Definition: str, Guidelines: str} value
330
+ # this promper formats the input text to analize with SLIMER instruction
331
+ input_instruction_prompter = Prompter('LLaMA3-chat-NOheaders', template_path='./src/SFT_finetuning/templates')
332
+
333
+ system_message = "You are a helpful NER assistant designed to output JSON."
334
+ conversation = [
335
+ {"role": "system", "content": system_message},
336
+ {"role": "user", "content": input_instruction_prompter.generate_prompt(input=row["input"], instruction=row["instruction"])}, # the input_text + instruction
337
+ ]
338
+ prompt = tokenizer.apply_chat_template(conversation, tokenize=False, truncation=True, max_length=cutoff_len, add_generation_prompt=True)
339
 
340
+ responses = vllm_model.generate(prompt, sampling_params)
341
+ ```
342
 
343
  ## Citation
344