smrstep commited on
Commit
0ba2ad4
·
verified ·
1 Parent(s): 44b1dfd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +36 -2
README.md CHANGED
@@ -44,6 +44,40 @@ As is, CARROT supports routing to the following collection of large language mod
44
  </p>
45
 
46
  ```python
47
- your_code = do_some_stuff
48
- ```
 
 
 
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  </p>
45
 
46
  ```python
47
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
48
+ import numpy as np
49
+
50
+ token = 'YOUR HF TOKEN'
51
+
52
+ COSTS = {'aws-claude-3-5-sonnet-v1':[3, 15], 'aws-titan-text-premier-v1': [.5, 1.5], 'openai-gpt-4o': [2.5, 10], 'openai-gpt-4o-mini': [0.15,0.6], 'wxai-granite-3-2b-instruct-8k-max-tokens':[0.1, 0.1], 'wxai-granite-3-8b-instruct-8k-max-tokens':[0.2, 0.2], 'wxai-llama-3-1-70b-instruct':[.9,.9], 'wxai-llama-3-1-8b-instruct': [.2,.2], 'wxai-llama-3-2-1b-instruct':[.06,.06], 'wxai-llama-3-2-3b-instruct':[.06, .06],'wxai-llama-3-3-70b-instruct':[.9,.9], 'wxai-mixtral-8x7b-instruct-v01':[.6,.6], 'wxai-llama-3-405b-instruct':[3.5, 3.5]}
53
 
54
+ input_counter = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-70B", token='')
55
+
56
+ tokenizer = AutoTokenizer.from_pretrained('roberta-base')
57
+
58
+ score_predictor = AutoModelForSequenceClassification.from_pretrained('CARROT-LLM-Routing/Performance',
59
+ problem_type="multi_label_classification",
60
+ num_labels=len(COSTS),
61
+ )
62
+ output_counter = AutoModelForSequenceClassification.from_pretrained('CARROT-LLM-Routing/Cost',
63
+ problem_type="regression",
64
+ num_labels=len(COSTS))
65
+ def CARROT(prompts, mu, input_counter=input_counter, predictors = [score_predictor, output_counter], tokenizer=tokenizer, costs=COSTS):
66
+ tokenized_text = tokenizer(prompts,
67
+ truncation=True,
68
+ padding=True,
69
+ is_split_into_words=False,
70
+ return_tensors='pt')
71
+ input_counter.pad_token = tokenizer.eos_token
72
+ scores = 1/(1+np.exp(-predictors[1](tokenized_text["input_ids"]).logits.detach().numpy()))
73
+ output_tokens = predictors[1](tokenized_text["input_ids"]).logits.detach().numpy()
74
+ input_tokens = [input_counter(prompt, return_tensors="pt")["input_ids"].shape[1] for prompt in prompts]
75
+ input_tokens = np.array(input_tokens).T
76
+ costs = []
77
+ for i, m in enumerate(COSTS.keys()):
78
+ costs.append((input_tokens*COSTS[m][0]/(1000000)+output_tokens[:,i]*COSTS[m][1]/1000).tolist())
79
+ costs = np.array(costs).T
80
+ model_idx = ((1 - mu) * scores - mu * costs*100 ).argmax(axis = 1, keepdims = True)
81
+ called = [id2label[idx[0]] for idx in model_idx]
82
+ return called
83
+ ```