Update README.md
Browse files
README.md
CHANGED
|
@@ -1,3 +1,147 @@
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
| 3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
license: apache-2.0
|
| 3 |
---
|
| 4 |
+
# Polyglot-math-4x7b-24b
|
| 5 |
+
|
| 6 |
+

|
| 7 |
+
|
| 8 |
+
Polyglot-4x7b is a Mixture of Experts approach to a multilingual model.
|
| 9 |
+
|
| 10 |
+
This project is an experiment to see if each expert can be of a different language. The answer is yes.
|
| 11 |
+
|
| 12 |
+
The model is a merge of models that are capable of Chinese and Japanese output.
|
| 13 |
+
|
| 14 |
+
+ meta-math/MetaMath-Mistral-7B
|
| 15 |
+
+ oshizo/japanese-e5-mistral-7b_slerp
|
| 16 |
+
+ cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser
|
| 17 |
+
+ s3nh/Mistral-7B-Evol-Instruct-Chinese
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# Code Example
|
| 21 |
+
|
| 22 |
+
Inference [Colab](https://colab.research.google.com/drive/1tYSb63IKZDsiQ5BIJU8Oc92phxugAmB3?usp=sharing)
|
| 23 |
+
|
| 24 |
+
```python
|
| 25 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 26 |
+
|
| 27 |
+
def generate_response(prompt):
|
| 28 |
+
"""
|
| 29 |
+
Generate a response from the model based on the input prompt.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
prompt (str): Prompt for the model.
|
| 33 |
+
|
| 34 |
+
Returns:
|
| 35 |
+
str: The generated response from the model.
|
| 36 |
+
"""
|
| 37 |
+
# Tokenize the input prompt
|
| 38 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
| 39 |
+
|
| 40 |
+
# Generate output tokens
|
| 41 |
+
outputs = model.generate(**inputs, max_new_tokens=256, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id)
|
| 42 |
+
|
| 43 |
+
# Decode the generated tokens to a string
|
| 44 |
+
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 45 |
+
|
| 46 |
+
return response
|
| 47 |
+
|
| 48 |
+
# Load the model and tokenizer
|
| 49 |
+
model_id = "macadeliccc/polyglot-math-4x7b"
|
| 50 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 51 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
|
| 52 |
+
|
| 53 |
+
# Math prompts in different languages
|
| 54 |
+
english_math_prompt = "Explain the proof of Fermat's Last Theorem and its implications in number theory."
|
| 55 |
+
chinese_math_prompt = "解释费马大定理的证明及其在数论中的意义。"
|
| 56 |
+
japanese_math_prompt = "フェルマーの最終定理の証明と数論におけるその意義について説明してください。"
|
| 57 |
+
|
| 58 |
+
# Generate and print responses for each math prompt
|
| 59 |
+
print("English Math Response:")
|
| 60 |
+
print(generate_response(english_math_prompt), "\n")
|
| 61 |
+
|
| 62 |
+
print("Chinese Math Response:")
|
| 63 |
+
print(generate_response(chinese_math_prompt), "\n")
|
| 64 |
+
|
| 65 |
+
print("Japanese Math Response:")
|
| 66 |
+
print(generate_response(japanese_math_prompt), "\n")
|
| 67 |
+
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
## Example Output
|
| 71 |
+
English:
|
| 72 |
+
|
| 73 |
+
Write a quicksort algorithm in python.
|
| 74 |
+
|
| 75 |
+
```python
|
| 76 |
+
def quicksort(arr):
|
| 77 |
+
if len(arr) <= 1:
|
| 78 |
+
return arr
|
| 79 |
+
else:
|
| 80 |
+
pivot = arr[0]
|
| 81 |
+
less = [i for i in arr[1:] if i <= pivot]
|
| 82 |
+
greater = [i for i in arr[1:] if i > pivot]
|
| 83 |
+
return quicksort(less) + [pivot] + quicksort(greater)
|
| 84 |
+
|
| 85 |
+
arr = [5, 2, 9, 1, 5, 7, 4, 8, 6, 3]
|
| 86 |
+
print(quicksort(arr))
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
This is a simple implementation of the quicksort algorithm in python. The function `quicksort` takes an array as input and returns a sorted array. The algorithm works by selecting a pivot element from the array and partitioning the other elements into two sub-arrays, according to whether they are less than or greater than the pivot. The process is then repeated recursively on the sub-arrays until the entire array is sorted.
|
| 90 |
+
|
| 91 |
+
Chinese Response:
|
| 92 |
+
|
| 93 |
+
用Python写一个快速排序算法
|
| 94 |
+
|
| 95 |
+
```python
|
| 96 |
+
def quick_sort(arr):
|
| 97 |
+
if len(arr) <= 1:
|
| 98 |
+
return arr
|
| 99 |
+
else:
|
| 100 |
+
pivot = arr[0]
|
| 101 |
+
less = [i for i in arr[1:] if i <= pivot]
|
| 102 |
+
greater = [i for i in arr[1:] if i > pivot]
|
| 103 |
+
return quick_sort(less) + [pivot] + quick_sort(greater)
|
| 104 |
+
|
| 105 |
+
arr = [3, 5, 2, 1, 4, 6, 8, 7]
|
| 106 |
+
print(quick_sort(arr))
|
| 107 |
+
```
|
| 108 |
+
这个程序的时间复杂度为O(nlogn),空间复杂度为O(n)。
|
| 109 |
+
|
| 110 |
+
Japanese Response:
|
| 111 |
+
|
| 112 |
+
Pythonでクイックソートアルゴリズムを書いてください。
|
| 113 |
+
|
| 114 |
+
```python
|
| 115 |
+
def quicksort(arr):
|
| 116 |
+
if len(arr) <= 1:
|
| 117 |
+
return arr
|
| 118 |
+
pivot = arr[0]
|
| 119 |
+
left = [x for x in arr[1:] if x < pivot]
|
| 120 |
+
right = [x for x in arr[1:] if x >= pivot]
|
| 121 |
+
return quicksort(left) + [pivot] + quicksort(right)
|
| 122 |
+
|
| 123 |
+
print(quicksort([3,6,8,10,1,5,9,2,4,7]))
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
このコードはクイックソートアルゴリズムを実装しています。クイックソートは一種の分割と conquers アルゴリズムで、配列を分割し、それぞれの部分配列を再帰的にソートします。
|
| 127 |
+
|
| 128 |
+
この実装では、配列の最初の要素をピボットとして使用します。そして、配列を2つの
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
# Evaluations
|
| 133 |
+
|
| 134 |
+
| Tasks |Version|Filter|n-shot| Metric |Value | |Stderr|
|
| 135 |
+
|-------------|-------|------|-----:|--------|-----:|---|-----:|
|
| 136 |
+
|arc_challenge|Yaml |none | 0|acc |0.5495|± |0.0145|
|
| 137 |
+
| | |none | 0|acc_norm|0.5794|± |0.0144|
|
| 138 |
+
|arc_easy |Yaml |none | 0|acc |0.8304|± |0.0077|
|
| 139 |
+
| | |none | 0|acc_norm|0.8068|± |0.0081|
|
| 140 |
+
|boolq |Yaml |none | 0|acc |0.8749|± |0.0058|
|
| 141 |
+
|hellaswag |Yaml |none | 0|acc |0.6276|± |0.0048|
|
| 142 |
+
| | |none | 0|acc_norm|0.8157|± |0.0039|
|
| 143 |
+
|openbookqa |Yaml |none | 0|acc |0.3180|± |0.0208|
|
| 144 |
+
| | |none | 0|acc_norm|0.4460|± |0.0223|
|
| 145 |
+
|piqa |Yaml |none | 0|acc |0.8139|± |0.0091|
|
| 146 |
+
| | |none | 0|acc_norm|0.8237|± |0.0089|
|
| 147 |
+
|winogrande |Yaml |none | 0|acc |0.7419|± |0.0123|
|