Spaces:
Runtime error
Runtime error
Update llama2.py
Browse files
llama2.py
CHANGED
|
@@ -93,4 +93,41 @@ async def gen_text(
|
|
| 93 |
|
| 94 |
client = sseclient.SSEClient(r)
|
| 95 |
for event in client.events():
|
| 96 |
-
yield json.loads(event.data)['token']['text']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
client = sseclient.SSEClient(r)
|
| 95 |
for event in client.events():
|
| 96 |
+
yield json.loads(event.data)['token']['text']
|
| 97 |
+
|
| 98 |
+
def gen_text_none_stream(
|
| 99 |
+
prompt,
|
| 100 |
+
hf_model='meta-llama/Llama-2-70b-chat-hf',
|
| 101 |
+
hf_token=None,
|
| 102 |
+
):
|
| 103 |
+
parameters = {
|
| 104 |
+
'max_new_tokens': 64,
|
| 105 |
+
'do_sample': True,
|
| 106 |
+
'return_full_text': False,
|
| 107 |
+
'temperature': 0.7,
|
| 108 |
+
'top_k': 10,
|
| 109 |
+
# 'top_p': 1.0,
|
| 110 |
+
'repetition_penalty': 1.2
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
url = f'https://api-inference.huggingface.co/models/{hf_model}'
|
| 114 |
+
headers={
|
| 115 |
+
'Authorization': f'Bearer {hf_token}',
|
| 116 |
+
'Content-type': 'application/json'
|
| 117 |
+
}
|
| 118 |
+
data = {
|
| 119 |
+
'inputs': prompt,
|
| 120 |
+
'stream': False,
|
| 121 |
+
'options': {
|
| 122 |
+
'use_cache': False,
|
| 123 |
+
},
|
| 124 |
+
'parameters': parameters
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
r = requests.post(
|
| 128 |
+
url,
|
| 129 |
+
headers=headers,
|
| 130 |
+
data=json.dumps(data),
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
return json.loads(r.text)[0]["generated_text"]
|