Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import requests | |
| import os | |
| start_token = "<s>" | |
| start_instruction_token = "[INST] " | |
| end_instruction_token = " [/INST]" | |
| system_prompt = "Sei un assistente utile ed affidabile. Rispondi in maniera adeguata alla domanda seguente:\n" | |
| start_completion = "\nRisposta:" | |
| API_URL = "https://jadvy64czlx56190.us-east-1.aws.endpoints.huggingface.cloud" | |
| token = "Bearer " + os.getenv("ITACA_TOKEN") | |
| headers = { | |
| "Accept": "application/json", | |
| "Authorization": token, | |
| "Content-Type": "application/json" | |
| } | |
| def query(payload): | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| return response.json() | |
| def predict(message, history): | |
| new_message = start_token + start_instruction_token + system_prompt + message + end_instruction_token + start_completion | |
| print(new_message) | |
| output = query({ | |
| "inputs": new_message, | |
| "parameters": { | |
| "temperature": 0.7, | |
| "max_new_tokens": 512, | |
| "return_full_text": False | |
| } | |
| }) | |
| return output[0]["generated_text"] | |
| iface = gr.ChatInterface(predict) | |
| iface.launch(share=True) | |