Spaces:

michelebasilico
/

mistral7b_itaca

Sleeping

Update app.py

120fd01 verified about 2 years ago

1.13 kB

	import gradio as gr
	from transformers import pipeline
	import requests
	import os

	start_token = "<s>"
	start_instruction_token = "[INST] "
	end_instruction_token = " [/INST]"
	system_prompt = "Sei un assistente utile ed affidabile. Rispondi in maniera adeguata alla domanda seguente:\n"
	start_completion = "\nRisposta:"

	API_URL = "https://jadvy64czlx56190.us-east-1.aws.endpoints.huggingface.cloud"
	token = "Bearer " + os.getenv("ITACA_TOKEN")

	headers = {
	"Accept": "application/json",
	"Authorization": token,
	"Content-Type": "application/json"
	}


	def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()


	def predict(message, history):
	new_message = start_token + start_instruction_token + system_prompt + message + end_instruction_token + start_completion
	print(new_message)
	output = query({
	"inputs": new_message,
	"parameters": {
	"temperature": 0.7,
	"max_new_tokens": 512,
	"return_full_text": False
	}
	})
	return output[0]["generated_text"]

	iface = gr.ChatInterface(predict)
	iface.launch(share=True)