Spaces:

daniloedu
/

chat_llm_v2

Build error

File size: 1,807 Bytes

246ff61
 
c43707f
246ff61
3cca5d2
c43707f
 
 
3cca5d2
 
 
c43707f
22ef894
c43707f
246ff61
 
 
044cc98
246ff61
 
 
abaf1a6
246ff61
38124bc
246ff61
38124bc
 
 
 
 
 
246ff61
 
fa3d7a5
 
246ff61
044cc98
246ff61
 
 
 
 
 
 
 
 
 
044cc98
246ff61

import os
import requests
import gradio as gr
from dotenv import load_dotenv
from transformers import AutoTokenizer

load_dotenv()

model_name = "tiiuae/falcon-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct"
headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"}

def format_chat_prompt(message, instruction):
    prompt = f"System:{instruction}\nUser: {message}\nAssistant:"
    return prompt

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()
    
def respond(message, instruction="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers."):
    MAX_TOKENS = 1024  # limit for the model
    prompt = format_chat_prompt(message, instruction)
    # Check if the prompt is too long and, if so, truncate it
    num_tokens = len(tokenizer.encode(prompt))
    if num_tokens > MAX_TOKENS:
        # Truncate the prompt to fit within the token limit
        prompt = tokenizer.decode(tokenizer.encode(prompt)[-MAX_TOKENS:])
    
    response = query({"inputs": prompt})
    generated_text = response[0]['generated_text']
    assistant_message = generated_text.split("Assistant:")[-1]
    assistant_message = assistant_message.split("User:")[0].strip()  # Only keep the text before the first "User:"
    return assistant_message

iface = gr.Interface(
    respond,
    inputs=[
        gr.inputs.Textbox(label="Your question"), 
        gr.inputs.Textbox(label="System message", lines=2, default="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers.")
    ],
    outputs=[
        gr.outputs.Textbox(label="AI's response")
    ],
)

iface.launch()