Spaces:
Build error
Build error
| import os | |
| import requests | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| from transformers import AutoTokenizer | |
| load_dotenv() | |
| model_name = "tiiuae/falcon-7b-instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct" | |
| headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"} | |
| def format_chat_prompt(message, instruction): | |
| prompt = f"System:{instruction}\nUser: {message}\nAssistant:" | |
| return prompt | |
| def query(payload): | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| return response.json() | |
| def respond(message, instruction="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers."): | |
| MAX_TOKENS = 1024 # limit for the model | |
| prompt = format_chat_prompt(message, instruction) | |
| # Check if the prompt is too long and, if so, truncate it | |
| num_tokens = len(tokenizer.encode(prompt)) | |
| if num_tokens > MAX_TOKENS: | |
| # Truncate the prompt to fit within the token limit | |
| prompt = tokenizer.decode(tokenizer.encode(prompt)[-MAX_TOKENS:]) | |
| response = query({"inputs": prompt}) | |
| generated_text = response[0]['generated_text'] | |
| assistant_message = generated_text.split("Assistant:")[-1] | |
| assistant_message = assistant_message.split("User:")[0].strip() # Only keep the text before the first "User:" | |
| return assistant_message | |
| iface = gr.Interface( | |
| respond, | |
| inputs=[ | |
| gr.inputs.Textbox(label="Your question"), | |
| gr.inputs.Textbox(label="System message", lines=2, default="A conversation between a user and an AI assistant. The assistant gives helpful and honest answers.") | |
| ], | |
| outputs=[ | |
| gr.outputs.Textbox(label="AI's response") | |
| ], | |
| ) | |
| iface.launch() | |