|
|
import gradio as gr |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
import torch |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME = "tiiuae/falcon-7b-instruct" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Loading model...") |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) |
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
model.to(device) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SYSTEM_PROMPT = ( |
|
|
"You are a helpful, creative AI assistant. " |
|
|
"Your creator is Austin. Answer clearly and politely." |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def chat_with_ai(user_input, history=[]): |
|
|
full_prompt = SYSTEM_PROMPT + "\n" |
|
|
for i, (u, r) in enumerate(history): |
|
|
full_prompt += f"User: {u}\nAI: {r}\n" |
|
|
full_prompt += f"User: {user_input}\nAI:" |
|
|
|
|
|
inputs = tokenizer(full_prompt, return_tensors="pt").to(device) |
|
|
outputs = model.generate(**inputs, max_new_tokens=200) |
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
response = response.split("AI:")[-1].strip() |
|
|
|
|
|
history.append((user_input, response)) |
|
|
return response, history |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# Austin's AI Chatbot") |
|
|
gr.Markdown("This chatbot was created by **Austin**. Chat with it below!") |
|
|
|
|
|
chatbot = gr.Chatbot() |
|
|
user_input = gr.Textbox(placeholder="Type your message here...") |
|
|
submit_btn = gr.Button("Send") |
|
|
history_state = gr.State([]) |
|
|
|
|
|
submit_btn.click( |
|
|
chat_with_ai, |
|
|
inputs=[user_input, history_state], |
|
|
outputs=[chatbot, history_state] |
|
|
) |
|
|
|
|
|
|
|
|
demo.launch() |
|
|
|