llama_test / app.py
Ephraimmm's picture
Update app.py
1b84177 verified
# import gradio as gr
# from transformers import AutoTokenizer, AutoModelForCausalLM
# from peft import PeftModel, PeftConfig
# import torch
# # Model paths
# ADAPTER_MODEL = "Ephraimmm/pdgn_llama_model"
# print("Loading LoRA adapter configuration...")
# peft_config = PeftConfig.from_pretrained(ADAPTER_MODEL)
# BASE_MODEL = peft_config.base_model_name_or_path
# print(f"Base model: {BASE_MODEL}")
# print(f"Adapter model: {ADAPTER_MODEL}")
# print("\nLoading tokenizer...")
# tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
# if tokenizer.pad_token is None:
# tokenizer.pad_token = tokenizer.eos_token
# print("Loading base model...")
# base_model = AutoModelForCausalLM.from_pretrained(
# BASE_MODEL,
# dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
# device_map="auto" if torch.cuda.is_available() else None,
# low_cpu_mem_usage=True,
# trust_remote_code=True
# )
# print("Loading LoRA adapter...")
# model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
# model.eval()
# print("Model loaded successfully!")
# def chat_with_pidgin_bot(message, history, system_prompt, max_length=512, temperature=0.7, top_p=0.9):
# conversation = f"System: {system_prompt}\n\n" if system_prompt else ""
# for user_msg, bot_msg in history:
# conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
# conversation += f"User: {message}\nAssistant:"
# inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=2048)
# if torch.cuda.is_available():
# inputs = inputs.to("cuda")
# with torch.no_grad():
# outputs = model.generate(
# **inputs,
# max_new_tokens=max_length,
# temperature=temperature,
# top_p=top_p,
# do_sample=True,
# pad_token_id=tokenizer.eos_token_id,
# eos_token_id=tokenizer.eos_token_id,
# )
# response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# response = response.split("Assistant:")[-1].strip()
# if "User:" in response:
# response = response.split("User:")[0].strip()
# return response
# custom_css = """
# #chatbot {
# height: 500px;
# }
# """
# with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
# gr.Markdown(
# """
# # Pidgin LLaMA Chatbot
# ### Chat with an AI trained on Nigerian Pidgin English
# This chatbot uses a LoRA fine-tuned model for Nigerian Pidgin.
# """
# )
# chatbot = gr.Chatbot(label="Pidgin Chat", elem_id="chatbot")
# with gr.Row():
# msg = gr.Textbox(label="Your Message", placeholder="Type your message here...", scale=4)
# send_btn = gr.Button("Send", scale=1, variant="primary")
# with gr.Accordion("System Prompt", open=True):
# system_prompt = gr.Textbox(
# label="System Instructions",
# value="You are a helpful AI assistant that speaks Nigerian Pidgin English. You are friendly, respectful, and knowledgeable about Nigerian culture.",
# lines=4
# )
# with gr.Row():
# preset1 = gr.Button("Comedian")
# preset2 = gr.Button("Teacher")
# preset3 = gr.Button("Friend")
# preset4 = gr.Button("Professional")
# with gr.Accordion("Advanced Settings", open=False):
# max_length = gr.Slider(50, 1024, 512, step=50, label="Max Response Length")
# temperature = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
# top_p = gr.Slider(0.1, 1.0, 0.9, step=0.05, label="Top P")
# clear = gr.Button("Clear Chat")
# def respond(message, chat_history, sys_prompt, max_len, temp, top_p_val):
# bot_message = chat_with_pidgin_bot(message, chat_history, sys_prompt, max_len, temp, top_p_val)
# chat_history.append((message, bot_message))
# return "", chat_history
# def set_preset(preset_type):
# presets = {
# "comedian": "You are a Nigerian comedian who speaks Pidgin. Make people laugh with witty responses.",
# "teacher": "You are a patient teacher who speaks Nigerian Pidgin. Explain things clearly.",
# "friend": "You are a caring friend who speaks Nigerian Pidgin. Give good advice.",
# "professional": "You are a professional consultant who speaks Nigerian Pidgin. Provide practical advice."
# }
# return presets.get(preset_type, "")
# msg.submit(respond, [msg, chatbot, system_prompt, max_length, temperature, top_p], [msg, chatbot])
# send_btn.click(respond, [msg, chatbot, system_prompt, max_length, temperature, top_p], [msg, chatbot])
# preset1.click(lambda: set_preset("comedian"), None, system_prompt)
# preset2.click(lambda: set_preset("teacher"), None, system_prompt)
# preset3.click(lambda: set_preset("friend"), None, system_prompt)
# preset4.click(lambda: set_preset("professional"), None, system_prompt)
# clear.click(lambda: None, None, chatbot, queue=False)
# if __name__ == "__main__":
# demo.launch(share=True)
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
import torch
# Model paths
ADAPTER_MODEL = "Ephraimmm/pdgn_llama_model"
# Load LoRA adapter and base model
peft_config = PeftConfig.from_pretrained(ADAPTER_MODEL)
BASE_MODEL = peft_config.base_model_name_or_path
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
low_cpu_mem_usage=True,
trust_remote_code=True
)
model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
model.eval()
# Chat generation
def chat_with_pidgin_bot(message, history, system_prompt, max_length=512, temperature=0.7, top_p=0.9):
conversation = f"System: {system_prompt}\n\n" if system_prompt else ""
for user_msg, bot_msg in history:
conversation += f"User: {user_msg}\nAssistant: {bot_msg}\n"
conversation += f"User: {message}\nAssistant:"
inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=2048)
if torch.cuda.is_available():
inputs = inputs.to("cuda")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = response.split("Assistant:")[-1].split("User:")[0].strip()
return response
# Gradio UI (without css argument)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# Pidgin LLaMA Chatbot
Chat with an AI trained on Nigerian Pidgin English.
"""
)
chatbot = gr.Chatbot(label="Pidgin Chat", elem_id="chatbot")
msg = gr.Textbox(label="Your Message", placeholder="Type your message here...", scale=4)
send_btn = gr.Button("Send", scale=1, variant="primary")
with gr.Accordion("System Prompt", open=True):
system_prompt = gr.Textbox(
label="System Instructions",
value="You are a helpful AI assistant that speaks Nigerian Pidgin English. Be friendly and respectful.",
lines=4
)
preset1 = gr.Button("Comedian")
preset2 = gr.Button("Teacher")
preset3 = gr.Button("Friend")
preset4 = gr.Button("Professional")
with gr.Accordion("Advanced Settings", open=False):
max_length = gr.Slider(50, 1024, 512, step=50, label="Max Response Length")
temperature = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
top_p = gr.Slider(0.1, 1.0, 0.9, step=0.05, label="Top P")
clear = gr.Button("Clear Chat")
# Respond function
def respond(message, chat_history, sys_prompt, max_len, temp, top_p_val):
bot_message = chat_with_pidgin_bot(message, chat_history, sys_prompt, max_len, temp, top_p_val)
chat_history.append((message, bot_message))
return "", chat_history
# Presets function
def set_preset(preset_type):
presets = {
"comedian": "You are a Nigerian comedian who speaks Pidgin. Make people laugh with witty responses.",
"teacher": "You are a patient teacher who speaks Pidgin. Explain things clearly.",
"friend": "You are a caring friend who speaks Pidgin. Give good advice.",
"professional": "You are a professional consultant who speaks Pidgin. Provide practical advice."
}
return presets.get(preset_type, "")
# Events
msg.submit(respond, [msg, chatbot, system_prompt, max_length, temperature, top_p], [msg, chatbot])
send_btn.click(respond, [msg, chatbot, system_prompt, max_length, temperature, top_p], [msg, chatbot])
preset1.click(lambda: set_preset("comedian"), None, system_prompt)
preset2.click(lambda: set_preset("teacher"), None, system_prompt)
preset3.click(lambda: set_preset("friend"), None, system_prompt)
preset4.click(lambda: set_preset("professional"), None, system_prompt)
clear.click(lambda: None, None, chatbot, queue=False)
if __name__ == "__main__":
demo.launch(share=True)