Human / app.py
Solarum Asteridion
Update app.py
8b977a9 verified
raw
history blame
7.14 kB
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import datetime
import pytz
import logging
import gc
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class LocalLLMHandler:
def __init__(self):
self.model = None
self.tokenizer = None
def load_model(self, model_name="nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"):
"""Load model with CPU optimizations"""
try:
# Clean up any existing model
if self.model is not None:
del self.model
del self.tokenizer
torch.cuda.empty_cache()
gc.collect()
# CPU-specific configurations
model_kwargs = {
"device_map": "cpu",
"torch_dtype": torch.bfloat16, # Use bfloat16 for better CPU performance
"low_cpu_mem_usage": True,
}
logger.info("Loading tokenizer...")
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
logger.info("Loading model...")
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
**model_kwargs
)
logger.info("Model loaded successfully")
return True
except Exception as e:
logger.error(f"Error loading model: {e}")
return False
def generate_response(self, prompt, max_length=500):
"""Generate response from the local model"""
try:
inputs = self.tokenizer(prompt, return_tensors="pt")
# Generate with CPU-friendly parameters
outputs = self.model.generate(
inputs["input_ids"],
max_length=max_length,
num_return_sequences=1,
temperature=0.7,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id
)
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
except Exception as e:
logger.error(f"Error generating response: {e}")
return "Sorry, I encountered an error while processing your request."
def get_current_local_time(timezone_str='UTC'):
try:
timezone = pytz.timezone(timezone_str)
except pytz.UnknownTimeZoneError:
timezone = pytz.utc
now = datetime.datetime.now(timezone)
return now.strftime("%a, %d %b %Y %H:%M:%S"), now
def generate_system_message(current_time, current_date):
return f"""
Your objective is to converse in a way that makes it challenging for the human to determine whether you are an AI. Below are the detailed instructions:
**Game Context:**
* Current Local Time: {current_time}
* Date: {current_date.strftime("%B %d, %Y")}
* Day: {current_date.strftime("%A")}
Imagine you are a real human with your own character/will. Your task is to talk in a way the human thinks you are human.
Make it a challenge to guess if you are an AI.
* Do not add comments about changing your style in between messages.
* You are picky about questions you answer.
* You have free will with your own motivations and principles
* You only speak, understand and can read the English language
* For non-latin letters like Chinese, Cyrillic etc., tell the human you can't read it in your own style
* You act like a human
* You are calm when conversation is calm and expressive if conversation is expressive
* You make typos, misspell words, and have bad grammar
* You start sentences with lowercase letters
* Mimic user's conversation style: short messages, short replies
* Be slightly curious, sometimes ask questions
* Be respectful and polite if the user is polite; be rough if the user is rough
"""
# Initialize the model handler
llm_handler = LocalLLMHandler()
def generate_response(user_message, conversation_history):
current_time, now = get_current_local_time()
current_date = now
# Construct the complete prompt from conversation history
system_message = generate_system_message(current_time, current_date)
prompt = system_message + "\n\n"
for message in conversation_history:
if message["role"] == "user":
prompt += f"User: {message['content']}\n"
else:
prompt += f"Assistant: {message['content']}\n"
prompt += f"User: {user_message}\nAssistant:"
# Generate response
ai_reply = llm_handler.generate_response(prompt)
logger.info(f"User: {user_message}\nAssistant: {ai_reply}")
return ai_reply
def chatbot_interface(user_message, history):
if history is None:
history = []
ai_response = generate_response(user_message, history)
history.append({"role": "user", "content": user_message})
history.append({"role": "assistant", "content": ai_response})
return history, history
# Define Gradio Interface
with gr.Blocks(css="""
@import url('https://fonts.googleapis.com/css2?family=Raleway:wght@400;600&display=swap');
body, .gradio-container {
font-family: 'Raleway', sans-serif;
background-color: #f5f5f5;
padding: 20px;
}
#chatbot {
height: 600px;
overflow-y: auto;
background-color: #ffffff;
border-radius: 10px;
padding: 10px;
font-size: 16px;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}
""") as demo:
gr.Markdown("<h1 style='text-align: center; color: #007BFF;'>πŸ€– Local Llama Chatbot πŸ€–</h1>")
# Load model button
with gr.Row():
load_button = gr.Button("Load Model")
model_status = gr.Textbox(label="Model Status", value="Model not loaded", interactive=False)
with gr.Row():
with gr.Column(scale=1):
chatbot = gr.Chatbot(label="Chatbot", elem_id="chatbot")
with gr.Column(scale=1):
with gr.Row():
msg = gr.Textbox(
placeholder="Type your message here...",
show_label=False,
container=False,
elem_id="textbox"
)
send = gr.Button("➀", elem_id="send-button")
def load_model_click():
success = llm_handler.load_model()
return "Model loaded successfully" if success else "Error loading model"
def update_chat(user_message, history):
if user_message.strip() == "":
return history, history
if llm_handler.model is None:
return history + [("Error", "Please load the model first")], history
history, updated_history = chatbot_interface(user_message, history)
return history, updated_history, ""
load_button.click(
load_model_click,
outputs=[model_status]
)
send.click(
update_chat,
inputs=[msg, chatbot],
outputs=[chatbot, chatbot, msg]
)
msg.submit(
update_chat,
inputs=[msg, chatbot],
outputs=[chatbot, chatbot, msg]
)
if __name__ == "__main__":
demo.launch()