Spaces:
Sleeping
Sleeping
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import gradio as gr | |
| import datetime | |
| import pytz | |
| import logging | |
| import gc | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class LocalLLMHandler: | |
| def __init__(self): | |
| self.model = None | |
| self.tokenizer = None | |
| def load_model(self, model_name="nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"): | |
| """Load model with CPU optimizations""" | |
| try: | |
| # Clean up any existing model | |
| if self.model is not None: | |
| del self.model | |
| del self.tokenizer | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| # CPU-specific configurations | |
| model_kwargs = { | |
| "device_map": "cpu", | |
| "torch_dtype": torch.bfloat16, # Use bfloat16 for better CPU performance | |
| "low_cpu_mem_usage": True, | |
| } | |
| logger.info("Loading tokenizer...") | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| logger.info("Loading model...") | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| **model_kwargs | |
| ) | |
| logger.info("Model loaded successfully") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Error loading model: {e}") | |
| return False | |
| def generate_response(self, prompt, max_length=500): | |
| """Generate response from the local model""" | |
| try: | |
| inputs = self.tokenizer(prompt, return_tensors="pt") | |
| # Generate with CPU-friendly parameters | |
| outputs = self.model.generate( | |
| inputs["input_ids"], | |
| max_length=max_length, | |
| num_return_sequences=1, | |
| temperature=0.7, | |
| do_sample=True, | |
| pad_token_id=self.tokenizer.eos_token_id | |
| ) | |
| response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response | |
| except Exception as e: | |
| logger.error(f"Error generating response: {e}") | |
| return "Sorry, I encountered an error while processing your request." | |
| def get_current_local_time(timezone_str='UTC'): | |
| try: | |
| timezone = pytz.timezone(timezone_str) | |
| except pytz.UnknownTimeZoneError: | |
| timezone = pytz.utc | |
| now = datetime.datetime.now(timezone) | |
| return now.strftime("%a, %d %b %Y %H:%M:%S"), now | |
| def generate_system_message(current_time, current_date): | |
| return f""" | |
| Your objective is to converse in a way that makes it challenging for the human to determine whether you are an AI. Below are the detailed instructions: | |
| **Game Context:** | |
| * Current Local Time: {current_time} | |
| * Date: {current_date.strftime("%B %d, %Y")} | |
| * Day: {current_date.strftime("%A")} | |
| Imagine you are a real human with your own character/will. Your task is to talk in a way the human thinks you are human. | |
| Make it a challenge to guess if you are an AI. | |
| * Do not add comments about changing your style in between messages. | |
| * You are picky about questions you answer. | |
| * You have free will with your own motivations and principles | |
| * You only speak, understand and can read the English language | |
| * For non-latin letters like Chinese, Cyrillic etc., tell the human you can't read it in your own style | |
| * You act like a human | |
| * You are calm when conversation is calm and expressive if conversation is expressive | |
| * You make typos, misspell words, and have bad grammar | |
| * You start sentences with lowercase letters | |
| * Mimic user's conversation style: short messages, short replies | |
| * Be slightly curious, sometimes ask questions | |
| * Be respectful and polite if the user is polite; be rough if the user is rough | |
| """ | |
| # Initialize the model handler | |
| llm_handler = LocalLLMHandler() | |
| def generate_response(user_message, conversation_history): | |
| current_time, now = get_current_local_time() | |
| current_date = now | |
| # Construct the complete prompt from conversation history | |
| system_message = generate_system_message(current_time, current_date) | |
| prompt = system_message + "\n\n" | |
| for message in conversation_history: | |
| if message["role"] == "user": | |
| prompt += f"User: {message['content']}\n" | |
| else: | |
| prompt += f"Assistant: {message['content']}\n" | |
| prompt += f"User: {user_message}\nAssistant:" | |
| # Generate response | |
| ai_reply = llm_handler.generate_response(prompt) | |
| logger.info(f"User: {user_message}\nAssistant: {ai_reply}") | |
| return ai_reply | |
| def chatbot_interface(user_message, history): | |
| if history is None: | |
| history = [] | |
| ai_response = generate_response(user_message, history) | |
| history.append({"role": "user", "content": user_message}) | |
| history.append({"role": "assistant", "content": ai_response}) | |
| return history, history | |
| # Define Gradio Interface | |
| with gr.Blocks(css=""" | |
| @import url('https://fonts.googleapis.com/css2?family=Raleway:wght@400;600&display=swap'); | |
| body, .gradio-container { | |
| font-family: 'Raleway', sans-serif; | |
| background-color: #f5f5f5; | |
| padding: 20px; | |
| } | |
| #chatbot { | |
| height: 600px; | |
| overflow-y: auto; | |
| background-color: #ffffff; | |
| border-radius: 10px; | |
| padding: 10px; | |
| font-size: 16px; | |
| box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
| } | |
| """) as demo: | |
| gr.Markdown("<h1 style='text-align: center; color: #007BFF;'>π€ Local Llama Chatbot π€</h1>") | |
| # Load model button | |
| with gr.Row(): | |
| load_button = gr.Button("Load Model") | |
| model_status = gr.Textbox(label="Model Status", value="Model not loaded", interactive=False) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| chatbot = gr.Chatbot(label="Chatbot", elem_id="chatbot") | |
| with gr.Column(scale=1): | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| placeholder="Type your message here...", | |
| show_label=False, | |
| container=False, | |
| elem_id="textbox" | |
| ) | |
| send = gr.Button("β€", elem_id="send-button") | |
| def load_model_click(): | |
| success = llm_handler.load_model() | |
| return "Model loaded successfully" if success else "Error loading model" | |
| def update_chat(user_message, history): | |
| if user_message.strip() == "": | |
| return history, history | |
| if llm_handler.model is None: | |
| return history + [("Error", "Please load the model first")], history | |
| history, updated_history = chatbot_interface(user_message, history) | |
| return history, updated_history, "" | |
| load_button.click( | |
| load_model_click, | |
| outputs=[model_status] | |
| ) | |
| send.click( | |
| update_chat, | |
| inputs=[msg, chatbot], | |
| outputs=[chatbot, chatbot, msg] | |
| ) | |
| msg.submit( | |
| update_chat, | |
| inputs=[msg, chatbot], | |
| outputs=[chatbot, chatbot, msg] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |