Spaces:

SolarumAsteridion
/

Human

Sleeping

App Files Files Community

Solarum Asteridion commited on Oct 22, 2024

Commit

8b977a9

verified ·

1 Parent(s): ca1dedb

Update app.py

Browse files

Files changed (1) hide show

app.py +145 -150

app.py CHANGED Viewed

@@ -1,36 +1,72 @@
-import openai
 import datetime
 import pytz
-import gradio as gr
 import logging
-import requests
-grqapi = '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'
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Initialize OpenAI client using secrets.toml
-client = openai.OpenAI(
-    api_key=grqapi,  # Accessing the API key from secrets.toml
-    base_url="https://api.groq.com/openai/v1",
-)
-def get_available_models():
-    url = "https://api.groq.com/openai/v1/models"
-    headers = {
-        "Authorization": f"Bearer {grqapi}",
-        "Content-Type": "application/json"
-    }
-    response = requests.get(url, headers=headers)
-    if response.status_code == 200:
-        models = [model["id"] for model in response.json()["data"]]
-        return models
-    else:
-        logger.error(f"Error fetching models: {response.status_code} - {response.text}")
-        return []
 def get_current_local_time(timezone_str='UTC'):
     try:
@@ -64,148 +100,107 @@ Make it a challenge to guess if you are an AI.
 * Be slightly curious, sometimes ask questions
 * Be respectful and polite if the user is polite; be rough if the user is rough
 """
-def generate_response(user_message, conversation_history, model_name):
     current_time, now = get_current_local_time()
     current_date = now
     system_message = generate_system_message(current_time, current_date)
-    messages = [{"role": "system", "content": system_message}]
     for message in conversation_history:
-        # Remove "metadata" if it exists (to be compatible with Groq API)
-        messages.append({k: v for k, v in message.items() if k != "metadata"})
-    messages.append({"role": "user", "content": user_message})
-    try:
-        response = client.chat.completions.create(
-            model=model_name,
-            messages=messages,
-            temperature=1.25,
-            max_tokens=500,  # Adjust as needed
-        )
-        ai_reply = response.choices[0].message.content.strip()
-        logger.info("------------------------------" + user_message + ", " + ai_reply)
-        return ai_reply
-    except Exception as e:
-        logger.error(f"Error generating response: {e}")
-        return "Sorry, I encountered an error while processing your request."
-def chatbot_interface(user_message, history, model_name):
     if history is None:
         history = []
-    ai_response = generate_response(user_message, history, model_name)
     history.append({"role": "user", "content": user_message})
     history.append({"role": "assistant", "content": ai_response})
-    logger.info("Chat history: %s", history)  # Corrected logging
     return history, history
 # Define Gradio Interface
 with gr.Blocks(css="""
-/* Import Raleway font from Google Fonts */
 @import url('https://fonts.googleapis.com/css2?family=Raleway:wght@400;600&display=swap');
 body, .gradio-container {
-font-family: 'Raleway', sans-serif;
-background-color: #f5f5f5;
-padding: 20px;
 }
 #chatbot {
-height: 600px; /* Increased height for a bigger chat box */
-overflow-y: auto;
-background-color: #ffffff;
-border-radius: 10px;
-padding: 10px;
-font-size: 16px;
-box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
-}
-#textbox {
-width: 100%;
-border-radius: 25px;
-border: 1px solid #ccc;
-outline: none;
-font-size: 16px;
-padding: 10px 20px;
-box-sizing: border-box;
-}
-#send-button {
-background-color: #007BFF;
-color: white;
-border: none;
-cursor: pointer;
-font-size: 20px;
-}
-#send-button:hover {
-background-color: #0056b3;
-}
-.message {
-margin-bottom: 10px;
-}
-/* Scrollbar Styling */
-#chatbot::-webkit-scrollbar {
-width: 8px;
-}
-#chatbot::-webkit-scrollbar-track {
-background: #f1f1f1;
-}
-#chatbot::-webkit-scrollbar-thumb {
-background: #888;
-border-radius: 4px;
-}
-#chatbot::-webkit-scrollbar-thumb:hover {
-background: #555;
-}
-/* Responsive Design */
-@media (max-width: 600px) {
-#send-button {
-width: 40px;
-height: 40px;
-font-size: 18px;
-}
-#textbox {
-padding: 8px 16px;
-}
 }
 """) as demo:
-    gr.Markdown("<h1 style='text-align: center; color: #007BFF;'>🤖 Human-like Chatbot 🤖</h1>")
-    available_models = get_available_models()
-    if not available_models:
-        gr.Markdown("**Error: Could not fetch available models from the API.**")
-    else:
-        with gr.Row():
-            model_dropdown = gr.Dropdown(choices=available_models, label="Select Model", value=available_models[0])
-        with gr.Row():
-            with gr.Column(scale=1):
-                chatbot = gr.Chatbot(label="Chatbot", elem_id="chatbot", type="messages")
-            with gr.Column(scale=1):
-                with gr.Row():
-                    msg = gr.Textbox(
-                        placeholder="Type your message here...",
-                        show_label=False,
-                        container=False,
-                        elem_id="textbox"
-                    )
-                    send = gr.Button("➤", elem_id="send-button")
-        def update_chat(user_message, history, model_name):
-            if user_message.strip() == "":
-                return history, history  # Do not process empty messages
-            history, updated_history = chatbot_interface(user_message, history, model_name)
-            return history, updated_history, ""  # Clear textbox
-        send.click(
-            update_chat,
-            inputs=[msg, chatbot, model_dropdown],
-            outputs=[chatbot, chatbot, msg]  # Added msg to outputs
-        )
-        msg.submit(
-            update_chat,
-            inputs=[msg, chatbot, model_dropdown],
-            outputs=[chatbot, chatbot, msg]  # Added msg to outputs
-        )
-demo.launch()

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import gradio as gr
 import datetime
 import pytz
 import logging
+import gc
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+class LocalLLMHandler:
+    def __init__(self):
+        self.model = None
+        self.tokenizer = None
+    def load_model(self, model_name="nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"):
+        """Load model with CPU optimizations"""
+        try:
+            # Clean up any existing model
+            if self.model is not None:
+                del self.model
+                del self.tokenizer
+                torch.cuda.empty_cache()
+                gc.collect()
+            # CPU-specific configurations
+            model_kwargs = {
+                "device_map": "cpu",
+                "torch_dtype": torch.bfloat16,  # Use bfloat16 for better CPU performance
+                "low_cpu_mem_usage": True,
+            }
+            logger.info("Loading tokenizer...")
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            logger.info("Loading model...")
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                **model_kwargs
+            )
+            logger.info("Model loaded successfully")
+            return True
+        except Exception as e:
+            logger.error(f"Error loading model: {e}")
+            return False
+    def generate_response(self, prompt, max_length=500):
+        """Generate response from the local model"""
+        try:
+            inputs = self.tokenizer(prompt, return_tensors="pt")
+            # Generate with CPU-friendly parameters
+            outputs = self.model.generate(
+                inputs["input_ids"],
+                max_length=max_length,
+                num_return_sequences=1,
+                temperature=0.7,
+                do_sample=True,
+                pad_token_id=self.tokenizer.eos_token_id
+            )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            return response
+        except Exception as e:
+            logger.error(f"Error generating response: {e}")
+            return "Sorry, I encountered an error while processing your request."
 def get_current_local_time(timezone_str='UTC'):
     try:
 * Be slightly curious, sometimes ask questions
 * Be respectful and polite if the user is polite; be rough if the user is rough
 """
+# Initialize the model handler
+llm_handler = LocalLLMHandler()
+def generate_response(user_message, conversation_history):
     current_time, now = get_current_local_time()
     current_date = now
+    # Construct the complete prompt from conversation history
     system_message = generate_system_message(current_time, current_date)
+    prompt = system_message + "\n\n"
     for message in conversation_history:
+        if message["role"] == "user":
+            prompt += f"User: {message['content']}\n"
+        else:
+            prompt += f"Assistant: {message['content']}\n"
+    prompt += f"User: {user_message}\nAssistant:"
+    # Generate response
+    ai_reply = llm_handler.generate_response(prompt)
+    logger.info(f"User: {user_message}\nAssistant: {ai_reply}")
+    return ai_reply
+def chatbot_interface(user_message, history):
     if history is None:
         history = []
+    ai_response = generate_response(user_message, history)
     history.append({"role": "user", "content": user_message})
     history.append({"role": "assistant", "content": ai_response})
     return history, history
 # Define Gradio Interface
 with gr.Blocks(css="""
 @import url('https://fonts.googleapis.com/css2?family=Raleway:wght@400;600&display=swap');
 body, .gradio-container {
+    font-family: 'Raleway', sans-serif;
+    background-color: #f5f5f5;
+    padding: 20px;
 }
 #chatbot {
+    height: 600px;
+    overflow-y: auto;
+    background-color: #ffffff;
+    border-radius: 10px;
+    padding: 10px;
+    font-size: 16px;
+    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
 }
 """) as demo:
+    gr.Markdown("<h1 style='text-align: center; color: #007BFF;'>🤖 Local Llama Chatbot 🤖</h1>")
+    # Load model button
+    with gr.Row():
+        load_button = gr.Button("Load Model")
+        model_status = gr.Textbox(label="Model Status", value="Model not loaded", interactive=False)
+    with gr.Row():
+        with gr.Column(scale=1):
+            chatbot = gr.Chatbot(label="Chatbot", elem_id="chatbot")
+        with gr.Column(scale=1):
+            with gr.Row():
+                msg = gr.Textbox(
+                    placeholder="Type your message here...",
+                    show_label=False,
+                    container=False,
+                    elem_id="textbox"
+                )
+                send = gr.Button("➤", elem_id="send-button")
+    def load_model_click():
+        success = llm_handler.load_model()
+        return "Model loaded successfully" if success else "Error loading model"
+    def update_chat(user_message, history):
+        if user_message.strip() == "":
+            return history, history
+        if llm_handler.model is None:
+            return history + [("Error", "Please load the model first")], history
+        history, updated_history = chatbot_interface(user_message, history)
+        return history, updated_history, ""
+    load_button.click(
+        load_model_click,
+        outputs=[model_status]
+    )
+    send.click(
+        update_chat,
+        inputs=[msg, chatbot],
+        outputs=[chatbot, chatbot, msg]
+    )
+    msg.submit(
+        update_chat,
+        inputs=[msg, chatbot],
+        outputs=[chatbot, chatbot, msg]
+    )
+if __name__ == "__main__":
+    demo.launch()