Spaces:

xtreme86
/

character

Sleeping

App Files Files Community

xtreme86 commited on Sep 15, 2024

Commit

8d2d1dc

1 Parent(s): 50bb5db

s

Browse files

Files changed (2) hide show

app.py +17 -94
requirements.txt +3 -2

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import torch
 import logging
 import html
 import signal
-from functools import lru_cache
 # Setup logging
 logging.basicConfig(level=logging.INFO)
@@ -20,16 +19,6 @@ def shutdown_handler(signum, frame):
 signal.signal(signal.SIGINT, shutdown_handler)
 def system_message_selector(choice, custom_message):
-    """
-    Selects the system message based on the user's choice or custom input.
-    Parameters:
-        choice (str): The persona choice selected by the user.
-        custom_message (str): A custom persona or system message provided by the user.
-    Returns:
-        str: The system message to be used in the conversation.
-    """
     if custom_message:
         return custom_message
     elif choice == "Friendly Chatbot":
@@ -42,29 +31,9 @@ def system_message_selector(choice, custom_message):
         return "You are a helpful assistant."
 def sanitize_input(text):
-    """
-    Sanitizes user input to prevent code injection or XSS attacks.
-    Parameters:
-        text (str): The user input text.
-    Returns:
-        str: The sanitized text.
-    """
     return html.escape(text)
 def validate_parameters(max_tokens, temperature, top_p):
-    """
-    Validates input parameters.
-    Parameters:
-        max_tokens (int): Maximum number of tokens for the response.
-        temperature (float): Sampling temperature.
-        top_p (float): Top-p (nucleus) sampling parameter.
-    Returns:
-        tuple: (bool, str) indicating validity and an error message if invalid.
-    """
     if not (1 <= max_tokens <= 2048):
         return False, "Error: 'Max new tokens' must be between 1 and 2048."
     if not (0.1 <= temperature <= 4.0):
@@ -74,101 +43,55 @@ def validate_parameters(max_tokens, temperature, top_p):
     return True, ""
 # Load the model and tokenizer
-model_name = "HuggingFaceH4/zephyr-7b-beta"  # Replace with your actual model name
 try:
-    tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
-    model = transformers.AutoModelForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16,
-        device_map="auto"  # Automatically places model layers on available GPUs
     )
     model.eval()
 except Exception as e:
     logging.error(f"Failed to load model {model_name}: {e}")
     exit(1)
-@lru_cache(maxsize=32)
-def generate_response(prompt, max_tokens, temperature, top_p):
-    """
-    Generates a response using the loaded language model.
-    Parameters:
-        prompt (str): The input prompt for the model.
-        max_tokens (int): Maximum number of tokens for the response.
-        temperature (float): Sampling temperature.
-        top_p (float): Top-p (nucleus) sampling parameter.
-    Returns:
-        str: The generated response from the model.
-    """
-    input_ids = tokenizer.encode(prompt, return_tensors="pt")
-    input_ids = input_ids.to(model.device)
-    with torch.no_grad():
-        output_ids = model.generate(
-            input_ids,
-            max_length=input_ids.shape[1] + max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id,
-            eos_token_id=tokenizer.eos_token_id,
-        )
-    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    return generated_text[len(prompt):].strip()
 def respond(message, history, persona_choice, custom_persona, max_tokens, temperature, top_p):
-    """
-    Generates a response using the loaded language model.
-    Parameters:
-        message (str): User's current input.
-        history (list[tuple[str, str]]): Previous conversation history.
-        persona_choice (str): The selected persona.
-        custom_persona (str): Custom persona or system message.
-        max_tokens (int): Maximum tokens allowed for the response.
-        temperature (float): Sampling temperature.
-        top_p (float): Top-p (nucleus sampling) parameter.
-    Returns:
-        str: The generated chatbot response.
-    """
-    # Validate parameters
     is_valid, error_message = validate_parameters(max_tokens, temperature, top_p)
     if not is_valid:
         return error_message
-    # Sanitize user input
     safe_message = sanitize_input(message)
     safe_history = [(sanitize_input(u), sanitize_input(b)) for u, b in history]
-    # Limit the history to the most recent exchanges
     truncated_history = safe_history[-MAX_HISTORY_LENGTH:]
-    # Select system message
     system_message = system_message_selector(persona_choice, custom_persona)
-    # Build the conversation prompt
     conversation = system_message + "\n\n"
     for user_msg, bot_msg in truncated_history:
         conversation += f"User: {user_msg}\n"
         conversation += f"Assistant: {bot_msg}\n"
     conversation += f"User: {safe_message}\nAssistant:"
-    # Log the request
     logging.info(f"Received message: {safe_message}")
     try:
-        # Use caching to optimize performance
-        response = generate_response(
-            prompt=conversation,
-            max_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
         )
-        return response
     except Exception as e:
         logging.error(f"An error occurred: {e}")
         return "I'm sorry, but something went wrong. Please try again."

 import logging
 import html
 import signal
 # Setup logging
 logging.basicConfig(level=logging.INFO)
 signal.signal(signal.SIGINT, shutdown_handler)
 def system_message_selector(choice, custom_message):
     if custom_message:
         return custom_message
     elif choice == "Friendly Chatbot":
         return "You are a helpful assistant."
 def sanitize_input(text):
     return html.escape(text)
 def validate_parameters(max_tokens, temperature, top_p):
     if not (1 <= max_tokens <= 2048):
         return False, "Error: 'Max new tokens' must be between 1 and 2048."
     if not (0.1 <= temperature <= 4.0):
     return True, ""
 # Load the model and tokenizer
+model_name = "HuggingFaceH4/mistral-7b-instruct"  # Update with the correct model name
 try:
+    from transformers import MistralForCausalLM, MistralTokenizer
+    tokenizer = MistralTokenizer.from_pretrained(model_name)
+    model = MistralForCausalLM.from_pretrained(
         model_name,
         torch_dtype=torch.float16,
+        device_map="auto",
     )
     model.eval()
 except Exception as e:
     logging.error(f"Failed to load model {model_name}: {e}")
     exit(1)
 def respond(message, history, persona_choice, custom_persona, max_tokens, temperature, top_p):
     is_valid, error_message = validate_parameters(max_tokens, temperature, top_p)
     if not is_valid:
         return error_message
     safe_message = sanitize_input(message)
     safe_history = [(sanitize_input(u), sanitize_input(b)) for u, b in history]
     truncated_history = safe_history[-MAX_HISTORY_LENGTH:]
     system_message = system_message_selector(persona_choice, custom_persona)
     conversation = system_message + "\n\n"
     for user_msg, bot_msg in truncated_history:
         conversation += f"User: {user_msg}\n"
         conversation += f"Assistant: {bot_msg}\n"
     conversation += f"User: {safe_message}\nAssistant:"
     logging.info(f"Received message: {safe_message}")
     try:
+        input_ids = tokenizer.encode(conversation, return_tensors="pt").to(model.device)
+        output_ids = model.generate(
+            input_ids,
+            max_new_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id,
         )
+        generated_text = tokenizer.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
+        return generated_text.strip()
     except Exception as e:
         logging.error(f"An error occurred: {e}")
         return "I'm sorry, but something went wrong. Please try again."

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
-transformers==4.31.0
 gradio==3.40.1
-torch==2.0.1

+transformers>=4.34.0
 gradio==3.40.1
+torch>=2.0.1
+xformers