Spaces:

N-Bot-Int
/

OpenElla-GGUF

Sleeping

App Files Files Community

ItsMeDevRoland commited on Mar 26, 2025

Commit

56b07de

verified ·

1 Parent(s): 761a03d

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -29

app.py CHANGED Viewed

@@ -45,7 +45,7 @@ def download_and_load_model(
         filename (str): Specific GGUF model filename
     Returns:
-        str: Path to downloaded model
     """
     try:
         # Try to import llama-cpp directly to ensure it's available
@@ -96,27 +96,9 @@ except Exception as e:
     print(f"Fatal error initializing model: {e}")
     sys.exit(1)
-def format_history(history):
-    """
-    Format chat history into a list of messages
-    Args:
-        history (list): Chat history of (user, assistant) tuples
-    Returns:
-        list: Formatted messages for model input
-    """
-    messages = []
-    for user, assistant in history:
-        if user:
-            messages.append({"role": "user", "content": user})
-        if assistant:
-            messages.append({"role": "assistant", "content": assistant})
-    return messages
 def respond(
     message,
-    history: list[tuple[str, str]],
     system_message="You are a friendly Chatbot.",
     max_tokens=512,
     temperature=0.7,
@@ -133,8 +115,8 @@ def respond(
         temperature (float): Sampling temperature
         top_p (float): Nucleus sampling probability threshold
-    Yields:
-        str: Streaming response
     """
     # Prepare the full prompt with system message and history
     full_prompt = system_message + "\n\n"
@@ -150,9 +132,9 @@ def respond(
     full_prompt += f"User: {message}\n"
     full_prompt += "Assistant: "
-    # Generate response with streaming
-    response = ""
     try:
         for chunk in llm_model.generate(
             full_prompt,
             max_tokens=max_tokens,
@@ -162,10 +144,11 @@ def respond(
             stream=True
         ):
             response += chunk
-            yield response
     except Exception as e:
         print(f"Error generating response: {e}")
-        yield f"An error occurred: {e}"
 # Create Gradio interface with updated configuration
 demo = gr.ChatInterface(
@@ -181,9 +164,7 @@ demo = gr.ChatInterface(
             step=0.05,
             label="Top-p (nucleus sampling)",
         ),
-    ],
-    # Explicitly set chatbot type to messages
-    chatbot=gr.Chatbot(type="messages")
 )
 if __name__ == "__main__":

         filename (str): Specific GGUF model filename
     Returns:
+        tuple: Loaded model and model path
     """
     try:
         # Try to import llama-cpp directly to ensure it's available
     print(f"Fatal error initializing model: {e}")
     sys.exit(1)
 def respond(
     message,
+    history,
     system_message="You are a friendly Chatbot.",
     max_tokens=512,
     temperature=0.7,
         temperature (float): Sampling temperature
         top_p (float): Nucleus sampling probability threshold
+    Returns:
+        str: Generated response
     """
     # Prepare the full prompt with system message and history
     full_prompt = system_message + "\n\n"
     full_prompt += f"User: {message}\n"
     full_prompt += "Assistant: "
+    # Generate response
     try:
+        response = ""
         for chunk in llm_model.generate(
             full_prompt,
             max_tokens=max_tokens,
             stream=True
         ):
             response += chunk
+        return response
     except Exception as e:
         print(f"Error generating response: {e}")
+        return f"An error occurred: {e}"
 # Create Gradio interface with updated configuration
 demo = gr.ChatInterface(
             step=0.05,
             label="Top-p (nucleus sampling)",
         ),
+    ]
 )
 if __name__ == "__main__":