Spaces:

jdesiree
/

Mimir

Sleeping

jdesiree commited on Aug 15, 2025

Commit

067dac2

verified ·

1 Parent(s): 0d36e61

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,11 +8,9 @@ import os
 llm = HuggingFaceEndpoint(
     repo_id="HuggingFaceH4/zephyr-7b-beta",
     temperature=0.7,
     model_kwargs={
-        "max_length": 256,  # Reduced from 512
-        "max_new_tokens": 256,  # Add explicit limit
-        "do_sample": True,
-        "top_p": 0.9
     },
     huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
 )
@@ -86,27 +84,21 @@ def respond_with_langchain(
     temperature,
     top_p,
 ):
-    # Select the appropriate template
-    template, mode = detect_subject(message)
-    # Format the prompt
-    formatted_prompt = template.format_messages(question=message)
-    # Get response from LangChain
     try:
-        # Convert to string format for the HuggingFace model
         prompt_text = f"{formatted_prompt[0].content}\n\nHuman: {formatted_prompt[1].content}\n\nAssistant:"
         response = llm.invoke(prompt_text)
-        # Add mode indicator to response
-        full_response = f"*{mode}*\n\n{response}"
-        # Yield the response (for streaming effect)
         yield full_response
     except Exception as e:
-        yield f"Sorry, I encountered an error: {str(e)}"
 # Create the Gradio interface
 demo = gr.ChatInterface(

 llm = HuggingFaceEndpoint(
     repo_id="HuggingFaceH4/zephyr-7b-beta",
     temperature=0.7,
+    top_p=0.9,
     model_kwargs={
+        "max_length": 1024
     },
     huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
 )
     temperature,
     top_p,
 ):
     try:
+        template, mode = detect_subject(message)
+        formatted_prompt = template.format_messages(question=message)
         prompt_text = f"{formatted_prompt[0].content}\n\nHuman: {formatted_prompt[1].content}\n\nAssistant:"
         response = llm.invoke(prompt_text)
+        if len(response) > 3000:
+            response = response[:3000] + "... [Response truncated for length]"
+        full_response = f"*{mode}*\n\n{response}"
         yield full_response
     except Exception as e:
+        yield f"Sorry, I encountered an error: {str(e)[:200]}"
 # Create the Gradio interface
 demo = gr.ChatInterface(