jdesiree commited on
Commit
067dac2
·
verified ·
1 Parent(s): 0d36e61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -16
app.py CHANGED
@@ -8,11 +8,9 @@ import os
8
  llm = HuggingFaceEndpoint(
9
  repo_id="HuggingFaceH4/zephyr-7b-beta",
10
  temperature=0.7,
 
11
  model_kwargs={
12
- "max_length": 256, # Reduced from 512
13
- "max_new_tokens": 256, # Add explicit limit
14
- "do_sample": True,
15
- "top_p": 0.9
16
  },
17
  huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
18
  )
@@ -86,27 +84,21 @@ def respond_with_langchain(
86
  temperature,
87
  top_p,
88
  ):
89
- # Select the appropriate template
90
- template, mode = detect_subject(message)
91
-
92
- # Format the prompt
93
- formatted_prompt = template.format_messages(question=message)
94
-
95
- # Get response from LangChain
96
  try:
97
- # Convert to string format for the HuggingFace model
 
98
  prompt_text = f"{formatted_prompt[0].content}\n\nHuman: {formatted_prompt[1].content}\n\nAssistant:"
99
 
100
  response = llm.invoke(prompt_text)
101
 
102
- # Add mode indicator to response
103
- full_response = f"*{mode}*\n\n{response}"
104
 
105
- # Yield the response (for streaming effect)
106
  yield full_response
107
 
108
  except Exception as e:
109
- yield f"Sorry, I encountered an error: {str(e)}"
110
 
111
  # Create the Gradio interface
112
  demo = gr.ChatInterface(
 
8
  llm = HuggingFaceEndpoint(
9
  repo_id="HuggingFaceH4/zephyr-7b-beta",
10
  temperature=0.7,
11
+ top_p=0.9,
12
  model_kwargs={
13
+ "max_length": 1024
 
 
 
14
  },
15
  huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
16
  )
 
84
  temperature,
85
  top_p,
86
  ):
 
 
 
 
 
 
 
87
  try:
88
+ template, mode = detect_subject(message)
89
+ formatted_prompt = template.format_messages(question=message)
90
  prompt_text = f"{formatted_prompt[0].content}\n\nHuman: {formatted_prompt[1].content}\n\nAssistant:"
91
 
92
  response = llm.invoke(prompt_text)
93
 
94
+ if len(response) > 3000:
95
+ response = response[:3000] + "... [Response truncated for length]"
96
 
97
+ full_response = f"*{mode}*\n\n{response}"
98
  yield full_response
99
 
100
  except Exception as e:
101
+ yield f"Sorry, I encountered an error: {str(e)[:200]}"
102
 
103
  # Create the Gradio interface
104
  demo = gr.ChatInterface(