KGSAGAR commited on
Commit
6cb1f28
·
verified ·
1 Parent(s): 3c49ccb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -14
app.py CHANGED
@@ -24,7 +24,7 @@ except Exception as e:
24
  tokenizer = None
25
  peft_model = None
26
 
27
- async def respond(message, history, system_message, max_tokens, temperature, top_p):
28
  """
29
  Generates a response based on the user message and history using the provided PEFT model.
30
  Args:
@@ -34,13 +34,12 @@ async def respond(message, history, system_message, max_tokens, temperature, top
34
  max_tokens (int): The maximum number of tokens to generate.
35
  temperature (float): The temperature parameter for generation.
36
  top_p (float): The top_p parameter for nucleus sampling.
37
- Yields:
38
- str: The generated response up to the current token.
39
  """
40
  global tokenizer, peft_model # Access global variables
41
  if tokenizer is None or peft_model is None:
42
- yield "Model loading failed. Please check the logs."
43
- return
44
 
45
  # Construct the prompt
46
  prompt = system_message
@@ -56,7 +55,7 @@ async def respond(message, history, system_message, max_tokens, temperature, top
56
 
57
  # Generate the output
58
  try:
59
- outputs = await peft_model.generate(
60
  **inputs,
61
  max_new_tokens=max_tokens,
62
  temperature=temperature,
@@ -64,8 +63,7 @@ async def respond(message, history, system_message, max_tokens, temperature, top
64
  do_sample=True # Enable sampling for more diverse outputs
65
  )
66
  except Exception as e:
67
- yield f"Generation error: {e}"
68
- return
69
 
70
  # Decode the generated tokens
71
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -78,17 +76,13 @@ async def respond(message, history, system_message, max_tokens, temperature, top
78
  """
79
  pattern = re.compile(r'<user>(.*?)</user>|output:', re.IGNORECASE)
80
  matches = re.findall(pattern, text, re.DOTALL)
81
- extracted_content = '\n'.join(match.strip() for match in matches)
82
  return extracted_content
83
 
84
  # Extract the normalized text
85
  normalized_text = extract_user_content(generated_text)
86
 
87
- # Stream the response token by token
88
- response = ""
89
- for token in normalized_text.split():
90
- response += token + " "
91
- yield response.strip()
92
 
93
  # Gradio interface setup
94
  demo = gr.ChatInterface(
 
24
  tokenizer = None
25
  peft_model = None
26
 
27
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
28
  """
29
  Generates a response based on the user message and history using the provided PEFT model.
30
  Args:
 
34
  max_tokens (int): The maximum number of tokens to generate.
35
  temperature (float): The temperature parameter for generation.
36
  top_p (float): The top_p parameter for nucleus sampling.
37
+ Returns:
38
+ str: The generated response.
39
  """
40
  global tokenizer, peft_model # Access global variables
41
  if tokenizer is None or peft_model is None:
42
+ return "Model loading failed. Please check the logs."
 
43
 
44
  # Construct the prompt
45
  prompt = system_message
 
55
 
56
  # Generate the output
57
  try:
58
+ outputs = peft_model.generate(
59
  **inputs,
60
  max_new_tokens=max_tokens,
61
  temperature=temperature,
 
63
  do_sample=True # Enable sampling for more diverse outputs
64
  )
65
  except Exception as e:
66
+ return f"Generation error: {e}"
 
67
 
68
  # Decode the generated tokens
69
  generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
76
  """
77
  pattern = re.compile(r'<user>(.*?)</user>|output:', re.IGNORECASE)
78
  matches = re.findall(pattern, text, re.DOTALL)
79
+ extracted_content = '\n'.join(match.strip() for match in matches if match)
80
  return extracted_content
81
 
82
  # Extract the normalized text
83
  normalized_text = extract_user_content(generated_text)
84
 
85
+ return normalized_text
 
 
 
 
86
 
87
  # Gradio interface setup
88
  demo = gr.ChatInterface(