mmargg commited on
Commit
22fbb3d
·
verified ·
1 Parent(s): d88068f

added yield response

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -121,7 +121,7 @@ cleaned_chunks = preprocess_text(poverty_and_education)
121
  chunk_embeddings = create_embeddings(cleaned_chunks)
122
  #AI API being used
123
  client= InferenceClient("Qwen/Qwen2.5-7B-Instruct-1M")
124
-
125
  #defining role of AI and user
126
  def respond(message,history):
127
  information = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
@@ -134,9 +134,11 @@ def respond(message,history):
134
 
135
  messages.append({"role":"user", "content": message})
136
 
137
- response=client.chat_completion(messages, max_tokens=100) #capping how many words the LLM is allowed to generate as a respond (100 words)
138
-
139
- return response['choices'][0]['message']['content'].strip() #storing value of response in a readable format to display
 
 
140
 
141
  ### STEP 6
142
  # Call the preprocess_text function and store the result in a cleaned_chunks variable
 
121
  chunk_embeddings = create_embeddings(cleaned_chunks)
122
  #AI API being used
123
  client= InferenceClient("Qwen/Qwen2.5-7B-Instruct-1M")
124
+ response=""
125
  #defining role of AI and user
126
  def respond(message,history):
127
  information = get_top_chunks(message, chunk_embeddings, cleaned_chunks)
 
134
 
135
  messages.append({"role":"user", "content": message})
136
 
137
+ response=client.chat_completion(messages, stream=True, max_tokens=100) #capping how many words the LLM is allowed to generate as a respond (100 words)
138
+ for message in client.chat_completion(messages):
139
+ token = message.choices[0].delta.content
140
+ response+=token
141
+ yield response['choices'][0]['message']['content'].strip() #storing value of response in a readable format to display
142
 
143
  ### STEP 6
144
  # Call the preprocess_text function and store the result in a cleaned_chunks variable