Shreyas094 commited on
Commit
a6cb479
·
verified ·
1 Parent(s): f800b56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -69
app.py CHANGED
@@ -85,7 +85,7 @@ def get_embeddings():
85
 
86
  def duckduckgo_search(query):
87
  with DDGS() as ddgs:
88
- results = list(ddgs.text(query, max_results=5))
89
  return results
90
 
91
  class CitingSources(BaseModel):
@@ -104,10 +104,11 @@ def chatbot_interface(message, history, model, temperature, num_calls, use_embed
104
  for response in respond(message, history, model, temperature, num_calls, use_embeddings, system_prompt):
105
  history[-1] = (message, response)
106
  yield history
 
 
107
  except Exception as e:
108
- logging.error(f"Error in chatbot_interface: {str(e)}")
109
- error_message = f"An error occurred: {str(e)}. Please try again."
110
- history[-1] = (message, error_message)
111
  yield history
112
 
113
  def retry_last_response(history, model, temperature, num_calls, use_embeddings, system_prompt):
@@ -126,8 +127,10 @@ def respond(message, history, model, temperature, num_calls, use_embeddings, sys
126
  logging.info(f"System Prompt: {system_prompt}")
127
 
128
  try:
129
- for main_content, _ in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature, use_embeddings=use_embeddings, system_prompt=system_prompt):
130
- yield main_content
 
 
131
  except Exception as e:
132
  logging.error(f"Error with {model}: {str(e)}")
133
  yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
@@ -143,22 +146,31 @@ def create_web_search_vectors(search_results):
143
 
144
  return FAISS.from_documents(documents, embed)
145
 
146
- def summarize_article(article, content, model, system_prompt, user_query, client, temperature=0.2):
147
- prompt = f"""Summarize the following article in the context of broader web search results:
148
-
149
- Article:
150
- Title: {article['title']}
151
- URL: {article['href']}
152
- Content: {article['body'][:1000]}... # Truncate to avoid extremely long prompts
153
-
154
- Additional Context:
155
- {content[:1000]}... # Truncate additional context as well
156
-
157
- User Query: {user_query}
 
 
 
 
158
 
159
- Write a detailed and complete research document which addresses the User Query, incorporating both the specific article and the broader context. Focus on the most relevant information.
160
- """
 
 
161
 
 
 
 
162
  # Calculate input tokens (this is an approximation, you might need a more accurate method)
163
  input_tokens = len(prompt.split()) // 4
164
 
@@ -166,54 +178,46 @@ User Query: {user_query}
166
  model_token_limit = MODEL_TOKEN_LIMITS.get(model, 8192) # Default to 8192 if model not found
167
 
168
  # Calculate max_new_tokens
169
- max_new_tokens = min(model_token_limit - input_tokens, 6500) # Cap at 6500 to be safe
170
-
171
- try:
172
- response = client.chat_completion(
173
- messages=[
174
- {"role": "system", "content": system_prompt},
175
- {"role": "user", "content": prompt}
176
- ],
177
- max_tokens=max_new_tokens,
178
- temperature=temperature,
179
- stream=False,
180
- top_p=0.8,
181
- )
182
-
183
- if hasattr(response, 'choices') and response.choices:
184
- for choice in response.choices:
185
- if hasattr(choice, 'message') and hasattr(choice.message, 'content'):
186
- return choice.message.content.strip()
187
- except Exception as e:
188
- logging.error(f"Error summarizing article: {str(e)}")
189
- return f"Error summarizing article: {str(e)}"
190
-
191
- return "Unable to generate summary."
192
-
193
- def get_response_with_search(query, model, num_calls=3, temperature=0.2, use_embeddings=True, system_prompt=DEFAULT_SYSTEM_PROMPT):
194
- search_results = duckduckgo_search(query)
195
- client = InferenceClient(model, token=huggingface_token)
196
-
197
- # Prepare overall context
198
- overall_context = "\n".join([f"{result['title']}\n{result['body']}" for result in search_results])
199
-
200
- summaries = []
201
- for result in search_results:
202
- summary = summarize_article(result, overall_context, model, system_prompt, query, client, temperature)
203
- summaries.append({
204
- "title": result['title'],
205
- "url": result['href'],
206
- "summary": summary
207
- })
208
- yield format_output(summaries), ""
209
-
210
- def format_output(summaries):
211
- output = "Here are the summarized search results:\n\n"
212
- for item in summaries:
213
- output += f"News Title: {item['title']}\n"
214
- output += f"URL: {item['url']}\n"
215
- output += f"Summary: {item['summary']}\n\n"
216
- return output
217
 
218
  def vote(data: gr.LikeData):
219
  if data.liked:
@@ -277,4 +281,5 @@ with gr.Blocks() as demo:
277
  )
278
 
279
  if __name__ == "__main__":
280
- demo.launch(share=True)
 
 
85
 
86
  def duckduckgo_search(query):
87
  with DDGS() as ddgs:
88
+ results = ddgs.text(query, max_results=5)
89
  return results
90
 
91
  class CitingSources(BaseModel):
 
104
  for response in respond(message, history, model, temperature, num_calls, use_embeddings, system_prompt):
105
  history[-1] = (message, response)
106
  yield history
107
+ except gr.CancelledError:
108
+ yield history
109
  except Exception as e:
110
+ logging.error(f"Unexpected error in chatbot_interface: {str(e)}")
111
+ history[-1] = (message, f"An unexpected error occurred: {str(e)}")
 
112
  yield history
113
 
114
  def retry_last_response(history, model, temperature, num_calls, use_embeddings, system_prompt):
 
127
  logging.info(f"System Prompt: {system_prompt}")
128
 
129
  try:
130
+ for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature, use_embeddings=use_embeddings, system_prompt=system_prompt):
131
+ response = f"{main_content}\n\n{sources}"
132
+ first_line = response.split('\n')[0] if response else ''
133
+ yield response
134
  except Exception as e:
135
  logging.error(f"Error with {model}: {str(e)}")
136
  yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
 
146
 
147
  return FAISS.from_documents(documents, embed)
148
 
149
+ def get_response_with_search(query, model, num_calls=3, temperature=0.2, use_embeddings=True, system_prompt=DEFAULT_SYSTEM_PROMPT):
150
+ search_results = duckduckgo_search(query)
151
+
152
+ if use_embeddings:
153
+ web_search_database = create_web_search_vectors(search_results)
154
+
155
+ if not web_search_database:
156
+ yield "No web search results available. Please try again.", ""
157
+ return
158
+
159
+ retriever = web_search_database.as_retriever(search_kwargs={"k": 5})
160
+ relevant_docs = retriever.get_relevant_documents(query)
161
+
162
+ context = "\n".join([doc.page_content for doc in relevant_docs])
163
+ else:
164
+ context = "\n".join([f"{result['title']}\n{result['body']}\nSource: {result['href']}" for result in search_results])
165
 
166
+ prompt = f"""Using the following context from web search results:
167
+ {context}
168
+ Write a detailed and complete research document that fulfills the following user request: '{query}'
169
+ After writing the document, please provide a list of sources with their URLs used in your response."""
170
 
171
+ # Use Hugging Face API
172
+ client = InferenceClient(model, token=huggingface_token)
173
+
174
  # Calculate input tokens (this is an approximation, you might need a more accurate method)
175
  input_tokens = len(prompt.split()) // 4
176
 
 
178
  model_token_limit = MODEL_TOKEN_LIMITS.get(model, 8192) # Default to 8192 if model not found
179
 
180
  # Calculate max_new_tokens
181
+ max_new_tokens = min(model_token_limit - input_tokens, 6500) # Cap at 4096 to be safe
182
+
183
+ main_content = ""
184
+ for i in range(num_calls):
185
+ try:
186
+ response = client.chat_completion(
187
+ messages=[
188
+ {"role": "system", "content": system_prompt},
189
+ {"role": "user", "content": prompt}
190
+ ],
191
+ max_tokens=max_new_tokens,
192
+ temperature=temperature,
193
+ stream=False,
194
+ top_p=0.8,
195
+ )
196
+
197
+ # Log the raw response for debugging
198
+ logging.info(f"Raw API response: {response}")
199
+
200
+ # Check if the response is a string (which might be an error message)
201
+ if isinstance(response, str):
202
+ logging.error(f"API returned an unexpected string response: {response}")
203
+ yield f"An error occurred: {response}", ""
204
+ return
205
+
206
+ # If it's not a string, assume it's the expected object structure
207
+ if hasattr(response, 'choices') and response.choices:
208
+ for choice in response.choices:
209
+ if hasattr(choice, 'message') and hasattr(choice.message, 'content'):
210
+ chunk = choice.message.content
211
+ main_content += chunk
212
+ yield main_content, "" # Yield partial main content without sources
213
+ else:
214
+ logging.error(f"Unexpected response structure: {response}")
215
+ yield "An unexpected error occurred. Please try again.", ""
216
+
217
+ except Exception as e:
218
+ logging.error(f"Error in API call: {str(e)}")
219
+ yield f"An error occurred: {str(e)}", ""
220
+ return
 
 
 
 
 
 
 
 
221
 
222
  def vote(data: gr.LikeData):
223
  if data.liked:
 
281
  )
282
 
283
  if __name__ == "__main__":
284
+ demo.launch(share=True)
285
+ Enhancing Web Search Summarization - Claude