Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -117,7 +117,7 @@ class RaindropSearchBot:
|
|
| 117 |
logger.error(f"Error extracting content from {url}: {e}")
|
| 118 |
return None
|
| 119 |
|
| 120 |
-
def get_content_and_summary(self, item: Dict, source_type: str) -> Dict:
|
| 121 |
"""Get content and generate summary for a single item."""
|
| 122 |
try:
|
| 123 |
# Get URL based on source type
|
|
@@ -140,16 +140,18 @@ class RaindropSearchBot:
|
|
| 140 |
# Generate summary focused on the query topic
|
| 141 |
try:
|
| 142 |
prompt = f"""
|
| 143 |
-
Analyze this content and provide a detailed summary focusing on key points
|
|
|
|
| 144 |
|
| 145 |
Content: {content[:4000]} # Limit content length for token constraints
|
| 146 |
|
| 147 |
Requirements:
|
| 148 |
-
1. Focus on the most important facts and findings
|
| 149 |
2. Include specific data points and quotes if relevant
|
| 150 |
3. Organize the information logically
|
| 151 |
4. Keep the summary to 2-3 paragraphs
|
| 152 |
5. Highlight any unique insights from this source
|
|
|
|
| 153 |
"""
|
| 154 |
|
| 155 |
response = self.client.chat.completions.create(
|
|
@@ -220,26 +222,26 @@ class RaindropSearchBot:
|
|
| 220 |
logger.error(f"Search error: {e}")
|
| 221 |
return []
|
| 222 |
|
| 223 |
-
def process_all_results(self, raindrop_results: List[Dict],
|
| 224 |
google_results: List[Dict],
|
| 225 |
news_results: List[Dict]) -> Tuple[List[Dict], List[Dict], List[Dict]]:
|
| 226 |
"""Process and enrich all results with content and summaries."""
|
| 227 |
|
| 228 |
processed_raindrop = []
|
| 229 |
for item in raindrop_results:
|
| 230 |
-
processed_item = self.get_content_and_summary(item, 'raindrop')
|
| 231 |
if processed_item.get('detailed_summary'):
|
| 232 |
processed_raindrop.append(processed_item)
|
| 233 |
|
| 234 |
processed_google = []
|
| 235 |
for item in google_results:
|
| 236 |
-
processed_item = self.get_content_and_summary(item, 'google')
|
| 237 |
if processed_item.get('detailed_summary'):
|
| 238 |
processed_google.append(processed_item)
|
| 239 |
|
| 240 |
processed_news = []
|
| 241 |
for item in news_results:
|
| 242 |
-
processed_item = self.get_content_and_summary(item, 'news')
|
| 243 |
if processed_item.get('detailed_summary'):
|
| 244 |
processed_news.append(processed_item)
|
| 245 |
|
|
@@ -291,7 +293,7 @@ class RaindropSearchBot:
|
|
| 291 |
prompt = f"""
|
| 292 |
Create a comprehensive essay-style analysis about: {user_query}
|
| 293 |
|
| 294 |
-
Use this content as your source material:
|
| 295 |
{all_content}
|
| 296 |
|
| 297 |
Requirements:
|
|
@@ -303,7 +305,7 @@ class RaindropSearchBot:
|
|
| 303 |
6. Address any contradictions or gaps
|
| 304 |
7. Use markdown formatting for better readability
|
| 305 |
|
| 306 |
-
Format the response as a proper academic essay with sections.
|
| 307 |
"""
|
| 308 |
|
| 309 |
response = self.client.chat.completions.create(
|
|
@@ -384,15 +386,16 @@ class RaindropSearchBot:
|
|
| 384 |
|
| 385 |
# Generate search query
|
| 386 |
search_query = self.generate_search_query(user_request)
|
|
|
|
| 387 |
logger.info(f"Using search query: {search_query}")
|
| 388 |
|
| 389 |
# Get results from all sources
|
| 390 |
raindrop_results = self.search_raindrop(search_query)
|
| 391 |
-
google_results = self.get_google_results(
|
| 392 |
-
news_results = self.get_news_results(
|
| 393 |
|
| 394 |
# Process all results to get content and summaries
|
| 395 |
-
processed_results = self.process_all_results(
|
| 396 |
raindrop_results, google_results, news_results
|
| 397 |
)
|
| 398 |
|
|
@@ -411,7 +414,7 @@ class RaindropSearchBot:
|
|
| 411 |
logger.info(f"Generating search query for: {user_request}")
|
| 412 |
|
| 413 |
prompt = f"""
|
| 414 |
-
You are a search expert. Create a search query to find relevant documents
|
| 415 |
{user_request}
|
| 416 |
|
| 417 |
Guidelines:
|
|
@@ -423,10 +426,11 @@ class RaindropSearchBot:
|
|
| 423 |
- use the formatting authorised in raindrop search:
|
| 424 |
o use " for exact search (ex: "artificial intelligence")
|
| 425 |
o use - to exclude some terms (ex: -math) // Do not exclude terms that are potentially relevant
|
| 426 |
-
o use match:OR for alternatives (ex: apple
|
| 427 |
-
o use
|
| 428 |
-
o use
|
| 429 |
-
|
|
|
|
| 430 |
Use your judgement, think step by steps.
|
| 431 |
Return only the search query terms.
|
| 432 |
"""
|
|
|
|
| 117 |
logger.error(f"Error extracting content from {url}: {e}")
|
| 118 |
return None
|
| 119 |
|
| 120 |
+
def get_content_and_summary(self, request: str, item: Dict, source_type: str) -> Dict:
|
| 121 |
"""Get content and generate summary for a single item."""
|
| 122 |
try:
|
| 123 |
# Get URL based on source type
|
|
|
|
| 140 |
# Generate summary focused on the query topic
|
| 141 |
try:
|
| 142 |
prompt = f"""
|
| 143 |
+
Analyze this content and provide a detailed summary focusing on key points related to the user request:
|
| 144 |
+
{request}
|
| 145 |
|
| 146 |
Content: {content[:4000]} # Limit content length for token constraints
|
| 147 |
|
| 148 |
Requirements:
|
| 149 |
+
1. Focus on the most important facts and findings related to the topic
|
| 150 |
2. Include specific data points and quotes if relevant
|
| 151 |
3. Organize the information logically
|
| 152 |
4. Keep the summary to 2-3 paragraphs
|
| 153 |
5. Highlight any unique insights from this source
|
| 154 |
+
6. No need to add a conclusion
|
| 155 |
"""
|
| 156 |
|
| 157 |
response = self.client.chat.completions.create(
|
|
|
|
| 222 |
logger.error(f"Search error: {e}")
|
| 223 |
return []
|
| 224 |
|
| 225 |
+
def process_all_results(self, userquest: Str, raindrop_results: List[Dict],
|
| 226 |
google_results: List[Dict],
|
| 227 |
news_results: List[Dict]) -> Tuple[List[Dict], List[Dict], List[Dict]]:
|
| 228 |
"""Process and enrich all results with content and summaries."""
|
| 229 |
|
| 230 |
processed_raindrop = []
|
| 231 |
for item in raindrop_results:
|
| 232 |
+
processed_item = self.get_content_and_summary(userquest, item, 'raindrop')
|
| 233 |
if processed_item.get('detailed_summary'):
|
| 234 |
processed_raindrop.append(processed_item)
|
| 235 |
|
| 236 |
processed_google = []
|
| 237 |
for item in google_results:
|
| 238 |
+
processed_item = self.get_content_and_summary(userquest, item, 'google')
|
| 239 |
if processed_item.get('detailed_summary'):
|
| 240 |
processed_google.append(processed_item)
|
| 241 |
|
| 242 |
processed_news = []
|
| 243 |
for item in news_results:
|
| 244 |
+
processed_item = self.get_content_and_summary(userquest, item, 'news')
|
| 245 |
if processed_item.get('detailed_summary'):
|
| 246 |
processed_news.append(processed_item)
|
| 247 |
|
|
|
|
| 293 |
prompt = f"""
|
| 294 |
Create a comprehensive essay-style analysis about: {user_query}
|
| 295 |
|
| 296 |
+
Use this content as your reference source material:
|
| 297 |
{all_content}
|
| 298 |
|
| 299 |
Requirements:
|
|
|
|
| 305 |
6. Address any contradictions or gaps
|
| 306 |
7. Use markdown formatting for better readability
|
| 307 |
|
| 308 |
+
Format the response as a proper academic essay with sections and sources.
|
| 309 |
"""
|
| 310 |
|
| 311 |
response = self.client.chat.completions.create(
|
|
|
|
| 386 |
|
| 387 |
# Generate search query
|
| 388 |
search_query = self.generate_search_query(user_request)
|
| 389 |
+
search_query_adjusted = search_query.replace("match:OR", "OK").replace("match:AND", "AND")
|
| 390 |
logger.info(f"Using search query: {search_query}")
|
| 391 |
|
| 392 |
# Get results from all sources
|
| 393 |
raindrop_results = self.search_raindrop(search_query)
|
| 394 |
+
google_results = self.get_google_results(search_query_adjusted)
|
| 395 |
+
news_results = self.get_news_results(search_query_adjusted)
|
| 396 |
|
| 397 |
# Process all results to get content and summaries
|
| 398 |
+
processed_results = self.process_all_results(user_request,
|
| 399 |
raindrop_results, google_results, news_results
|
| 400 |
)
|
| 401 |
|
|
|
|
| 414 |
logger.info(f"Generating search query for: {user_request}")
|
| 415 |
|
| 416 |
prompt = f"""
|
| 417 |
+
You are a search expert. Create a search query to find relevant documents about:
|
| 418 |
{user_request}
|
| 419 |
|
| 420 |
Guidelines:
|
|
|
|
| 426 |
- use the formatting authorised in raindrop search:
|
| 427 |
o use " for exact search (ex: "artificial intelligence")
|
| 428 |
o use - to exclude some terms (ex: -math) // Do not exclude terms that are potentially relevant
|
| 429 |
+
o use match:OR for alternatives (ex: apple match:OR banana )
|
| 430 |
+
o use match:AND for inclusion of both cases systematically (ex: apple match:AND banana )
|
| 431 |
+
o use parenthesis for combinations ( ex: sugar match:AND (banana match:OR apple) )
|
| 432 |
+
|
| 433 |
+
Example elaborate request: ("artificial intelligence" match:OR AI) -"machine learning"
|
| 434 |
Use your judgement, think step by steps.
|
| 435 |
Return only the search query terms.
|
| 436 |
"""
|