Spaces:
Sleeping
Sleeping
umer6016
commited on
Commit
·
438349d
1
Parent(s):
96cf5ed
Improve scraping depth to 20 pages and increase context window to 60k chars
Browse files
backend/app/services/scrape_pipeline.py
CHANGED
|
@@ -94,7 +94,7 @@ IMPORTANT_PAGE_KEYWORDS = [
|
|
| 94 |
'connect', 'social', 'links',
|
| 95 |
]
|
| 96 |
|
| 97 |
-
MAX_PAGES_TO_SCRAPE =
|
| 98 |
REQUEST_TIMEOUT = 15
|
| 99 |
MAX_RETRIES = 3
|
| 100 |
RETRY_DELAY = 1.0 # seconds between retries
|
|
@@ -1324,15 +1324,15 @@ async def run_full_research_new(url: str, force_refresh: bool = False, progress=
|
|
| 1324 |
system_prompt = f"""You are an AI assistant for {raw_name} ({url}).
|
| 1325 |
|
| 1326 |
RULES:
|
| 1327 |
-
1.
|
| 1328 |
-
2.
|
| 1329 |
-
3. For bio questions, check the HOMEPAGE section first.
|
| 1330 |
-
4.
|
| 1331 |
-
5.
|
| 1332 |
|
| 1333 |
=== KNOWLEDGE BASE ===
|
| 1334 |
|
| 1335 |
-
{chatbot_context[:
|
| 1336 |
|
| 1337 |
=== END ===
|
| 1338 |
"""
|
|
@@ -1470,15 +1470,15 @@ RULES:
|
|
| 1470 |
system_prompt = f"""You are an AI assistant for {raw_name} ({url}).
|
| 1471 |
|
| 1472 |
RULES:
|
| 1473 |
-
1.
|
| 1474 |
-
2.
|
| 1475 |
-
3. For bio questions, check the HOMEPAGE section first.
|
| 1476 |
-
4.
|
| 1477 |
-
5.
|
| 1478 |
|
| 1479 |
=== KNOWLEDGE BASE ===
|
| 1480 |
|
| 1481 |
-
{chatbot_context[:
|
| 1482 |
|
| 1483 |
=== END ===
|
| 1484 |
"""
|
|
|
|
| 94 |
'connect', 'social', 'links',
|
| 95 |
]
|
| 96 |
|
| 97 |
+
MAX_PAGES_TO_SCRAPE = 20
|
| 98 |
REQUEST_TIMEOUT = 15
|
| 99 |
MAX_RETRIES = 3
|
| 100 |
RETRY_DELAY = 1.0 # seconds between retries
|
|
|
|
| 1324 |
system_prompt = f"""You are an AI assistant for {raw_name} ({url}).
|
| 1325 |
|
| 1326 |
RULES:
|
| 1327 |
+
1. Base your answers primarily on the KNOWLEDGE BASE below.
|
| 1328 |
+
2. If the knowledge base doesn't have the answer, you may use your general knowledge to explain standard concepts or fill minor gaps, but MUST clarify that this info comes from general knowledge, not the specific website.
|
| 1329 |
+
3. For bio/overview questions, check the HOMEPAGE section first.
|
| 1330 |
+
4. Keep answers concise, helpful, and use Markdown (bullet points, bold text) for readability.
|
| 1331 |
+
5. If the user asks about something completely unrelated to the site (e.g. "who is batman"), politely bring them back to the topic of {raw_name}.
|
| 1332 |
|
| 1333 |
=== KNOWLEDGE BASE ===
|
| 1334 |
|
| 1335 |
+
{chatbot_context[:60000]}
|
| 1336 |
|
| 1337 |
=== END ===
|
| 1338 |
"""
|
|
|
|
| 1470 |
system_prompt = f"""You are an AI assistant for {raw_name} ({url}).
|
| 1471 |
|
| 1472 |
RULES:
|
| 1473 |
+
1. Base your answers primarily on the KNOWLEDGE BASE below.
|
| 1474 |
+
2. If the knowledge base doesn't have the answer, you may use your general knowledge to explain standard concepts or fill minor gaps, but MUST clarify that this info comes from general knowledge, not the specific website.
|
| 1475 |
+
3. For bio/overview questions, check the HOMEPAGE section first.
|
| 1476 |
+
4. Keep answers concise, helpful, and use Markdown (bullet points, bold text) for readability.
|
| 1477 |
+
5. If the user asks about something completely unrelated to the site (e.g. "who is batman"), politely bring them back to the topic of {raw_name}.
|
| 1478 |
|
| 1479 |
=== KNOWLEDGE BASE ===
|
| 1480 |
|
| 1481 |
+
{chatbot_context[:60000]}
|
| 1482 |
|
| 1483 |
=== END ===
|
| 1484 |
"""
|