Spaces:
Sleeping
Sleeping
Prompt changes
Browse files
llm.py
CHANGED
|
@@ -31,20 +31,10 @@ def extract_https_links(chunks):
|
|
| 31 |
def fetch_all_links(links, timeout=10, max_workers=10):
|
| 32 |
"""
|
| 33 |
Fetch all HTTPS links in parallel, with per-link timing.
|
| 34 |
-
Skips banned links.
|
| 35 |
Returns a dict {link: content or error}.
|
| 36 |
"""
|
| 37 |
fetched_data = {}
|
| 38 |
|
| 39 |
-
banned_links = [
|
| 40 |
-
"https://register.hackrx.in/teams/public/flights/getFirstCityFlightNumber",
|
| 41 |
-
"https://register.hackrx.in/teams/public/flights/getSecondCityFlightNumber",
|
| 42 |
-
"https://register.hackrx.in/teams/public/flights/getFourthCityFlightNumber",
|
| 43 |
-
"https://register.hackrx.in/teams/public/flights/getFifthCityFlightNumber",
|
| 44 |
-
]
|
| 45 |
-
|
| 46 |
-
special_url = "https://register.hackrx.in/submissions/myFavouriteCity"
|
| 47 |
-
|
| 48 |
def fetch(link):
|
| 49 |
start = time.perf_counter()
|
| 50 |
try:
|
|
@@ -58,29 +48,18 @@ def fetch_all_links(links, timeout=10, max_workers=10):
|
|
| 58 |
print(f"❌ {link} — {elapsed:.2f}s — ERROR: {e}")
|
| 59 |
return link, f"ERROR: {e}"
|
| 60 |
|
| 61 |
-
#
|
| 62 |
-
links_to_fetch = [l for l in links if l not in banned_links]
|
| 63 |
-
for banned in set(links) - set(links_to_fetch):
|
| 64 |
-
print(f"⛔ Skipped banned link: {banned}")
|
| 65 |
-
fetched_data[banned] = "BANNED"
|
| 66 |
-
|
| 67 |
-
# Fetch special_url first if present
|
| 68 |
-
if special_url in links_to_fetch:
|
| 69 |
-
link, content = fetch(special_url)
|
| 70 |
-
fetched_data[link] = content
|
| 71 |
-
links_to_fetch.remove(special_url)
|
| 72 |
-
|
| 73 |
-
# Fetch the rest in parallel
|
| 74 |
t0 = time.perf_counter()
|
| 75 |
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 76 |
-
future_to_link = {executor.submit(fetch, link): link for link in
|
| 77 |
for future in as_completed(future_to_link):
|
| 78 |
link, content = future.result()
|
| 79 |
fetched_data[link] = content
|
| 80 |
-
print(f"[TIMER] Total link fetching: {time.perf_counter() - t0:.2f}s")
|
| 81 |
|
|
|
|
| 82 |
return fetched_data
|
| 83 |
|
|
|
|
| 84 |
def query_gemini(questions, contexts, max_retries=3):
|
| 85 |
import itertools
|
| 86 |
|
|
|
|
| 31 |
def fetch_all_links(links, timeout=10, max_workers=10):
|
| 32 |
"""
|
| 33 |
Fetch all HTTPS links in parallel, with per-link timing.
|
|
|
|
| 34 |
Returns a dict {link: content or error}.
|
| 35 |
"""
|
| 36 |
fetched_data = {}
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def fetch(link):
|
| 39 |
start = time.perf_counter()
|
| 40 |
try:
|
|
|
|
| 48 |
print(f"❌ {link} — {elapsed:.2f}s — ERROR: {e}")
|
| 49 |
return link, f"ERROR: {e}"
|
| 50 |
|
| 51 |
+
# Fetch all links in parallel (no banned filtering, no special prioritization)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
t0 = time.perf_counter()
|
| 53 |
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 54 |
+
future_to_link = {executor.submit(fetch, link): link for link in links}
|
| 55 |
for future in as_completed(future_to_link):
|
| 56 |
link, content = future.result()
|
| 57 |
fetched_data[link] = content
|
|
|
|
| 58 |
|
| 59 |
+
print(f"[TIMER] Total link fetching: {time.perf_counter() - t0:.2f}s")
|
| 60 |
return fetched_data
|
| 61 |
|
| 62 |
+
|
| 63 |
def query_gemini(questions, contexts, max_retries=3):
|
| 64 |
import itertools
|
| 65 |
|