TharanJ commited on
Commit
e4c6be7
·
1 Parent(s): f9fa975

Prompt changes

Browse files
Files changed (1) hide show
  1. llm.py +4 -25
llm.py CHANGED
@@ -31,20 +31,10 @@ def extract_https_links(chunks):
31
  def fetch_all_links(links, timeout=10, max_workers=10):
32
  """
33
  Fetch all HTTPS links in parallel, with per-link timing.
34
- Skips banned links.
35
  Returns a dict {link: content or error}.
36
  """
37
  fetched_data = {}
38
 
39
- banned_links = [
40
- "https://register.hackrx.in/teams/public/flights/getFirstCityFlightNumber",
41
- "https://register.hackrx.in/teams/public/flights/getSecondCityFlightNumber",
42
- "https://register.hackrx.in/teams/public/flights/getFourthCityFlightNumber",
43
- "https://register.hackrx.in/teams/public/flights/getFifthCityFlightNumber",
44
- ]
45
-
46
- special_url = "https://register.hackrx.in/submissions/myFavouriteCity"
47
-
48
  def fetch(link):
49
  start = time.perf_counter()
50
  try:
@@ -58,29 +48,18 @@ def fetch_all_links(links, timeout=10, max_workers=10):
58
  print(f"❌ {link} — {elapsed:.2f}s — ERROR: {e}")
59
  return link, f"ERROR: {e}"
60
 
61
- # Filter banned links first
62
- links_to_fetch = [l for l in links if l not in banned_links]
63
- for banned in set(links) - set(links_to_fetch):
64
- print(f"⛔ Skipped banned link: {banned}")
65
- fetched_data[banned] = "BANNED"
66
-
67
- # Fetch special_url first if present
68
- if special_url in links_to_fetch:
69
- link, content = fetch(special_url)
70
- fetched_data[link] = content
71
- links_to_fetch.remove(special_url)
72
-
73
- # Fetch the rest in parallel
74
  t0 = time.perf_counter()
75
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
76
- future_to_link = {executor.submit(fetch, link): link for link in links_to_fetch}
77
  for future in as_completed(future_to_link):
78
  link, content = future.result()
79
  fetched_data[link] = content
80
- print(f"[TIMER] Total link fetching: {time.perf_counter() - t0:.2f}s")
81
 
 
82
  return fetched_data
83
 
 
84
  def query_gemini(questions, contexts, max_retries=3):
85
  import itertools
86
 
 
31
  def fetch_all_links(links, timeout=10, max_workers=10):
32
  """
33
  Fetch all HTTPS links in parallel, with per-link timing.
 
34
  Returns a dict {link: content or error}.
35
  """
36
  fetched_data = {}
37
 
 
 
 
 
 
 
 
 
 
38
  def fetch(link):
39
  start = time.perf_counter()
40
  try:
 
48
  print(f"❌ {link} — {elapsed:.2f}s — ERROR: {e}")
49
  return link, f"ERROR: {e}"
50
 
51
+ # Fetch all links in parallel (no banned filtering, no special prioritization)
 
 
 
 
 
 
 
 
 
 
 
 
52
  t0 = time.perf_counter()
53
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
54
+ future_to_link = {executor.submit(fetch, link): link for link in links}
55
  for future in as_completed(future_to_link):
56
  link, content = future.result()
57
  fetched_data[link] = content
 
58
 
59
+ print(f"[TIMER] Total link fetching: {time.perf_counter() - t0:.2f}s")
60
  return fetched_data
61
 
62
+
63
  def query_gemini(questions, contexts, max_retries=3):
64
  import itertools
65