Soham Waghmare commited on
Commit
51f3191
·
1 Parent(s): f7da48c

feat: evaluation using TF-IDF and cosine

Browse files
backend/knet.py CHANGED
@@ -64,6 +64,10 @@ class Prompt:
64
  Return only decision: true/false""")
65
 
66
  self.search_query = dedent("""Based on the following findings on topic {vertical}, create google search queries
 
 
 
 
67
  <Global Research Plan>
68
  {research_plan}
69
  </Global Research Plan>
@@ -236,7 +240,7 @@ class KNet:
236
  # Generate initial search query
237
  query = self.generate_content(
238
  self.prompt.search_query.format(
239
- vertical=self.research_plan[self.idx_research_plan], research_plan="None", past_queries="None", ctx_manager="None", n=1
240
  ),
241
  schema=self.schema.search_query,
242
  temp=1.5,
@@ -293,11 +297,6 @@ class KNet:
293
  self.logger.error("Research failed", exc_info=True)
294
  raise
295
 
296
- def _check_cancelled(self):
297
- """Check if the current task has been cancelled and raise CancelledError if so"""
298
- if asyncio.current_task() and asyncio.current_task().cancelled():
299
- raise asyncio.CancelledError("Research task was cancelled")
300
-
301
  async def _generate_final_report(self, topic: str, retry_count: int = 1) -> Dict[str, Any]:
302
  try:
303
  self._check_cancelled()
@@ -384,6 +383,7 @@ class KNet:
384
 
385
  prompt = self.prompt.search_query.format(
386
  vertical=self.research_plan[self.idx_research_plan],
 
387
  research_plan="\n".join([f"[done] {step}" for i, step in enumerate(self.research_plan) if i < self.idx_research_plan]),
388
  past_queries="\n".join([f"[done] {query}" for query in node.get_path_to_root()[1:]]),
389
  ctx_manager="\n\n---\n\n".join(self.ctx_manager),
@@ -420,9 +420,11 @@ class KNet:
420
 
421
  # Generate summary of key findings into the manager's context
422
  if node.data:
423
- findings = ("\n" + "-" * 10 + "Next data" + "-" * 10 + "\n").join([json.dumps(d, indent=2) for d in node.data])
424
- response = self.generate_content(self.prompt.site_summary.format(query=node.query, findings=findings), temp=0.2)
425
- self.ctx_manager.append(response) if isinstance(response, str) else None
 
 
426
 
427
  # Research manager takes decision to proceed or not
428
  prompt = self.prompt.continue_branch.format(
@@ -473,6 +475,11 @@ class KNet:
473
  raise Exception("GEMINI_RECITATION")
474
  raise
475
 
 
 
 
 
 
476
  async def test(self, topic: str, progress_callback):
477
  self.progress = ResearchProgress(progress_callback, self.master_node)
478
  try:
 
64
  Return only decision: true/false""")
65
 
66
  self.search_query = dedent("""Based on the following findings on topic {vertical}, create google search queries
67
+ <Original user query>
68
+ {topic}
69
+ </Original user query>
70
+
71
  <Global Research Plan>
72
  {research_plan}
73
  </Global Research Plan>
 
240
  # Generate initial search query
241
  query = self.generate_content(
242
  self.prompt.search_query.format(
243
+ vertical=self.research_plan[self.idx_research_plan], topic=topic, research_plan="None", past_queries="None", ctx_manager="None", n=1
244
  ),
245
  schema=self.schema.search_query,
246
  temp=1.5,
 
297
  self.logger.error("Research failed", exc_info=True)
298
  raise
299
 
 
 
 
 
 
300
  async def _generate_final_report(self, topic: str, retry_count: int = 1) -> Dict[str, Any]:
301
  try:
302
  self._check_cancelled()
 
383
 
384
  prompt = self.prompt.search_query.format(
385
  vertical=self.research_plan[self.idx_research_plan],
386
+ topic=topic,
387
  research_plan="\n".join([f"[done] {step}" for i, step in enumerate(self.research_plan) if i < self.idx_research_plan]),
388
  past_queries="\n".join([f"[done] {query}" for query in node.get_path_to_root()[1:]]),
389
  ctx_manager="\n\n---\n\n".join(self.ctx_manager),
 
420
 
421
  # Generate summary of key findings into the manager's context
422
  if node.data:
423
+ for idx in range(0, len(node.data), 3):
424
+ data = node.data[idx : idx + 3]
425
+ findings = ("\n" + "-" * 10 + "Next data" + "-" * 10 + "\n").join([json.dumps(d, indent=2) for d in data])
426
+ response = self.generate_content(self.prompt.site_summary.format(query=node.query, findings=findings), temp=0.2)
427
+ self.ctx_manager.append(response) if isinstance(response, str) else None
428
 
429
  # Research manager takes decision to proceed or not
430
  prompt = self.prompt.continue_branch.format(
 
475
  raise Exception("GEMINI_RECITATION")
476
  raise
477
 
478
+ def _check_cancelled(self):
479
+ """Check if the current task has been cancelled and raise CancelledError if so"""
480
+ if asyncio.current_task() and asyncio.current_task().cancelled():
481
+ raise asyncio.CancelledError("Research task was cancelled")
482
+
483
  async def test(self, topic: str, progress_callback):
484
  self.progress = ResearchProgress(progress_callback, self.master_node)
485
  try:
backend/pyproject.toml CHANGED
@@ -113,6 +113,7 @@ dependencies = [
113
  "rich==13.9.4",
114
  "rpds-py==0.22.3",
115
  "rsa==4.9",
 
116
  "selenium==4.28.1",
117
  "sgmllib3k==1.0.0",
118
  "simple-websocket==1.1.0",
 
113
  "rich==13.9.4",
114
  "rpds-py==0.22.3",
115
  "rsa==4.9",
116
+ "scikit-learn>=1.6.1",
117
  "selenium==4.28.1",
118
  "sgmllib3k==1.0.0",
119
  "simple-websocket==1.1.0",
backend/similarity_metrics.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.metrics.pairwise import cosine_similarity
5
+ import numpy as np
6
+ import csv # For CSV output
7
+ import os # To get filename for report
8
+
9
+ # --- 1. Load Data --- (No changes from previous version)
10
+ def load_data(filepath="output.log.json"):
11
+ try:
12
+ with open(filepath, 'r', encoding='utf-8') as f:
13
+ data = json.load(f)
14
+ return data
15
+ except FileNotFoundError:
16
+ print(f"Error: File '{filepath}' not found.")
17
+ return None
18
+ except json.JSONDecodeError:
19
+ print(f"Error: Could not decode JSON from '{filepath}'.")
20
+ return None
21
+
22
+ # --- 2. Text Extraction --- (No changes from previous version)
23
+ def extract_main_content(data):
24
+ return data.get("content", "")
25
+
26
+ def extract_source_texts_from_tree(current_node):
27
+ sources_list = []
28
+ if isinstance(current_node, dict):
29
+ if "sources" in current_node and isinstance(current_node["sources"], dict):
30
+ for url, raw_content in current_node["sources"].items():
31
+ if isinstance(raw_content, str) and raw_content.strip():
32
+ sources_list.append({"id": url, "text": raw_content})
33
+ for key, value in current_node.items():
34
+ if key != "sources":
35
+ sources_list.extend(extract_source_texts_from_tree(value))
36
+ elif isinstance(current_node, list):
37
+ for item in current_node:
38
+ sources_list.extend(extract_source_texts_from_tree(item))
39
+ return sources_list
40
+
41
+ # --- 3. Text Preprocessing --- (No changes from previous version)
42
+ def preprocess_text(text):
43
+ if not text or not isinstance(text, str):
44
+ return ""
45
+ text = text.lower()
46
+ text = re.sub(r'\W+', ' ', text)
47
+ text = re.sub(r'\s+', ' ', text).strip()
48
+ return text
49
+
50
+ # --- 4. Similarity Calculation ---
51
+ # Modified to return both similarity_scores and the count of valid_source_info_for_results
52
+ def calculate_similarity(main_text, source_texts_list):
53
+ if not main_text.strip():
54
+ # print("Main report content is empty. Cannot calculate similarity.") # Will be handled by caller
55
+ return {}, 0
56
+ if not source_texts_list:
57
+ # print("No source texts extracted. Cannot calculate similarity.") # Will be handled by caller
58
+ return {}, 0
59
+
60
+ preprocessed_main_text = preprocess_text(main_text)
61
+
62
+ all_texts_for_tfidf = [preprocessed_main_text]
63
+ valid_source_info_for_results = []
64
+
65
+ for source_info in source_texts_list:
66
+ source_text = source_info.get("text", "")
67
+ preprocessed_s_text = preprocess_text(source_text)
68
+ if preprocessed_s_text:
69
+ all_texts_for_tfidf.append(preprocessed_s_text)
70
+ valid_source_info_for_results.append(source_info)
71
+ # else:
72
+ # print(f"Info: Source '{source_info.get('id', 'unknown')}' resulted in empty text after preprocessing, skipping.")
73
+
74
+ if len(all_texts_for_tfidf) < 2:
75
+ # print("Not enough text content (main text + at least one valid source after preprocessing) to compare.")
76
+ return {}, len(valid_source_info_for_results)
77
+
78
+ try:
79
+ vectorizer = TfidfVectorizer()
80
+ tfidf_matrix = vectorizer.fit_transform(all_texts_for_tfidf)
81
+ except ValueError as e:
82
+ print(f"Error during TF-IDF vectorization: {e}.")
83
+ return {}, len(valid_source_info_for_results)
84
+
85
+ main_text_vector = tfidf_matrix[0]
86
+ source_vectors = tfidf_matrix[1:]
87
+
88
+ if source_vectors.shape[0] == 0:
89
+ return {}, len(valid_source_info_for_results)
90
+
91
+ similarities = cosine_similarity(main_text_vector, source_vectors)
92
+
93
+ results = {}
94
+ for i, source_info in enumerate(valid_source_info_for_results):
95
+ results[source_info["id"]] = similarities[0, i]
96
+
97
+ return results, len(valid_source_info_for_results)
98
+
99
+ # --- 5. Aggregate Metrics Calculation --- (New Function)
100
+ def calculate_aggregate_metrics(similarity_scores, total_sources_extracted_from_tree, sources_used_in_similarity_calc):
101
+ """Calculates aggregate metrics from individual similarity scores."""
102
+ metrics = {
103
+ "total_sources_in_tree": total_sources_extracted_from_tree,
104
+ "sources_used_for_sim": sources_used_in_similarity_calc,
105
+ "average_similarity": 0.0,
106
+ "max_similarity": 0.0,
107
+ "top_source_url": "N/A",
108
+ "highly_similar_sources_gt_0_3": 0,
109
+ "highly_similar_sources_gt_0_5": 0
110
+ }
111
+
112
+ if not similarity_scores: # No scores to aggregate
113
+ return metrics
114
+
115
+ scores = list(similarity_scores.values())
116
+ metrics["average_similarity"] = np.mean(scores) if scores else 0.0
117
+ metrics["max_similarity"] = np.max(scores) if scores else 0.0
118
+
119
+ if scores:
120
+ for url, score in similarity_scores.items():
121
+ if score == metrics["max_similarity"]:
122
+ metrics["top_source_url"] = url
123
+ break # Take the first one if multiple have max score
124
+
125
+ metrics["highly_similar_sources_gt_0_3"] = sum(1 for score in scores if score > 0.3)
126
+ metrics["highly_similar_sources_gt_0_5"] = sum(1 for score in scores if score > 0.5)
127
+
128
+ return metrics
129
+
130
+ # --- Main Execution ---
131
+ if __name__ == "__main__":
132
+ # --- IMPORTANT: Update this list with paths to your JSON files ---
133
+ sample_files = [
134
+ "output.log.json",
135
+ # "output_sample2.log.json", # Example: add more files
136
+ # "output_sample3.log.json"
137
+ ]
138
+ # You can also use glob to find all *.json files in a directory if needed:
139
+ # import glob
140
+ # sample_files = glob.glob("path/to/your/json_files/*.json")
141
+
142
+ if not sample_files or not os.path.exists(sample_files[0]): # Basic check
143
+ print("Please update the 'sample_files' list with valid paths to your JSON files.")
144
+ print(f"Currently looking for: {sample_files}")
145
+ exit()
146
+
147
+ all_samples_aggregated_metrics = []
148
+
149
+ print("Processing multiple sample files...\n")
150
+
151
+ for filepath in sample_files:
152
+ print(f"--- Processing file: {filepath} ---")
153
+ data = load_data(filepath)
154
+
155
+ if not data:
156
+ print(f"Skipping file {filepath} due to loading error.\n")
157
+ # Add a placeholder entry or skip
158
+ all_samples_aggregated_metrics.append({
159
+ "sample_file": os.path.basename(filepath),
160
+ "error": "Failed to load or parse JSON"
161
+ })
162
+ continue
163
+
164
+ main_report_content = extract_main_content(data)
165
+ research_tree_data = data.get("research_tree")
166
+
167
+ if not main_report_content:
168
+ print("Could not extract main report content.")
169
+ all_samples_aggregated_metrics.append({
170
+ "sample_file": os.path.basename(filepath),
171
+ "error": "No main content"
172
+ })
173
+ continue # Or add more default values
174
+
175
+ if research_tree_data is None:
176
+ print("Could not find 'research_tree' in the data or it is null.")
177
+ all_samples_aggregated_metrics.append({
178
+ "sample_file": os.path.basename(filepath),
179
+ "total_sources_in_tree": 0, "sources_used_for_sim":0,
180
+ "error": "No research tree"
181
+ })
182
+ continue # Or add more default values
183
+
184
+ source_texts_from_tree = extract_source_texts_from_tree(research_tree_data)
185
+ total_extracted_count = len(source_texts_from_tree)
186
+ print(f"Extracted {total_extracted_count} source entries from the tree.")
187
+
188
+ if not source_texts_from_tree:
189
+ print("No source texts could be extracted from the research_tree.")
190
+ agg_metrics = calculate_aggregate_metrics({}, total_extracted_count, 0)
191
+ else:
192
+ similarity_scores, num_sources_used_in_calc = calculate_similarity(main_report_content, source_texts_from_tree)
193
+ if similarity_scores:
194
+ print(f"Calculated similarities for {len(similarity_scores)} sources (out of {num_sources_used_in_calc} with text).")
195
+ else:
196
+ print(f"Could not calculate similarity scores (used {num_sources_used_in_calc} sources with text).")
197
+ agg_metrics = calculate_aggregate_metrics(similarity_scores, total_extracted_count, num_sources_used_in_calc)
198
+
199
+ agg_metrics["sample_file"] = os.path.basename(filepath) # Add filename to the metrics
200
+ all_samples_aggregated_metrics.append(agg_metrics)
201
+ print(f"Aggregated metrics for {filepath}: {agg_metrics}\n")
202
+
203
+ # --- Outputting the Combined Metrics ---
204
+ if not all_samples_aggregated_metrics:
205
+ print("No data processed to generate a report.")
206
+ else:
207
+ print("\n--- Combined Metrics Table ---")
208
+
209
+ # Define header based on keys from the first valid entry (if any)
210
+ # Fallback headers if all entries had errors
211
+ header = ["sample_file", "total_sources_in_tree", "sources_used_for_sim",
212
+ "average_similarity", "max_similarity", "top_source_url",
213
+ "highly_similar_sources_gt_0_3", "highly_similar_sources_gt_0_5", "error"]
214
+
215
+ # Try to get header from a successful processing attempt
216
+ first_successful_metric = next((m for m in all_samples_aggregated_metrics if "error" not in m), None)
217
+ if first_successful_metric:
218
+ header = list(first_successful_metric.keys())
219
+
220
+
221
+ # Print to console
222
+ # Dynamically create format string for cleaner table printing
223
+ col_widths = {h: len(h) for h in header}
224
+ for row_metrics in all_samples_aggregated_metrics:
225
+ for h in header:
226
+ col_widths[h] = max(col_widths[h], len(str(row_metrics.get(h, ""))))
227
+
228
+ header_fmt = " | ".join([f"{{:<{col_widths[h]}}}" for h in header])
229
+ row_fmt = " | ".join([f"{{:<{col_widths[h]}}}" for h in header])
230
+
231
+ print(header_fmt.format(*header))
232
+ print("-+-".join(['-' * col_widths[h] for h in header])) # Separator line
233
+
234
+ for sample_metrics in all_samples_aggregated_metrics:
235
+ # Ensure all keys are present for formatting, defaulting to "N/A" or empty
236
+ row_values = [sample_metrics.get(h, "N/A") for h in header]
237
+ # Format floats
238
+ for i, h_key in enumerate(header):
239
+ if isinstance(row_values[i], float):
240
+ row_values[i] = f"{row_values[i]:.4f}"
241
+ print(row_fmt.format(*row_values))
242
+
243
+ # Write to CSV
244
+ csv_filename = "similarity_metrics_report.csv"
245
+ try:
246
+ with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
247
+ writer = csv.DictWriter(csvfile, fieldnames=header)
248
+ writer.writeheader()
249
+ reader = csv.DictReader(csvfile, fieldnames=header)
250
+ old_rows = list(reader)
251
+ writer.writerows(old_rows)
252
+ for sample_metrics in all_samples_aggregated_metrics:
253
+ # Create a full dict for DictWriter, ensuring all header fields are present
254
+ row_to_write = {h: sample_metrics.get(h, "N/A") for h in header}
255
+ writer.writerow(row_to_write)
256
+ print(f"\nMetrics report saved to: {csv_filename}")
257
+ except IOError:
258
+ print(f"\nError: Could not write metrics report to {csv_filename}")
backend/uv.lock CHANGED
@@ -269,6 +269,7 @@ dependencies = [
269
  { name = "rich" },
270
  { name = "rpds-py" },
271
  { name = "rsa" },
 
272
  { name = "selenium" },
273
  { name = "sgmllib3k" },
274
  { name = "simple-websocket" },
@@ -414,6 +415,7 @@ requires-dist = [
414
  { name = "rich", specifier = "==13.9.4" },
415
  { name = "rpds-py", specifier = "==0.22.3" },
416
  { name = "rsa", specifier = "==4.9" },
 
417
  { name = "selenium", specifier = "==4.28.1" },
418
  { name = "sgmllib3k", specifier = "==1.0.0" },
419
  { name = "simple-websocket", specifier = "==1.1.0" },
@@ -2308,6 +2310,86 @@ wheels = [
2308
  { url = "https://files.pythonhosted.org/packages/49/97/fa78e3d2f65c02c8e1268b9aba606569fe97f6c8f7c2d74394553347c145/rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", size = 34315 },
2309
  ]
2310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2311
  [[package]]
2312
  name = "selenium"
2313
  version = "4.28.1"
@@ -2459,6 +2541,15 @@ wheels = [
2459
  { url = "https://files.pythonhosted.org/packages/06/1b/4a2dcfedf88923b8b022f566042006a6d871235d9c6f633866a9a640cc51/tf_playwright_stealth-1.1.1-py3-none-any.whl", hash = "sha256:7cc4785bcafca17c8987919591eab569c180b0d6159ce058807398ba616d6696", size = 32998 },
2460
  ]
2461
 
 
 
 
 
 
 
 
 
 
2462
  [[package]]
2463
  name = "tiktoken"
2464
  version = "0.8.0"
 
269
  { name = "rich" },
270
  { name = "rpds-py" },
271
  { name = "rsa" },
272
+ { name = "scikit-learn" },
273
  { name = "selenium" },
274
  { name = "sgmllib3k" },
275
  { name = "simple-websocket" },
 
415
  { name = "rich", specifier = "==13.9.4" },
416
  { name = "rpds-py", specifier = "==0.22.3" },
417
  { name = "rsa", specifier = "==4.9" },
418
+ { name = "scikit-learn", specifier = ">=1.6.1" },
419
  { name = "selenium", specifier = "==4.28.1" },
420
  { name = "sgmllib3k", specifier = "==1.0.0" },
421
  { name = "simple-websocket", specifier = "==1.1.0" },
 
2310
  { url = "https://files.pythonhosted.org/packages/49/97/fa78e3d2f65c02c8e1268b9aba606569fe97f6c8f7c2d74394553347c145/rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", size = 34315 },
2311
  ]
2312
 
2313
+ [[package]]
2314
+ name = "scikit-learn"
2315
+ version = "1.6.1"
2316
+ source = { registry = "https://pypi.org/simple" }
2317
+ dependencies = [
2318
+ { name = "joblib" },
2319
+ { name = "numpy" },
2320
+ { name = "scipy" },
2321
+ { name = "threadpoolctl" },
2322
+ ]
2323
+ sdist = { url = "https://files.pythonhosted.org/packages/9e/a5/4ae3b3a0755f7b35a280ac90b28817d1f380318973cff14075ab41ef50d9/scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e", size = 7068312 }
2324
+ wheels = [
2325
+ { url = "https://files.pythonhosted.org/packages/6c/2a/e291c29670795406a824567d1dfc91db7b699799a002fdaa452bceea8f6e/scikit_learn-1.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72abc587c75234935e97d09aa4913a82f7b03ee0b74111dcc2881cba3c5a7b33", size = 12102620 },
2326
+ { url = "https://files.pythonhosted.org/packages/25/92/ee1d7a00bb6b8c55755d4984fd82608603a3cc59959245068ce32e7fb808/scikit_learn-1.6.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b3b00cdc8f1317b5f33191df1386c0befd16625f49d979fe77a8d44cae82410d", size = 11116234 },
2327
+ { url = "https://files.pythonhosted.org/packages/30/cd/ed4399485ef364bb25f388ab438e3724e60dc218c547a407b6e90ccccaef/scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4765af3386811c3ca21638f63b9cf5ecf66261cc4815c1db3f1e7dc7b79db2", size = 12592155 },
2328
+ { url = "https://files.pythonhosted.org/packages/a8/f3/62fc9a5a659bb58a03cdd7e258956a5824bdc9b4bb3c5d932f55880be569/scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25fc636bdaf1cc2f4a124a116312d837148b5e10872147bdaf4887926b8c03d8", size = 13497069 },
2329
+ { url = "https://files.pythonhosted.org/packages/a1/a6/c5b78606743a1f28eae8f11973de6613a5ee87366796583fb74c67d54939/scikit_learn-1.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415", size = 11139809 },
2330
+ { url = "https://files.pythonhosted.org/packages/0a/18/c797c9b8c10380d05616db3bfb48e2a3358c767affd0857d56c2eb501caa/scikit_learn-1.6.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:926f207c804104677af4857b2c609940b743d04c4c35ce0ddc8ff4f053cddc1b", size = 12104516 },
2331
+ { url = "https://files.pythonhosted.org/packages/c4/b7/2e35f8e289ab70108f8cbb2e7a2208f0575dc704749721286519dcf35f6f/scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c2cae262064e6a9b77eee1c8e768fc46aa0b8338c6a8297b9b6759720ec0ff2", size = 11167837 },
2332
+ { url = "https://files.pythonhosted.org/packages/a4/f6/ff7beaeb644bcad72bcfd5a03ff36d32ee4e53a8b29a639f11bcb65d06cd/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f", size = 12253728 },
2333
+ { url = "https://files.pythonhosted.org/packages/29/7a/8bce8968883e9465de20be15542f4c7e221952441727c4dad24d534c6d99/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e69fab4ebfc9c9b580a7a80111b43d214ab06250f8a7ef590a4edf72464dd86", size = 13147700 },
2334
+ { url = "https://files.pythonhosted.org/packages/62/27/585859e72e117fe861c2079bcba35591a84f801e21bc1ab85bce6ce60305/scikit_learn-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:70b1d7e85b1c96383f872a519b3375f92f14731e279a7b4c6cfd650cf5dffc52", size = 11110613 },
2335
+ { url = "https://files.pythonhosted.org/packages/2e/59/8eb1872ca87009bdcdb7f3cdc679ad557b992c12f4b61f9250659e592c63/scikit_learn-1.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ffa1e9e25b3d93990e74a4be2c2fc61ee5af85811562f1288d5d055880c4322", size = 12010001 },
2336
+ { url = "https://files.pythonhosted.org/packages/9d/05/f2fc4effc5b32e525408524c982c468c29d22f828834f0625c5ef3d601be/scikit_learn-1.6.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dc5cf3d68c5a20ad6d571584c0750ec641cc46aeef1c1507be51300e6003a7e1", size = 11096360 },
2337
+ { url = "https://files.pythonhosted.org/packages/c8/e4/4195d52cf4f113573fb8ebc44ed5a81bd511a92c0228889125fac2f4c3d1/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c06beb2e839ecc641366000ca84f3cf6fa9faa1777e29cf0c04be6e4d096a348", size = 12209004 },
2338
+ { url = "https://files.pythonhosted.org/packages/94/be/47e16cdd1e7fcf97d95b3cb08bde1abb13e627861af427a3651fcb80b517/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97", size = 13171776 },
2339
+ { url = "https://files.pythonhosted.org/packages/34/b0/ca92b90859070a1487827dbc672f998da95ce83edce1270fc23f96f1f61a/scikit_learn-1.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:7a1c43c8ec9fde528d664d947dc4c0789be4077a3647f232869f41d9bf50e0fb", size = 11071865 },
2340
+ { url = "https://files.pythonhosted.org/packages/12/ae/993b0fb24a356e71e9a894e42b8a9eec528d4c70217353a1cd7a48bc25d4/scikit_learn-1.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a17c1dea1d56dcda2fac315712f3651a1fea86565b64b48fa1bc090249cbf236", size = 11955804 },
2341
+ { url = "https://files.pythonhosted.org/packages/d6/54/32fa2ee591af44507eac86406fa6bba968d1eb22831494470d0a2e4a1eb1/scikit_learn-1.6.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a7aa5f9908f0f28f4edaa6963c0a6183f1911e63a69aa03782f0d924c830a35", size = 11100530 },
2342
+ { url = "https://files.pythonhosted.org/packages/3f/58/55856da1adec655bdce77b502e94a267bf40a8c0b89f8622837f89503b5a/scikit_learn-1.6.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691", size = 12433852 },
2343
+ { url = "https://files.pythonhosted.org/packages/ff/4f/c83853af13901a574f8f13b645467285a48940f185b690936bb700a50863/scikit_learn-1.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:3f59fe08dc03ea158605170eb52b22a105f238a5d512c4470ddeca71feae8e5f", size = 11337256 },
2344
+ ]
2345
+
2346
+ [[package]]
2347
+ name = "scipy"
2348
+ version = "1.15.3"
2349
+ source = { registry = "https://pypi.org/simple" }
2350
+ dependencies = [
2351
+ { name = "numpy" },
2352
+ ]
2353
+ sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214 }
2354
+ wheels = [
2355
+ { url = "https://files.pythonhosted.org/packages/96/ab/5cc9f80f28f6a7dff646c5756e559823614a42b1939d86dd0ed550470210/scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b", size = 38714255 },
2356
+ { url = "https://files.pythonhosted.org/packages/4a/4a/66ba30abe5ad1a3ad15bfb0b59d22174012e8056ff448cb1644deccbfed2/scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba", size = 30111035 },
2357
+ { url = "https://files.pythonhosted.org/packages/4b/fa/a7e5b95afd80d24313307f03624acc65801846fa75599034f8ceb9e2cbf6/scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65", size = 22384499 },
2358
+ { url = "https://files.pythonhosted.org/packages/17/99/f3aaddccf3588bb4aea70ba35328c204cadd89517a1612ecfda5b2dd9d7a/scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1", size = 25152602 },
2359
+ { url = "https://files.pythonhosted.org/packages/56/c5/1032cdb565f146109212153339f9cb8b993701e9fe56b1c97699eee12586/scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889", size = 35503415 },
2360
+ { url = "https://files.pythonhosted.org/packages/bd/37/89f19c8c05505d0601ed5650156e50eb881ae3918786c8fd7262b4ee66d3/scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982", size = 37652622 },
2361
+ { url = "https://files.pythonhosted.org/packages/7e/31/be59513aa9695519b18e1851bb9e487de66f2d31f835201f1b42f5d4d475/scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9", size = 37244796 },
2362
+ { url = "https://files.pythonhosted.org/packages/10/c0/4f5f3eeccc235632aab79b27a74a9130c6c35df358129f7ac8b29f562ac7/scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594", size = 40047684 },
2363
+ { url = "https://files.pythonhosted.org/packages/ab/a7/0ddaf514ce8a8714f6ed243a2b391b41dbb65251affe21ee3077ec45ea9a/scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb", size = 41246504 },
2364
+ { url = "https://files.pythonhosted.org/packages/37/4b/683aa044c4162e10ed7a7ea30527f2cbd92e6999c10a8ed8edb253836e9c/scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019", size = 38766735 },
2365
+ { url = "https://files.pythonhosted.org/packages/7b/7e/f30be3d03de07f25dc0ec926d1681fed5c732d759ac8f51079708c79e680/scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6", size = 30173284 },
2366
+ { url = "https://files.pythonhosted.org/packages/07/9c/0ddb0d0abdabe0d181c1793db51f02cd59e4901da6f9f7848e1f96759f0d/scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477", size = 22446958 },
2367
+ { url = "https://files.pythonhosted.org/packages/af/43/0bce905a965f36c58ff80d8bea33f1f9351b05fad4beaad4eae34699b7a1/scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c", size = 25242454 },
2368
+ { url = "https://files.pythonhosted.org/packages/56/30/a6f08f84ee5b7b28b4c597aca4cbe545535c39fe911845a96414700b64ba/scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45", size = 35210199 },
2369
+ { url = "https://files.pythonhosted.org/packages/0b/1f/03f52c282437a168ee2c7c14a1a0d0781a9a4a8962d84ac05c06b4c5b555/scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49", size = 37309455 },
2370
+ { url = "https://files.pythonhosted.org/packages/89/b1/fbb53137f42c4bf630b1ffdfc2151a62d1d1b903b249f030d2b1c0280af8/scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e", size = 36885140 },
2371
+ { url = "https://files.pythonhosted.org/packages/2e/2e/025e39e339f5090df1ff266d021892694dbb7e63568edcfe43f892fa381d/scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539", size = 39710549 },
2372
+ { url = "https://files.pythonhosted.org/packages/e6/eb/3bf6ea8ab7f1503dca3a10df2e4b9c3f6b3316df07f6c0ded94b281c7101/scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed", size = 40966184 },
2373
+ { url = "https://files.pythonhosted.org/packages/73/18/ec27848c9baae6e0d6573eda6e01a602e5649ee72c27c3a8aad673ebecfd/scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759", size = 38728256 },
2374
+ { url = "https://files.pythonhosted.org/packages/74/cd/1aef2184948728b4b6e21267d53b3339762c285a46a274ebb7863c9e4742/scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62", size = 30109540 },
2375
+ { url = "https://files.pythonhosted.org/packages/5b/d8/59e452c0a255ec352bd0a833537a3bc1bfb679944c4938ab375b0a6b3a3e/scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb", size = 22383115 },
2376
+ { url = "https://files.pythonhosted.org/packages/08/f5/456f56bbbfccf696263b47095291040655e3cbaf05d063bdc7c7517f32ac/scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730", size = 25163884 },
2377
+ { url = "https://files.pythonhosted.org/packages/a2/66/a9618b6a435a0f0c0b8a6d0a2efb32d4ec5a85f023c2b79d39512040355b/scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825", size = 35174018 },
2378
+ { url = "https://files.pythonhosted.org/packages/b5/09/c5b6734a50ad4882432b6bb7c02baf757f5b2f256041da5df242e2d7e6b6/scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7", size = 37269716 },
2379
+ { url = "https://files.pythonhosted.org/packages/77/0a/eac00ff741f23bcabd352731ed9b8995a0a60ef57f5fd788d611d43d69a1/scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11", size = 36872342 },
2380
+ { url = "https://files.pythonhosted.org/packages/fe/54/4379be86dd74b6ad81551689107360d9a3e18f24d20767a2d5b9253a3f0a/scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126", size = 39670869 },
2381
+ { url = "https://files.pythonhosted.org/packages/87/2e/892ad2862ba54f084ffe8cc4a22667eaf9c2bcec6d2bff1d15713c6c0703/scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163", size = 40988851 },
2382
+ { url = "https://files.pythonhosted.org/packages/1b/e9/7a879c137f7e55b30d75d90ce3eb468197646bc7b443ac036ae3fe109055/scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8", size = 38863011 },
2383
+ { url = "https://files.pythonhosted.org/packages/51/d1/226a806bbd69f62ce5ef5f3ffadc35286e9fbc802f606a07eb83bf2359de/scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5", size = 30266407 },
2384
+ { url = "https://files.pythonhosted.org/packages/e5/9b/f32d1d6093ab9eeabbd839b0f7619c62e46cc4b7b6dbf05b6e615bbd4400/scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e", size = 22540030 },
2385
+ { url = "https://files.pythonhosted.org/packages/e7/29/c278f699b095c1a884f29fda126340fcc201461ee8bfea5c8bdb1c7c958b/scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb", size = 25218709 },
2386
+ { url = "https://files.pythonhosted.org/packages/24/18/9e5374b617aba742a990581373cd6b68a2945d65cc588482749ef2e64467/scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723", size = 34809045 },
2387
+ { url = "https://files.pythonhosted.org/packages/e1/fe/9c4361e7ba2927074360856db6135ef4904d505e9b3afbbcb073c4008328/scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb", size = 36703062 },
2388
+ { url = "https://files.pythonhosted.org/packages/b7/8e/038ccfe29d272b30086b25a4960f757f97122cb2ec42e62b460d02fe98e9/scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4", size = 36393132 },
2389
+ { url = "https://files.pythonhosted.org/packages/10/7e/5c12285452970be5bdbe8352c619250b97ebf7917d7a9a9e96b8a8140f17/scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5", size = 38979503 },
2390
+ { url = "https://files.pythonhosted.org/packages/81/06/0a5e5349474e1cbc5757975b21bd4fad0e72ebf138c5592f191646154e06/scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca", size = 40308097 },
2391
+ ]
2392
+
2393
  [[package]]
2394
  name = "selenium"
2395
  version = "4.28.1"
 
2541
  { url = "https://files.pythonhosted.org/packages/06/1b/4a2dcfedf88923b8b022f566042006a6d871235d9c6f633866a9a640cc51/tf_playwright_stealth-1.1.1-py3-none-any.whl", hash = "sha256:7cc4785bcafca17c8987919591eab569c180b0d6159ce058807398ba616d6696", size = 32998 },
2542
  ]
2543
 
2544
+ [[package]]
2545
+ name = "threadpoolctl"
2546
+ version = "3.6.0"
2547
+ source = { registry = "https://pypi.org/simple" }
2548
+ sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274 }
2549
+ wheels = [
2550
+ { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638 },
2551
+ ]
2552
+
2553
  [[package]]
2554
  name = "tiktoken"
2555
  version = "0.8.0"