Spaces:

sohamw03
/

knowledge-net

Paused

App Files Files Community

Soham Waghmare commited on May 12, 2025

Commit

51f3191

1 Parent(s): f7da48c

feat: evaluation using TF-IDF and cosine

Browse files

Files changed (4) hide show

backend/knet.py +16 -9
backend/pyproject.toml +1 -0
backend/similarity_metrics.py +258 -0
backend/uv.lock +91 -0

backend/knet.py CHANGED Viewed

@@ -64,6 +64,10 @@ class Prompt:
         Return only decision: true/false""")
         self.search_query = dedent("""Based on the following findings on topic {vertical}, create google search queries
         <Global Research Plan>
         {research_plan}
         </Global Research Plan>
@@ -236,7 +240,7 @@ class KNet:
                 # Generate initial search query
                 query = self.generate_content(
                     self.prompt.search_query.format(
-                        vertical=self.research_plan[self.idx_research_plan], research_plan="None", past_queries="None", ctx_manager="None", n=1
                     ),
                     schema=self.schema.search_query,
                     temp=1.5,
@@ -293,11 +297,6 @@ class KNet:
             self.logger.error("Research failed", exc_info=True)
             raise
-    def _check_cancelled(self):
-        """Check if the current task has been cancelled and raise CancelledError if so"""
-        if asyncio.current_task() and asyncio.current_task().cancelled():
-            raise asyncio.CancelledError("Research task was cancelled")
     async def _generate_final_report(self, topic: str, retry_count: int = 1) -> Dict[str, Any]:
         try:
             self._check_cancelled()
@@ -384,6 +383,7 @@ class KNet:
             prompt = self.prompt.search_query.format(
                 vertical=self.research_plan[self.idx_research_plan],
                 research_plan="\n".join([f"[done] {step}" for i, step in enumerate(self.research_plan) if i < self.idx_research_plan]),
                 past_queries="\n".join([f"[done] {query}" for query in node.get_path_to_root()[1:]]),
                 ctx_manager="\n\n---\n\n".join(self.ctx_manager),
@@ -420,9 +420,11 @@ class KNet:
             # Generate summary of key findings into the manager's context
             if node.data:
-                findings = ("\n" + "-" * 10 + "Next data" + "-" * 10 + "\n").join([json.dumps(d, indent=2) for d in node.data])
-                response = self.generate_content(self.prompt.site_summary.format(query=node.query, findings=findings), temp=0.2)
-                self.ctx_manager.append(response) if isinstance(response, str) else None
             # Research manager takes decision to proceed or not
             prompt = self.prompt.continue_branch.format(
@@ -473,6 +475,11 @@ class KNet:
                 raise Exception("GEMINI_RECITATION")
             raise
     async def test(self, topic: str, progress_callback):
         self.progress = ResearchProgress(progress_callback, self.master_node)
         try:

         Return only decision: true/false""")
         self.search_query = dedent("""Based on the following findings on topic {vertical}, create google search queries
+        <Original user query>
+        {topic}
+        </Original user query>
         <Global Research Plan>
         {research_plan}
         </Global Research Plan>
                 # Generate initial search query
                 query = self.generate_content(
                     self.prompt.search_query.format(
+                        vertical=self.research_plan[self.idx_research_plan], topic=topic, research_plan="None", past_queries="None", ctx_manager="None", n=1
                     ),
                     schema=self.schema.search_query,
                     temp=1.5,
             self.logger.error("Research failed", exc_info=True)
             raise
     async def _generate_final_report(self, topic: str, retry_count: int = 1) -> Dict[str, Any]:
         try:
             self._check_cancelled()
             prompt = self.prompt.search_query.format(
                 vertical=self.research_plan[self.idx_research_plan],
+                topic=topic,
                 research_plan="\n".join([f"[done] {step}" for i, step in enumerate(self.research_plan) if i < self.idx_research_plan]),
                 past_queries="\n".join([f"[done] {query}" for query in node.get_path_to_root()[1:]]),
                 ctx_manager="\n\n---\n\n".join(self.ctx_manager),
             # Generate summary of key findings into the manager's context
             if node.data:
+                for idx in range(0, len(node.data), 3):
+                    data = node.data[idx : idx + 3]
+                    findings = ("\n" + "-" * 10 + "Next data" + "-" * 10 + "\n").join([json.dumps(d, indent=2) for d in data])
+                    response = self.generate_content(self.prompt.site_summary.format(query=node.query, findings=findings), temp=0.2)
+                    self.ctx_manager.append(response) if isinstance(response, str) else None
             # Research manager takes decision to proceed or not
             prompt = self.prompt.continue_branch.format(
                 raise Exception("GEMINI_RECITATION")
             raise
+    def _check_cancelled(self):
+        """Check if the current task has been cancelled and raise CancelledError if so"""
+        if asyncio.current_task() and asyncio.current_task().cancelled():
+            raise asyncio.CancelledError("Research task was cancelled")
     async def test(self, topic: str, progress_callback):
         self.progress = ResearchProgress(progress_callback, self.master_node)
         try:

backend/pyproject.toml CHANGED Viewed

@@ -113,6 +113,7 @@ dependencies = [
     "rich==13.9.4",
     "rpds-py==0.22.3",
     "rsa==4.9",
     "selenium==4.28.1",
     "sgmllib3k==1.0.0",
     "simple-websocket==1.1.0",

     "rich==13.9.4",
     "rpds-py==0.22.3",
     "rsa==4.9",
+    "scikit-learn>=1.6.1",
     "selenium==4.28.1",
     "sgmllib3k==1.0.0",
     "simple-websocket==1.1.0",

backend/similarity_metrics.py ADDED Viewed

	@@ -0,0 +1,258 @@

+import json
+import re
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+import csv # For CSV output
+import os # To get filename for report
+# --- 1. Load Data --- (No changes from previous version)
+def load_data(filepath="output.log.json"):
+    try:
+        with open(filepath, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        return data
+    except FileNotFoundError:
+        print(f"Error: File '{filepath}' not found.")
+        return None
+    except json.JSONDecodeError:
+        print(f"Error: Could not decode JSON from '{filepath}'.")
+        return None
+# --- 2. Text Extraction --- (No changes from previous version)
+def extract_main_content(data):
+    return data.get("content", "")
+def extract_source_texts_from_tree(current_node):
+    sources_list = []
+    if isinstance(current_node, dict):
+        if "sources" in current_node and isinstance(current_node["sources"], dict):
+            for url, raw_content in current_node["sources"].items():
+                if isinstance(raw_content, str) and raw_content.strip():
+                    sources_list.append({"id": url, "text": raw_content})
+        for key, value in current_node.items():
+            if key != "sources":
+                sources_list.extend(extract_source_texts_from_tree(value))
+    elif isinstance(current_node, list):
+        for item in current_node:
+            sources_list.extend(extract_source_texts_from_tree(item))
+    return sources_list
+# --- 3. Text Preprocessing --- (No changes from previous version)
+def preprocess_text(text):
+    if not text or not isinstance(text, str):
+        return ""
+    text = text.lower()
+    text = re.sub(r'\W+', ' ', text)
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+# --- 4. Similarity Calculation ---
+# Modified to return both similarity_scores and the count of valid_source_info_for_results
+def calculate_similarity(main_text, source_texts_list):
+    if not main_text.strip():
+        # print("Main report content is empty. Cannot calculate similarity.") # Will be handled by caller
+        return {}, 0
+    if not source_texts_list:
+        # print("No source texts extracted. Cannot calculate similarity.") # Will be handled by caller
+        return {}, 0
+    preprocessed_main_text = preprocess_text(main_text)
+    all_texts_for_tfidf = [preprocessed_main_text]
+    valid_source_info_for_results = []
+    for source_info in source_texts_list:
+        source_text = source_info.get("text", "")
+        preprocessed_s_text = preprocess_text(source_text)
+        if preprocessed_s_text:
+            all_texts_for_tfidf.append(preprocessed_s_text)
+            valid_source_info_for_results.append(source_info)
+        # else:
+            # print(f"Info: Source '{source_info.get('id', 'unknown')}' resulted in empty text after preprocessing, skipping.")
+    if len(all_texts_for_tfidf) < 2:
+        # print("Not enough text content (main text + at least one valid source after preprocessing) to compare.")
+        return {}, len(valid_source_info_for_results)
+    try:
+        vectorizer = TfidfVectorizer()
+        tfidf_matrix = vectorizer.fit_transform(all_texts_for_tfidf)
+    except ValueError as e:
+        print(f"Error during TF-IDF vectorization: {e}.")
+        return {}, len(valid_source_info_for_results)
+    main_text_vector = tfidf_matrix[0]
+    source_vectors = tfidf_matrix[1:]
+    if source_vectors.shape[0] == 0:
+        return {}, len(valid_source_info_for_results)
+    similarities = cosine_similarity(main_text_vector, source_vectors)
+    results = {}
+    for i, source_info in enumerate(valid_source_info_for_results):
+        results[source_info["id"]] = similarities[0, i]
+    return results, len(valid_source_info_for_results)
+# --- 5. Aggregate Metrics Calculation --- (New Function)
+def calculate_aggregate_metrics(similarity_scores, total_sources_extracted_from_tree, sources_used_in_similarity_calc):
+    """Calculates aggregate metrics from individual similarity scores."""
+    metrics = {
+        "total_sources_in_tree": total_sources_extracted_from_tree,
+        "sources_used_for_sim": sources_used_in_similarity_calc,
+        "average_similarity": 0.0,
+        "max_similarity": 0.0,
+        "top_source_url": "N/A",
+        "highly_similar_sources_gt_0_3": 0,
+        "highly_similar_sources_gt_0_5": 0
+    }
+    if not similarity_scores: # No scores to aggregate
+        return metrics
+    scores = list(similarity_scores.values())
+    metrics["average_similarity"] = np.mean(scores) if scores else 0.0
+    metrics["max_similarity"] = np.max(scores) if scores else 0.0
+    if scores:
+        for url, score in similarity_scores.items():
+            if score == metrics["max_similarity"]:
+                metrics["top_source_url"] = url
+                break # Take the first one if multiple have max score
+    metrics["highly_similar_sources_gt_0_3"] = sum(1 for score in scores if score > 0.3)
+    metrics["highly_similar_sources_gt_0_5"] = sum(1 for score in scores if score > 0.5)
+    return metrics
+# --- Main Execution ---
+if __name__ == "__main__":
+    # --- IMPORTANT: Update this list with paths to your JSON files ---
+    sample_files = [
+        "output.log.json",
+        # "output_sample2.log.json", # Example: add more files
+        # "output_sample3.log.json"
+    ]
+    # You can also use glob to find all *.json files in a directory if needed:
+    # import glob
+    # sample_files = glob.glob("path/to/your/json_files/*.json")
+    if not sample_files or not os.path.exists(sample_files[0]): # Basic check
+        print("Please update the 'sample_files' list with valid paths to your JSON files.")
+        print(f"Currently looking for: {sample_files}")
+        exit()
+    all_samples_aggregated_metrics = []
+    print("Processing multiple sample files...\n")
+    for filepath in sample_files:
+        print(f"--- Processing file: {filepath} ---")
+        data = load_data(filepath)
+        if not data:
+            print(f"Skipping file {filepath} due to loading error.\n")
+            # Add a placeholder entry or skip
+            all_samples_aggregated_metrics.append({
+                "sample_file": os.path.basename(filepath),
+                "error": "Failed to load or parse JSON"
+            })
+            continue
+        main_report_content = extract_main_content(data)
+        research_tree_data = data.get("research_tree")
+        if not main_report_content:
+            print("Could not extract main report content.")
+            all_samples_aggregated_metrics.append({
+                "sample_file": os.path.basename(filepath),
+                "error": "No main content"
+            })
+            continue # Or add more default values
+        if research_tree_data is None:
+            print("Could not find 'research_tree' in the data or it is null.")
+            all_samples_aggregated_metrics.append({
+                "sample_file": os.path.basename(filepath),
+                 "total_sources_in_tree": 0, "sources_used_for_sim":0,
+                "error": "No research tree"
+            })
+            continue # Or add more default values
+        source_texts_from_tree = extract_source_texts_from_tree(research_tree_data)
+        total_extracted_count = len(source_texts_from_tree)
+        print(f"Extracted {total_extracted_count} source entries from the tree.")
+        if not source_texts_from_tree:
+            print("No source texts could be extracted from the research_tree.")
+            agg_metrics = calculate_aggregate_metrics({}, total_extracted_count, 0)
+        else:
+            similarity_scores, num_sources_used_in_calc = calculate_similarity(main_report_content, source_texts_from_tree)
+            if similarity_scores:
+                print(f"Calculated similarities for {len(similarity_scores)} sources (out of {num_sources_used_in_calc} with text).")
+            else:
+                print(f"Could not calculate similarity scores (used {num_sources_used_in_calc} sources with text).")
+            agg_metrics = calculate_aggregate_metrics(similarity_scores, total_extracted_count, num_sources_used_in_calc)
+        agg_metrics["sample_file"] = os.path.basename(filepath) # Add filename to the metrics
+        all_samples_aggregated_metrics.append(agg_metrics)
+        print(f"Aggregated metrics for {filepath}: {agg_metrics}\n")
+    # --- Outputting the Combined Metrics ---
+    if not all_samples_aggregated_metrics:
+        print("No data processed to generate a report.")
+    else:
+        print("\n--- Combined Metrics Table ---")
+        # Define header based on keys from the first valid entry (if any)
+        # Fallback headers if all entries had errors
+        header = ["sample_file", "total_sources_in_tree", "sources_used_for_sim",
+                  "average_similarity", "max_similarity", "top_source_url",
+                  "highly_similar_sources_gt_0_3", "highly_similar_sources_gt_0_5", "error"]
+        # Try to get header from a successful processing attempt
+        first_successful_metric = next((m for m in all_samples_aggregated_metrics if "error" not in m), None)
+        if first_successful_metric:
+            header = list(first_successful_metric.keys())
+        # Print to console
+        # Dynamically create format string for cleaner table printing
+        col_widths = {h: len(h) for h in header}
+        for row_metrics in all_samples_aggregated_metrics:
+            for h in header:
+                col_widths[h] = max(col_widths[h], len(str(row_metrics.get(h, ""))))
+        header_fmt = " | ".join([f"{{:<{col_widths[h]}}}" for h in header])
+        row_fmt = " | ".join([f"{{:<{col_widths[h]}}}" for h in header])
+        print(header_fmt.format(*header))
+        print("-+-".join(['-' * col_widths[h] for h in header])) # Separator line
+        for sample_metrics in all_samples_aggregated_metrics:
+            # Ensure all keys are present for formatting, defaulting to "N/A" or empty
+            row_values = [sample_metrics.get(h, "N/A") for h in header]
+            # Format floats
+            for i, h_key in enumerate(header):
+                if isinstance(row_values[i], float):
+                    row_values[i] = f"{row_values[i]:.4f}"
+            print(row_fmt.format(*row_values))
+        # Write to CSV
+        csv_filename = "similarity_metrics_report.csv"
+        try:
+            with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
+                writer = csv.DictWriter(csvfile, fieldnames=header)
+                writer.writeheader()
+                reader = csv.DictReader(csvfile, fieldnames=header)
+                old_rows = list(reader)
+                writer.writerows(old_rows)
+                for sample_metrics in all_samples_aggregated_metrics:
+                    # Create a full dict for DictWriter, ensuring all header fields are present
+                    row_to_write = {h: sample_metrics.get(h, "N/A") for h in header}
+                    writer.writerow(row_to_write)
+            print(f"\nMetrics report saved to: {csv_filename}")
+        except IOError:
+            print(f"\nError: Could not write metrics report to {csv_filename}")

backend/uv.lock CHANGED Viewed

@@ -269,6 +269,7 @@ dependencies = [
     { name = "rich" },
     { name = "rpds-py" },
     { name = "rsa" },
     { name = "selenium" },
     { name = "sgmllib3k" },
     { name = "simple-websocket" },
@@ -414,6 +415,7 @@ requires-dist = [
     { name = "rich", specifier = "==13.9.4" },
     { name = "rpds-py", specifier = "==0.22.3" },
     { name = "rsa", specifier = "==4.9" },
     { name = "selenium", specifier = "==4.28.1" },
     { name = "sgmllib3k", specifier = "==1.0.0" },
     { name = "simple-websocket", specifier = "==1.1.0" },
@@ -2308,6 +2310,86 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/49/97/fa78e3d2f65c02c8e1268b9aba606569fe97f6c8f7c2d74394553347c145/rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", size = 34315 },
 ]
 [[package]]
 name = "selenium"
 version = "4.28.1"
@@ -2459,6 +2541,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/06/1b/4a2dcfedf88923b8b022f566042006a6d871235d9c6f633866a9a640cc51/tf_playwright_stealth-1.1.1-py3-none-any.whl", hash = "sha256:7cc4785bcafca17c8987919591eab569c180b0d6159ce058807398ba616d6696", size = 32998 },
 ]
 [[package]]
 name = "tiktoken"
 version = "0.8.0"

     { name = "rich" },
     { name = "rpds-py" },
     { name = "rsa" },
+    { name = "scikit-learn" },
     { name = "selenium" },
     { name = "sgmllib3k" },
     { name = "simple-websocket" },
     { name = "rich", specifier = "==13.9.4" },
     { name = "rpds-py", specifier = "==0.22.3" },
     { name = "rsa", specifier = "==4.9" },
+    { name = "scikit-learn", specifier = ">=1.6.1" },
     { name = "selenium", specifier = "==4.28.1" },
     { name = "sgmllib3k", specifier = "==1.0.0" },
     { name = "simple-websocket", specifier = "==1.1.0" },
     { url = "https://files.pythonhosted.org/packages/49/97/fa78e3d2f65c02c8e1268b9aba606569fe97f6c8f7c2d74394553347c145/rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7", size = 34315 },
 ]
+[[package]]
+name = "scikit-learn"
+version = "1.6.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "joblib" },
+    { name = "numpy" },
+    { name = "scipy" },
+    { name = "threadpoolctl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9e/a5/4ae3b3a0755f7b35a280ac90b28817d1f380318973cff14075ab41ef50d9/scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e", size = 7068312 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6c/2a/e291c29670795406a824567d1dfc91db7b699799a002fdaa452bceea8f6e/scikit_learn-1.6.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:72abc587c75234935e97d09aa4913a82f7b03ee0b74111dcc2881cba3c5a7b33", size = 12102620 },
+    { url = "https://files.pythonhosted.org/packages/25/92/ee1d7a00bb6b8c55755d4984fd82608603a3cc59959245068ce32e7fb808/scikit_learn-1.6.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:b3b00cdc8f1317b5f33191df1386c0befd16625f49d979fe77a8d44cae82410d", size = 11116234 },
+    { url = "https://files.pythonhosted.org/packages/30/cd/ed4399485ef364bb25f388ab438e3724e60dc218c547a407b6e90ccccaef/scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc4765af3386811c3ca21638f63b9cf5ecf66261cc4815c1db3f1e7dc7b79db2", size = 12592155 },
+    { url = "https://files.pythonhosted.org/packages/a8/f3/62fc9a5a659bb58a03cdd7e258956a5824bdc9b4bb3c5d932f55880be569/scikit_learn-1.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25fc636bdaf1cc2f4a124a116312d837148b5e10872147bdaf4887926b8c03d8", size = 13497069 },
+    { url = "https://files.pythonhosted.org/packages/a1/a6/c5b78606743a1f28eae8f11973de6613a5ee87366796583fb74c67d54939/scikit_learn-1.6.1-cp311-cp311-win_amd64.whl", hash = "sha256:fa909b1a36e000a03c382aade0bd2063fd5680ff8b8e501660c0f59f021a6415", size = 11139809 },
+    { url = "https://files.pythonhosted.org/packages/0a/18/c797c9b8c10380d05616db3bfb48e2a3358c767affd0857d56c2eb501caa/scikit_learn-1.6.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:926f207c804104677af4857b2c609940b743d04c4c35ce0ddc8ff4f053cddc1b", size = 12104516 },
+    { url = "https://files.pythonhosted.org/packages/c4/b7/2e35f8e289ab70108f8cbb2e7a2208f0575dc704749721286519dcf35f6f/scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c2cae262064e6a9b77eee1c8e768fc46aa0b8338c6a8297b9b6759720ec0ff2", size = 11167837 },
+    { url = "https://files.pythonhosted.org/packages/a4/f6/ff7beaeb644bcad72bcfd5a03ff36d32ee4e53a8b29a639f11bcb65d06cd/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f", size = 12253728 },
+    { url = "https://files.pythonhosted.org/packages/29/7a/8bce8968883e9465de20be15542f4c7e221952441727c4dad24d534c6d99/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e69fab4ebfc9c9b580a7a80111b43d214ab06250f8a7ef590a4edf72464dd86", size = 13147700 },
+    { url = "https://files.pythonhosted.org/packages/62/27/585859e72e117fe861c2079bcba35591a84f801e21bc1ab85bce6ce60305/scikit_learn-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:70b1d7e85b1c96383f872a519b3375f92f14731e279a7b4c6cfd650cf5dffc52", size = 11110613 },
+    { url = "https://files.pythonhosted.org/packages/2e/59/8eb1872ca87009bdcdb7f3cdc679ad557b992c12f4b61f9250659e592c63/scikit_learn-1.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ffa1e9e25b3d93990e74a4be2c2fc61ee5af85811562f1288d5d055880c4322", size = 12010001 },
+    { url = "https://files.pythonhosted.org/packages/9d/05/f2fc4effc5b32e525408524c982c468c29d22f828834f0625c5ef3d601be/scikit_learn-1.6.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dc5cf3d68c5a20ad6d571584c0750ec641cc46aeef1c1507be51300e6003a7e1", size = 11096360 },
+    { url = "https://files.pythonhosted.org/packages/c8/e4/4195d52cf4f113573fb8ebc44ed5a81bd511a92c0228889125fac2f4c3d1/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c06beb2e839ecc641366000ca84f3cf6fa9faa1777e29cf0c04be6e4d096a348", size = 12209004 },
+    { url = "https://files.pythonhosted.org/packages/94/be/47e16cdd1e7fcf97d95b3cb08bde1abb13e627861af427a3651fcb80b517/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97", size = 13171776 },
+    { url = "https://files.pythonhosted.org/packages/34/b0/ca92b90859070a1487827dbc672f998da95ce83edce1270fc23f96f1f61a/scikit_learn-1.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:7a1c43c8ec9fde528d664d947dc4c0789be4077a3647f232869f41d9bf50e0fb", size = 11071865 },
+    { url = "https://files.pythonhosted.org/packages/12/ae/993b0fb24a356e71e9a894e42b8a9eec528d4c70217353a1cd7a48bc25d4/scikit_learn-1.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a17c1dea1d56dcda2fac315712f3651a1fea86565b64b48fa1bc090249cbf236", size = 11955804 },
+    { url = "https://files.pythonhosted.org/packages/d6/54/32fa2ee591af44507eac86406fa6bba968d1eb22831494470d0a2e4a1eb1/scikit_learn-1.6.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a7aa5f9908f0f28f4edaa6963c0a6183f1911e63a69aa03782f0d924c830a35", size = 11100530 },
+    { url = "https://files.pythonhosted.org/packages/3f/58/55856da1adec655bdce77b502e94a267bf40a8c0b89f8622837f89503b5a/scikit_learn-1.6.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691", size = 12433852 },
+    { url = "https://files.pythonhosted.org/packages/ff/4f/c83853af13901a574f8f13b645467285a48940f185b690936bb700a50863/scikit_learn-1.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:3f59fe08dc03ea158605170eb52b22a105f238a5d512c4470ddeca71feae8e5f", size = 11337256 },
+]
+[[package]]
+name = "scipy"
+version = "1.15.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/96/ab/5cc9f80f28f6a7dff646c5756e559823614a42b1939d86dd0ed550470210/scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b", size = 38714255 },
+    { url = "https://files.pythonhosted.org/packages/4a/4a/66ba30abe5ad1a3ad15bfb0b59d22174012e8056ff448cb1644deccbfed2/scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba", size = 30111035 },
+    { url = "https://files.pythonhosted.org/packages/4b/fa/a7e5b95afd80d24313307f03624acc65801846fa75599034f8ceb9e2cbf6/scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65", size = 22384499 },
+    { url = "https://files.pythonhosted.org/packages/17/99/f3aaddccf3588bb4aea70ba35328c204cadd89517a1612ecfda5b2dd9d7a/scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1", size = 25152602 },
+    { url = "https://files.pythonhosted.org/packages/56/c5/1032cdb565f146109212153339f9cb8b993701e9fe56b1c97699eee12586/scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889", size = 35503415 },
+    { url = "https://files.pythonhosted.org/packages/bd/37/89f19c8c05505d0601ed5650156e50eb881ae3918786c8fd7262b4ee66d3/scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982", size = 37652622 },
+    { url = "https://files.pythonhosted.org/packages/7e/31/be59513aa9695519b18e1851bb9e487de66f2d31f835201f1b42f5d4d475/scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9", size = 37244796 },
+    { url = "https://files.pythonhosted.org/packages/10/c0/4f5f3eeccc235632aab79b27a74a9130c6c35df358129f7ac8b29f562ac7/scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594", size = 40047684 },
+    { url = "https://files.pythonhosted.org/packages/ab/a7/0ddaf514ce8a8714f6ed243a2b391b41dbb65251affe21ee3077ec45ea9a/scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb", size = 41246504 },
+    { url = "https://files.pythonhosted.org/packages/37/4b/683aa044c4162e10ed7a7ea30527f2cbd92e6999c10a8ed8edb253836e9c/scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019", size = 38766735 },
+    { url = "https://files.pythonhosted.org/packages/7b/7e/f30be3d03de07f25dc0ec926d1681fed5c732d759ac8f51079708c79e680/scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6", size = 30173284 },
+    { url = "https://files.pythonhosted.org/packages/07/9c/0ddb0d0abdabe0d181c1793db51f02cd59e4901da6f9f7848e1f96759f0d/scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477", size = 22446958 },
+    { url = "https://files.pythonhosted.org/packages/af/43/0bce905a965f36c58ff80d8bea33f1f9351b05fad4beaad4eae34699b7a1/scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c", size = 25242454 },
+    { url = "https://files.pythonhosted.org/packages/56/30/a6f08f84ee5b7b28b4c597aca4cbe545535c39fe911845a96414700b64ba/scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45", size = 35210199 },
+    { url = "https://files.pythonhosted.org/packages/0b/1f/03f52c282437a168ee2c7c14a1a0d0781a9a4a8962d84ac05c06b4c5b555/scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49", size = 37309455 },
+    { url = "https://files.pythonhosted.org/packages/89/b1/fbb53137f42c4bf630b1ffdfc2151a62d1d1b903b249f030d2b1c0280af8/scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e", size = 36885140 },
+    { url = "https://files.pythonhosted.org/packages/2e/2e/025e39e339f5090df1ff266d021892694dbb7e63568edcfe43f892fa381d/scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539", size = 39710549 },
+    { url = "https://files.pythonhosted.org/packages/e6/eb/3bf6ea8ab7f1503dca3a10df2e4b9c3f6b3316df07f6c0ded94b281c7101/scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed", size = 40966184 },
+    { url = "https://files.pythonhosted.org/packages/73/18/ec27848c9baae6e0d6573eda6e01a602e5649ee72c27c3a8aad673ebecfd/scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759", size = 38728256 },
+    { url = "https://files.pythonhosted.org/packages/74/cd/1aef2184948728b4b6e21267d53b3339762c285a46a274ebb7863c9e4742/scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62", size = 30109540 },
+    { url = "https://files.pythonhosted.org/packages/5b/d8/59e452c0a255ec352bd0a833537a3bc1bfb679944c4938ab375b0a6b3a3e/scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb", size = 22383115 },
+    { url = "https://files.pythonhosted.org/packages/08/f5/456f56bbbfccf696263b47095291040655e3cbaf05d063bdc7c7517f32ac/scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730", size = 25163884 },
+    { url = "https://files.pythonhosted.org/packages/a2/66/a9618b6a435a0f0c0b8a6d0a2efb32d4ec5a85f023c2b79d39512040355b/scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825", size = 35174018 },
+    { url = "https://files.pythonhosted.org/packages/b5/09/c5b6734a50ad4882432b6bb7c02baf757f5b2f256041da5df242e2d7e6b6/scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7", size = 37269716 },
+    { url = "https://files.pythonhosted.org/packages/77/0a/eac00ff741f23bcabd352731ed9b8995a0a60ef57f5fd788d611d43d69a1/scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11", size = 36872342 },
+    { url = "https://files.pythonhosted.org/packages/fe/54/4379be86dd74b6ad81551689107360d9a3e18f24d20767a2d5b9253a3f0a/scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126", size = 39670869 },
+    { url = "https://files.pythonhosted.org/packages/87/2e/892ad2862ba54f084ffe8cc4a22667eaf9c2bcec6d2bff1d15713c6c0703/scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163", size = 40988851 },
+    { url = "https://files.pythonhosted.org/packages/1b/e9/7a879c137f7e55b30d75d90ce3eb468197646bc7b443ac036ae3fe109055/scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8", size = 38863011 },
+    { url = "https://files.pythonhosted.org/packages/51/d1/226a806bbd69f62ce5ef5f3ffadc35286e9fbc802f606a07eb83bf2359de/scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5", size = 30266407 },
+    { url = "https://files.pythonhosted.org/packages/e5/9b/f32d1d6093ab9eeabbd839b0f7619c62e46cc4b7b6dbf05b6e615bbd4400/scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e", size = 22540030 },
+    { url = "https://files.pythonhosted.org/packages/e7/29/c278f699b095c1a884f29fda126340fcc201461ee8bfea5c8bdb1c7c958b/scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb", size = 25218709 },
+    { url = "https://files.pythonhosted.org/packages/24/18/9e5374b617aba742a990581373cd6b68a2945d65cc588482749ef2e64467/scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723", size = 34809045 },
+    { url = "https://files.pythonhosted.org/packages/e1/fe/9c4361e7ba2927074360856db6135ef4904d505e9b3afbbcb073c4008328/scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb", size = 36703062 },
+    { url = "https://files.pythonhosted.org/packages/b7/8e/038ccfe29d272b30086b25a4960f757f97122cb2ec42e62b460d02fe98e9/scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4", size = 36393132 },
+    { url = "https://files.pythonhosted.org/packages/10/7e/5c12285452970be5bdbe8352c619250b97ebf7917d7a9a9e96b8a8140f17/scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5", size = 38979503 },
+    { url = "https://files.pythonhosted.org/packages/81/06/0a5e5349474e1cbc5757975b21bd4fad0e72ebf138c5592f191646154e06/scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca", size = 40308097 },
+]
 [[package]]
 name = "selenium"
 version = "4.28.1"
     { url = "https://files.pythonhosted.org/packages/06/1b/4a2dcfedf88923b8b022f566042006a6d871235d9c6f633866a9a640cc51/tf_playwright_stealth-1.1.1-py3-none-any.whl", hash = "sha256:7cc4785bcafca17c8987919591eab569c180b0d6159ce058807398ba616d6696", size = 32998 },
 ]
+[[package]]
+name = "threadpoolctl"
+version = "3.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638 },
+]
 [[package]]
 name = "tiktoken"
 version = "0.8.0"