{
  "bm25": {
    "summary": {
      "method": "bm25",
      "total": 33,
      "verifiable": 33,
      "passed": 30,
      "failed": 3,
      "unverifiable": 0,
      "pass_rate": 0.9090909090909091,
      "avg_kw_score": 0.8545454545454545,
      "avg_mr_score": 0.28205128205128205
    },
    "per_query": [
      {
        "query_id": "T1-001",
        "tier": 1,
        "repos": [
          "numpy"
        ],
        "query": "How does numpy implement the clip function and what are its boundary conditions?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "clip",
          "min",
          "max",
          "out"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "clip"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "resize",
          "ravel_multi_index",
          "argrelextrema",
          "take",
          "argrelmax",
          "argrelmin",
          "_boolrelextrema",
          "clip",
          "whosmat",
          "put"
        ],
        "retrieved_repos": [
          "numpy",
          "numpy",
          "scipy",
          "numpy",
          "scipy",
          "scipy",
          "scipy",
          "pandas",
          "scipy",
          "numpy"
        ]
      },
      {
        "query_id": "T1-002",
        "tier": 1,
        "repos": [
          "numpy"
        ],
        "query": "What is the purpose of numpy's _wrapreduction function and when is it called?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "reduction",
          "axis",
          "ufunc",
          "dtype"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "_wrapreduction"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "clean_column_name",
          "_xp_copy_to_numpy",
          "amin",
          "amax",
          "__RandomState_ctor",
          "_convert_to_numpy",
          "_asarray_with_order",
          "lazy_xp_function",
          "xp_result_device",
          "_clean_nans"
        ],
        "retrieved_repos": [
          "pandas",
          "scipy",
          "numpy",
          "numpy",
          "numpy",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scipy",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T1-003",
        "tier": 1,
        "repos": [
          "numpy"
        ],
        "query": "How does numpy handle broadcasting when array shapes are incompatible?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "broadcast",
          "shape",
          "dimension"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "_argcheck_rvs",
          "broadcast_shapes",
          "broadcast_shapes",
          "_broadcast_shape",
          "xp_promote",
          "take",
          "matmul",
          "broadcast_symbolic_shapes",
          "clip",
          "expected_freq"
        ],
        "retrieved_repos": [
          "scipy",
          "numpy",
          "scipy",
          "numpy",
          "scipy",
          "pandas",
          "numpy",
          "pytorch",
          "scikit-learn",
          "scipy"
        ]
      },
      {
        "query_id": "T1-004",
        "tier": 1,
        "repos": [
          "scipy"
        ],
        "query": "How does scipy.optimize.minimize handle convergence criteria internally?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "tol",
          "maxiter",
          "convergence",
          "optimize"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "minimize"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "show_options",
          "_check_optimize_result",
          "fallback_lbfgs_solve",
          "fmin",
          "brent",
          "fmin_ncg",
          "fminbound",
          "fixed_point",
          "update_converged_count",
          "_constrained_optimization"
        ],
        "retrieved_repos": [
          "scipy",
          "scikit-learn",
          "scikit-learn",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "pytorch",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T1-005",
        "tier": 1,
        "repos": [
          "scipy"
        ],
        "query": "What is the implementation of scipy's fft and how does it differ from numpy's fft?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "fft",
          "workers",
          "plan"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "fft"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "set_global_backend",
          "register_backend",
          "_backend_from_arg",
          "skip_backend",
          "set_backend",
          "prev_fast_len",
          "fftfreq",
          "set_workers",
          "rfft2",
          "rfftfreq"
        ],
        "retrieved_repos": [
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy"
        ]
      },
      {
        "query_id": "T1-006",
        "tier": 1,
        "repos": [
          "pandas"
        ],
        "query": "How does pandas implement groupby aggregation internally?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "groupby",
          "aggregate",
          "apply"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "aggregate"
        ],
        "mr_misses": [
          "agg"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_groupby_op",
          "_groupby_op",
          "_groupby_and_aggregate",
          "aggregate",
          "_groupby_op",
          "groupby",
          "groupby",
          "_groupby_op",
          "get_resampler_for_grouping",
          "retrieve_const_key"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pytorch"
        ]
      },
      {
        "query_id": "T1-007",
        "tier": 1,
        "repos": [
          "pandas"
        ],
        "query": "What happens to NaN values in the output of pandas merge() \u2014 are they propagated, dropped, or filled by default?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "merge",
          "NaN",
          "join",
          "how"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "merge"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "ffill",
          "merge_ordered",
          "stack",
          "fillna",
          "bfill",
          "construct_1d_arraylike_from_scalar",
          "groupby",
          "map",
          "_sort_tuples",
          "_drop_from_level"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T1-008",
        "tier": 1,
        "repos": [
          "scikit-learn"
        ],
        "query": "How does scikit-learn's StandardScaler compute mean and variance during fit()?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "mean",
          "var",
          "scale",
          "fit"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "fit"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "make_pipeline",
          "fit",
          "check_f_contiguous_array_estimator",
          "_is_constant_feature",
          "predict",
          "partial_fit",
          "fit",
          "_patch_raw_predict",
          "fit",
          "fit"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T1-009",
        "tier": 1,
        "repos": [
          "scikit-learn"
        ],
        "query": "What is the implementation of train_test_split in scikit-learn?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "train_test_split",
          "shuffle",
          "stratify"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "train_test_split"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "is_usable_for",
          "evaluate",
          "from_estimator",
          "from_predictions",
          "_clone_parametrized",
          "from_estimator",
          "train_test_split",
          "from_estimator",
          "from_predictions",
          "from_predictions"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "pandas",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T1-010",
        "tier": 1,
        "repos": [
          "pytorch"
        ],
        "query": "How does PyTorch implement the Adam optimizer update step?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "adam",
          "lr",
          "beta",
          "grad"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "step"
        ],
        "mr_misses": [
          "Adam"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_get_adamw_apex_fused",
          "_register_fused_optim",
          "set_optimizer",
          "_get_adamw_torch",
          "step",
          "get_state_dict",
          "__init__",
          "dim",
          "_apply_optimizer_in_backward",
          "deepspeed_optim_sched"
        ],
        "retrieved_repos": [
          "transformers",
          "pytorch",
          "transformers",
          "transformers",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "transformers"
        ]
      },
      {
        "query_id": "T1-011",
        "tier": 1,
        "repos": [
          "pytorch"
        ],
        "query": "How does torch.autograd track gradients through operations?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "grad",
          "backward",
          "autograd",
          "requires_grad"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "grad_fn",
          "backward"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "handle_autograd_grad",
          "_aot_stage2a_partition",
          "recompute_mean_var",
          "first_slice_copy_with_grad",
          "register_autograd",
          "get_gradient_edge",
          "method_backward",
          "register_autograd",
          "check_undefined_grad_support",
          "check"
        ],
        "retrieved_repos": [
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch"
        ]
      },
      {
        "query_id": "T1-012",
        "tier": 1,
        "repos": [
          "transformers"
        ],
        "query": "How does the HuggingFace tokenizer handle out-of-vocabulary tokens?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "tokenize",
          "unk",
          "vocab",
          "token"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "tokenize",
          "encode"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "add_tokens",
          "get_missing_alphabet_tokens",
          "from_pretrained",
          "from_pretrained",
          "_add_tokens",
          "save_vocabulary",
          "_add_tokens",
          "vocab_size",
          "get_vocab",
          "save_vocabulary"
        ],
        "retrieved_repos": [
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T1-013",
        "tier": 1,
        "repos": [
          "transformers"
        ],
        "query": "What happens inside the forward pass of BertModel?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "attention",
          "hidden",
          "encoder",
          "forward"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "forward"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "setup_context",
          "clean_column_name",
          "keys",
          "no_sync",
          "get_cb_kwargs",
          "get_cb_kwargs",
          "forward",
          "forward",
          "forward",
          "forward"
        ],
        "retrieved_repos": [
          "pytorch",
          "pandas",
          "pytorch",
          "pytorch",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T2-001",
        "tier": 2,
        "repos": [
          "numpy",
          "pandas"
        ],
        "query": "How does pandas use numpy arrays internally to store DataFrame column data?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "ndarray",
          "dtype",
          "numpy",
          "block"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "NDFrame",
          "Block",
          "array"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_from_arrays",
          "from_spmatrix",
          "explode",
          "_liac_arff_parser",
          "create_dataframe_from_blocks",
          "_pandas_arff_parser",
          "merge",
          "merge",
          "load_arff_from_gzip_file",
          "itertuples"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "scikit-learn",
          "pandas",
          "scikit-learn",
          "pandas",
          "pandas",
          "scikit-learn",
          "pandas"
        ]
      },
      {
        "query_id": "T2-002",
        "tier": 2,
        "repos": [
          "numpy",
          "scikit-learn"
        ],
        "query": "How does scikit-learn validate that input arrays are numpy-compatible before fitting?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "dtype",
          "ndarray",
          "validate"
        ],
        "kw_missed": [
          "check_array"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "check_array",
          "validate_data"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "broadcast_shapes",
          "_get_adapter_from_container",
          "_single_array_device",
          "_from_arrays",
          "get_config",
          "_asarray_with_order",
          "fit",
          "raise_build_error",
          "fit",
          "_assert_all_finite_element_wise"
        ],
        "retrieved_repos": [
          "numpy",
          "scikit-learn",
          "scikit-learn",
          "pandas",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T2-003",
        "tier": 2,
        "repos": [
          "numpy",
          "scipy"
        ],
        "query": "When scipy computes a matrix inverse, how does it use numpy's linear algebra routines?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "inv",
          "solve",
          "lapack",
          "linalg"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "inv",
          "solve"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "solve",
          "inv",
          "pinv",
          "_logpdf",
          "_mode",
          "vec",
          "from_eigendecomposition",
          "_mean",
          "pinv",
          "_var"
        ],
        "retrieved_repos": [
          "numpy",
          "scipy",
          "numpy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy"
        ]
      },
      {
        "query_id": "T2-004",
        "tier": 2,
        "repos": [
          "pytorch",
          "numpy"
        ],
        "query": "How does PyTorch's tensor.numpy() method convert a tensor to a numpy array and what are the constraints?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "numpy",
          "cpu",
          "detach"
        ],
        "kw_missed": [
          "contiguous"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "numpy"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_to_tensor",
          "to_numpy",
          "process_images",
          "process_images",
          "process_images",
          "as_tensor",
          "_from_numpy_array",
          "make_np",
          "_get_is_as_tensor_fns",
          "__call__"
        ],
        "retrieved_repos": [
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "pytorch",
          "pytorch",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T2-005",
        "tier": 2,
        "repos": [
          "transformers",
          "pytorch"
        ],
        "query": "How does HuggingFace's Trainer class use PyTorch DataLoader for batching?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "dataloader",
          "batch",
          "sampler"
        ],
        "kw_missed": [
          "collate"
        ],
        "mr_score": 0.3333333333333333,
        "mr_hits": [
          "get_train_dataloader"
        ],
        "mr_misses": [
          "Trainer",
          "DataLoader"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "get_train_dataloader",
          "get_steps_per_epoch",
          "create_rng",
          "thread_safe_generator",
          "num_examples",
          "tpu_spmd_dataloader",
          "get_worker_info",
          "__init__",
          "set_initial_training_values",
          "extract_hyperparameters_from_trainer"
        ],
        "retrieved_repos": [
          "transformers",
          "transformers",
          "transformers",
          "pytorch",
          "transformers",
          "transformers",
          "pytorch",
          "pytorch",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T2-006",
        "tier": 2,
        "repos": [
          "pandas",
          "numpy"
        ],
        "query": "How does pandas rolling() use numpy operations under the hood?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "rolling",
          "window",
          "apply",
          "numpy"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "Rolling",
          "apply"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_groupby_op",
          "aggregate",
          "mean",
          "_groupby_op",
          "sum",
          "std",
          "var",
          "quantile",
          "aggregate",
          "pipe"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T2-007",
        "tier": 2,
        "repos": [
          "scikit-learn",
          "numpy"
        ],
        "query": "How does scikit-learn's PCA implementation use numpy's SVD?",
        "kw_score": 0.0,
        "kw_passed": false,
        "kw_found": [],
        "kw_missed": [
          "svd",
          "components",
          "explained_variance",
          "singular"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "PCA",
          "fit",
          "_fit_full"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_single_array_device",
          "__setstate__",
          "_assert_all_finite_element_wise",
          "_asarray_with_order",
          "is_usable_for",
          "_get_adapter_from_container",
          "check_f_contiguous_array_estimator",
          "get_data_home",
          "_use_interchange_protocol",
          "get_config"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T2-008",
        "tier": 2,
        "repos": [
          "transformers",
          "numpy"
        ],
        "query": "Where does the transformers library convert between PyTorch tensors and numpy arrays for metric computation?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "numpy",
          "metrics",
          "cpu"
        ],
        "kw_missed": [
          "predictions"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "compute_metrics"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "as_tensor",
          "_get_is_as_tensor_fns",
          "to_cpu_and_numpy",
          "_to_tensor",
          "cond",
          "fill",
          "pairwise_distances_argmin",
          "pdist",
          "paired_distances",
          "_preprocess_input"
        ],
        "retrieved_repos": [
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "pytorch",
          "pytorch",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "transformers"
        ]
      },
      {
        "query_id": "T3-001",
        "tier": 3,
        "repos": [
          "transformers",
          "pytorch",
          "numpy"
        ],
        "query": "What sampling strategies does HuggingFace model.generate() use and how does it call into PyTorch operations?",
        "kw_score": 0.25,
        "kw_passed": false,
        "kw_found": [
          "generate"
        ],
        "kw_missed": [
          "sample",
          "logits",
          "beam"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "generate"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "load_custom_generate",
          "is_torch_array",
          "repro_load_args",
          "call_function",
          "repro_common",
          "run_load_args",
          "call_function",
          "_get_dict",
          "can_generate",
          "default_data_collator"
        ],
        "retrieved_repos": [
          "transformers",
          "scikit-learn",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T3-002",
        "tier": 3,
        "repos": [
          "pandas",
          "numpy",
          "scipy"
        ],
        "query": "How does pandas.DataFrame.corr() ultimately compute correlation \u2014 trace through to the underlying math?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "corr",
          "pearson",
          "cov",
          "std"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "corr"
        ],
        "mr_misses": [
          "nancorr"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "corr",
          "autocorr",
          "corr",
          "corr",
          "corr",
          "corr",
          "corrwith",
          "corr",
          "corrwith",
          "corr"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T3-003",
        "tier": 3,
        "repos": [
          "scikit-learn",
          "scipy",
          "numpy"
        ],
        "query": "How does scikit-learn's SVM implementation use scipy's sparse matrices and what numpy operations are at the core?",
        "kw_score": 0.5,
        "kw_passed": true,
        "kw_found": [
          "sparse",
          "support"
        ],
        "kw_missed": [
          "svm",
          "kernel"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "SVC",
          "fit"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "is_usable_for",
          "set_config",
          "_check_large_sparse",
          "config_context",
          "_asarray_with_order",
          "pairwise_distances_argmin",
          "is_valid_sparse_matrix",
          "_fit_full",
          "__setstate__",
          "_assert_all_finite_element_wise"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T3-004",
        "tier": 3,
        "repos": [
          "transformers",
          "pytorch"
        ],
        "query": "How does HuggingFace's attention mechanism implement scaled dot-product attention at the PyTorch level?",
        "kw_score": 0.2,
        "kw_passed": false,
        "kw_found": [
          "attention"
        ],
        "kw_missed": [
          "query",
          "key",
          "value",
          "softmax"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "attention",
          "forward"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "can_produce",
          "enable_flash_sdp",
          "enable_math_sdp",
          "enable_cudnn_sdp",
          "enable_mem_efficient_sdp",
          "cudnn_sdp_enabled",
          "flash_sdp_enabled",
          "math_sdp_enabled",
          "mem_efficient_sdp_enabled",
          "is_ck_sdpa_available"
        ],
        "retrieved_repos": [
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch"
        ]
      },
      {
        "query_id": "T3-005",
        "tier": 3,
        "repos": [
          "numpy",
          "scipy",
          "scikit-learn"
        ],
        "query": "Trace how scikit-learn's KMeans uses numpy and scipy for distance computation.",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "kmeans",
          "euclidean",
          "centroid",
          "distance"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "KMeans",
          "_lloyd_iter",
          "euclidean_distances"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "pairwise_distances_argmin",
          "pairwise_distances_argmin_min",
          "__setstate__",
          "is_usable_for",
          "pairwise_distances",
          "silhouette_samples",
          "kmeans",
          "_get_expected_failed_checks",
          "set_config",
          "check_f_contiguous_array_estimator"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T4-001",
        "tier": 4,
        "repos": [
          "numpy",
          "transformers",
          "pytorch",
          "pandas",
          "scikit-learn"
        ],
        "query": "What functions in transformers would break if numpy changed the default dtype of np.float_ from float64 to float32?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "float64",
          "float32",
          "dtype",
          "numpy"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "set_default_dtype",
          "fit",
          "obj2sctype",
          "_transform",
          "_promote",
          "_prep_values",
          "_validate_array_cls",
          "getdtype",
          "fill_value",
          "as_float_array"
        ],
        "retrieved_repos": [
          "pytorch",
          "scikit-learn",
          "numpy",
          "scikit-learn",
          "scipy",
          "pandas",
          "scipy",
          "scipy",
          "numpy",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T4-002",
        "tier": 4,
        "repos": [
          "numpy",
          "pandas",
          "scikit-learn"
        ],
        "query": "If numpy deprecated np.bool (alias for Python bool), which pandas and scikit-learn functions would be affected?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "bool",
          "dtype",
          "numpy"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "set_config",
          "parametrize_with_checks",
          "__get__",
          "config_context",
          "_single_array_device",
          "check_f_contiguous_array_estimator",
          "__setstate__",
          "func",
          "is_usable_for",
          "array"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "pandas"
        ]
      },
      {
        "query_id": "T4-003",
        "tier": 4,
        "repos": [
          "pytorch",
          "transformers"
        ],
        "query": "If PyTorch changed the default behavior of torch.no_grad() to not propagate to nested functions, what would break in HuggingFace transformers?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "no_grad",
          "grad",
          "inference",
          "context"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "print_bt",
          "save",
          "nested_tensor",
          "enable_propagation",
          "_contains_nan",
          "guard_size_oblivious",
          "traverse",
          "default_device",
          "forward",
          "call_module"
        ],
        "retrieved_repos": [
          "pytorch",
          "pytorch",
          "pytorch",
          "transformers",
          "scipy",
          "pytorch",
          "pytorch",
          "scikit-learn",
          "transformers",
          "pytorch"
        ]
      },
      {
        "query_id": "T4-004",
        "tier": 4,
        "repos": [
          "numpy",
          "scipy"
        ],
        "query": "If numpy removed the np.matrix class entirely, which scipy functions would need to be updated?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "matrix",
          "ndarray",
          "scipy"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "estimate_rank",
          "_validate_array_cls",
          "svd",
          "from_precision",
          "estimate_spectral_norm_diff",
          "_check_scalar",
          "interp_decomp",
          "inv",
          "cholesky",
          "estimate_spectral_norm"
        ],
        "retrieved_repos": [
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "numpy",
          "numpy",
          "scipy"
        ]
      },
      {
        "query_id": "T4-005",
        "tier": 4,
        "repos": [
          "pandas",
          "numpy"
        ],
        "query": "What would happen to pandas DataFrame operations if numpy changed integer overflow behavior to raise exceptions instead of wrapping?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "int",
          "dtype",
          "numpy"
        ],
        "kw_missed": [
          "overflow"
        ],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "create_dataframe_from_blocks",
          "round",
          "_liac_arff_parser",
          "aggregate",
          "read_table",
          "maybe_prepare_scalar_for_op",
          "astype_array_safe",
          "_safe_indexing",
          "_validate_array_cls",
          "aggregate"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "scikit-learn",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "scikit-learn",
          "scipy",
          "pandas"
        ]
      },
      {
        "query_id": "T4-006",
        "tier": 4,
        "repos": [
          "transformers",
          "pytorch",
          "numpy"
        ],
        "query": "If HuggingFace removed the return_dict parameter from model forward() calls, what downstream code would break?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "return_dict",
          "forward",
          "output"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "forward"
        ],
        "mr_misses": [
          "return_dict"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "forward",
          "forward",
          "forward",
          "freeze_embeddings_and_language_adapters",
          "forward",
          "forward",
          "get_fixed_layout_without_freezing",
          "jit_code_filter",
          "forward",
          "forward"
        ],
        "retrieved_repos": [
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "pytorch",
          "pytorch",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T4-007",
        "tier": 4,
        "repos": [
          "scikit-learn",
          "numpy"
        ],
        "query": "If numpy's legacy np.random functions were removed, which scikit-learn estimators would break?",
        "kw_score": 0.5,
        "kw_passed": true,
        "kw_found": [
          "random",
          "numpy"
        ],
        "kw_missed": [
          "random_state",
          "seed"
        ],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "parametrize_with_checks",
          "check_f_contiguous_array_estimator",
          "_assert_all_finite_element_wise",
          "set_config",
          "_single_array_device",
          "check_estimator",
          "config_context",
          "decision_function",
          "_get_expected_failed_checks",
          "check_estimator_tags_renamed"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      }
    ]
  },
  "dense": {
    "summary": {
      "method": "dense",
      "total": 33,
      "verifiable": 33,
      "passed": 33,
      "failed": 0,
      "unverifiable": 0,
      "pass_rate": 1.0,
      "avg_kw_score": 0.9116161616161615,
      "avg_mr_score": 0.40384615384615385
    },
    "per_query": [
      {
        "query_id": "T1-001",
        "tier": 1,
        "repos": [
          "numpy"
        ],
        "query": "How does numpy implement the clip function and what are its boundary conditions?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "clip",
          "min",
          "max",
          "out"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "clip"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "clip",
          "clip",
          "_clip_with_one_bound",
          "_check_clip_x",
          "_sf",
          "_call",
          "clip",
          "validate_clip_with_axis",
          "_clip_with_scalar",
          "trimmed_var"
        ],
        "retrieved_repos": [
          "pandas",
          "numpy",
          "pandas",
          "scipy",
          "scipy",
          "pytorch",
          "scikit-learn",
          "pandas",
          "pandas",
          "scipy"
        ]
      },
      {
        "query_id": "T1-002",
        "tier": 1,
        "repos": [
          "numpy"
        ],
        "query": "What is the purpose of numpy's _wrapreduction function and when is it called?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "reduction",
          "axis",
          "ufunc",
          "dtype"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "_wrapreduction"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "wrap_function",
          "__array_wrap__",
          "_wrapfunc",
          "wrap",
          "wrapped",
          "_wrap_min_count_reduction_result",
          "_wrap_function",
          "numpy_dtype",
          "_generate_wrapped_number",
          "prod"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "numpy",
          "scipy",
          "pytorch",
          "pandas",
          "scipy",
          "pandas",
          "pytorch",
          "numpy"
        ]
      },
      {
        "query_id": "T1-003",
        "tier": 1,
        "repos": [
          "numpy"
        ],
        "query": "How does numpy handle broadcasting when array shapes are incompatible?",
        "kw_score": 0.6666666666666666,
        "kw_passed": true,
        "kw_found": [
          "broadcast",
          "shape"
        ],
        "kw_missed": [
          "dimension"
        ],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "broadcast_to",
          "broadcast_shapes",
          "_broadcast_shape",
          "_broadcast_arrays",
          "broadcast_arrays",
          "broadcast_shapes",
          "_broadcast_array_shapes_remove_axis",
          "_broadcast",
          "_broadcast_shapes_remove_axis",
          "_broadcast_to"
        ],
        "retrieved_repos": [
          "numpy",
          "scikit-learn",
          "numpy",
          "scipy",
          "numpy",
          "numpy",
          "scipy",
          "scipy",
          "scipy",
          "numpy"
        ]
      },
      {
        "query_id": "T1-004",
        "tier": 1,
        "repos": [
          "scipy"
        ],
        "query": "How does scipy.optimize.minimize handle convergence criteria internally?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "tol",
          "maxiter",
          "convergence",
          "optimize"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "minimize"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_check_optimize_result",
          "__init__",
          "fallback_lbfgs_solve",
          "_constrained_optimization",
          "_solve_W",
          "_constrained_optimization",
          "__init__",
          "_minimize_powell",
          "__init__",
          "solve"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scipy",
          "scipy",
          "scipy",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T1-005",
        "tier": 1,
        "repos": [
          "scipy"
        ],
        "query": "What is the implementation of scipy's fft and how does it differ from numpy's fft?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "fft",
          "workers",
          "plan"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "fft"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "fft",
          "ifft",
          "_fft_helper",
          "fft_mode",
          "hfft2",
          "ihfft2",
          "irfft2",
          "_fftconv_faster",
          "rfft2",
          "hfft"
        ],
        "retrieved_repos": [
          "numpy",
          "numpy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "numpy"
        ]
      },
      {
        "query_id": "T1-006",
        "tier": 1,
        "repos": [
          "pandas"
        ],
        "query": "How does pandas implement groupby aggregation internally?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "groupby",
          "aggregate",
          "apply"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "aggregate"
        ],
        "mr_misses": [
          "agg"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "groupby",
          "groupby",
          "_wrap_aggregated_output",
          "get_groupby",
          "aggregate",
          "__init__",
          "cumsum",
          "__iter__",
          "aggregate",
          "expanding"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T1-007",
        "tier": 1,
        "repos": [
          "pandas"
        ],
        "query": "What happens to NaN values in the output of pandas merge() \u2014 are they propagated, dropped, or filled by default?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "merge",
          "NaN",
          "how"
        ],
        "kw_missed": [
          "join"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "merge"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "combine",
          "na_value",
          "_replace_nans",
          "fillna",
          "nancumsum",
          "ffill",
          "nanmedian",
          "fillna",
          "bfill",
          "nansum"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "numpy",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "numpy"
        ]
      },
      {
        "query_id": "T1-008",
        "tier": 1,
        "repos": [
          "scikit-learn"
        ],
        "query": "How does scikit-learn's StandardScaler compute mean and variance during fit()?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "mean",
          "var",
          "scale",
          "fit"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "fit"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "fit",
          "partial_fit",
          "_is_constant_feature",
          "fit",
          "fit",
          "fit",
          "latent_mean_and_variance",
          "fit",
          "latent_mean_and_variance",
          "fit"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scipy"
        ]
      },
      {
        "query_id": "T1-009",
        "tier": 1,
        "repos": [
          "scikit-learn"
        ],
        "query": "What is the implementation of train_test_split in scikit-learn?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "train_test_split",
          "shuffle",
          "stratify"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "train_test_split"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "split",
          "_split",
          "__init__",
          "split",
          "split",
          "split",
          "_split",
          "split",
          "train_test_split",
          "_check_input_parameters"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T1-010",
        "tier": 1,
        "repos": [
          "pytorch"
        ],
        "query": "How does PyTorch implement the Adam optimizer update step?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "adam",
          "lr",
          "beta",
          "grad"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "Adam",
          "step"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "sparse_adam",
          "__init__",
          "adam",
          "set_optimizer",
          "__init__",
          "adamw",
          "adamax",
          "_get_adamw_torch",
          "_get_adamw_anyprecision",
          "_get_stable_adamw"
        ],
        "retrieved_repos": [
          "pytorch",
          "scikit-learn",
          "pytorch",
          "transformers",
          "scikit-learn",
          "pytorch",
          "pytorch",
          "transformers",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T1-011",
        "tier": 1,
        "repos": [
          "pytorch"
        ],
        "query": "How does torch.autograd track gradients through operations?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "grad",
          "backward",
          "autograd",
          "requires_grad"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "backward"
        ],
        "mr_misses": [
          "grad_fn"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_extract_parameters_and_gradients",
          "backward",
          "backward",
          "register_autograd",
          "backward",
          "register_autograd",
          "_root_post_backward_final_callback",
          "vjp_fn",
          "vjp_fn",
          "_track_module_params_and_buffers"
        ],
        "retrieved_repos": [
          "pytorch",
          "transformers",
          "transformers",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch"
        ]
      },
      {
        "query_id": "T1-012",
        "tier": 1,
        "repos": [
          "transformers"
        ],
        "query": "How does the HuggingFace tokenizer handle out-of-vocabulary tokens?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "tokenize",
          "unk",
          "vocab",
          "token"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "tokenize"
        ],
        "mr_misses": [
          "encode"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "add_tokens",
          "_add_tokens",
          "_add_tokens",
          "tokenize",
          "tokenize",
          "tokenize",
          "tokenize",
          "tokenize",
          "_tokenize",
          "_wrap_decode_method_backend_tokenizer"
        ],
        "retrieved_repos": [
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T1-013",
        "tier": 1,
        "repos": [
          "transformers"
        ],
        "query": "What happens inside the forward pass of BertModel?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "attention",
          "hidden",
          "encoder",
          "forward"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "forward"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "forward",
          "forward",
          "forward",
          "forward",
          "forward",
          "forward",
          "forward",
          "forward",
          "forward",
          "forward"
        ],
        "retrieved_repos": [
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "transformers",
          "transformers",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T2-001",
        "tier": 2,
        "repos": [
          "numpy",
          "pandas"
        ],
        "query": "How does pandas use numpy arrays internally to store DataFrame column data?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "ndarray",
          "dtype",
          "numpy",
          "block"
        ],
        "kw_missed": [],
        "mr_score": 0.3333333333333333,
        "mr_hits": [
          "array"
        ],
        "mr_misses": [
          "NDFrame",
          "Block"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "array",
          "array",
          "array",
          "_from_arrays",
          "array",
          "column_data_lengths",
          "column_data_offsets",
          "primitive_column_to_ndarray",
          "to_arrays",
          "create_dataframe"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T2-002",
        "tier": 2,
        "repos": [
          "numpy",
          "scikit-learn"
        ],
        "query": "How does scikit-learn validate that input arrays are numpy-compatible before fitting?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "check_array",
          "dtype",
          "ndarray",
          "validate"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "check_array",
          "validate_data"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "fit",
          "_asarray_validated",
          "fit",
          "_validate_vector",
          "assert_allclose",
          "check_array_indexer",
          "validate",
          "check_X_y",
          "fit",
          "matdims"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "pandas",
          "scipy",
          "scikit-learn",
          "scikit-learn",
          "scipy"
        ]
      },
      {
        "query_id": "T2-003",
        "tier": 2,
        "repos": [
          "numpy",
          "scipy"
        ],
        "query": "When scipy computes a matrix inverse, how does it use numpy's linear algebra routines?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "inv",
          "solve",
          "linalg"
        ],
        "kw_missed": [
          "lapack"
        ],
        "mr_score": 0.5,
        "mr_hits": [
          "inv"
        ],
        "mr_misses": [
          "solve"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "inv",
          "I",
          "inv",
          "dot",
          "affine_transform",
          "tensorinv",
          "matdims",
          "invpascal",
          "transpose",
          "_fractional_matrix_power"
        ],
        "retrieved_repos": [
          "numpy",
          "numpy",
          "scipy",
          "pandas",
          "scipy",
          "numpy",
          "scipy",
          "scipy",
          "scipy",
          "scipy"
        ]
      },
      {
        "query_id": "T2-004",
        "tier": 2,
        "repos": [
          "pytorch",
          "numpy"
        ],
        "query": "How does PyTorch's tensor.numpy() method convert a tensor to a numpy array and what are the constraints?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "numpy",
          "cpu",
          "detach"
        ],
        "kw_missed": [
          "contiguous"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "numpy"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_to_tensor",
          "to_numpy_helper",
          "numpy_to_tensor",
          "as_tensor",
          "_to_numpy_array",
          "_to_numpy",
          "_get_is_as_tensor_fns",
          "to_numpy",
          "as_tensor",
          "to_py_obj"
        ],
        "retrieved_repos": [
          "transformers",
          "pytorch",
          "pytorch",
          "transformers",
          "pytorch",
          "pytorch",
          "transformers",
          "transformers",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T2-005",
        "tier": 2,
        "repos": [
          "transformers",
          "pytorch"
        ],
        "query": "How does HuggingFace's Trainer class use PyTorch DataLoader for batching?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "dataloader",
          "batch",
          "sampler"
        ],
        "kw_missed": [
          "collate"
        ],
        "mr_score": 0.3333333333333333,
        "mr_hits": [
          "get_train_dataloader"
        ],
        "mr_misses": [
          "Trainer",
          "DataLoader"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "__init__",
          "__init__",
          "set_dataloader",
          "__init__",
          "get_train_dataloader",
          "to",
          "__next__",
          "getstate",
          "__init__",
          "get_steps_per_epoch"
        ],
        "retrieved_repos": [
          "transformers",
          "pytorch",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "pytorch",
          "pytorch",
          "pytorch",
          "transformers"
        ]
      },
      {
        "query_id": "T2-006",
        "tier": 2,
        "repos": [
          "pandas",
          "numpy"
        ],
        "query": "How does pandas rolling() use numpy operations under the hood?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "rolling",
          "window",
          "apply",
          "numpy"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "Rolling",
          "apply"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "roll",
          "rollaxis",
          "nunique",
          "roll_apply",
          "mean",
          "count",
          "first",
          "sum",
          "var",
          "std"
        ],
        "retrieved_repos": [
          "numpy",
          "numpy",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T2-007",
        "tier": 2,
        "repos": [
          "scikit-learn",
          "numpy"
        ],
        "query": "How does scikit-learn's PCA implementation use numpy's SVD?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "svd",
          "components",
          "singular"
        ],
        "kw_missed": [
          "explained_variance"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "PCA",
          "fit",
          "_fit_full"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "svdvals",
          "_solve_svd",
          "_svds_lobpcg_doc",
          "_get_first_singular_vectors_svd",
          "pca_lowrank",
          "_multi_svd_norm",
          "_svd",
          "svdvals",
          "_get_first_singular_vectors_power_method",
          "diagsvd"
        ],
        "retrieved_repos": [
          "numpy",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "pytorch",
          "numpy",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "scipy"
        ]
      },
      {
        "query_id": "T2-008",
        "tier": 2,
        "repos": [
          "transformers",
          "numpy"
        ],
        "query": "Where does the transformers library convert between PyTorch tensors and numpy arrays for metric computation?",
        "kw_score": 0.5,
        "kw_passed": true,
        "kw_found": [
          "numpy",
          "cpu"
        ],
        "kw_missed": [
          "predictions",
          "metrics"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "compute_metrics"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "to_numpy_helper",
          "to_tvm_tensor",
          "pre_flatten_transform",
          "_to_tensor",
          "_rebuild_device_tensor_from_numpy",
          "_to_numpy_array",
          "pow_by_natural",
          "_to_numpy",
          "_encode_tensor",
          "to_torch_tensor"
        ],
        "retrieved_repos": [
          "pytorch",
          "pytorch",
          "pytorch",
          "transformers",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch"
        ]
      },
      {
        "query_id": "T3-001",
        "tier": 3,
        "repos": [
          "transformers",
          "pytorch",
          "numpy"
        ],
        "query": "What sampling strategies does HuggingFace model.generate() use and how does it call into PyTorch operations?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "generate",
          "sample",
          "logits",
          "beam"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "generate"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "sample",
          "sample",
          "_compute_rng_offsets",
          "_initialize_parameters",
          "init_population_random",
          "sample_n",
          "generate",
          "_sample",
          "_sample",
          "_sample_visibles"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "pytorch",
          "pytorch",
          "scikit-learn",
          "scipy",
          "pytorch",
          "transformers",
          "transformers",
          "transformers",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T3-002",
        "tier": 3,
        "repos": [
          "pandas",
          "numpy",
          "scipy"
        ],
        "query": "How does pandas.DataFrame.corr() ultimately compute correlation \u2014 trace through to the underlying math?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "corr",
          "pearson",
          "cov",
          "std"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "corr"
        ],
        "mr_misses": [
          "nancorr"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "corr",
          "corrwith",
          "corr",
          "corrwith",
          "corr",
          "corr",
          "corr",
          "corrcoef",
          "_corr",
          "corr"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "numpy",
          "scipy",
          "pandas"
        ]
      },
      {
        "query_id": "T3-003",
        "tier": 3,
        "repos": [
          "scikit-learn",
          "scipy",
          "numpy"
        ],
        "query": "How does scikit-learn's SVM implementation use scipy's sparse matrices and what numpy operations are at the core?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "svm",
          "sparse",
          "kernel",
          "support"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "SVC",
          "fit"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_svds_lobpcg_doc",
          "svds",
          "is_valid_sparse_matrix",
          "_sparse_fit",
          "svdvals",
          "_svds_propack_doc",
          "_sparse_encode_precomputed",
          "eye",
          "_svds_arpack_doc",
          "svdvals"
        ],
        "retrieved_repos": [
          "scipy",
          "scipy",
          "scikit-learn",
          "scikit-learn",
          "numpy",
          "scipy",
          "scikit-learn",
          "scipy",
          "scipy",
          "scipy"
        ]
      },
      {
        "query_id": "T3-004",
        "tier": 3,
        "repos": [
          "transformers",
          "pytorch"
        ],
        "query": "How does HuggingFace's attention mechanism implement scaled dot-product attention at the PyTorch level?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "attention",
          "query",
          "key",
          "value",
          "softmax"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "attention"
        ],
        "mr_misses": [
          "forward"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_unmask_unattended",
          "_scaled_dot_product_attention_quantized",
          "flex_attention",
          "aten_scaled_dot_product_attention_23",
          "sdpa_kernel",
          "_in_projection_packed",
          "can_use_efficient_attention",
          "attention",
          "_attention_scale",
          "_scaled_dot_product_efficient_attention_backward_cp_strategy"
        ],
        "retrieved_repos": [
          "transformers",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch"
        ]
      },
      {
        "query_id": "T3-005",
        "tier": 3,
        "repos": [
          "numpy",
          "scipy",
          "scikit-learn"
        ],
        "query": "Trace how scikit-learn's KMeans uses numpy and scipy for distance computation.",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "kmeans",
          "euclidean",
          "centroid",
          "distance"
        ],
        "kw_missed": [],
        "mr_score": 0.3333333333333333,
        "mr_hits": [
          "euclidean_distances"
        ],
        "mr_misses": [
          "KMeans",
          "_lloyd_iter"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_kmeans",
          "kneighbors_graph",
          "kmeans2",
          "paired_distances",
          "_compute_core_distances_",
          "_kmeans_plusplus",
          "kmeans",
          "euclidean_distances",
          "_kmeans_single_elkan",
          "radius_neighbors_graph"
        ],
        "retrieved_repos": [
          "scipy",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T4-001",
        "tier": 4,
        "repos": [
          "numpy",
          "transformers",
          "pytorch",
          "pandas",
          "scikit-learn"
        ],
        "query": "What functions in transformers would break if numpy changed the default dtype of np.float_ from float64 to float32?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "float64",
          "float32",
          "dtype",
          "numpy"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "check_int_infer_dtype",
          "default_dtypes",
          "default_dtypes",
          "set_default_dtype",
          "_standardize_dtype",
          "float_numpy_dtype",
          "_get_dtype",
          "_get_dtype",
          "convert_dtypes",
          "convert_dtypes"
        ],
        "retrieved_repos": [
          "pandas",
          "numpy",
          "scikit-learn",
          "pytorch",
          "pandas",
          "pandas",
          "scipy",
          "scipy",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T4-002",
        "tier": 4,
        "repos": [
          "numpy",
          "pandas",
          "scikit-learn"
        ],
        "query": "If numpy deprecated np.bool (alias for Python bool), which pandas and scikit-learn functions would be affected?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "bool",
          "dtype",
          "numpy"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "_bool_arith_check",
          "_is_boolean",
          "is_bool",
          "is_scalar_nan",
          "_with_pandas",
          "is_bool_dtype",
          "np_find_common_type",
          "_isnan",
          "__init__",
          "_has_bool_dtype"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "scikit-learn",
          "pytorch",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T4-003",
        "tier": 4,
        "repos": [
          "pytorch",
          "transformers"
        ],
        "query": "If PyTorch changed the default behavior of torch.no_grad() to not propagate to nested functions, what would break in HuggingFace transformers?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "no_grad",
          "grad",
          "context"
        ],
        "kw_missed": [
          "inference"
        ],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "_no_grad_wrapper",
          "wrapper",
          "_apply",
          "__repr__",
          "generate_single_level_function",
          "wrapped",
          "_no_grad",
          "forward",
          "wrapper",
          "_set_tensor_requires_grad"
        ],
        "retrieved_repos": [
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch"
        ]
      },
      {
        "query_id": "T4-004",
        "tier": 4,
        "repos": [
          "numpy",
          "scipy"
        ],
        "query": "If numpy removed the np.matrix class entirely, which scipy functions would need to be updated?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "matrix",
          "ndarray",
          "scipy"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "__new__",
          "__array__",
          "empty",
          "_asarray_validated",
          "to_numpy",
          "is_numpy",
          "matvec",
          "__array_finalize__",
          "array_namespace",
          "__init_subclass__"
        ],
        "retrieved_repos": [
          "numpy",
          "pytorch",
          "numpy",
          "scipy",
          "transformers",
          "pytorch",
          "scipy",
          "numpy",
          "scipy",
          "numpy"
        ]
      },
      {
        "query_id": "T4-005",
        "tier": 4,
        "repos": [
          "pandas",
          "numpy"
        ],
        "query": "What would happen to pandas DataFrame operations if numpy changed integer overflow behavior to raise exceptions instead of wrapping?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "int",
          "overflow",
          "dtype",
          "numpy"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "_wrapfunc",
          "diff",
          "_convert_to_ndarray",
          "_maybe_convert",
          "_cast_to_stata_types",
          "nansum",
          "_bool_arith_check",
          "_get_fill_value",
          "_is_int_type",
          "astype_float_to_int_nansafe"
        ],
        "retrieved_repos": [
          "numpy",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "numpy",
          "pandas",
          "pandas",
          "scipy",
          "pandas"
        ]
      },
      {
        "query_id": "T4-006",
        "tier": 4,
        "repos": [
          "transformers",
          "pytorch",
          "numpy"
        ],
        "query": "If HuggingFace removed the return_dict parameter from model forward() calls, what downstream code would break?",
        "kw_score": 0.6666666666666666,
        "kw_passed": true,
        "kw_found": [
          "forward",
          "output"
        ],
        "kw_missed": [
          "return_dict"
        ],
        "mr_score": 0.5,
        "mr_hits": [
          "forward"
        ],
        "mr_misses": [
          "return_dict"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_forward",
          "_sticky_export",
          "forward",
          "_forward",
          "_forward",
          "_forward",
          "_forward",
          "_forward",
          "_use_post_forward_mesh",
          "forward"
        ],
        "retrieved_repos": [
          "transformers",
          "pytorch",
          "pytorch",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "pytorch",
          "pytorch"
        ]
      },
      {
        "query_id": "T4-007",
        "tier": 4,
        "repos": [
          "scikit-learn",
          "numpy"
        ],
        "query": "If numpy's legacy np.random functions were removed, which scikit-learn estimators would break?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "random",
          "random_state",
          "numpy"
        ],
        "kw_missed": [
          "seed"
        ],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "_average",
          "check_estimator",
          "_single_array_device",
          "get_tags",
          "_reductions",
          "__array_ufunc__",
          "check_f_contiguous_array_estimator",
          "check_estimators_pickle",
          "prod",
          "var"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "pandas",
          "pandas",
          "scikit-learn",
          "scikit-learn",
          "numpy",
          "numpy"
        ]
      }
    ]
  },
  "hybrid": {
    "summary": {
      "method": "hybrid",
      "total": 33,
      "verifiable": 33,
      "passed": 32,
      "failed": 1,
      "unverifiable": 0,
      "pass_rate": 0.9696969696969697,
      "avg_kw_score": 0.8863636363636364,
      "avg_mr_score": 0.41025641025641024
    },
    "per_query": [
      {
        "query_id": "T1-001",
        "tier": 1,
        "repos": [
          "numpy"
        ],
        "query": "How does numpy implement the clip function and what are its boundary conditions?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "clip",
          "min",
          "max",
          "out"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "clip"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "clip",
          "clip",
          "clip",
          "resize",
          "ravel_multi_index",
          "argrelextrema",
          "_clip_with_one_bound",
          "take",
          "_check_clip_x",
          "argrelmax"
        ],
        "retrieved_repos": [
          "pandas",
          "scikit-learn",
          "numpy",
          "numpy",
          "numpy",
          "scipy",
          "pandas",
          "numpy",
          "scipy",
          "scipy"
        ]
      },
      {
        "query_id": "T1-002",
        "tier": 1,
        "repos": [
          "numpy"
        ],
        "query": "What is the purpose of numpy's _wrapreduction function and when is it called?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "reduction",
          "axis",
          "ufunc",
          "dtype"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "_wrapreduction"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "clean_column_name",
          "wrap_function",
          "_xp_copy_to_numpy",
          "__array_wrap__",
          "amin",
          "_wrapfunc",
          "amax",
          "wrap",
          "__RandomState_ctor",
          "wrapped"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "scipy",
          "pandas",
          "numpy",
          "numpy",
          "numpy",
          "scipy",
          "numpy",
          "pytorch"
        ]
      },
      {
        "query_id": "T1-003",
        "tier": 1,
        "repos": [
          "numpy"
        ],
        "query": "How does numpy handle broadcasting when array shapes are incompatible?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "broadcast",
          "shape",
          "dimension"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "_broadcast_shape",
          "broadcast_shapes",
          "broadcast_shapes",
          "_broadcast_shapes",
          "tile",
          "_argcheck_rvs",
          "broadcast_to",
          "broadcast_shapes",
          "_broadcast_arrays",
          "xp_promote"
        ],
        "retrieved_repos": [
          "numpy",
          "numpy",
          "scipy",
          "scipy",
          "numpy",
          "scipy",
          "numpy",
          "scikit-learn",
          "scipy",
          "scipy"
        ]
      },
      {
        "query_id": "T1-004",
        "tier": 1,
        "repos": [
          "scipy"
        ],
        "query": "How does scipy.optimize.minimize handle convergence criteria internally?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "tol",
          "maxiter",
          "convergence",
          "optimize"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "minimize"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_check_optimize_result",
          "fallback_lbfgs_solve",
          "_constrained_optimization",
          "_constrained_optimization",
          "_solve_W",
          "_solve_lbfgs",
          "show_options",
          "__init__",
          "fmin",
          "brent"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scipy",
          "scipy",
          "scipy",
          "scipy"
        ]
      },
      {
        "query_id": "T1-005",
        "tier": 1,
        "repos": [
          "scipy"
        ],
        "query": "What is the implementation of scipy's fft and how does it differ from numpy's fft?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "fft",
          "workers",
          "plan"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "fft"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "hfft2",
          "rfft2",
          "_fft_helper",
          "rfftfreq",
          "set_global_backend",
          "fft",
          "register_backend",
          "ifft",
          "_backend_from_arg",
          "skip_backend"
        ],
        "retrieved_repos": [
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "scipy",
          "numpy",
          "scipy",
          "numpy",
          "scipy",
          "scipy"
        ]
      },
      {
        "query_id": "T1-006",
        "tier": 1,
        "repos": [
          "pandas"
        ],
        "query": "How does pandas implement groupby aggregation internally?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "groupby",
          "aggregate",
          "apply"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "aggregate"
        ],
        "mr_misses": [
          "agg"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "groupby",
          "groupby",
          "aggregate",
          "_groupby_op",
          "cumsum",
          "_groupby_op",
          "_groupby_op",
          "_groupby_and_aggregate",
          "_wrap_aggregated_output",
          "get_groupby"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T1-007",
        "tier": 1,
        "repos": [
          "pandas"
        ],
        "query": "What happens to NaN values in the output of pandas merge() \u2014 are they propagated, dropped, or filled by default?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "merge",
          "NaN",
          "join",
          "how"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "merge"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "fillna",
          "bfill",
          "merge",
          "ffill",
          "combine",
          "merge_ordered",
          "na_value",
          "stack",
          "_replace_nans",
          "fillna"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T1-008",
        "tier": 1,
        "repos": [
          "scikit-learn"
        ],
        "query": "How does scikit-learn's StandardScaler compute mean and variance during fit()?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "mean",
          "var",
          "scale",
          "fit"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "fit"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "fit",
          "_is_constant_feature",
          "partial_fit",
          "fit",
          "fit",
          "fit",
          "fit",
          "make_pipeline",
          "check_f_contiguous_array_estimator",
          "predict"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T1-009",
        "tier": 1,
        "repos": [
          "scikit-learn"
        ],
        "query": "What is the implementation of train_test_split in scikit-learn?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "train_test_split",
          "shuffle",
          "stratify"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "train_test_split"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "train_test_split",
          "is_usable_for",
          "split",
          "evaluate",
          "_split",
          "from_estimator",
          "__init__",
          "from_predictions",
          "split",
          "_clone_parametrized"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "pandas",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T1-010",
        "tier": 1,
        "repos": [
          "pytorch"
        ],
        "query": "How does PyTorch implement the Adam optimizer update step?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "adam",
          "lr",
          "beta",
          "grad"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "step"
        ],
        "mr_misses": [
          "Adam"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "set_optimizer",
          "_get_adamw_torch",
          "_get_adamw_anyprecision",
          "_get_adamw_apex_fused",
          "sparse_adam",
          "_register_fused_optim",
          "__init__",
          "adam",
          "step",
          "__init__"
        ],
        "retrieved_repos": [
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "pytorch",
          "pytorch",
          "scikit-learn",
          "pytorch",
          "pytorch",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T1-011",
        "tier": 1,
        "repos": [
          "pytorch"
        ],
        "query": "How does torch.autograd track gradients through operations?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "grad",
          "backward",
          "autograd",
          "requires_grad"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "backward"
        ],
        "mr_misses": [
          "grad_fn"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "register_autograd",
          "register_autograd",
          "backward",
          "backward",
          "_track_module_params_and_buffers",
          "stage_backward",
          "_wrap_tensor_autograd_backward",
          "handle_autograd_grad",
          "_extract_parameters_and_gradients",
          "_aot_stage2a_partition"
        ],
        "retrieved_repos": [
          "pytorch",
          "pytorch",
          "transformers",
          "transformers",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch"
        ]
      },
      {
        "query_id": "T1-012",
        "tier": 1,
        "repos": [
          "transformers"
        ],
        "query": "How does the HuggingFace tokenizer handle out-of-vocabulary tokens?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "tokenize",
          "unk",
          "vocab",
          "token"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "tokenize"
        ],
        "mr_misses": [
          "encode"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "add_tokens",
          "_add_tokens",
          "_add_tokens",
          "get_vocab",
          "get_missing_alphabet_tokens",
          "from_pretrained",
          "from_pretrained",
          "tokenize",
          "tokenize",
          "save_vocabulary"
        ],
        "retrieved_repos": [
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T1-013",
        "tier": 1,
        "repos": [
          "transformers"
        ],
        "query": "What happens inside the forward pass of BertModel?",
        "kw_score": 0.25,
        "kw_passed": false,
        "kw_found": [
          "forward"
        ],
        "kw_missed": [
          "attention",
          "hidden",
          "encoder"
        ],
        "mr_score": 1.0,
        "mr_hits": [
          "forward"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "setup_context",
          "forward",
          "clean_column_name",
          "forward",
          "keys",
          "forward",
          "no_sync",
          "forward",
          "get_cb_kwargs",
          "forward"
        ],
        "retrieved_repos": [
          "pytorch",
          "pytorch",
          "pandas",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "transformers",
          "pytorch"
        ]
      },
      {
        "query_id": "T2-001",
        "tier": 2,
        "repos": [
          "numpy",
          "pandas"
        ],
        "query": "How does pandas use numpy arrays internally to store DataFrame column data?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "ndarray",
          "dtype",
          "numpy",
          "block"
        ],
        "kw_missed": [],
        "mr_score": 0.3333333333333333,
        "mr_hits": [
          "array"
        ],
        "mr_misses": [
          "NDFrame",
          "Block"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_from_arrays",
          "create_dataframe_from_blocks",
          "array",
          "from_spmatrix",
          "array",
          "explode",
          "array",
          "_liac_arff_parser",
          "array",
          "_pandas_arff_parser"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "scikit-learn",
          "pandas",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T2-002",
        "tier": 2,
        "repos": [
          "numpy",
          "scikit-learn"
        ],
        "query": "How does scikit-learn validate that input arrays are numpy-compatible before fitting?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "dtype",
          "ndarray",
          "validate"
        ],
        "kw_missed": [
          "check_array"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "check_array",
          "validate_data"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "fit",
          "fit",
          "fit",
          "fit",
          "broadcast_shapes",
          "_get_adapter_from_container",
          "_asarray_validated",
          "_single_array_device",
          "_from_arrays",
          "_validate_vector"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "numpy",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "pandas",
          "scipy"
        ]
      },
      {
        "query_id": "T2-003",
        "tier": 2,
        "repos": [
          "numpy",
          "scipy"
        ],
        "query": "When scipy computes a matrix inverse, how does it use numpy's linear algebra routines?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "inv",
          "solve",
          "lapack",
          "linalg"
        ],
        "kw_missed": [],
        "mr_score": 1.0,
        "mr_hits": [
          "inv",
          "solve"
        ],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "inv",
          "inv",
          "pinv",
          "inv",
          "solve",
          "I",
          "_logpdf",
          "dot",
          "_mode",
          "affine_transform"
        ],
        "retrieved_repos": [
          "scipy",
          "numpy",
          "numpy",
          "scipy",
          "numpy",
          "numpy",
          "scipy",
          "pandas",
          "scipy",
          "scipy"
        ]
      },
      {
        "query_id": "T2-004",
        "tier": 2,
        "repos": [
          "pytorch",
          "numpy"
        ],
        "query": "How does PyTorch's tensor.numpy() method convert a tensor to a numpy array and what are the constraints?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "numpy",
          "cpu",
          "detach"
        ],
        "kw_missed": [
          "contiguous"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "numpy"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_to_tensor",
          "to_numpy",
          "as_tensor",
          "_get_is_as_tensor_fns",
          "to_py_obj",
          "_from_numpy_array",
          "make_np",
          "to_numpy_helper",
          "process_images",
          "numpy_to_tensor"
        ],
        "retrieved_repos": [
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "pytorch",
          "pytorch",
          "pytorch",
          "transformers",
          "pytorch"
        ]
      },
      {
        "query_id": "T2-005",
        "tier": 2,
        "repos": [
          "transformers",
          "pytorch"
        ],
        "query": "How does HuggingFace's Trainer class use PyTorch DataLoader for batching?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "dataloader",
          "batch",
          "sampler"
        ],
        "kw_missed": [
          "collate"
        ],
        "mr_score": 0.3333333333333333,
        "mr_hits": [
          "get_train_dataloader"
        ],
        "mr_misses": [
          "Trainer",
          "DataLoader"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "get_train_dataloader",
          "__init__",
          "get_steps_per_epoch",
          "__init__",
          "create_rng",
          "set_dataloader",
          "thread_safe_generator",
          "__init__",
          "num_examples",
          "tpu_spmd_dataloader"
        ],
        "retrieved_repos": [
          "transformers",
          "pytorch",
          "transformers",
          "transformers",
          "transformers",
          "transformers",
          "pytorch",
          "transformers",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T2-006",
        "tier": 2,
        "repos": [
          "pandas",
          "numpy"
        ],
        "query": "How does pandas rolling() use numpy operations under the hood?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "rolling",
          "window",
          "apply",
          "numpy"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "Rolling",
          "apply"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "mean",
          "sum",
          "std",
          "var",
          "expanding",
          "_groupby_op",
          "roll",
          "aggregate",
          "rollaxis",
          "nunique"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "numpy",
          "pandas",
          "numpy",
          "pandas"
        ]
      },
      {
        "query_id": "T2-007",
        "tier": 2,
        "repos": [
          "scikit-learn",
          "numpy"
        ],
        "query": "How does scikit-learn's PCA implementation use numpy's SVD?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "svd",
          "components",
          "singular"
        ],
        "kw_missed": [
          "explained_variance"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "PCA",
          "fit",
          "_fit_full"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_single_array_device",
          "svdvals",
          "__setstate__",
          "_solve_svd",
          "_assert_all_finite_element_wise",
          "_svds_lobpcg_doc",
          "_asarray_with_order",
          "_get_first_singular_vectors_svd",
          "is_usable_for",
          "pca_lowrank"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "numpy",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "pytorch"
        ]
      },
      {
        "query_id": "T2-008",
        "tier": 2,
        "repos": [
          "transformers",
          "numpy"
        ],
        "query": "Where does the transformers library convert between PyTorch tensors and numpy arrays for metric computation?",
        "kw_score": 0.5,
        "kw_passed": true,
        "kw_found": [
          "numpy",
          "cpu"
        ],
        "kw_missed": [
          "predictions",
          "metrics"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "compute_metrics"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_to_tensor",
          "as_tensor",
          "to_numpy_helper",
          "_get_is_as_tensor_fns",
          "to_tvm_tensor",
          "to_cpu_and_numpy",
          "pre_flatten_transform",
          "cond",
          "_rebuild_device_tensor_from_numpy",
          "fill"
        ],
        "retrieved_repos": [
          "transformers",
          "transformers",
          "pytorch",
          "transformers",
          "pytorch",
          "transformers",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch"
        ]
      },
      {
        "query_id": "T3-001",
        "tier": 3,
        "repos": [
          "transformers",
          "pytorch",
          "numpy"
        ],
        "query": "What sampling strategies does HuggingFace model.generate() use and how does it call into PyTorch operations?",
        "kw_score": 0.5,
        "kw_passed": true,
        "kw_found": [
          "generate",
          "sample"
        ],
        "kw_missed": [
          "logits",
          "beam"
        ],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "generate"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "load_custom_generate",
          "sample",
          "is_torch_array",
          "sample",
          "repro_load_args",
          "_compute_rng_offsets",
          "call_function",
          "_initialize_parameters",
          "repro_common",
          "init_population_random"
        ],
        "retrieved_repos": [
          "transformers",
          "scikit-learn",
          "scikit-learn",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "scikit-learn",
          "pytorch",
          "scipy"
        ]
      },
      {
        "query_id": "T3-002",
        "tier": 3,
        "repos": [
          "pandas",
          "numpy",
          "scipy"
        ],
        "query": "How does pandas.DataFrame.corr() ultimately compute correlation \u2014 trace through to the underlying math?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "corr",
          "pearson",
          "cov",
          "std"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "corr"
        ],
        "mr_misses": [
          "nancorr"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "corr",
          "corr",
          "corr",
          "corrwith",
          "corrwith",
          "autocorr",
          "corr",
          "corr",
          "corr",
          "corr"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T3-003",
        "tier": 3,
        "repos": [
          "scikit-learn",
          "scipy",
          "numpy"
        ],
        "query": "How does scikit-learn's SVM implementation use scipy's sparse matrices and what numpy operations are at the core?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "svm",
          "sparse",
          "kernel",
          "support"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "SVC",
          "fit"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "is_valid_sparse_matrix",
          "is_usable_for",
          "_svds_lobpcg_doc",
          "set_config",
          "svds",
          "_check_large_sparse",
          "config_context",
          "_sparse_fit",
          "_asarray_with_order",
          "svdvals"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "numpy"
        ]
      },
      {
        "query_id": "T3-004",
        "tier": 3,
        "repos": [
          "transformers",
          "pytorch"
        ],
        "query": "How does HuggingFace's attention mechanism implement scaled dot-product attention at the PyTorch level?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "attention",
          "query",
          "key",
          "value",
          "softmax"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "attention",
          "forward"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_scaled_dot_product_attention_quantized",
          "sdpa_kernel",
          "flex_attention",
          "can_produce",
          "_unmask_unattended",
          "enable_flash_sdp",
          "enable_math_sdp",
          "enable_cudnn_sdp",
          "aten_scaled_dot_product_attention_23",
          "enable_mem_efficient_sdp"
        ],
        "retrieved_repos": [
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "transformers",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch"
        ]
      },
      {
        "query_id": "T3-005",
        "tier": 3,
        "repos": [
          "numpy",
          "scipy",
          "scikit-learn"
        ],
        "query": "Trace how scikit-learn's KMeans uses numpy and scipy for distance computation.",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "kmeans",
          "euclidean",
          "centroid",
          "distance"
        ],
        "kw_missed": [],
        "mr_score": 0.0,
        "mr_hits": [],
        "mr_misses": [
          "KMeans",
          "_lloyd_iter",
          "euclidean_distances"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "_kmeans",
          "kmeans",
          "compute_optics_graph",
          "pairwise_distances_argmin",
          "pairwise_distances_argmin_min",
          "kneighbors_graph",
          "__setstate__",
          "kmeans2",
          "is_usable_for",
          "paired_distances"
        ],
        "retrieved_repos": [
          "scipy",
          "scipy",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scipy",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T4-001",
        "tier": 4,
        "repos": [
          "numpy",
          "transformers",
          "pytorch",
          "pandas",
          "scikit-learn"
        ],
        "query": "What functions in transformers would break if numpy changed the default dtype of np.float_ from float64 to float32?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "float64",
          "float32",
          "dtype",
          "numpy"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "set_default_dtype",
          "check_int_infer_dtype",
          "fit",
          "default_dtypes",
          "obj2sctype",
          "default_dtypes",
          "_transform",
          "_promote",
          "_standardize_dtype",
          "_prep_values"
        ],
        "retrieved_repos": [
          "pytorch",
          "pandas",
          "scikit-learn",
          "numpy",
          "numpy",
          "scikit-learn",
          "scikit-learn",
          "scipy",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T4-002",
        "tier": 4,
        "repos": [
          "numpy",
          "pandas",
          "scikit-learn"
        ],
        "query": "If numpy deprecated np.bool (alias for Python bool), which pandas and scikit-learn functions would be affected?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "bool",
          "dtype",
          "numpy"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "_use_interchange_protocol",
          "set_config",
          "_bool_arith_check",
          "parametrize_with_checks",
          "_is_boolean",
          "__get__",
          "is_bool",
          "config_context",
          "is_scalar_nan",
          "_single_array_device"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "pandas",
          "scikit-learn",
          "pandas",
          "scikit-learn",
          "pandas",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn"
        ]
      },
      {
        "query_id": "T4-003",
        "tier": 4,
        "repos": [
          "pytorch",
          "transformers"
        ],
        "query": "If PyTorch changed the default behavior of torch.no_grad() to not propagate to nested functions, what would break in HuggingFace transformers?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "no_grad",
          "grad",
          "context"
        ],
        "kw_missed": [
          "inference"
        ],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "print_bt",
          "_no_grad_wrapper",
          "save",
          "wrapper",
          "nested_tensor",
          "_apply",
          "enable_propagation",
          "__repr__",
          "_contains_nan",
          "generate_single_level_function"
        ],
        "retrieved_repos": [
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "pytorch",
          "transformers",
          "pytorch",
          "scipy",
          "pytorch"
        ]
      },
      {
        "query_id": "T4-004",
        "tier": 4,
        "repos": [
          "numpy",
          "scipy"
        ],
        "query": "If numpy removed the np.matrix class entirely, which scipy functions would need to be updated?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "matrix",
          "ndarray",
          "scipy"
        ],
        "kw_missed": [],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "estimate_rank",
          "__new__",
          "_validate_array_cls",
          "__array__",
          "svd",
          "empty",
          "from_precision",
          "_asarray_validated",
          "estimate_spectral_norm_diff",
          "to_numpy"
        ],
        "retrieved_repos": [
          "scipy",
          "numpy",
          "scipy",
          "pytorch",
          "scipy",
          "numpy",
          "scipy",
          "scipy",
          "scipy",
          "transformers"
        ]
      },
      {
        "query_id": "T4-005",
        "tier": 4,
        "repos": [
          "pandas",
          "numpy"
        ],
        "query": "What would happen to pandas DataFrame operations if numpy changed integer overflow behavior to raise exceptions instead of wrapping?",
        "kw_score": 0.75,
        "kw_passed": true,
        "kw_found": [
          "int",
          "dtype",
          "numpy"
        ],
        "kw_missed": [
          "overflow"
        ],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "_bool_arith_check",
          "create_dataframe_from_blocks",
          "_wrapfunc",
          "round",
          "diff",
          "_liac_arff_parser",
          "_convert_to_ndarray",
          "aggregate",
          "_maybe_convert",
          "read_table"
        ],
        "retrieved_repos": [
          "pandas",
          "pandas",
          "numpy",
          "pandas",
          "pandas",
          "scikit-learn",
          "pandas",
          "pandas",
          "pandas",
          "pandas"
        ]
      },
      {
        "query_id": "T4-006",
        "tier": 4,
        "repos": [
          "transformers",
          "pytorch",
          "numpy"
        ],
        "query": "If HuggingFace removed the return_dict parameter from model forward() calls, what downstream code would break?",
        "kw_score": 1.0,
        "kw_passed": true,
        "kw_found": [
          "return_dict",
          "forward",
          "output"
        ],
        "kw_missed": [],
        "mr_score": 0.5,
        "mr_hits": [
          "forward"
        ],
        "mr_misses": [
          "return_dict"
        ],
        "anti_hits": [],
        "retrieved_functions": [
          "forward",
          "_forward",
          "forward",
          "_sticky_export",
          "forward",
          "forward",
          "freeze_embeddings_and_language_adapters",
          "_forward",
          "forward",
          "_forward"
        ],
        "retrieved_repos": [
          "transformers",
          "transformers",
          "transformers",
          "pytorch",
          "transformers",
          "pytorch",
          "transformers",
          "transformers",
          "transformers",
          "transformers"
        ]
      },
      {
        "query_id": "T4-007",
        "tier": 4,
        "repos": [
          "scikit-learn",
          "numpy"
        ],
        "query": "If numpy's legacy np.random functions were removed, which scikit-learn estimators would break?",
        "kw_score": 0.5,
        "kw_passed": true,
        "kw_found": [
          "random",
          "numpy"
        ],
        "kw_missed": [
          "random_state",
          "seed"
        ],
        "mr_score": null,
        "mr_hits": [],
        "mr_misses": [],
        "anti_hits": [],
        "retrieved_functions": [
          "check_estimator",
          "_single_array_device",
          "check_f_contiguous_array_estimator",
          "parametrize_with_checks",
          "_average",
          "_assert_all_finite_element_wise",
          "set_config",
          "get_tags",
          "_reductions",
          "__array_ufunc__"
        ],
        "retrieved_repos": [
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "scikit-learn",
          "pandas",
          "pandas"
        ]
      }
    ]
  }
}