Ilia Tambovtsev commited on
Commit
dfee524
·
1 Parent(s): 24e252a

feat: configure logging all params, move preprocessing to be a feature of a storage

Browse files
src/eval/eval_mlflow.py CHANGED
@@ -292,7 +292,7 @@ class MetricPresets:
292
 
293
  LLM = ["llmrelevance"]
294
 
295
- FULL = BASIC + LLM
296
 
297
  @classmethod
298
  def get_preset(cls, name: str) -> List[str]:
@@ -354,6 +354,15 @@ class MlflowConfig(BaseModel):
354
  logger.info(f"Using metrics: {self.metrics}")
355
  return super().model_post_init(__context)
356
 
 
 
 
 
 
 
 
 
 
357
 
358
  class RAGEvaluatorMlflow:
359
  """MLFlow-based evaluator for RAG pipeline"""
@@ -422,7 +431,7 @@ class RAGEvaluatorMlflow:
422
  Dictionary mapping metric names to MetricResult objects
423
  """
424
  # Log evaluation start
425
- self._logger.info(f"Evaluating question: {question}")
426
 
427
  results = {}
428
 
@@ -435,7 +444,7 @@ class RAGEvaluatorMlflow:
435
  # Log metric result
436
  log_msg = f"Metric {metric.name}: {result.score}"
437
  if result.explanation:
438
- log_msg += f" ({result.explanation})"
439
  self._logger.info(log_msg)
440
 
441
  except Exception as e:
@@ -570,12 +579,39 @@ class RAGEvaluatorMlflow:
570
 
571
  for scorer in self.config.scorers:
572
  self._logger.info(f"Evaluating with scorer: {scorer.id}")
573
- with mlflow.start_run(run_name=f"scorer_{scorer.id}"):
574
- mlflow.log_params(scorer.model_dump())
575
- self._logger.debug(f"Logged scorer parameters: {scorer.model_dump()}")
576
 
577
- # Initialize retriever
578
- retriever = self.config.get_retriever_with_scorer(scorer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
 
580
  # Initialize aggregation containers
581
  results_log = []
@@ -594,6 +630,8 @@ class RAGEvaluatorMlflow:
594
  if results_log
595
  else len(questions_df)
596
  )
 
 
597
 
598
  # Process results
599
  results_df = pd.DataFrame(results_log)
 
292
 
293
  LLM = ["llmrelevance"]
294
 
295
+ ALL = BASIC + LLM
296
 
297
  @classmethod
298
  def get_preset(cls, name: str) -> List[str]:
 
354
  logger.info(f"Using metrics: {self.metrics}")
355
  return super().model_post_init(__context)
356
 
357
+ def get_log_params(self) -> Dict[str, Any]:
358
+ """Get parameters for MLflow logging"""
359
+ return {
360
+ "experiment_name": self.experiment_name,
361
+ "n_judge_contexts": self.n_judge_contexts,
362
+ "metrics": ",".join(self.metrics),
363
+ "metric_args": self.metric_args,
364
+ }
365
+
366
 
367
  class RAGEvaluatorMlflow:
368
  """MLFlow-based evaluator for RAG pipeline"""
 
431
  Dictionary mapping metric names to MetricResult objects
432
  """
433
  # Log evaluation start
434
+ self._logger.info(f"Evaluating question: '{question}'")
435
 
436
  results = {}
437
 
 
444
  # Log metric result
445
  log_msg = f"Metric {metric.name}: {result.score}"
446
  if result.explanation:
447
+ log_msg += f" ({result.explanation[:200]})"
448
  self._logger.info(log_msg)
449
 
450
  except Exception as e:
 
579
 
580
  for scorer in self.config.scorers:
581
  self._logger.info(f"Evaluating with scorer: {scorer.id}")
 
 
 
582
 
583
+ # Initialize retriever
584
+ retriever = self.config.get_retriever_with_scorer(scorer)
585
+
586
+ with mlflow.start_run(
587
+ run_name=f"scorer_{scorer.id}__retriever_{retriever.id}"
588
+ ):
589
+ # Log preprocessor
590
+ preprocessor_id = (
591
+ retriever.storage.query_preprocessor.id
592
+ if retriever.storage.query_preprocessor
593
+ else "None"
594
+ )
595
+ mlflow.log_params({"preprocessing": preprocessor_id})
596
+ self._logger.info(f"Using preprocessor: {preprocessor_id}")
597
+
598
+ # Log config parameters
599
+ mlflow.log_params(
600
+ {f"config_{k}": v for k, v in self.config.get_log_params().items()}
601
+ )
602
+ self._logger.debug("Logged config parameters")
603
+
604
+ # Log scorer parameters
605
+ mlflow.log_params(
606
+ {f"scorer_{k}": v for k, v in scorer.model_dump().items()}
607
+ )
608
+ self._logger.debug("Logged scorer parameters")
609
+
610
+ # Initialize retriever and log its parameters
611
+ mlflow.log_params(
612
+ {f"retriever_{k}": v for k, v in retriever.get_log_params().items()}
613
+ )
614
+ self._logger.debug("Logged retriever parameters")
615
 
616
  # Initialize aggregation containers
617
  results_log = []
 
630
  if results_log
631
  else len(questions_df)
632
  )
633
+ if n_errors > 1:
634
+ logger.error(f"{n_errors} while processing {retriever.id}")
635
 
636
  # Process results
637
  results_df = pd.DataFrame(results_log)
src/rag/preprocess.py CHANGED
@@ -6,7 +6,7 @@ import nltk
6
  from nltk.corpus import stopwords
7
 
8
 
9
- class QueryPreprocessor:
10
  """Preprocesses search queries by removing common patterns and standardizing format."""
11
 
12
  @dataclass
@@ -76,6 +76,10 @@ class QueryPreprocessor:
76
  re.compile(p.pattern, re.IGNORECASE) for p in patterns
77
  ]
78
 
 
 
 
 
79
  def remove_stopwords_from_text(self, text: str) -> str:
80
  """Remove stopwords while preserving protected terms."""
81
  tokens = text.split()
 
6
  from nltk.corpus import stopwords
7
 
8
 
9
+ class RegexQueryPreprocessor:
10
  """Preprocesses search queries by removing common patterns and standardizing format."""
11
 
12
  @dataclass
 
76
  re.compile(p.pattern, re.IGNORECASE) for p in patterns
77
  ]
78
 
79
+ @property
80
+ def id(self):
81
+ return self.__class__.__name__
82
+
83
  def remove_stopwords_from_text(self, text: str) -> str:
84
  """Remove stopwords while preserving protected terms."""
85
  tokens = text.split()
src/rag/storage.py CHANGED
@@ -1,12 +1,13 @@
1
  import asyncio
2
  import logging
3
- from collections import OrderedDict
4
  from pathlib import Path
5
  from typing import Any, Dict, List, Optional, Tuple, Type, Union
6
  from uuid import uuid4
7
 
8
  import chromadb
9
  import numpy as np
 
10
  from chromadb.api.types import QueryResult
11
  from chromadb.config import Settings
12
  from datasets.utils import metadata
@@ -27,7 +28,7 @@ from src.chains.prompts import JsonH1AndGDPrompt
27
  from src.config.model_setup import EmbeddingConfig
28
  from src.config.navigator import Navigator
29
  from src.rag import BaseScorer, HyperbolicScorer, ScorerTypes
30
- from src.rag.preprocess import QueryPreprocessor
31
  from src.rag.score import ExponentialScorer, MinScorer
32
 
33
  logger = logging.getLogger(__name__)
@@ -376,6 +377,7 @@ class ChromaSlideStore:
376
  self,
377
  collection_name: str = "pres1",
378
  embedding_model: Embeddings = EmbeddingConfig().load_openai(),
 
379
  ):
380
  """Initialize ChromaDB storage"""
381
  self.navigator = Navigator()
@@ -397,6 +399,9 @@ class ChromaSlideStore:
397
  # self._api_key = os.getenv("OPENAI_API_KEY")
398
  self._embeddings = embedding_model
399
 
 
 
 
400
  # Initialize indexer
401
  self._indexer = SlideIndexer(collection_name=collection_name)
402
 
@@ -461,8 +466,10 @@ class ChromaSlideStore:
461
  Returns:
462
  List of ScoredChunks sorted by similarity
463
  """
 
 
464
  # Get query embedding
465
- query_embedding = await self._embeddings.aembed_query(query)
466
 
467
  # Query ChromaDB
468
  result = self._collection.query(
@@ -798,6 +805,139 @@ class ChromaSlideStore:
798
  await gather(*tasks)
799
  logger.info(f"Completed processing presentation: '{presentation.name}'")
800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801
 
802
  class PresentationRetriever(BaseModel):
803
  """Retriever for slide search that provides formatted context"""
@@ -806,16 +946,18 @@ class PresentationRetriever(BaseModel):
806
  scorer: BaseScorer = ExponentialScorer()
807
  n_contexts: int = -1
808
  n_pages: int = -1
 
809
  retrieve_page_contexts: bool = True
810
 
811
- query_preprocessor: Optional[QueryPreprocessor] = QueryPreprocessor()
812
-
813
  model_config = ConfigDict(arbitrary_types_allowed=True)
814
 
815
  @property
816
  def id(self) -> str:
817
  return self.__class__.__name__.lower()
818
 
 
 
 
819
  def format_slide(
820
  self, slide: SearchResultPage, metadata: Optional[Dict[str, Any]] = None
821
  ) -> str:
@@ -883,10 +1025,9 @@ class PresentationRetriever(BaseModel):
883
  Returns:
884
  Dictionary with presentation results and formatted context
885
  """
886
- q_storage = self.query_preprocessor(query) if self.query_preprocessor else query
887
 
888
  results = self.storage.search_query_presentations(
889
- query=q_storage,
890
  chunk_types=chunk_types,
891
  n_results=n_results,
892
  scorer=self.scorer,
@@ -931,6 +1072,15 @@ class PresentationRetriever(BaseModel):
931
  def set_scorer(self, scorer: ScorerTypes):
932
  self.scorer = scorer
933
 
 
 
 
 
 
 
 
 
 
934
 
935
  class LLMPresentationRetriever(PresentationRetriever):
936
  """LLM-enhanced retriever that reranks results using structured relevance scoring"""
@@ -1144,6 +1294,19 @@ Output Formatting:
1144
 
1145
  return dict(contexts=reranked)
1146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1147
  RetrieverTypes = Union[PresentationRetriever, LLMPresentationRetriever]
1148
 
1149
  # def create_slides_database(
 
1
  import asyncio
2
  import logging
3
+ from collections import OrderedDict, defaultdict
4
  from pathlib import Path
5
  from typing import Any, Dict, List, Optional, Tuple, Type, Union
6
  from uuid import uuid4
7
 
8
  import chromadb
9
  import numpy as np
10
+ import pandas as pd
11
  from chromadb.api.types import QueryResult
12
  from chromadb.config import Settings
13
  from datasets.utils import metadata
 
28
  from src.config.model_setup import EmbeddingConfig
29
  from src.config.navigator import Navigator
30
  from src.rag import BaseScorer, HyperbolicScorer, ScorerTypes
31
+ from src.rag.preprocess import RegexQueryPreprocessor
32
  from src.rag.score import ExponentialScorer, MinScorer
33
 
34
  logger = logging.getLogger(__name__)
 
377
  self,
378
  collection_name: str = "pres1",
379
  embedding_model: Embeddings = EmbeddingConfig().load_openai(),
380
+ query_preprocessor: Optional[RegexQueryPreprocessor] = RegexQueryPreprocessor(),
381
  ):
382
  """Initialize ChromaDB storage"""
383
  self.navigator = Navigator()
 
399
  # self._api_key = os.getenv("OPENAI_API_KEY")
400
  self._embeddings = embedding_model
401
 
402
+ # Initialize query preprocessor
403
+ self.query_preprocessor = query_preprocessor
404
+
405
  # Initialize indexer
406
  self._indexer = SlideIndexer(collection_name=collection_name)
407
 
 
466
  Returns:
467
  List of ScoredChunks sorted by similarity
468
  """
469
+ q_storage = self.query_preprocessor(query) if self.query_preprocessor else query
470
+
471
  # Get query embedding
472
+ query_embedding = await self._embeddings.aembed_query(q_storage)
473
 
474
  # Query ChromaDB
475
  result = self._collection.query(
 
805
  await gather(*tasks)
806
  logger.info(f"Completed processing presentation: '{presentation.name}'")
807
 
808
+ def validate_presentations(self) -> Tuple[pd.DataFrame, List[str]]:
809
+ """Validate that all presentation slides were properly stored.
810
+
811
+ Uses metadata from stored chunks to compare number of pages in presentations.
812
+ Result shows how many pages are in ChromaDB vs expected total pages.
813
+
814
+ Returns:
815
+ Tuple containing:
816
+ - DataFrame with presentations statistics:
817
+ Columns:
818
+ - presentation: Presentation name
819
+ - stored_pages: Number of pages found in ChromaDB
820
+ - chunks_per_page: Average chunks per page
821
+ - total_chunks: Total chunks for this presentation
822
+ - chunk_types: Set of unique chunk types
823
+ - min_page: First page number
824
+ - max_page: Last page number
825
+ - List of validation warnings if any inconsistencies found
826
+ """
827
+ # Get all stored chunks
828
+ all_chunks = self._collection.get()
829
+
830
+ # Group chunks by presentation
831
+ pres_pages: Dict[str, Set[int]] = defaultdict(set) # Unique pages
832
+ pres_chunks: Dict[str, int] = defaultdict(int) # Total chunks
833
+ pres_types: Dict[str, Set[str]] = defaultdict(set) # Chunk types
834
+
835
+ # Process each chunk's metadata
836
+ for metadata in all_chunks["metadatas"]:
837
+ if not metadata:
838
+ continue
839
+
840
+ pdf_path = metadata.get("pdf_path", "")
841
+ if not pdf_path:
842
+ continue
843
+
844
+ # Extract presentation name from path
845
+ pres_name = Path(pdf_path).stem
846
+
847
+ # Track pages, chunks and types
848
+ page_num = int(metadata.get("page_num", -1))
849
+ if page_num >= 0:
850
+ pres_pages[pres_name].add(page_num)
851
+
852
+ chunk_type = metadata.get("chunk_type", "unknown")
853
+ pres_types[pres_name].add(chunk_type)
854
+
855
+ pres_chunks[pres_name] += 1
856
+
857
+ # Compile statistics and warnings
858
+ stats_data = []
859
+ warnings = []
860
+
861
+ for pres_name in pres_pages:
862
+ stored_pages = len(pres_pages[pres_name])
863
+ total_chunks = pres_chunks[pres_name]
864
+ chunks_per_page = total_chunks / stored_pages if stored_pages > 0 else 0
865
+ chunk_types = pres_types[pres_name]
866
+ pages = sorted(pres_pages[pres_name])
867
+
868
+ stats_data.append(
869
+ {
870
+ "presentation": pres_name,
871
+ "stored_pages": stored_pages,
872
+ "chunks_per_page": round(chunks_per_page, 2),
873
+ "total_chunks": total_chunks,
874
+ "chunk_types": chunk_types,
875
+ "min_page": min(pages) if pages else None,
876
+ "max_page": max(pages) if pages else None,
877
+ }
878
+ )
879
+
880
+ # Check for potential issues
881
+ if (
882
+ chunks_per_page < 3
883
+ ): # Assuming we should have at least 3 chunks per page
884
+ warnings.append(
885
+ f"Low chunks per page ({chunks_per_page:.1f}) " f"for '{pres_name}'"
886
+ )
887
+
888
+ # Check for page number gaps
889
+ if pages:
890
+ expected_pages = set(range(min(pages), max(pages) + 1))
891
+ missing_pages = expected_pages - pres_pages[pres_name]
892
+ if missing_pages:
893
+ warnings.append(
894
+ f"Missing pages {sorted(missing_pages)} in '{pres_name}'"
895
+ )
896
+
897
+ # Check for missing chunk types
898
+ expected_types = {
899
+ "text_content",
900
+ "visual_content",
901
+ "topic_overview",
902
+ "conclusions_and_insights",
903
+ "layout_and_composition",
904
+ }
905
+ missing_types = expected_types - chunk_types
906
+ if missing_types:
907
+ warnings.append(f"Missing chunk types {missing_types} in '{pres_name}'")
908
+
909
+ # Create DataFrame from stats
910
+ stats_df = pd.DataFrame(stats_data).sort_values("presentation")
911
+
912
+ return stats_df, warnings
913
+
914
+ def validate_storage(self) -> Tuple[pd.DataFrame, List[str]]:
915
+ """Helper function to run validation and display results.
916
+
917
+ Args:
918
+ store: ChromaSlideStore instance to validate
919
+
920
+ Returns:
921
+ Tuple of (statistics DataFrame, list of warnings)
922
+ """
923
+ from IPython.display import display
924
+
925
+ stats_df, warnings = self.validate_presentations()
926
+
927
+ # Display statistics
928
+ print("\nPresentation Statistics:")
929
+ display(stats_df)
930
+
931
+ # Display warnings if any
932
+ if warnings:
933
+ print("\nWarnings:")
934
+ for warning in warnings:
935
+ print(f"- {warning}")
936
+ else:
937
+ print("\nNo validation warnings found.")
938
+
939
+ return stats_df, warnings
940
+
941
 
942
  class PresentationRetriever(BaseModel):
943
  """Retriever for slide search that provides formatted context"""
 
946
  scorer: BaseScorer = ExponentialScorer()
947
  n_contexts: int = -1
948
  n_pages: int = -1
949
+ n_query_results: int = 70
950
  retrieve_page_contexts: bool = True
951
 
 
 
952
  model_config = ConfigDict(arbitrary_types_allowed=True)
953
 
954
  @property
955
  def id(self) -> str:
956
  return self.__class__.__name__.lower()
957
 
958
+ def set_n_query_results(self, n_query_results: int):
959
+ self.n_query_results = n_query_results
960
+
961
  def format_slide(
962
  self, slide: SearchResultPage, metadata: Optional[Dict[str, Any]] = None
963
  ) -> str:
 
1025
  Returns:
1026
  Dictionary with presentation results and formatted context
1027
  """
 
1028
 
1029
  results = self.storage.search_query_presentations(
1030
+ query=query,
1031
  chunk_types=chunk_types,
1032
  n_results=n_results,
1033
  scorer=self.scorer,
 
1072
  def set_scorer(self, scorer: ScorerTypes):
1073
  self.scorer = scorer
1074
 
1075
+ def get_log_params(self) -> Dict[str, Any]:
1076
+ """Get parameters for MLflow logging"""
1077
+ return {
1078
+ "type": self.__class__.__name__,
1079
+ "n_contexts": self.n_contexts,
1080
+ "n_pages": self.n_pages,
1081
+ "retrieve_page_contexts": self.retrieve_page_contexts,
1082
+ }
1083
+
1084
 
1085
  class LLMPresentationRetriever(PresentationRetriever):
1086
  """LLM-enhanced retriever that reranks results using structured relevance scoring"""
 
1294
 
1295
  return dict(contexts=reranked)
1296
 
1297
+ def get_log_params(self) -> Dict[str, Any]:
1298
+ """Get parameters for MLflow logging including LLM specifics"""
1299
+ params = super().get_log_params()
1300
+ params.update(
1301
+ {
1302
+ "llm_model": self.llm.model_name,
1303
+ "llm_temperature": self.llm.temperature,
1304
+ "top_k": self.top_k,
1305
+ }
1306
+ )
1307
+ return params
1308
+
1309
+
1310
  RetrieverTypes = Union[PresentationRetriever, LLMPresentationRetriever]
1311
 
1312
  # def create_slides_database(
src/run_evaluation.py CHANGED
@@ -21,6 +21,7 @@ from src.eval.eval_mlflow import (
21
  )
22
  from src.eval.evaluate import LangsmithConfig, RAGEvaluatorLangsmith
23
  from src.rag import ChromaSlideStore, PresentationRetriever
 
24
  from src.rag.score import (
25
  BaseScorer,
26
  ExponentialScorer,
@@ -109,6 +110,7 @@ class EvaluationCLI:
109
  model_name: Optional[str],
110
  collection: str,
111
  scorers: List[str],
 
112
  temperature: float = 0.2,
113
  ) -> EvalComponents:
114
  """Initialize common evaluation components
@@ -137,8 +139,10 @@ class EvaluationCLI:
137
  # Initialize components
138
  llm = self.config.model_config.get_llm(provider, model_name, temperature)
139
  embeddings = self.config.embedding_config.get_embeddings(provider)
 
 
140
  storage = ChromaSlideStore(
141
- collection_name=collection, embedding_model=embeddings
142
  )
143
 
144
  logger.info(f"Initialized storage collection: {collection}")
@@ -159,12 +163,17 @@ class EvaluationCLI:
159
  def mlflow(
160
  self,
161
  retriever: str = "basic",
 
 
 
 
162
  provider: str = "vsegpt",
163
  model_name: Optional[str] = None,
164
  collection: str = "pres1",
165
  experiment: str = "PresRetrieve_eval",
166
  scorers: List[str] = ["default"],
167
  metrics: List[str] = ["basic"],
 
168
  n_questions: int = -1,
169
  max_concurrent: int = 8,
170
  rate_limit_timeout: float = -1,
@@ -201,7 +210,7 @@ class EvaluationCLI:
201
 
202
  metrics: List of metric specifications
203
  Options:
204
- - Presets: 'basic', 'llm', 'full'
205
  - Individual: 'presentationmatch', 'presentationfound', 'pagematch', 'pagefound', 'presentationcount',
206
  Default: ['basic']
207
 
@@ -251,9 +260,15 @@ class EvaluationCLI:
251
  model_name=model_name,
252
  collection=collection,
253
  scorers=scorers,
 
254
  temperature=temperature,
255
  )
256
 
 
 
 
 
 
257
  # Setup evaluation config
258
  db_path = self.config.navigator.eval_runs / "mlruns.db"
259
  artifacts_path = self.config.navigator.eval_artifacts
@@ -270,6 +285,7 @@ class EvaluationCLI:
270
  else -1.0
271
  )
272
  ),
 
273
  write_to_google=write_to_google,
274
  )
275
 
@@ -348,7 +364,7 @@ class EvaluationCLI:
348
  questions_df = questions_df.sample(n_questions).reset_index()
349
  logger.info(f"Selected {len(questions_df)} random questions")
350
 
351
- evaluator.run_evaluation(questions_df)
352
  logger.info("LangSmith evaluation completed successfully")
353
 
354
  except Exception as e:
 
21
  )
22
  from src.eval.evaluate import LangsmithConfig, RAGEvaluatorLangsmith
23
  from src.rag import ChromaSlideStore, PresentationRetriever
24
+ from src.rag.preprocess import RegexQueryPreprocessor
25
  from src.rag.score import (
26
  BaseScorer,
27
  ExponentialScorer,
 
110
  model_name: Optional[str],
111
  collection: str,
112
  scorers: List[str],
113
+ preprocessing: Optional[str] = None,
114
  temperature: float = 0.2,
115
  ) -> EvalComponents:
116
  """Initialize common evaluation components
 
139
  # Initialize components
140
  llm = self.config.model_config.get_llm(provider, model_name, temperature)
141
  embeddings = self.config.embedding_config.get_embeddings(provider)
142
+ query_preprocessor = {"regex": RegexQueryPreprocessor()}.get(preprocessing) if preprocessing else None
143
+
144
  storage = ChromaSlideStore(
145
+ collection_name=collection, embedding_model=embeddings, query_preprocessor=query_preprocessor
146
  )
147
 
148
  logger.info(f"Initialized storage collection: {collection}")
 
163
  def mlflow(
164
  self,
165
  retriever: str = "basic",
166
+ n_query_results: int = 50,
167
+ n_contexts: int = -1,
168
+ n_pages: int = -1,
169
+ preprocessing: str = "regex",
170
  provider: str = "vsegpt",
171
  model_name: Optional[str] = None,
172
  collection: str = "pres1",
173
  experiment: str = "PresRetrieve_eval",
174
  scorers: List[str] = ["default"],
175
  metrics: List[str] = ["basic"],
176
+ n_judge_contexts: int = 8,
177
  n_questions: int = -1,
178
  max_concurrent: int = 8,
179
  rate_limit_timeout: float = -1,
 
210
 
211
  metrics: List of metric specifications
212
  Options:
213
+ - Presets: 'basic', 'llm', 'all'
214
  - Individual: 'presentationmatch', 'presentationfound', 'pagematch', 'pagefound', 'presentationcount',
215
  Default: ['basic']
216
 
 
260
  model_name=model_name,
261
  collection=collection,
262
  scorers=scorers,
263
+ preprocessing=preprocessing,
264
  temperature=temperature,
265
  )
266
 
267
+ # Set attributes
268
+ components.retriever.n_query_results = n_query_results
269
+ components.retriever.n_contexts = n_contexts
270
+ components.retriever.n_pages = n_pages
271
+
272
  # Setup evaluation config
273
  db_path = self.config.navigator.eval_runs / "mlruns.db"
274
  artifacts_path = self.config.navigator.eval_artifacts
 
285
  else -1.0
286
  )
287
  ),
288
+ n_judge_contexts=n_judge_contexts,
289
  write_to_google=write_to_google,
290
  )
291
 
 
364
  questions_df = questions_df.sample(n_questions).reset_index()
365
  logger.info(f"Selected {len(questions_df)} random questions")
366
 
367
+ evaluator.run_evaluation()
368
  logger.info("LangSmith evaluation completed successfully")
369
 
370
  except Exception as e: