ragmint-mcp-server / models.py
AndrΓ© Oliveira
changed docstrings and args
a529c4d
from typing import Optional, List, Dict, Any
from pydantic import BaseModel, Field
# Models
class OptimizeRequest(BaseModel):
"""
πŸ”§ Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.
Args:
docs_path (str, optional): πŸ“‚ Folder containing your documents for RAG optimization. Default: "data/docs"
retriever (List[str], optional): πŸ” Retriever type(s) to use. Default: ['faiss']. Example: 'bm25', 'faiss', 'chroma'
embedding_model (List[str], optional): 🧠 Embedding model(s) to use. Default: ['sentence-transformers/all-MiniLM-L6-v2']
strategy (List[str], optional): 🎯 RAG strategy to apply. Default: ['fixed']. Options: 'fixed', 'token', 'sentence'
chunk_sizes (List[int], optional): πŸ“ List of chunk sizes to evaluate. Default: [200, 400, 600]
overlaps (List[int], optional): πŸ” List of overlap values to test. Default: [50, 100, 200]
rerankers (List[str], optional): βš–οΈ Rerankers to apply after retrieval. Default: ['mmr']
search_type (str, optional): πŸ” Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
trials (int, optional): πŸ§ͺ Number of optimization trials. Default: 5
metric (str, optional): πŸ“ˆ Metric to optimize. Default: 'faithfulness'
validation_choice (str, optional): βœ… Source of validation data. Default: 'generate'. Options: blank (use default), 'generate', local path, HF dataset ID
llm_model (str, optional): πŸ€– LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
"""
docs_path: Optional[str] = Field(
default="data/docs",
description="πŸ“‚ Folder containing your documents for RAG optimization. Example: 'data/docs'"
)
retriever: Optional[List[str]] = Field(
description="πŸ” Retriever type to use. Example: 'bm25', 'faiss', 'chroma'",
default=['faiss']
)
embedding_model: Optional[List[str]] = Field(
description="🧠 Embedding model name or path. Example: 'sentence-transformers/all-MiniLM-L6-v2'",
default=['sentence-transformers/all-MiniLM-L6-v2']
)
strategy: Optional[List[str]] = Field(
description="🎯 RAG strategy name. Example: 'fixed', 'token', 'sentence'",
default=['fixed']
)
chunk_sizes: Optional[List[int]] = Field(
description="πŸ“ List of chunk sizes to evaluate. Example: [200, 400, 600]",
default=[200, 400, 600]
)
overlaps: Optional[List[int]] = Field(
description="πŸ” List of overlap values to test. Example: [50, 100, 200]",
default = [50, 100, 200]
)
rerankers: Optional[List[str]] = Field(
default=["mmr"],
description="βš–οΈ Rerankers to apply after retrieval. Default: ['mmr']"
)
search_type: Optional[str] = Field(
default="grid",
description="πŸ” Search method to explore parameter space. Options: 'grid', 'random', 'bayesian'"
)
trials: Optional[int] = Field(
default=5,
description="πŸ§ͺ Number of optimization trials to run."
)
metric: Optional[str] = Field(
default="faithfulness",
description="πŸ“ˆ Evaluation metric for optimization. Options: 'faithfulness'"
)
validation_choice: Optional[str] = Field(
default='generate',
description=(
"βœ… Validation data source. Options:\n"
" - Leave blank β†’ use default 'validation_qa.json' if available\n"
" - 'generate' β†’ auto-generate a validation QA file from your docs\n"
" - Path to a local JSON file (e.g. 'data/validation_qa.json')\n"
" - Hugging Face dataset ID (e.g. 'squad')"
)
)
llm_model: Optional[str] = Field(
default="gemini-2.5-flash-lite",
description="πŸ€– LLM used to generate QA dataset when validation_choice='generate'. Example: 'gemini-pro', 'gpt-4o-mini'"
)
class AutotuneRequest(BaseModel):
"""
⚑ Automatically tunes RAG pipeline parameters based on document analysis.
Args:
docs_path (str, optional): πŸ“‚ Folder containing documents for RAG optimization. Default: "data/docs"
embedding_model (str, optional): 🧠 Embedding model to analyze. Default: 'sentence-transformers/all-MiniLM-L6-v2'
num_chunk_pairs (int, optional): πŸ”’ Number of chunk pairs to analyze. Default: 5
metric (str, optional): πŸ“ˆ Metric to optimize. Default: 'faithfulness'
search_type (str, optional): πŸ” Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
trials (int, optional): πŸ§ͺ Number of optimization trials. Default: 5
validation_choice (str, optional): βœ… Source of validation data. Default: 'generate'. Options: blank, 'generate', local path, HF dataset ID
llm_model (str, optional): πŸ€– LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
"""
docs_path: Optional[str] = Field(
default="data/docs",
description="πŸ“‚ Folder containing your documents for RAG optimization. Example: 'data/docs'"
)
embedding_model: Optional[str] = Field(
default="sentence-transformers/all-MiniLM-L6-v2",
description="🧠 Embedding model name or path. Example: 'sentence-transformers/all-MiniLM-L6-v2'"
)
num_chunk_pairs: Optional[int] = Field(
default=5,
description="πŸ”’ Number of chunk pairs to analyze for tuning."
)
metric: Optional[str] = Field(
default="faithfulness",
description="πŸ“ˆ Evaluation metric for optimization. Options: 'faithfulness'"
)
search_type: Optional[str] = Field(
default="grid",
description="πŸ” Search method to explore parameter space. Options: 'grid', 'random', 'bayesian'"
)
trials: Optional[int] = Field(
default=5,
description="πŸ§ͺ Number of optimization trials to run."
)
validation_choice: Optional[str] = Field(
default='generate',
description=(
"βœ… Validation data source. Options:\n"
" - Leave blank β†’ use default 'validation_qa.jsonl' if available\n"
" - 'generate' β†’ auto-generate a validation QA file from your docs\n"
" - Path to a local JSON file (e.g. 'data/validation_qa.json')\n"
" - Hugging Face dataset ID (e.g. 'squad')"
)
)
llm_model: Optional[str] = Field(
default="gemini-2.5-flash-lite",
description="πŸ€– LLM used to generate QA dataset when validation_choice='generate'. Example: 'gemini-pro', 'gpt-4o-mini'"
)
class QARequest(BaseModel):
"""
🧩 Generate a validation QA dataset from documents for RAG evaluation.
Args:
docs_path (str): πŸ“‚ Folder containing documents. Default: 'data/docs'
llm_model (str): πŸ€– LLM model used for question generation. Default: 'gemini-2.5-flash-lite'
batch_size (int): πŸ“¦ Number of documents per batch. Default: 5
min_q (int): ❓ Minimum number of questions per document. Default: 3
max_q (int): ❓ Maximum number of questions per document. Default: 25
"""
docs_path: str = Field(
description="πŸ“‚ Folder containing your documents to generate QA pairs from. Example: 'data/docs'",
default='data/docs'
)
llm_model: str = Field(
default="gemini-2.5-flash-lite",
description="πŸ€– LLM model used for question generation. Example: 'gemini-2.5-flash-lite', 'gpt-4o-mini'"
)
batch_size: int = Field(
default=5,
description="πŸ“¦ Number of documents processed per generation batch."
)
min_q: int = Field(
default=3,
description="❓ Minimum number of questions per document."
)
max_q: int = Field(
default=25,
description="❓ Maximum number of questions per document."
)