AndrΓ© Oliveira
commited on
Commit
Β·
a529c4d
1
Parent(s):
c40bfd2
changed docstrings and args
Browse files
app.py
CHANGED
|
@@ -25,10 +25,12 @@ def call_api(endpoint: str, payload: dict) -> str:
|
|
| 25 |
|
| 26 |
def clear_cache_tool(docs_path="data/docs"):
|
| 27 |
"""
|
| 28 |
-
π§Ή Clear Cache MCP Tool
|
|
|
|
| 29 |
Deletes all files and directories inside docs_path on the server.
|
| 30 |
-
|
| 31 |
-
|
|
|
|
| 32 |
"""
|
| 33 |
try:
|
| 34 |
r = requests.post(
|
|
@@ -45,10 +47,10 @@ def clear_cache_tool(docs_path="data/docs"):
|
|
| 45 |
def upload_docs_tool(files, docs_path="data/docs"):
|
| 46 |
"""
|
| 47 |
Upload documents to the server's docs folder via FastAPI /upload_docs.
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
"""
|
| 53 |
import shutil, tempfile
|
| 54 |
|
|
@@ -122,11 +124,6 @@ def generate_qa_tool_(payload: str) -> str:
|
|
| 122 |
return call_api("/generate_validation_qa", json.loads(payload))
|
| 123 |
|
| 124 |
|
| 125 |
-
# Assign Pydantic docstrings
|
| 126 |
-
optimize_rag_tool_.__doc__ = OptimizeRequest.__doc__
|
| 127 |
-
autotune_tool_.__doc__ = AutotuneRequest.__doc__
|
| 128 |
-
generate_qa_tool_.__doc__ = QARequest.__doc__
|
| 129 |
-
|
| 130 |
|
| 131 |
def model_to_json(model_cls) -> str:
|
| 132 |
return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)
|
|
@@ -149,7 +146,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 149 |
</a>
|
| 150 |
<img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
|
| 151 |
<a href="https://pypi.org/project/ragmint/">
|
| 152 |
-
<img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="
|
| 153 |
</a>
|
| 154 |
<img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
|
| 155 |
<img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
|
|
@@ -255,17 +252,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 255 |
label="URLs"
|
| 256 |
)
|
| 257 |
|
| 258 |
-
def upload_urls_tool(text,
|
| 259 |
"""
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
- URLs
|
| 264 |
-
|
| 265 |
"""
|
| 266 |
|
| 267 |
urls = [u.strip() for u in text.split("\n") if u.strip()]
|
| 268 |
-
return upload_docs_tool(urls,
|
| 269 |
|
| 270 |
upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
|
| 271 |
upload_mcp_btn = gr.Button("Upload", variant="primary")
|
|
@@ -330,11 +327,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 330 |
autotune_out = gr.Textbox(label="Response", lines=15)
|
| 331 |
|
| 332 |
|
| 333 |
-
def autotune_tool(
|
| 334 |
-
(
|
| 335 |
docs_path, embedding_model, num_chunk_pairs, metric,
|
| 336 |
search_type, trials, validation_choice, llm_model
|
| 337 |
-
)
|
| 338 |
|
| 339 |
payload = {
|
| 340 |
"docs_path": docs_path,
|
|
@@ -360,7 +356,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 360 |
outputs=autotune_out
|
| 361 |
)
|
| 362 |
|
| 363 |
-
with gr.Accordion("
|
| 364 |
gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
|
| 365 |
|
| 366 |
gr.Markdown("---")
|
|
@@ -393,7 +389,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 393 |
label="RAG Strategy"
|
| 394 |
)
|
| 395 |
|
| 396 |
-
|
| 397 |
chunk_sizes = gr.Textbox(
|
| 398 |
value="200,400,600",
|
| 399 |
label="Chunk Sizes (comma-separated integers)"
|
|
@@ -410,7 +405,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 410 |
label="Rerankers"
|
| 411 |
)
|
| 412 |
|
| 413 |
-
|
| 414 |
search_type = gr.Dropdown(
|
| 415 |
choices=["grid", "random", "bayesian"],
|
| 416 |
value="grid",
|
|
@@ -444,12 +438,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 444 |
|
| 445 |
|
| 446 |
# Function to convert inputs into payload and call API
|
| 447 |
-
def optimize_rag_tool(
|
| 448 |
-
(
|
| 449 |
docs_path, retriever, embedding_model, strategy, chunk_sizes,
|
| 450 |
overlaps, rerankers, search_type, trials, metric,
|
| 451 |
validation_choice, llm_model
|
| 452 |
-
)
|
| 453 |
|
| 454 |
payload = {
|
| 455 |
"docs_path": docs_path,
|
|
@@ -482,7 +475,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 482 |
)
|
| 483 |
|
| 484 |
|
| 485 |
-
with gr.Accordion("
|
| 486 |
gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
|
| 487 |
gr.Markdown("---")
|
| 488 |
|
|
@@ -506,8 +499,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 506 |
qa_out = gr.Textbox(lines=15, label="Response")
|
| 507 |
|
| 508 |
|
| 509 |
-
def generate_qa_tool(
|
| 510 |
-
docs_path, llm_model, batch_size, min_q, max_q = args
|
| 511 |
return generate_qa_tool_(json.dumps({
|
| 512 |
"docs_path": docs_path,
|
| 513 |
"llm_model": llm_model,
|
|
@@ -525,7 +517,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 525 |
outputs=qa_out
|
| 526 |
)
|
| 527 |
|
| 528 |
-
with gr.Accordion("
|
| 529 |
gr.Markdown(QARequest.__doc__ or "No description available.")
|
| 530 |
|
| 531 |
gr.Markdown("---")
|
|
|
|
| 25 |
|
| 26 |
def clear_cache_tool(docs_path="data/docs"):
|
| 27 |
"""
|
| 28 |
+
π§Ή Clear Cache MCP Tool.
|
| 29 |
+
|
| 30 |
Deletes all files and directories inside docs_path on the server.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
docs_path (str): The local path to the folder to clear. Defaults to 'data/docs'.
|
| 34 |
"""
|
| 35 |
try:
|
| 36 |
r = requests.post(
|
|
|
|
| 47 |
def upload_docs_tool(files, docs_path="data/docs"):
|
| 48 |
"""
|
| 49 |
Upload documents to the server's docs folder via FastAPI /upload_docs.
|
| 50 |
+
|
| 51 |
+
Args:
|
| 52 |
+
files (list): A list of local file paths, remote URLs, or file-like objects.
|
| 53 |
+
docs_path (str): The server folder path to upload documents to. Defaults to 'data/docs'.
|
| 54 |
"""
|
| 55 |
import shutil, tempfile
|
| 56 |
|
|
|
|
| 124 |
return call_api("/generate_validation_qa", json.loads(payload))
|
| 125 |
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
def model_to_json(model_cls) -> str:
|
| 129 |
return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)
|
|
|
|
| 146 |
</a>
|
| 147 |
<img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
|
| 148 |
<a href="https://pypi.org/project/ragmint/">
|
| 149 |
+
<img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="PyPI">
|
| 150 |
</a>
|
| 151 |
<img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
|
| 152 |
<img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
|
|
|
|
| 252 |
label="URLs"
|
| 253 |
)
|
| 254 |
|
| 255 |
+
def upload_urls_tool(text, docs_path):
|
| 256 |
"""
|
| 257 |
+
Upload documents from a list of URLs to the server's docs folder.
|
| 258 |
+
|
| 259 |
+
Args:
|
| 260 |
+
text (str): A newline-separated string of document URLs to download.
|
| 261 |
+
docs_path (str): The destination folder path on the server. Defaults to 'data/docs'.
|
| 262 |
"""
|
| 263 |
|
| 264 |
urls = [u.strip() for u in text.split("\n") if u.strip()]
|
| 265 |
+
return upload_docs_tool(urls, docs_path)
|
| 266 |
|
| 267 |
upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
|
| 268 |
upload_mcp_btn = gr.Button("Upload", variant="primary")
|
|
|
|
| 327 |
autotune_out = gr.Textbox(label="Response", lines=15)
|
| 328 |
|
| 329 |
|
| 330 |
+
def autotune_tool(
|
|
|
|
| 331 |
docs_path, embedding_model, num_chunk_pairs, metric,
|
| 332 |
search_type, trials, validation_choice, llm_model
|
| 333 |
+
):
|
| 334 |
|
| 335 |
payload = {
|
| 336 |
"docs_path": docs_path,
|
|
|
|
| 356 |
outputs=autotune_out
|
| 357 |
)
|
| 358 |
|
| 359 |
+
with gr.Accordion("β More Information", open=False):
|
| 360 |
gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
|
| 361 |
|
| 362 |
gr.Markdown("---")
|
|
|
|
| 389 |
label="RAG Strategy"
|
| 390 |
)
|
| 391 |
|
|
|
|
| 392 |
chunk_sizes = gr.Textbox(
|
| 393 |
value="200,400,600",
|
| 394 |
label="Chunk Sizes (comma-separated integers)"
|
|
|
|
| 405 |
label="Rerankers"
|
| 406 |
)
|
| 407 |
|
|
|
|
| 408 |
search_type = gr.Dropdown(
|
| 409 |
choices=["grid", "random", "bayesian"],
|
| 410 |
value="grid",
|
|
|
|
| 438 |
|
| 439 |
|
| 440 |
# Function to convert inputs into payload and call API
|
| 441 |
+
def optimize_rag_tool(
|
|
|
|
| 442 |
docs_path, retriever, embedding_model, strategy, chunk_sizes,
|
| 443 |
overlaps, rerankers, search_type, trials, metric,
|
| 444 |
validation_choice, llm_model
|
| 445 |
+
):
|
| 446 |
|
| 447 |
payload = {
|
| 448 |
"docs_path": docs_path,
|
|
|
|
| 475 |
)
|
| 476 |
|
| 477 |
|
| 478 |
+
with gr.Accordion("β More Information", open=False):
|
| 479 |
gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
|
| 480 |
gr.Markdown("---")
|
| 481 |
|
|
|
|
| 499 |
qa_out = gr.Textbox(lines=15, label="Response")
|
| 500 |
|
| 501 |
|
| 502 |
+
def generate_qa_tool(docs_path, llm_model, batch_size, min_q, max_q):
|
|
|
|
| 503 |
return generate_qa_tool_(json.dumps({
|
| 504 |
"docs_path": docs_path,
|
| 505 |
"llm_model": llm_model,
|
|
|
|
| 517 |
outputs=qa_out
|
| 518 |
)
|
| 519 |
|
| 520 |
+
with gr.Accordion("β More Information", open=False):
|
| 521 |
gr.Markdown(QARequest.__doc__ or "No description available.")
|
| 522 |
|
| 523 |
gr.Markdown("---")
|
models.py
CHANGED
|
@@ -8,19 +8,19 @@ class OptimizeRequest(BaseModel):
|
|
| 8 |
"""
|
| 9 |
π§ Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
"""
|
| 25 |
docs_path: Optional[str] = Field(
|
| 26 |
default="data/docs",
|
|
@@ -83,15 +83,15 @@ class AutotuneRequest(BaseModel):
|
|
| 83 |
"""
|
| 84 |
β‘ Automatically tunes RAG pipeline parameters based on document analysis.
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
"""
|
| 96 |
|
| 97 |
docs_path: Optional[str] = Field(
|
|
@@ -138,12 +138,12 @@ class QARequest(BaseModel):
|
|
| 138 |
"""
|
| 139 |
π§© Generate a validation QA dataset from documents for RAG evaluation.
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
"""
|
| 148 |
docs_path: str = Field(
|
| 149 |
description="π Folder containing your documents to generate QA pairs from. Example: 'data/docs'",
|
|
|
|
| 8 |
"""
|
| 9 |
π§ Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.
|
| 10 |
|
| 11 |
+
Args:
|
| 12 |
+
docs_path (str, optional): π Folder containing your documents for RAG optimization. Default: "data/docs"
|
| 13 |
+
retriever (List[str], optional): π Retriever type(s) to use. Default: ['faiss']. Example: 'bm25', 'faiss', 'chroma'
|
| 14 |
+
embedding_model (List[str], optional): π§ Embedding model(s) to use. Default: ['sentence-transformers/all-MiniLM-L6-v2']
|
| 15 |
+
strategy (List[str], optional): π― RAG strategy to apply. Default: ['fixed']. Options: 'fixed', 'token', 'sentence'
|
| 16 |
+
chunk_sizes (List[int], optional): π List of chunk sizes to evaluate. Default: [200, 400, 600]
|
| 17 |
+
overlaps (List[int], optional): π List of overlap values to test. Default: [50, 100, 200]
|
| 18 |
+
rerankers (List[str], optional): βοΈ Rerankers to apply after retrieval. Default: ['mmr']
|
| 19 |
+
search_type (str, optional): π Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
|
| 20 |
+
trials (int, optional): π§ͺ Number of optimization trials. Default: 5
|
| 21 |
+
metric (str, optional): π Metric to optimize. Default: 'faithfulness'
|
| 22 |
+
validation_choice (str, optional): β
Source of validation data. Default: 'generate'. Options: blank (use default), 'generate', local path, HF dataset ID
|
| 23 |
+
llm_model (str, optional): π€ LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
|
| 24 |
"""
|
| 25 |
docs_path: Optional[str] = Field(
|
| 26 |
default="data/docs",
|
|
|
|
| 83 |
"""
|
| 84 |
β‘ Automatically tunes RAG pipeline parameters based on document analysis.
|
| 85 |
|
| 86 |
+
Args:
|
| 87 |
+
docs_path (str, optional): π Folder containing documents for RAG optimization. Default: "data/docs"
|
| 88 |
+
embedding_model (str, optional): π§ Embedding model to analyze. Default: 'sentence-transformers/all-MiniLM-L6-v2'
|
| 89 |
+
num_chunk_pairs (int, optional): π’ Number of chunk pairs to analyze. Default: 5
|
| 90 |
+
metric (str, optional): π Metric to optimize. Default: 'faithfulness'
|
| 91 |
+
search_type (str, optional): π Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
|
| 92 |
+
trials (int, optional): π§ͺ Number of optimization trials. Default: 5
|
| 93 |
+
validation_choice (str, optional): β
Source of validation data. Default: 'generate'. Options: blank, 'generate', local path, HF dataset ID
|
| 94 |
+
llm_model (str, optional): π€ LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
|
| 95 |
"""
|
| 96 |
|
| 97 |
docs_path: Optional[str] = Field(
|
|
|
|
| 138 |
"""
|
| 139 |
π§© Generate a validation QA dataset from documents for RAG evaluation.
|
| 140 |
|
| 141 |
+
Args:
|
| 142 |
+
docs_path (str): π Folder containing documents. Default: 'data/docs'
|
| 143 |
+
llm_model (str): π€ LLM model used for question generation. Default: 'gemini-2.5-flash-lite'
|
| 144 |
+
batch_size (int): π¦ Number of documents per batch. Default: 5
|
| 145 |
+
min_q (int): β Minimum number of questions per document. Default: 3
|
| 146 |
+
max_q (int): β Maximum number of questions per document. Default: 25
|
| 147 |
"""
|
| 148 |
docs_path: str = Field(
|
| 149 |
description="π Folder containing your documents to generate QA pairs from. Example: 'data/docs'",
|