AndrΓ© Oliveira commited on
Commit
a529c4d
Β·
1 Parent(s): c40bfd2

changed docstrings and args

Browse files
Files changed (2) hide show
  1. app.py +25 -33
  2. models.py +28 -28
app.py CHANGED
@@ -25,10 +25,12 @@ def call_api(endpoint: str, payload: dict) -> str:
25
 
26
  def clear_cache_tool(docs_path="data/docs"):
27
  """
28
- 🧹 Clear Cache MCP Tool
 
29
  Deletes all files and directories inside docs_path on the server.
30
- Accepts:
31
- - local paths (str), default='data/docs/'
 
32
  """
33
  try:
34
  r = requests.post(
@@ -45,10 +47,10 @@ def clear_cache_tool(docs_path="data/docs"):
45
  def upload_docs_tool(files, docs_path="data/docs"):
46
  """
47
  Upload documents to the server's docs folder via FastAPI /upload_docs.
48
- Accepts:
49
- - local file paths (str)
50
- - URLs (str)
51
- - file-like objects
52
  """
53
  import shutil, tempfile
54
 
@@ -122,11 +124,6 @@ def generate_qa_tool_(payload: str) -> str:
122
  return call_api("/generate_validation_qa", json.loads(payload))
123
 
124
 
125
- # Assign Pydantic docstrings
126
- optimize_rag_tool_.__doc__ = OptimizeRequest.__doc__
127
- autotune_tool_.__doc__ = AutotuneRequest.__doc__
128
- generate_qa_tool_.__doc__ = QARequest.__doc__
129
-
130
 
131
  def model_to_json(model_cls) -> str:
132
  return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)
@@ -149,7 +146,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
149
  </a>
150
  <img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
151
  <a href="https://pypi.org/project/ragmint/">
152
- <img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="HF Space">
153
  </a>
154
  <img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
155
  <img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
@@ -255,17 +252,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
255
  label="URLs"
256
  )
257
 
258
- def upload_urls_tool(text, path):
259
  """
260
- Upload documents to the server's docs folder via FastAPI /upload_docs.
261
- Accepts:
262
- - local file paths (str)
263
- - URLs (str)
264
- - file-like objects
265
  """
266
 
267
  urls = [u.strip() for u in text.split("\n") if u.strip()]
268
- return upload_docs_tool(urls, path)
269
 
270
  upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
271
  upload_mcp_btn = gr.Button("Upload", variant="primary")
@@ -330,11 +327,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
330
  autotune_out = gr.Textbox(label="Response", lines=15)
331
 
332
 
333
- def autotune_tool(*args):
334
- (
335
  docs_path, embedding_model, num_chunk_pairs, metric,
336
  search_type, trials, validation_choice, llm_model
337
- ) = args
338
 
339
  payload = {
340
  "docs_path": docs_path,
@@ -360,7 +356,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
360
  outputs=autotune_out
361
  )
362
 
363
- with gr.Accordion("Parameter Information", open=False):
364
  gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
365
 
366
  gr.Markdown("---")
@@ -393,7 +389,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
393
  label="RAG Strategy"
394
  )
395
 
396
-
397
  chunk_sizes = gr.Textbox(
398
  value="200,400,600",
399
  label="Chunk Sizes (comma-separated integers)"
@@ -410,7 +405,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
410
  label="Rerankers"
411
  )
412
 
413
-
414
  search_type = gr.Dropdown(
415
  choices=["grid", "random", "bayesian"],
416
  value="grid",
@@ -444,12 +438,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
444
 
445
 
446
  # Function to convert inputs into payload and call API
447
- def optimize_rag_tool(*args):
448
- (
449
  docs_path, retriever, embedding_model, strategy, chunk_sizes,
450
  overlaps, rerankers, search_type, trials, metric,
451
  validation_choice, llm_model
452
- ) = args
453
 
454
  payload = {
455
  "docs_path": docs_path,
@@ -482,7 +475,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
482
  )
483
 
484
 
485
- with gr.Accordion("Parameter Information", open=False):
486
  gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
487
  gr.Markdown("---")
488
 
@@ -506,8 +499,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
506
  qa_out = gr.Textbox(lines=15, label="Response")
507
 
508
 
509
- def generate_qa_tool(*args):
510
- docs_path, llm_model, batch_size, min_q, max_q = args
511
  return generate_qa_tool_(json.dumps({
512
  "docs_path": docs_path,
513
  "llm_model": llm_model,
@@ -525,7 +517,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
525
  outputs=qa_out
526
  )
527
 
528
- with gr.Accordion("Parameter Information", open=False):
529
  gr.Markdown(QARequest.__doc__ or "No description available.")
530
 
531
  gr.Markdown("---")
 
25
 
26
  def clear_cache_tool(docs_path="data/docs"):
27
  """
28
+ 🧹 Clear Cache MCP Tool.
29
+
30
  Deletes all files and directories inside docs_path on the server.
31
+
32
+ Args:
33
+ docs_path (str): The local path to the folder to clear. Defaults to 'data/docs'.
34
  """
35
  try:
36
  r = requests.post(
 
47
  def upload_docs_tool(files, docs_path="data/docs"):
48
  """
49
  Upload documents to the server's docs folder via FastAPI /upload_docs.
50
+
51
+ Args:
52
+ files (list): A list of local file paths, remote URLs, or file-like objects.
53
+ docs_path (str): The server folder path to upload documents to. Defaults to 'data/docs'.
54
  """
55
  import shutil, tempfile
56
 
 
124
  return call_api("/generate_validation_qa", json.loads(payload))
125
 
126
 
 
 
 
 
 
127
 
128
  def model_to_json(model_cls) -> str:
129
  return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)
 
146
  </a>
147
  <img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
148
  <a href="https://pypi.org/project/ragmint/">
149
+ <img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="PyPI">
150
  </a>
151
  <img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
152
  <img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
 
252
  label="URLs"
253
  )
254
 
255
+ def upload_urls_tool(text, docs_path):
256
  """
257
+ Upload documents from a list of URLs to the server's docs folder.
258
+
259
+ Args:
260
+ text (str): A newline-separated string of document URLs to download.
261
+ docs_path (str): The destination folder path on the server. Defaults to 'data/docs'.
262
  """
263
 
264
  urls = [u.strip() for u in text.split("\n") if u.strip()]
265
+ return upload_docs_tool(urls, docs_path)
266
 
267
  upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
268
  upload_mcp_btn = gr.Button("Upload", variant="primary")
 
327
  autotune_out = gr.Textbox(label="Response", lines=15)
328
 
329
 
330
+ def autotune_tool(
 
331
  docs_path, embedding_model, num_chunk_pairs, metric,
332
  search_type, trials, validation_choice, llm_model
333
+ ):
334
 
335
  payload = {
336
  "docs_path": docs_path,
 
356
  outputs=autotune_out
357
  )
358
 
359
+ with gr.Accordion("βž• More Information", open=False):
360
  gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
361
 
362
  gr.Markdown("---")
 
389
  label="RAG Strategy"
390
  )
391
 
 
392
  chunk_sizes = gr.Textbox(
393
  value="200,400,600",
394
  label="Chunk Sizes (comma-separated integers)"
 
405
  label="Rerankers"
406
  )
407
 
 
408
  search_type = gr.Dropdown(
409
  choices=["grid", "random", "bayesian"],
410
  value="grid",
 
438
 
439
 
440
  # Function to convert inputs into payload and call API
441
+ def optimize_rag_tool(
 
442
  docs_path, retriever, embedding_model, strategy, chunk_sizes,
443
  overlaps, rerankers, search_type, trials, metric,
444
  validation_choice, llm_model
445
+ ):
446
 
447
  payload = {
448
  "docs_path": docs_path,
 
475
  )
476
 
477
 
478
+ with gr.Accordion("βž• More Information", open=False):
479
  gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
480
  gr.Markdown("---")
481
 
 
499
  qa_out = gr.Textbox(lines=15, label="Response")
500
 
501
 
502
+ def generate_qa_tool(docs_path, llm_model, batch_size, min_q, max_q):
 
503
  return generate_qa_tool_(json.dumps({
504
  "docs_path": docs_path,
505
  "llm_model": llm_model,
 
517
  outputs=qa_out
518
  )
519
 
520
+ with gr.Accordion("βž• More Information", open=False):
521
  gr.Markdown(QARequest.__doc__ or "No description available.")
522
 
523
  gr.Markdown("---")
models.py CHANGED
@@ -8,19 +8,19 @@ class OptimizeRequest(BaseModel):
8
  """
9
  πŸ”§ Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.
10
 
11
- Parameters:
12
- - docs_path (str, optional): πŸ“‚ Folder containing your documents for RAG optimization. Default: "data/docs"
13
- - retriever (List[str], optional): πŸ” Retriever type(s) to use. Default: ['faiss']. Example: 'bm25', 'faiss', 'chroma'
14
- - embedding_model (List[str], optional): 🧠 Embedding model(s) to use. Default: ['sentence-transformers/all-MiniLM-L6-v2']
15
- - strategy (List[str], optional): 🎯 RAG strategy to apply. Default: ['fixed']. Options: 'fixed', 'token', 'sentence'
16
- - chunk_sizes (List[int], optional): πŸ“ List of chunk sizes to evaluate. Default: [200, 400, 600]
17
- - overlaps (List[int], optional): πŸ” List of overlap values to test. Default: [50, 100, 200]
18
- - rerankers (List[str], optional): βš–οΈ Rerankers to apply after retrieval. Default: ['mmr']
19
- - search_type (str, optional): πŸ” Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
20
- - trials (int, optional): πŸ§ͺ Number of optimization trials. Default: 5
21
- - metric (str, optional): πŸ“ˆ Metric to optimize. Default: 'faithfulness'
22
- - validation_choice (str, optional): βœ… Source of validation data. Default: 'generate'. Options: blank (use default), 'generate', local path, HF dataset ID
23
- - llm_model (str, optional): πŸ€– LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
24
  """
25
  docs_path: Optional[str] = Field(
26
  default="data/docs",
@@ -83,15 +83,15 @@ class AutotuneRequest(BaseModel):
83
  """
84
  ⚑ Automatically tunes RAG pipeline parameters based on document analysis.
85
 
86
- Parameters:
87
- - docs_path (str, optional): πŸ“‚ Folder containing documents for RAG optimization. Default: "data/docs"
88
- - embedding_model (str, optional): 🧠 Embedding model to analyze. Default: 'sentence-transformers/all-MiniLM-L6-v2'
89
- - num_chunk_pairs (int, optional): πŸ”’ Number of chunk pairs to analyze. Default: 5
90
- - metric (str, optional): πŸ“ˆ Metric to optimize. Default: 'faithfulness'
91
- - search_type (str, optional): πŸ” Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
92
- - trials (int, optional): πŸ§ͺ Number of optimization trials. Default: 5
93
- - validation_choice (str, optional): βœ… Source of validation data. Default: 'generate'. Options: blank, 'generate', local path, HF dataset ID
94
- - llm_model (str, optional): πŸ€– LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
95
  """
96
 
97
  docs_path: Optional[str] = Field(
@@ -138,12 +138,12 @@ class QARequest(BaseModel):
138
  """
139
  🧩 Generate a validation QA dataset from documents for RAG evaluation.
140
 
141
- Parameters:
142
- - docs_path (str): πŸ“‚ Folder containing documents. Default: 'data/docs'
143
- - llm_model (str): πŸ€– LLM model used for question generation. Default: 'gemini-2.5-flash-lite'
144
- - batch_size (int): πŸ“¦ Number of documents per batch. Default: 5
145
- - min_q (int): ❓ Minimum number of questions per document. Default: 3
146
- - max_q (int): ❓ Maximum number of questions per document. Default: 25
147
  """
148
  docs_path: str = Field(
149
  description="πŸ“‚ Folder containing your documents to generate QA pairs from. Example: 'data/docs'",
 
8
  """
9
  πŸ”§ Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.
10
 
11
+ Args:
12
+ docs_path (str, optional): πŸ“‚ Folder containing your documents for RAG optimization. Default: "data/docs"
13
+ retriever (List[str], optional): πŸ” Retriever type(s) to use. Default: ['faiss']. Example: 'bm25', 'faiss', 'chroma'
14
+ embedding_model (List[str], optional): 🧠 Embedding model(s) to use. Default: ['sentence-transformers/all-MiniLM-L6-v2']
15
+ strategy (List[str], optional): 🎯 RAG strategy to apply. Default: ['fixed']. Options: 'fixed', 'token', 'sentence'
16
+ chunk_sizes (List[int], optional): πŸ“ List of chunk sizes to evaluate. Default: [200, 400, 600]
17
+ overlaps (List[int], optional): πŸ” List of overlap values to test. Default: [50, 100, 200]
18
+ rerankers (List[str], optional): βš–οΈ Rerankers to apply after retrieval. Default: ['mmr']
19
+ search_type (str, optional): πŸ” Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
20
+ trials (int, optional): πŸ§ͺ Number of optimization trials. Default: 5
21
+ metric (str, optional): πŸ“ˆ Metric to optimize. Default: 'faithfulness'
22
+ validation_choice (str, optional): βœ… Source of validation data. Default: 'generate'. Options: blank (use default), 'generate', local path, HF dataset ID
23
+ llm_model (str, optional): πŸ€– LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
24
  """
25
  docs_path: Optional[str] = Field(
26
  default="data/docs",
 
83
  """
84
  ⚑ Automatically tunes RAG pipeline parameters based on document analysis.
85
 
86
+ Args:
87
+ docs_path (str, optional): πŸ“‚ Folder containing documents for RAG optimization. Default: "data/docs"
88
+ embedding_model (str, optional): 🧠 Embedding model to analyze. Default: 'sentence-transformers/all-MiniLM-L6-v2'
89
+ num_chunk_pairs (int, optional): πŸ”’ Number of chunk pairs to analyze. Default: 5
90
+ metric (str, optional): πŸ“ˆ Metric to optimize. Default: 'faithfulness'
91
+ search_type (str, optional): πŸ” Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
92
+ trials (int, optional): πŸ§ͺ Number of optimization trials. Default: 5
93
+ validation_choice (str, optional): βœ… Source of validation data. Default: 'generate'. Options: blank, 'generate', local path, HF dataset ID
94
+ llm_model (str, optional): πŸ€– LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
95
  """
96
 
97
  docs_path: Optional[str] = Field(
 
138
  """
139
  🧩 Generate a validation QA dataset from documents for RAG evaluation.
140
 
141
+ Args:
142
+ docs_path (str): πŸ“‚ Folder containing documents. Default: 'data/docs'
143
+ llm_model (str): πŸ€– LLM model used for question generation. Default: 'gemini-2.5-flash-lite'
144
+ batch_size (int): πŸ“¦ Number of documents per batch. Default: 5
145
+ min_q (int): ❓ Minimum number of questions per document. Default: 3
146
+ max_q (int): ❓ Maximum number of questions per document. Default: 25
147
  """
148
  docs_path: str = Field(
149
  description="πŸ“‚ Folder containing your documents to generate QA pairs from. Example: 'data/docs'",