Spaces:

MCP-1st-Birthday
/

ragmint-mcp-server

Running

App Files Files Community

André Oliveira commited on 24 days ago

Commit

a529c4d

1 Parent(s): c40bfd2

changed docstrings and args

Browse files

Files changed (2) hide show

app.py +25 -33
models.py +28 -28

app.py CHANGED Viewed

@@ -25,10 +25,12 @@ def call_api(endpoint: str, payload: dict) -> str:
 def clear_cache_tool(docs_path="data/docs"):
     """
-    🧹 Clear Cache MCP Tool
     Deletes all files and directories inside docs_path on the server.
-    Accepts:
-    - local paths (str), default='data/docs/'
     """
     try:
         r = requests.post(
@@ -45,10 +47,10 @@ def clear_cache_tool(docs_path="data/docs"):
 def upload_docs_tool(files, docs_path="data/docs"):
     """
     Upload documents to the server's docs folder via FastAPI /upload_docs.
-    Accepts:
-    - local file paths (str)
-    - URLs (str)
-    - file-like objects
     """
     import shutil, tempfile
@@ -122,11 +124,6 @@ def generate_qa_tool_(payload: str) -> str:
     return call_api("/generate_validation_qa", json.loads(payload))
-# Assign Pydantic docstrings
-optimize_rag_tool_.__doc__ = OptimizeRequest.__doc__
-autotune_tool_.__doc__ = AutotuneRequest.__doc__
-generate_qa_tool_.__doc__ = QARequest.__doc__
 def model_to_json(model_cls) -> str:
     return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)
@@ -149,7 +146,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
       </a>
       <img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
       <a href="https://pypi.org/project/ragmint/">
-        <img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="HF Space">
       </a>
       <img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
       <img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
@@ -255,17 +252,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                     label="URLs"
                 )
-                def upload_urls_tool(text, path):
                     """
-                        Upload documents to the server's docs folder via FastAPI /upload_docs.
-                        Accepts:
-                        - local file paths (str)
-                        - URLs (str)
-                        - file-like objects
                     """
                     urls = [u.strip() for u in text.split("\n") if u.strip()]
-                    return upload_docs_tool(urls, path)
                 upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
                 upload_mcp_btn = gr.Button("Upload", variant="primary")
@@ -330,11 +327,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             autotune_out = gr.Textbox(label="Response", lines=15)
-            def autotune_tool(*args):
-                (
                     docs_path, embedding_model, num_chunk_pairs, metric,
                     search_type, trials, validation_choice, llm_model
-                ) = args
                 payload = {
                     "docs_path": docs_path,
@@ -360,7 +356,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 outputs=autotune_out
             )
-            with gr.Accordion("Parameter Information", open=False):
                 gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
             gr.Markdown("---")
@@ -393,7 +389,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                     label="RAG Strategy"
                 )
                 chunk_sizes = gr.Textbox(
                     value="200,400,600",
                     label="Chunk Sizes (comma-separated integers)"
@@ -410,7 +405,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                     label="Rerankers"
                 )
                 search_type = gr.Dropdown(
                     choices=["grid", "random", "bayesian"],
                     value="grid",
@@ -444,12 +438,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             # Function to convert inputs into payload and call API
-            def optimize_rag_tool(*args):
-                (
                     docs_path, retriever, embedding_model, strategy, chunk_sizes,
                     overlaps, rerankers, search_type, trials, metric,
                     validation_choice, llm_model
-                ) = args
                 payload = {
                     "docs_path": docs_path,
@@ -482,7 +475,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             )
-            with gr.Accordion("Parameter Information", open=False):
                 gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
             gr.Markdown("---")
@@ -506,8 +499,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 qa_out = gr.Textbox(lines=15, label="Response")
-                def generate_qa_tool(*args):
-                    docs_path, llm_model, batch_size, min_q, max_q = args
                     return generate_qa_tool_(json.dumps({
                         "docs_path": docs_path,
                         "llm_model": llm_model,
@@ -525,7 +517,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                     outputs=qa_out
                 )
-                with gr.Accordion("Parameter Information", open=False):
                     gr.Markdown(QARequest.__doc__ or "No description available.")
                 gr.Markdown("---")

 def clear_cache_tool(docs_path="data/docs"):
     """
+    🧹 Clear Cache MCP Tool.
     Deletes all files and directories inside docs_path on the server.
+    Args:
+        docs_path (str): The local path to the folder to clear. Defaults to 'data/docs'.
     """
     try:
         r = requests.post(
 def upload_docs_tool(files, docs_path="data/docs"):
     """
     Upload documents to the server's docs folder via FastAPI /upload_docs.
+    Args:
+        files (list): A list of local file paths, remote URLs, or file-like objects.
+        docs_path (str): The server folder path to upload documents to. Defaults to 'data/docs'.
     """
     import shutil, tempfile
     return call_api("/generate_validation_qa", json.loads(payload))
 def model_to_json(model_cls) -> str:
     return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)
       </a>
       <img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
       <a href="https://pypi.org/project/ragmint/">
+        <img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="PyPI">
       </a>
       <img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
       <img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
                     label="URLs"
                 )
+                def upload_urls_tool(text, docs_path):
                     """
+                    Upload documents from a list of URLs to the server's docs folder.
+                    Args:
+                        text (str): A newline-separated string of document URLs to download.
+                        docs_path (str): The destination folder path on the server. Defaults to 'data/docs'.
                     """
                     urls = [u.strip() for u in text.split("\n") if u.strip()]
+                    return upload_docs_tool(urls, docs_path)
                 upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
                 upload_mcp_btn = gr.Button("Upload", variant="primary")
             autotune_out = gr.Textbox(label="Response", lines=15)
+            def autotune_tool(
                     docs_path, embedding_model, num_chunk_pairs, metric,
                     search_type, trials, validation_choice, llm_model
+                ):
                 payload = {
                     "docs_path": docs_path,
                 outputs=autotune_out
             )
+            with gr.Accordion("➕ More Information", open=False):
                 gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
             gr.Markdown("---")
                     label="RAG Strategy"
                 )
                 chunk_sizes = gr.Textbox(
                     value="200,400,600",
                     label="Chunk Sizes (comma-separated integers)"
                     label="Rerankers"
                 )
                 search_type = gr.Dropdown(
                     choices=["grid", "random", "bayesian"],
                     value="grid",
             # Function to convert inputs into payload and call API
+            def optimize_rag_tool(
                     docs_path, retriever, embedding_model, strategy, chunk_sizes,
                     overlaps, rerankers, search_type, trials, metric,
                     validation_choice, llm_model
+                ):
                 payload = {
                     "docs_path": docs_path,
             )
+            with gr.Accordion("➕ More Information", open=False):
                 gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
             gr.Markdown("---")
                 qa_out = gr.Textbox(lines=15, label="Response")
+                def generate_qa_tool(docs_path, llm_model, batch_size, min_q, max_q):
                     return generate_qa_tool_(json.dumps({
                         "docs_path": docs_path,
                         "llm_model": llm_model,
                     outputs=qa_out
                 )
+                with gr.Accordion("➕ More Information", open=False):
                     gr.Markdown(QARequest.__doc__ or "No description available.")
                 gr.Markdown("---")

models.py CHANGED Viewed

@@ -8,19 +8,19 @@ class OptimizeRequest(BaseModel):
     """
     🔧 Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.
-    Parameters:
-    - docs_path (str, optional): 📂 Folder containing your documents for RAG optimization. Default: "data/docs"
-    - retriever (List[str], optional): 🔍 Retriever type(s) to use. Default: ['faiss']. Example: 'bm25', 'faiss', 'chroma'
-    - embedding_model (List[str], optional): 🧠 Embedding model(s) to use. Default: ['sentence-transformers/all-MiniLM-L6-v2']
-    - strategy (List[str], optional): 🎯 RAG strategy to apply. Default: ['fixed']. Options: 'fixed', 'token', 'sentence'
-    - chunk_sizes (List[int], optional): 📏 List of chunk sizes to evaluate. Default: [200, 400, 600]
-    - overlaps (List[int], optional): 🔁 List of overlap values to test. Default: [50, 100, 200]
-    - rerankers (List[str], optional): ⚖️ Rerankers to apply after retrieval. Default: ['mmr']
-    - search_type (str, optional): 🔍 Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
-    - trials (int, optional): 🧪 Number of optimization trials. Default: 5
-    - metric (str, optional): 📈 Metric to optimize. Default: 'faithfulness'
-    - validation_choice (str, optional): ✅ Source of validation data. Default: 'generate'. Options: blank (use default), 'generate', local path, HF dataset ID
-    - llm_model (str, optional): 🤖 LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
     """
     docs_path: Optional[str] = Field(
         default="data/docs",
@@ -83,15 +83,15 @@ class AutotuneRequest(BaseModel):
     """
     ⚡ Automatically tunes RAG pipeline parameters based on document analysis.
-    Parameters:
-    - docs_path (str, optional): 📂 Folder containing documents for RAG optimization. Default: "data/docs"
-    - embedding_model (str, optional): 🧠 Embedding model to analyze. Default: 'sentence-transformers/all-MiniLM-L6-v2'
-    - num_chunk_pairs (int, optional): 🔢 Number of chunk pairs to analyze. Default: 5
-    - metric (str, optional): 📈 Metric to optimize. Default: 'faithfulness'
-    - search_type (str, optional): 🔍 Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
-    - trials (int, optional): 🧪 Number of optimization trials. Default: 5
-    - validation_choice (str, optional): ✅ Source of validation data. Default: 'generate'. Options: blank, 'generate', local path, HF dataset ID
-    - llm_model (str, optional): 🤖 LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
     """
     docs_path: Optional[str] = Field(
@@ -138,12 +138,12 @@ class QARequest(BaseModel):
     """
     🧩 Generate a validation QA dataset from documents for RAG evaluation.
-    Parameters:
-    - docs_path (str): 📂 Folder containing documents. Default: 'data/docs'
-    - llm_model (str): 🤖 LLM model used for question generation. Default: 'gemini-2.5-flash-lite'
-    - batch_size (int): 📦 Number of documents per batch. Default: 5
-    - min_q (int): ❓ Minimum number of questions per document. Default: 3
-    - max_q (int): ❓ Maximum number of questions per document. Default: 25
     """
     docs_path: str = Field(
         description="📂 Folder containing your documents to generate QA pairs from. Example: 'data/docs'",

     """
     🔧 Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.
+    Args:
+        docs_path (str, optional): 📂 Folder containing your documents for RAG optimization. Default: "data/docs"
+        retriever (List[str], optional): 🔍 Retriever type(s) to use. Default: ['faiss']. Example: 'bm25', 'faiss', 'chroma'
+        embedding_model (List[str], optional): 🧠 Embedding model(s) to use. Default: ['sentence-transformers/all-MiniLM-L6-v2']
+        strategy (List[str], optional): 🎯 RAG strategy to apply. Default: ['fixed']. Options: 'fixed', 'token', 'sentence'
+        chunk_sizes (List[int], optional): 📏 List of chunk sizes to evaluate. Default: [200, 400, 600]
+        overlaps (List[int], optional): 🔁 List of overlap values to test. Default: [50, 100, 200]
+        rerankers (List[str], optional): ⚖️ Rerankers to apply after retrieval. Default: ['mmr']
+        search_type (str, optional): 🔍 Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
+        trials (int, optional): 🧪 Number of optimization trials. Default: 5
+        metric (str, optional): 📈 Metric to optimize. Default: 'faithfulness'
+        validation_choice (str, optional): ✅ Source of validation data. Default: 'generate'. Options: blank (use default), 'generate', local path, HF dataset ID
+        llm_model (str, optional): 🤖 LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
     """
     docs_path: Optional[str] = Field(
         default="data/docs",
     """
     ⚡ Automatically tunes RAG pipeline parameters based on document analysis.
+    Args:
+        docs_path (str, optional): 📂 Folder containing documents for RAG optimization. Default: "data/docs"
+        embedding_model (str, optional): 🧠 Embedding model to analyze. Default: 'sentence-transformers/all-MiniLM-L6-v2'
+        num_chunk_pairs (int, optional): 🔢 Number of chunk pairs to analyze. Default: 5
+        metric (str, optional): 📈 Metric to optimize. Default: 'faithfulness'
+        search_type (str, optional): 🔍 Search method for parameter exploration. Default: 'grid'. Options: 'grid', 'random', 'bayesian'
+        trials (int, optional): 🧪 Number of optimization trials. Default: 5
+        validation_choice (str, optional): ✅ Source of validation data. Default: 'generate'. Options: blank, 'generate', local path, HF dataset ID
+        llm_model (str, optional): 🤖 LLM used for QA generation if validation_choice='generate'. Default: 'gemini-2.5-flash-lite'
     """
     docs_path: Optional[str] = Field(
     """
     🧩 Generate a validation QA dataset from documents for RAG evaluation.
+    Args:
+        docs_path (str): 📂 Folder containing documents. Default: 'data/docs'
+        llm_model (str): 🤖 LLM model used for question generation. Default: 'gemini-2.5-flash-lite'
+        batch_size (int): 📦 Number of documents per batch. Default: 5
+        min_q (int): ❓ Minimum number of questions per document. Default: 3
+        max_q (int): ❓ Maximum number of questions per document. Default: 25
     """
     docs_path: str = Field(
         description="📂 Folder containing your documents to generate QA pairs from. Example: 'data/docs'",