André Oliveira
docs: updated tool info
709c564
raw
history blame
19.8 kB
import gradio as gr
import requests
import json
import os
import threading
from models import OptimizeRequest, AutotuneRequest, QARequest
from api import start_api
# Start FastAPI server in background
threading.Thread(target=start_api, daemon=True).start()
# Base URL for internal calls
BASE_INTERNAL = "http://127.0.0.1:8000"
def call_api(endpoint: str, payload: dict) -> str:
try:
r = requests.post(f"{BASE_INTERNAL}{endpoint}", json=payload, timeout=120)
return json.dumps(r.json(), indent=2)
except Exception as e:
return str(e)
def clear_cache_tool(docs_path="data/docs"):
"""
🗑️ Clear Cache MCP Tool.
Deletes all files and directories inside docs_path on the server.
Args:
docs_path (str): The local path to the folder to clear. Defaults to 'data/docs'.
"""
try:
r = requests.post(
f"{BASE_INTERNAL}/clear_cache",
data={"docs_path": docs_path},
timeout=60
)
r.raise_for_status()
return r.json()
except Exception as e:
return {"error": str(e)}
def upload_docs_tool(files, docs_path="data/docs"):
"""
Upload documents to the server's docs folder via FastAPI /upload_docs.
Args:
files (list): A list of local file paths, remote URLs, or file-like objects.
docs_path (str): The server folder path to upload documents to. Defaults to 'data/docs'.
"""
import shutil, tempfile
os.makedirs(docs_path, exist_ok=True)
files_payload = []
temp_files = []
try:
for f in files:
if isinstance(f, str) and f.startswith(("http://", "https://")):
# Download URL to a temp file (txt aware)
resp = requests.get(f, timeout=60)
resp.raise_for_status()
# create temp file with proper extension
ext = os.path.splitext(f)[1] or ".txt"
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
if "text" in resp.headers.get("Content-Type", "").lower():
tmp.write(resp.text.encode("utf-8"))
else:
tmp.write(resp.content)
tmp.close()
temp_files.append(tmp.name)
files_payload.append(("files", open(tmp.name, "rb")))
elif isinstance(f, str):
# Local file path
files_payload.append(("files", open(f, "rb")))
else:
# File-like object
files_payload.append(("files", f))
resp = requests.post(
f"{BASE_INTERNAL}/upload_docs",
files=files_payload,
data={"docs_path": docs_path}
)
resp.raise_for_status()
return resp.json()
finally:
# Close all file handles
for _, file_obj in files_payload:
if not file_obj.closed:
file_obj.close()
# Clean up temp files
for tmp_file in temp_files:
try:
os.unlink(tmp_file)
except Exception:
pass
def optimize_rag_tool_(payload: str) -> str:
"""🔧 Explicit optimization request: user provides all pipeline configs manually."""
return call_api("/optimize_rag", json.loads(payload))
def autotune_tool_(payload: str) -> str:
"""🔧 Autotune RAG: recommends chunk sizes and embedding models automatically."""
return call_api("/autotune_rag", json.loads(payload))
def generate_qa_tool_(payload: str) -> str:
"""🧩 Generates a validation QA dataset for RAG evaluation."""
return call_api("/generate_validation_qa", json.loads(payload))
def model_to_json(model_cls) -> str:
return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2)
# Default inputs
DEFAULT_UPLOAD_PATH = "data/docs"
DEFAULT_OPTIMIZE_JSON = model_to_json(OptimizeRequest)
DEFAULT_AUTOTUNE_JSON = model_to_json(AutotuneRequest)
DEFAULT_QA_JSON = model_to_json(QARequest)
with gr.Blocks(theme=gr.themes.Ocean()) as demo:
gr.Markdown("# 🧠 Ragmint MCP Server")
gr.HTML("""
<div style="display:flex; gap:5px; flex-wrap:wrap; align-items:center;">
<a href="https://huggingface.co/spaces/MCP-1st-Birthday/ragmint-mcp-server">
<img src="https://img.shields.io/badge/HF-Space-blue" alt="HF Space">
</a>
<img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python">
<a href="https://pypi.org/project/ragmint/">
<img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="PyPI">
</a>
<img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License">
<img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP">
<img src="https://img.shields.io/badge/Status-Beta-orange" alt="Status">
<img src="https://img.shields.io/badge/Optuna-Bayesian%20Optimization-6f42c1?logo=optuna&logoColor=white" alt="Optuna">
<img src="https://img.shields.io/badge/Google%20Gemini-LLM-lightblue?logo=google&logoColor=white" alt="Google Gemini 2.5">
</div>
""")
gr.Markdown("""
**AI-Powered Optimization for RAG Pipelines**
This server provides **6 MCP Tools** for RAG pipeline tuning, dataset generation & workspace control — all programmatically accessible through MCP clients like **Claude Desktop, Cursor, VS Code MCP Extension**, and more.
<br>
## 🔧 MCP Tools
- 📄 **Upload Docs**: Upload .txt files to workspace for evaluation using `upload_docs`.
- 🔗 **Upload URLs**: Import remote docs via URLs with `upload_urls`.
- 🔧 **Optimize RAG**: Full hyperparameter search (Grid/Random/Bayesian) with metrics on `optimize_rag`.
- ⚡️ **Autotune RAG**: Automated recommendations for best chunking and embeddings with `autotune`.
- 🧩 **Generate QA Dataset**: Create validation QA pairs with LLMs for benchmarking using `generate_qa`.
- 🗑️ **Clear Cache**: Reset workspace and delete stored docs with `clear_cache`.
<br>
## 🧠 What Ragmint Solves
- Automated RAG hyperparameter optimization.
- Retriever, embedding, reranker selection.
- Synthetic validation QA generation.
- Evaluation metrics (faithfulness, latency, etc.).
- Experiment tracking & reproducible pipeline comparison.
🔬 **Built for RAG engineers, researchers, and LLM developers** who want consistent performance improvement without trial-and-error.
<br>
## ⚙ Powered by
- Optuna (Bayesian Optimization).
- Google Gemini 2.5 Flash Lite/Pro.
- FAISS, Chroma, BM25, scikit-learn retrievers.
- Sentence-Transformers/BGE embeddings.
<br>
## 🌐 MCP Connection
**HuggingFace Space**
https://huggingface.co/spaces/andyolivers/ragmint-mcp-server
**MCP Endpoint (SSE — Recommended)**
https://andyolivers-ragmint-mcp-server.hf.space/gradio_api/mcp/sse
<br>
## 📦 Example MCP Use Cases
- Run Auto-Optimization for RAG pipelines.
- Compare embedding + retriever combinations.
- Automatically generate QA validation datasets.
- Rapid experiment iteration inside Claude/Cursor.
<br>
## 🧩 MCP Tools Overview
| MCP Tool | Core Function |
|----------|---------------|
| upload_docs | Upload .txt documents |
| upload_urls | Import documents from external URLs |
| optimize_rag | Hyperparameter search with metrics |
| autotune | Automated RAG configuration suggestions |
| generate_qa | Synthetic QA generation |
| clear_cache | Clean workspace |
---
""")
with gr.Tab("📂 Upload"):
with gr.Row():
# Upload Documents
with gr.Column(scale=1):
gr.Markdown("## Upload Documents")
gr.Markdown("📄 Upload files (local paths or URLs) to your `data/docs` folder.")
upload_files = gr.File(file_count="multiple", type="filepath")
upload_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
upload_btn = gr.Button("Upload", variant="huggingface")
upload_out = gr.JSON(label="Response")
upload_btn.click(upload_docs_tool, inputs=[upload_files, upload_path], outputs=upload_out)
# Upload MCP Documents (no file uploader)
with gr.Column(scale=1):
gr.Markdown("## Upload Documents from URLs")
gr.Markdown("🔗 Upload files (URLs) to your `data/docs` folder on MCP.")
upload_mcp_input = gr.TextArea(
placeholder="Paste URLs (one per line without commas)",
label="URLs"
)
def upload_urls_tool(text, docs_path):
"""
Upload documents from a list of URLs to the server's docs folder.
Args:
text (str): A newline-separated string of document URLs to download.
docs_path (str): The destination folder path on the server. Defaults to 'data/docs'.
"""
urls = [u.strip() for u in text.split("\n") if u.strip()]
return upload_docs_tool(urls, docs_path)
upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path")
upload_mcp_btn = gr.Button("Upload", variant="huggingface")
upload_mcp_out = gr.JSON(label="Response")
upload_mcp_btn.click(
upload_urls_tool,
inputs=[upload_mcp_input, upload_mcp_path],
outputs=upload_mcp_out
)
gr.Markdown("---")
with gr.Tab("⚡ Autotune"):
# Autotune RAG
with gr.Column():
gr.Markdown("## Autotune RAG")
gr.Markdown(" ⚡ Automatically tunes RAG pipeline parameters based on document analysis.")
with gr.Accordion("⚙ Settings", open=False):
docs_path = gr.Textbox(value="data/docs", label="Docs Path")
embedding_model = gr.Textbox(
value="sentence-transformers/all-MiniLM-L6-v2",
label="Embedding Model"
)
num_chunk_pairs = gr.Slider(
minimum=1, maximum=20, step=1, value=5, label="Number of chunk pairs"
)
metric = gr.Dropdown(
choices=["faithfulness"],
value="faithfulness",
label="Metric"
)
search_type = gr.Dropdown(
choices=["grid", "random", "bayesian"],
value="grid",
label="Search Type"
)
trials = gr.Slider(
minimum=1, maximum=100, step=1, value=5, label="Optimization Trials"
)
validation_choice = gr.Dropdown(
choices=["generate", ""],
value="generate",
label="Validation Choice"
)
llm_model = gr.Textbox(
value="gemini-2.5-flash-lite",
label="LLM Model"
)
autotune_btn = gr.Button("Autotune", variant="huggingface")
autotune_out = gr.Textbox(label="Response", lines=15)
def autotune_tool(
docs_path, embedding_model, num_chunk_pairs, metric,
search_type, trials, validation_choice, llm_model
):
payload = {
"docs_path": docs_path,
"embedding_model": embedding_model,
"num_chunk_pairs": num_chunk_pairs,
"metric": metric,
"search_type": search_type,
"trials": trials,
"validation_choice": validation_choice,
"llm_model": llm_model
}
return autotune_tool_(json.dumps(payload))
autotune_tool.__doc__ = AutotuneRequest.__doc__
autotune_btn.click(
autotune_tool,
inputs=[
docs_path, embedding_model, num_chunk_pairs, metric,
search_type, trials, validation_choice, llm_model
],
outputs=autotune_out
)
with gr.Accordion("➕ More Information", open=False):
gr.Markdown(AutotuneRequest.__doc__ or "No description available.")
gr.Markdown("---")
with gr.Tab("🔧 Optimize"):
# Optimize RAG
with gr.Column():
gr.Markdown("## Optimize RAG")
gr.Markdown("🔧 Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.")
# Parameters accordion
with gr.Accordion("⚙ Settings", open=False):
docs_path = gr.Textbox(value="data/docs", label="Docs Path")
retriever = gr.CheckboxGroup(
choices=["faiss", "chroma", "numpy", "sklearn","bm25"],
value="faiss",
label="Search Type"
)
embedding_model = gr.Textbox(
value="sentence-transformers/all-MiniLM-L6-v2",
label="Embedding Model(s) (comma-separated)"
)
strategy = gr.CheckboxGroup(
choices=["fixed","token","sentence"],
value="fixed",
label="RAG Strategy"
)
chunk_sizes = gr.Textbox(
value="200,400,600",
label="Chunk Sizes (comma-separated integers)"
)
overlaps = gr.Textbox(
value="50,100,200",
label="Overlaps (comma-separated integers)"
)
rerankers = gr.Dropdown(
choices=["mmr"],
value="mmr",
label="Rerankers"
)
search_type = gr.Dropdown(
choices=["grid", "random", "bayesian"],
value="grid",
label="Search Type"
)
trials = gr.Slider(
minimum=1, maximum=100, step=1, value=5,
label="Number of Trials"
)
metric = gr.Dropdown(
choices=["faithfulness"],
value="faithfulness",
label="Metric"
)
validation_choice = gr.Dropdown(
choices=["generate", ""],
value="generate",
label="Validation Choice"
)
llm_model = gr.Textbox(
value="gemini-2.5-flash-lite",
label="LLM Model"
)
optimize_btn = gr.Button("Optimize", variant="huggingface")
optimize_out = gr.Textbox(label="Response", lines=15)
# Function to convert inputs into payload and call API
def optimize_rag_tool(
docs_path, retriever, embedding_model, strategy, chunk_sizes,
overlaps, rerankers, search_type, trials, metric,
validation_choice, llm_model
):
payload = {
"docs_path": docs_path,
#"retriever": [r.strip() for r in retriever.split(",") if r.strip()],
"retriever": retriever,
"embedding_model": [e.strip() for e in embedding_model.split(",") if e.strip()],
#"strategy": [s.strip() for s in strategy.split(",") if s.strip()],
"strategy": strategy,
"chunk_sizes": [int(c) for c in chunk_sizes.split(",") if c.strip()],
"overlaps": [int(o) for o in overlaps.split(",") if o.strip()],
"rerankers": [r.strip() for r in rerankers.split(",") if r.strip()],
"search_type": search_type,
"trials": trials,
"metric": metric,
"validation_choice": validation_choice,
"llm_model": llm_model
}
return optimize_rag_tool_(json.dumps(payload))
optimize_rag_tool.__doc__ = OptimizeRequest.__doc__
optimize_btn.click(
optimize_rag_tool,
inputs=[
docs_path, retriever, embedding_model, strategy, chunk_sizes,
overlaps, rerankers, search_type, trials, metric,
validation_choice, llm_model
],
outputs=optimize_out
)
with gr.Accordion("➕ More Information", open=False):
gr.Markdown(OptimizeRequest.__doc__ or "No description available.")
gr.Markdown("---")
with gr.Tab("🧩 Generate QA"):
# Generate QA
with gr.Column():
gr.Markdown("## Generate QA")
gr.Markdown("🧩 Generate a validation QA dataset from documents for RAG evaluation.")
with gr.Tab("🧩 Generate QA"):
with gr.Accordion("⚙ Settings", open=False):
docs_path = gr.Textbox(value="data/docs", label="Docs Path")
llm_model = gr.Textbox(value="gemini-2.5-flash-lite", label="LLM Model")
batch_size = gr.Slider(1, 50, step=1, value=5, label="Batch Size")
min_q = gr.Slider(1, 20, step=1, value=3, label="Min Questions")
max_q = gr.Slider(1, 50, step=1, value=25, label="Max Questions")
qa_btn = gr.Button("Generate QA", variant="huggingface")
qa_out = gr.Textbox(lines=15, label="Response")
def generate_qa_tool(docs_path, llm_model, batch_size, min_q, max_q):
return generate_qa_tool_(json.dumps({
"docs_path": docs_path,
"llm_model": llm_model,
"batch_size": batch_size,
"min_q": min_q,
"max_q": max_q
}))
generate_qa_tool.__doc__ = QARequest.__doc__
qa_btn.click(
generate_qa_tool,
inputs=[docs_path, llm_model, batch_size, min_q, max_q],
outputs=qa_out
)
with gr.Accordion("➕ More Information", open=False):
gr.Markdown(QARequest.__doc__ or "No description available.")
gr.Markdown("---")
with gr.Tab("🗑️ Clear Cache"):
# Clear Cache
with gr.Column():
gr.Markdown("## Clear Cache")
gr.Markdown("🗑️ Deletes all files and directories inside docs_path on the server.")
clear_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path to Clear")
clear_btn = gr.Button("Clear Cache", variant="huggingface")
clear_out = gr.JSON(label="Response")
clear_btn.click(clear_cache_tool, inputs=[clear_path], outputs=clear_out)
gr.Markdown("---")
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
mcp_server=True,
show_error=True
)