|
|
import gradio as gr |
|
|
import requests |
|
|
import json |
|
|
import os |
|
|
import threading |
|
|
from models import OptimizeRequest, AutotuneRequest, QARequest |
|
|
from api import start_api |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
threading.Thread(target=start_api, daemon=True).start() |
|
|
|
|
|
|
|
|
BASE_INTERNAL = "http://127.0.0.1:8000" |
|
|
|
|
|
|
|
|
def call_api(endpoint: str, payload: dict) -> str: |
|
|
try: |
|
|
r = requests.post(f"{BASE_INTERNAL}{endpoint}", json=payload, timeout=120) |
|
|
return json.dumps(r.json(), indent=2) |
|
|
except Exception as e: |
|
|
return str(e) |
|
|
|
|
|
|
|
|
def clear_cache_tool(docs_path="data/docs"): |
|
|
""" |
|
|
🗑️ Clear Cache MCP Tool. |
|
|
|
|
|
Deletes all files and directories inside docs_path on the server. |
|
|
|
|
|
Args: |
|
|
docs_path (str): The local path to the folder to clear. Defaults to 'data/docs'. |
|
|
""" |
|
|
try: |
|
|
r = requests.post( |
|
|
f"{BASE_INTERNAL}/clear_cache", |
|
|
data={"docs_path": docs_path}, |
|
|
timeout=60 |
|
|
) |
|
|
r.raise_for_status() |
|
|
return r.json() |
|
|
except Exception as e: |
|
|
return {"error": str(e)} |
|
|
|
|
|
|
|
|
def upload_docs_tool(files, docs_path="data/docs"): |
|
|
""" |
|
|
Upload documents to the server's docs folder via FastAPI /upload_docs. |
|
|
|
|
|
Args: |
|
|
files (list): A list of local file paths, remote URLs, or file-like objects. |
|
|
docs_path (str): The server folder path to upload documents to. Defaults to 'data/docs'. |
|
|
""" |
|
|
import shutil, tempfile |
|
|
|
|
|
os.makedirs(docs_path, exist_ok=True) |
|
|
files_payload = [] |
|
|
|
|
|
temp_files = [] |
|
|
|
|
|
try: |
|
|
for f in files: |
|
|
if isinstance(f, str) and f.startswith(("http://", "https://")): |
|
|
|
|
|
resp = requests.get(f, timeout=60) |
|
|
resp.raise_for_status() |
|
|
|
|
|
|
|
|
ext = os.path.splitext(f)[1] or ".txt" |
|
|
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=ext) |
|
|
|
|
|
if "text" in resp.headers.get("Content-Type", "").lower(): |
|
|
tmp.write(resp.text.encode("utf-8")) |
|
|
else: |
|
|
tmp.write(resp.content) |
|
|
|
|
|
tmp.close() |
|
|
temp_files.append(tmp.name) |
|
|
|
|
|
files_payload.append(("files", open(tmp.name, "rb"))) |
|
|
|
|
|
elif isinstance(f, str): |
|
|
|
|
|
files_payload.append(("files", open(f, "rb"))) |
|
|
|
|
|
else: |
|
|
|
|
|
files_payload.append(("files", f)) |
|
|
|
|
|
resp = requests.post( |
|
|
f"{BASE_INTERNAL}/upload_docs", |
|
|
files=files_payload, |
|
|
data={"docs_path": docs_path} |
|
|
) |
|
|
resp.raise_for_status() |
|
|
return resp.json() |
|
|
|
|
|
finally: |
|
|
|
|
|
for _, file_obj in files_payload: |
|
|
if not file_obj.closed: |
|
|
file_obj.close() |
|
|
|
|
|
for tmp_file in temp_files: |
|
|
try: |
|
|
os.unlink(tmp_file) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
|
|
|
def optimize_rag_tool_(payload: str) -> str: |
|
|
"""🔧 Explicit optimization request: user provides all pipeline configs manually.""" |
|
|
return call_api("/optimize_rag", json.loads(payload)) |
|
|
|
|
|
|
|
|
def autotune_tool_(payload: str) -> str: |
|
|
"""🔧 Autotune RAG: recommends chunk sizes and embedding models automatically.""" |
|
|
return call_api("/autotune_rag", json.loads(payload)) |
|
|
|
|
|
|
|
|
def generate_qa_tool_(payload: str) -> str: |
|
|
"""🧩 Generates a validation QA dataset for RAG evaluation.""" |
|
|
return call_api("/generate_validation_qa", json.loads(payload)) |
|
|
|
|
|
|
|
|
|
|
|
def model_to_json(model_cls) -> str: |
|
|
return json.dumps({k: v.default for k, v in model_cls.model_fields.items()}, indent=2) |
|
|
|
|
|
|
|
|
|
|
|
DEFAULT_UPLOAD_PATH = "data/docs" |
|
|
DEFAULT_OPTIMIZE_JSON = model_to_json(OptimizeRequest) |
|
|
DEFAULT_AUTOTUNE_JSON = model_to_json(AutotuneRequest) |
|
|
DEFAULT_QA_JSON = model_to_json(QARequest) |
|
|
|
|
|
|
|
|
from claude_theme import Claude |
|
|
with gr.Blocks(theme=Claude()) as demo: |
|
|
gr.Markdown("# 🧠 Ragmint MCP Server") |
|
|
|
|
|
gr.HTML(""" |
|
|
<div style="display:flex; gap:5px; flex-wrap:wrap; align-items:center;"> |
|
|
<a href="https://huggingface.co/spaces/MCP-1st-Birthday/ragmint-mcp-server"> |
|
|
<img src="https://img.shields.io/badge/HF-Space-blue" alt="HF Space"> |
|
|
</a> |
|
|
<img src="https://img.shields.io/badge/Python-3.9%2B-blue?logo=python" alt="Python"> |
|
|
<a href="https://pypi.org/project/ragmint/"> |
|
|
<img src="https://img.shields.io/pypi/v/ragmint?color=blue" alt="PyPI"> |
|
|
</a> |
|
|
<img src="https://img.shields.io/badge/License-Apache%202.0-green" alt="License"> |
|
|
<img src="https://img.shields.io/badge/MCP-Enabled-green" alt="MCP"> |
|
|
<img src="https://img.shields.io/badge/Status-Beta-orange" alt="Status"> |
|
|
<img src="https://img.shields.io/badge/Optuna-Bayesian%20Optimization-6f42c1?logo=optuna&logoColor=white" alt="Optuna"> |
|
|
<img src="https://img.shields.io/badge/Google%20Gemini-LLM-lightblue?logo=google&logoColor=white" alt="Google Gemini 2.5"> |
|
|
<a href="https://www.linkedin.com/posts/andyolivers_ragmint-mcp-server-a-hugging-face-space-activity-7399028674261348352-P5wy?utm_source=share&utm_medium=member_desktop&rcm=ACoAABanwk4Bp0A-FVwO9wyzwVp0g_yqZoRDptI"> |
|
|
<img src="https://img.shields.io/badge/LinkedIn-Post-blue" alt="LinkedIn"> |
|
|
</a> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
gr.HTML(""" |
|
|
<style> |
|
|
.center-wrapper { |
|
|
display: flex; |
|
|
justify-content: center; |
|
|
align-items: center; |
|
|
} |
|
|
.center-wrapper img { |
|
|
height: 100px !important; |
|
|
} |
|
|
</style> |
|
|
|
|
|
<div class="center-wrapper"> |
|
|
<img src="https://raw.githubusercontent.com/andyolivers/ragmint/main/src/ragmint/assets/img/ragmint_logo.png" alt="Ragmint Banner"> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
gr.Markdown(""" |
|
|
**AI-Powered Optimization for RAG Pipelines** |
|
|
|
|
|
This server provides **6 MCP Tools** for RAG pipeline tuning, dataset generation & workspace control — all programmatically accessible through MCP clients like **Claude Desktop, Cursor, VS Code MCP Extension**, and more. |
|
|
|
|
|
<br> |
|
|
|
|
|
## 🔧 MCP Tools |
|
|
|
|
|
- 📄 **Upload Docs**: Upload .txt files to workspace for evaluation using `upload_docs`. |
|
|
- 🔗 **Upload URLs**: Import remote docs via URLs with `upload_urls`. |
|
|
- 🔧 **Optimize RAG**: Full hyperparameter search (Grid/Random/Bayesian) with metrics on `optimize_rag`. |
|
|
- ⚡️ **Autotune RAG**: Automated recommendations for best chunking and embeddings with `autotune`. |
|
|
- 🧩 **Generate QA Dataset**: Create validation QA pairs with LLMs for benchmarking using `generate_qa`. |
|
|
- 🗑️ **Clear Cache**: Reset workspace and delete stored docs with `clear_cache`. |
|
|
|
|
|
<br> |
|
|
|
|
|
## 🧠 What Ragmint Solves |
|
|
|
|
|
- Automated RAG hyperparameter optimization. |
|
|
- Retriever, embedding, reranker selection. |
|
|
- Synthetic validation QA generation. |
|
|
- Evaluation metrics (faithfulness, latency, etc.). |
|
|
- Experiment tracking & reproducible pipeline comparison. |
|
|
|
|
|
🔬 **Built for RAG engineers, researchers, and LLM developers** who want consistent performance improvement without trial-and-error. |
|
|
|
|
|
<br> |
|
|
|
|
|
## ⚙ Powered by |
|
|
|
|
|
- Optuna (Bayesian Optimization). |
|
|
- Google Gemini 2.5 Flash Lite/Pro. |
|
|
- FAISS, Chroma, BM25, scikit-learn retrievers. |
|
|
- Sentence-Transformers/BGE embeddings. |
|
|
|
|
|
<br> |
|
|
|
|
|
## 🌐 MCP Connection |
|
|
|
|
|
**HuggingFace Space** |
|
|
https://huggingface.co/spaces/andyolivers/ragmint-mcp-server |
|
|
|
|
|
**MCP Endpoint (SSE — Recommended)** |
|
|
https://andyolivers-ragmint-mcp-server.hf.space/gradio_api/mcp/sse |
|
|
|
|
|
<br> |
|
|
|
|
|
## 📦 Example MCP Use Cases |
|
|
|
|
|
- Run Auto-Optimization for RAG pipelines. |
|
|
- Compare embedding + retriever combinations. |
|
|
- Automatically generate QA validation datasets. |
|
|
- Rapid experiment iteration inside Claude/Cursor. |
|
|
|
|
|
|
|
|
--- |
|
|
|
|
|
""") |
|
|
|
|
|
with gr.Tab("📂 Upload"): |
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("## Upload Documents") |
|
|
gr.Markdown("📄 Upload files (local paths or URLs) to your `data/docs` folder.") |
|
|
upload_files = gr.File(file_count="multiple", type="filepath") |
|
|
upload_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path") |
|
|
upload_btn = gr.Button("Upload", variant="primary") |
|
|
upload_out = gr.JSON(label="Response") |
|
|
upload_btn.click(upload_docs_tool, inputs=[upload_files, upload_path], outputs=upload_out) |
|
|
|
|
|
|
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("## Upload Documents from URLs") |
|
|
gr.Markdown("🔗 Upload files (URLs) to your `data/docs` folder on MCP.") |
|
|
|
|
|
upload_mcp_input = gr.TextArea( |
|
|
placeholder="Paste URLs (one per line without commas)", |
|
|
label="URLs" |
|
|
) |
|
|
|
|
|
def upload_urls_tool(text, docs_path): |
|
|
""" |
|
|
Upload documents from a list of URLs to the server's docs folder. |
|
|
|
|
|
Args: |
|
|
text (str): A newline-separated string of document URLs to download. |
|
|
docs_path (str): The destination folder path on the server. Defaults to 'data/docs'. |
|
|
""" |
|
|
|
|
|
urls = [u.strip() for u in text.split("\n") if u.strip()] |
|
|
return upload_docs_tool(urls, docs_path) |
|
|
|
|
|
upload_mcp_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path") |
|
|
upload_mcp_btn = gr.Button("Upload", variant="primary") |
|
|
upload_mcp_out = gr.JSON(label="Response") |
|
|
|
|
|
upload_mcp_btn.click( |
|
|
upload_urls_tool, |
|
|
inputs=[upload_mcp_input, upload_mcp_path], |
|
|
outputs=upload_mcp_out |
|
|
) |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
|
|
|
|
|
|
with gr.Tab("⚡ Autotune"): |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("## Autotune RAG") |
|
|
gr.Markdown(" ⚡ Automatically tunes RAG pipeline parameters based on document analysis.") |
|
|
|
|
|
with gr.Accordion("⚙ Settings", open=False): |
|
|
docs_path = gr.Textbox(value="data/docs", label="Docs Path") |
|
|
|
|
|
embedding_model = gr.Textbox( |
|
|
value="sentence-transformers/all-MiniLM-L6-v2", |
|
|
label="Embedding Model" |
|
|
) |
|
|
|
|
|
num_chunk_pairs = gr.Slider( |
|
|
minimum=1, maximum=20, step=1, value=5, label="Number of chunk pairs" |
|
|
) |
|
|
|
|
|
metric = gr.Dropdown( |
|
|
choices=["faithfulness"], |
|
|
value="faithfulness", |
|
|
label="Metric" |
|
|
) |
|
|
|
|
|
search_type = gr.Dropdown( |
|
|
choices=["grid", "random", "bayesian"], |
|
|
value="grid", |
|
|
label="Search Type" |
|
|
) |
|
|
|
|
|
trials = gr.Slider( |
|
|
minimum=1, maximum=100, step=1, value=5, label="Optimization Trials" |
|
|
) |
|
|
|
|
|
validation_choice = gr.Dropdown( |
|
|
choices=["generate", ""], |
|
|
value="generate", |
|
|
label="Validation Choice" |
|
|
) |
|
|
|
|
|
llm_model = gr.Textbox( |
|
|
value="gemini-2.5-flash-lite", |
|
|
label="LLM Model" |
|
|
) |
|
|
|
|
|
autotune_btn = gr.Button("Autotune", variant="primary") |
|
|
autotune_out = gr.Textbox(label="Response", lines=15) |
|
|
|
|
|
|
|
|
def autotune_tool( |
|
|
docs_path, embedding_model, num_chunk_pairs, metric, |
|
|
search_type, trials, validation_choice, llm_model |
|
|
): |
|
|
|
|
|
payload = { |
|
|
"docs_path": docs_path, |
|
|
"embedding_model": embedding_model, |
|
|
"num_chunk_pairs": num_chunk_pairs, |
|
|
"metric": metric, |
|
|
"search_type": search_type, |
|
|
"trials": trials, |
|
|
"validation_choice": validation_choice, |
|
|
"llm_model": llm_model |
|
|
} |
|
|
|
|
|
return autotune_tool_(json.dumps(payload)) |
|
|
|
|
|
|
|
|
autotune_tool.__doc__ = AutotuneRequest.__doc__ |
|
|
autotune_btn.click( |
|
|
autotune_tool, |
|
|
inputs=[ |
|
|
docs_path, embedding_model, num_chunk_pairs, metric, |
|
|
search_type, trials, validation_choice, llm_model |
|
|
], |
|
|
outputs=autotune_out |
|
|
) |
|
|
|
|
|
with gr.Accordion("➕ More Information", open=False): |
|
|
gr.Markdown(AutotuneRequest.__doc__ or "No description available.") |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
|
|
|
with gr.Tab("🔧 Optimize"): |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("## Optimize RAG") |
|
|
gr.Markdown("🔧 Explicit optimization request for RAG (Retrieval-Augmented Generation) pipelines.") |
|
|
|
|
|
|
|
|
with gr.Accordion("⚙ Settings", open=False): |
|
|
docs_path = gr.Textbox(value="data/docs", label="Docs Path") |
|
|
|
|
|
retriever = gr.CheckboxGroup( |
|
|
choices=["faiss", "chroma", "numpy","bm25"], |
|
|
value="faiss", |
|
|
label="Search Type" |
|
|
) |
|
|
|
|
|
embedding_model = gr.Textbox( |
|
|
value="sentence-transformers/all-MiniLM-L6-v2", |
|
|
label="Embedding Model(s) (comma-separated)" |
|
|
) |
|
|
|
|
|
strategy = gr.CheckboxGroup( |
|
|
choices=["fixed","token","sentence"], |
|
|
value="fixed", |
|
|
label="RAG Strategy" |
|
|
) |
|
|
|
|
|
chunk_sizes = gr.Textbox( |
|
|
value="200,400,600", |
|
|
label="Chunk Sizes (comma-separated integers)" |
|
|
) |
|
|
|
|
|
overlaps = gr.Textbox( |
|
|
value="50,100,200", |
|
|
label="Overlaps (comma-separated integers)" |
|
|
) |
|
|
|
|
|
rerankers = gr.Dropdown( |
|
|
choices=["mmr"], |
|
|
value="mmr", |
|
|
label="Rerankers" |
|
|
) |
|
|
|
|
|
search_type = gr.Dropdown( |
|
|
choices=["grid", "random", "bayesian"], |
|
|
value="grid", |
|
|
label="Search Type" |
|
|
) |
|
|
|
|
|
trials = gr.Slider( |
|
|
minimum=1, maximum=100, step=1, value=5, |
|
|
label="Number of Trials" |
|
|
) |
|
|
|
|
|
metric = gr.Dropdown( |
|
|
choices=["faithfulness"], |
|
|
value="faithfulness", |
|
|
label="Metric" |
|
|
) |
|
|
|
|
|
validation_choice = gr.Dropdown( |
|
|
choices=["generate", ""], |
|
|
value="generate", |
|
|
label="Validation Choice" |
|
|
) |
|
|
|
|
|
llm_model = gr.Textbox( |
|
|
value="gemini-2.5-flash-lite", |
|
|
label="LLM Model" |
|
|
) |
|
|
|
|
|
optimize_btn = gr.Button("Optimize", variant="primary") |
|
|
optimize_out = gr.Textbox(label="Response", lines=15) |
|
|
|
|
|
|
|
|
|
|
|
def optimize_rag_tool( |
|
|
docs_path, retriever, embedding_model, strategy, chunk_sizes, |
|
|
overlaps, rerankers, search_type, trials, metric, |
|
|
validation_choice, llm_model |
|
|
): |
|
|
|
|
|
payload = { |
|
|
"docs_path": docs_path, |
|
|
"retriever": retriever, |
|
|
"embedding_model": [e.strip() for e in embedding_model.split(",") if e.strip()], |
|
|
"strategy": strategy, |
|
|
"chunk_sizes": [int(c) for c in chunk_sizes.split(",") if c.strip()], |
|
|
"overlaps": [int(o) for o in overlaps.split(",") if o.strip()], |
|
|
"rerankers": [r.strip() for r in rerankers.split(",") if r.strip()], |
|
|
"search_type": search_type, |
|
|
"trials": trials, |
|
|
"metric": metric, |
|
|
"validation_choice": validation_choice, |
|
|
"llm_model": llm_model |
|
|
} |
|
|
|
|
|
return optimize_rag_tool_(json.dumps(payload)) |
|
|
|
|
|
|
|
|
optimize_rag_tool.__doc__ = OptimizeRequest.__doc__ |
|
|
|
|
|
optimize_btn.click( |
|
|
optimize_rag_tool, |
|
|
inputs=[ |
|
|
docs_path, retriever, embedding_model, strategy, chunk_sizes, |
|
|
overlaps, rerankers, search_type, trials, metric, |
|
|
validation_choice, llm_model |
|
|
], |
|
|
outputs=optimize_out |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Accordion("➕ More Information", open=False): |
|
|
gr.Markdown(OptimizeRequest.__doc__ or "No description available.") |
|
|
gr.Markdown("---") |
|
|
|
|
|
|
|
|
with gr.Tab("🧩 Generate QA"): |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("## Generate QA") |
|
|
gr.Markdown("🧩 Generate a validation QA dataset from documents for RAG evaluation.") |
|
|
|
|
|
with gr.Tab("🧩 Generate QA"): |
|
|
|
|
|
with gr.Accordion("⚙ Settings", open=False): |
|
|
docs_path = gr.Textbox(value="data/docs", label="Docs Path") |
|
|
llm_model = gr.Textbox(value="gemini-2.5-flash-lite", label="LLM Model") |
|
|
batch_size = gr.Slider(1, 50, step=1, value=5, label="Batch Size") |
|
|
min_q = gr.Slider(1, 20, step=1, value=3, label="Min Questions") |
|
|
max_q = gr.Slider(1, 50, step=1, value=25, label="Max Questions") |
|
|
|
|
|
qa_btn = gr.Button("Generate QA", variant="primary") |
|
|
qa_out = gr.Textbox(lines=15, label="Response") |
|
|
|
|
|
|
|
|
def generate_qa_tool(docs_path, llm_model, batch_size, min_q, max_q): |
|
|
return generate_qa_tool_(json.dumps({ |
|
|
"docs_path": docs_path, |
|
|
"llm_model": llm_model, |
|
|
"batch_size": batch_size, |
|
|
"min_q": min_q, |
|
|
"max_q": max_q |
|
|
})) |
|
|
|
|
|
|
|
|
generate_qa_tool.__doc__ = QARequest.__doc__ |
|
|
|
|
|
qa_btn.click( |
|
|
generate_qa_tool, |
|
|
inputs=[docs_path, llm_model, batch_size, min_q, max_q], |
|
|
outputs=qa_out |
|
|
) |
|
|
|
|
|
with gr.Accordion("➕ More Information", open=False): |
|
|
gr.Markdown(QARequest.__doc__ or "No description available.") |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
with gr.Tab("🗑️ Clear Cache"): |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown("## Clear Cache") |
|
|
gr.Markdown("🗑️ Deletes all files and directories inside docs_path on the server.") |
|
|
clear_path = gr.Textbox(value=DEFAULT_UPLOAD_PATH, label="Docs Path to Clear") |
|
|
clear_btn = gr.Button("Clear Cache", variant="primary") |
|
|
clear_out = gr.JSON(label="Response") |
|
|
clear_btn.click(clear_cache_tool, inputs=[clear_path], outputs=clear_out) |
|
|
gr.Markdown("---") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
mcp_server=True, |
|
|
show_error=True |
|
|
) |
|
|
|