# -------------------------------------------------------------- # combined_s3_sql_vector_app.py # Full Combined: S3 SQL + Vector MCP Tool in One Gradio App # -------------------------------------------------------------- import boto3 import pandas as pd import duckdb import requests from urllib.parse import quote from io import StringIO import gradio as gr # === S3 Credentials === ENDPOINT_URL = "https://s3.us-west-1.idrivee2.com" ACCESS_KEY = "rNuPBAQetemqpEeBospZ" SECRET_KEY = "BU4FccUYxzXVqiWjPSJM1CWEX1cNhBqbU9NeGidE" BUCKET = "accusagas3" s3 = boto3.client( "s3", endpoint_url=ENDPOINT_URL, aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY, ) # -------------------------------------------------------------- # Vector MCP Tool # -------------------------------------------------------------- def query_vector_agent_calling(user_query: str, collection_name: str) -> str: base_url = "https://srivatsavdamaraju-mvp-2-0-deploy-all-apis.hf.space/qdrant/search" encoded_collection = quote(collection_name, safe="") url = f"{base_url}?collection_name={encoded_collection}&mode=hybrid" headers = { "accept": "application/json", "Content-Type": "application/json", } payload = {"query": user_query, "top_k": 5} try: response = requests.post(url, headers=headers, json=payload, timeout=30) response.raise_for_status() data = response.json() results = data.get("results") or data.get("result") or [] if not results: return "No relevant context found." output = [] for item in results: text = item.get("text") or item.get("payload", {}).get("text") or str(item) score = item.get("score", "?") output.append(f"Score: {score}\n{text}\n---") return "\n".join(output) except requests.exceptions.Timeout: return "Vector API timeout." except requests.exceptions.HTTPError as e: return f"HTTP Error: {e.response.status_code}" except Exception as e: return f"Unexpected Error: {str(e)}" # -------------------------------------------------------------- # SQL Query Tool (S3 → DuckDB) # -------------------------------------------------------------- def run_sql(path: str, sql: str) -> pd.DataFrame: try: obj = s3.get_object(Bucket=BUCKET, Key=path) df = pd.read_csv(StringIO(obj["Body"].read().decode("utf-8"))) except Exception as e: return pd.DataFrame({"error": [str(e)]}) if df.empty: return pd.DataFrame({"error": ["Empty CSV"]}) for col in df.columns: if any(x in col.lower() for x in ["price", "volume", "amount"]): df[col] = pd.to_numeric(df[col].astype(str).str.replace(r"[^\d.-]", "", regex=True), errors="coerce") con = duckdb.connect(":memory:") con.register("data", df) if not sql.strip().lower().startswith(("select", "with")): con.close() return pd.DataFrame({"error": ["Only SELECT allowed"]}) try: result = con.execute(sql).df() except Exception as e: if "VARCHAR" in str(e): import re col = re.search(r"column ([a-zA-Z0-9_]+)", str(e)) if col and (c := col.group(1)) in df.columns: sql = sql.replace(c, f"CAST({c} AS DOUBLE)") result = con.execute(sql).df() else: con.close() return pd.DataFrame({"error": [str(e)]}) else: con.close() return pd.DataFrame({"error": [str(e)]}) finally: con.close() return result.head(10000) # -------------------------------------------------------------- # Combined Gradio App (with MCP enabled) # -------------------------------------------------------------- with gr.Blocks() as app: gr.Markdown("# 🔥 Combined S3 SQL + Vector MCP Tool") gr.Markdown("## 📌 SQL Query on S3 CSV Files") with gr.Row(): path = gr.Textbox(label="S3 Path", placeholder="folder/file.csv") sql = gr.Textbox(label="SQL Query", lines=3, placeholder="SELECT * FROM data LIMIT 10") btn_sql = gr.Button("Run SQL Query") out_sql = gr.Dataframe() gr.Markdown("---\n## 🔍 Vector Search MCP Tool") with gr.Row(): user_query = gr.Textbox(label="Query", placeholder="Explain gold market trends") collection_name = gr.Textbox(label="Collection Name", placeholder="gold&silver-db") btn_vec = gr.Button("Run Vector Search") out_vec = gr.Textbox(label="Vector Output", lines=10) btn_sql.click(run_sql, [path, sql], out_sql) btn_vec.click(query_vector_agent_calling, [user_query, collection_name], out_vec) if __name__ == "__main__": app.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)