Spaces:
Sleeping
Sleeping
| # -------------------------------------------------------------- | |
| # combined_s3_sql_vector_app.py | |
| # Full Combined: S3 SQL + Vector MCP Tool in One Gradio App | |
| # -------------------------------------------------------------- | |
| import boto3 | |
| import pandas as pd | |
| import duckdb | |
| import requests | |
| from urllib.parse import quote | |
| from io import StringIO | |
| import gradio as gr | |
| # === S3 Credentials === | |
| ENDPOINT_URL = "https://s3.us-west-1.idrivee2.com" | |
| ACCESS_KEY = "rNuPBAQetemqpEeBospZ" | |
| SECRET_KEY = "BU4FccUYxzXVqiWjPSJM1CWEX1cNhBqbU9NeGidE" | |
| BUCKET = "accusagas3" | |
| s3 = boto3.client( | |
| "s3", | |
| endpoint_url=ENDPOINT_URL, | |
| aws_access_key_id=ACCESS_KEY, | |
| aws_secret_access_key=SECRET_KEY, | |
| ) | |
| # -------------------------------------------------------------- | |
| # Vector MCP Tool | |
| # -------------------------------------------------------------- | |
| def query_vector_agent_calling(user_query: str, collection_name: str) -> str: | |
| base_url = "https://srivatsavdamaraju-mvp-2-0-deploy-all-apis.hf.space/qdrant/search" | |
| encoded_collection = quote(collection_name, safe="") | |
| url = f"{base_url}?collection_name={encoded_collection}&mode=hybrid" | |
| headers = { | |
| "accept": "application/json", | |
| "Content-Type": "application/json", | |
| } | |
| payload = {"query": user_query, "top_k": 5} | |
| try: | |
| response = requests.post(url, headers=headers, json=payload, timeout=30) | |
| response.raise_for_status() | |
| data = response.json() | |
| results = data.get("results") or data.get("result") or [] | |
| if not results: | |
| return "No relevant context found." | |
| output = [] | |
| for item in results: | |
| text = item.get("text") or item.get("payload", {}).get("text") or str(item) | |
| score = item.get("score", "?") | |
| output.append(f"Score: {score}\n{text}\n---") | |
| return "\n".join(output) | |
| except requests.exceptions.Timeout: | |
| return "Vector API timeout." | |
| except requests.exceptions.HTTPError as e: | |
| return f"HTTP Error: {e.response.status_code}" | |
| except Exception as e: | |
| return f"Unexpected Error: {str(e)}" | |
| # -------------------------------------------------------------- | |
| # SQL Query Tool (S3 β DuckDB) | |
| # -------------------------------------------------------------- | |
| def run_sql(path: str, sql: str) -> pd.DataFrame: | |
| try: | |
| obj = s3.get_object(Bucket=BUCKET, Key=path) | |
| df = pd.read_csv(StringIO(obj["Body"].read().decode("utf-8"))) | |
| except Exception as e: | |
| return pd.DataFrame({"error": [str(e)]}) | |
| if df.empty: | |
| return pd.DataFrame({"error": ["Empty CSV"]}) | |
| for col in df.columns: | |
| if any(x in col.lower() for x in ["price", "volume", "amount"]): | |
| df[col] = pd.to_numeric(df[col].astype(str).str.replace(r"[^\d.-]", "", regex=True), errors="coerce") | |
| con = duckdb.connect(":memory:") | |
| con.register("data", df) | |
| if not sql.strip().lower().startswith(("select", "with")): | |
| con.close() | |
| return pd.DataFrame({"error": ["Only SELECT allowed"]}) | |
| try: | |
| result = con.execute(sql).df() | |
| except Exception as e: | |
| if "VARCHAR" in str(e): | |
| import re | |
| col = re.search(r"column ([a-zA-Z0-9_]+)", str(e)) | |
| if col and (c := col.group(1)) in df.columns: | |
| sql = sql.replace(c, f"CAST({c} AS DOUBLE)") | |
| result = con.execute(sql).df() | |
| else: | |
| con.close() | |
| return pd.DataFrame({"error": [str(e)]}) | |
| else: | |
| con.close() | |
| return pd.DataFrame({"error": [str(e)]}) | |
| finally: | |
| con.close() | |
| return result.head(10000) | |
| # -------------------------------------------------------------- | |
| # Combined Gradio App (with MCP enabled) | |
| # -------------------------------------------------------------- | |
| with gr.Blocks() as app: | |
| gr.Markdown("# π₯ Combined S3 SQL + Vector MCP Tool") | |
| gr.Markdown("## π SQL Query on S3 CSV Files") | |
| with gr.Row(): | |
| path = gr.Textbox(label="S3 Path", placeholder="folder/file.csv") | |
| sql = gr.Textbox(label="SQL Query", lines=3, placeholder="SELECT * FROM data LIMIT 10") | |
| btn_sql = gr.Button("Run SQL Query") | |
| out_sql = gr.Dataframe() | |
| gr.Markdown("---\n## π Vector Search MCP Tool") | |
| with gr.Row(): | |
| user_query = gr.Textbox(label="Query", placeholder="Explain gold market trends") | |
| collection_name = gr.Textbox(label="Collection Name", placeholder="gold&silver-db") | |
| btn_vec = gr.Button("Run Vector Search") | |
| out_vec = gr.Textbox(label="Vector Output", lines=10) | |
| btn_sql.click(run_sql, [path, sql], out_sql) | |
| btn_vec.click(query_vector_agent_calling, [user_query, collection_name], out_vec) | |
| if __name__ == "__main__": | |
| app.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True) |