Spaces:
Sleeping
Sleeping
File size: 4,821 Bytes
bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d bc80f2b 50d215d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# --------------------------------------------------------------
# combined_s3_sql_vector_app.py
# Full Combined: S3 SQL + Vector MCP Tool in One Gradio App
# --------------------------------------------------------------
import boto3
import pandas as pd
import duckdb
import requests
from urllib.parse import quote
from io import StringIO
import gradio as gr
# === S3 Credentials ===
ENDPOINT_URL = "https://s3.us-west-1.idrivee2.com"
ACCESS_KEY = "rNuPBAQetemqpEeBospZ"
SECRET_KEY = "BU4FccUYxzXVqiWjPSJM1CWEX1cNhBqbU9NeGidE"
BUCKET = "accusagas3"
s3 = boto3.client(
"s3",
endpoint_url=ENDPOINT_URL,
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY,
)
# --------------------------------------------------------------
# Vector MCP Tool
# --------------------------------------------------------------
def query_vector_agent_calling(user_query: str, collection_name: str) -> str:
base_url = "https://srivatsavdamaraju-mvp-2-0-deploy-all-apis.hf.space/qdrant/search"
encoded_collection = quote(collection_name, safe="")
url = f"{base_url}?collection_name={encoded_collection}&mode=hybrid"
headers = {
"accept": "application/json",
"Content-Type": "application/json",
}
payload = {"query": user_query, "top_k": 5}
try:
response = requests.post(url, headers=headers, json=payload, timeout=30)
response.raise_for_status()
data = response.json()
results = data.get("results") or data.get("result") or []
if not results:
return "No relevant context found."
output = []
for item in results:
text = item.get("text") or item.get("payload", {}).get("text") or str(item)
score = item.get("score", "?")
output.append(f"Score: {score}\n{text}\n---")
return "\n".join(output)
except requests.exceptions.Timeout:
return "Vector API timeout."
except requests.exceptions.HTTPError as e:
return f"HTTP Error: {e.response.status_code}"
except Exception as e:
return f"Unexpected Error: {str(e)}"
# --------------------------------------------------------------
# SQL Query Tool (S3 β DuckDB)
# --------------------------------------------------------------
def run_sql(path: str, sql: str) -> pd.DataFrame:
try:
obj = s3.get_object(Bucket=BUCKET, Key=path)
df = pd.read_csv(StringIO(obj["Body"].read().decode("utf-8")))
except Exception as e:
return pd.DataFrame({"error": [str(e)]})
if df.empty:
return pd.DataFrame({"error": ["Empty CSV"]})
for col in df.columns:
if any(x in col.lower() for x in ["price", "volume", "amount"]):
df[col] = pd.to_numeric(df[col].astype(str).str.replace(r"[^\d.-]", "", regex=True), errors="coerce")
con = duckdb.connect(":memory:")
con.register("data", df)
if not sql.strip().lower().startswith(("select", "with")):
con.close()
return pd.DataFrame({"error": ["Only SELECT allowed"]})
try:
result = con.execute(sql).df()
except Exception as e:
if "VARCHAR" in str(e):
import re
col = re.search(r"column ([a-zA-Z0-9_]+)", str(e))
if col and (c := col.group(1)) in df.columns:
sql = sql.replace(c, f"CAST({c} AS DOUBLE)")
result = con.execute(sql).df()
else:
con.close()
return pd.DataFrame({"error": [str(e)]})
else:
con.close()
return pd.DataFrame({"error": [str(e)]})
finally:
con.close()
return result.head(10000)
# --------------------------------------------------------------
# Combined Gradio App (with MCP enabled)
# --------------------------------------------------------------
with gr.Blocks() as app:
gr.Markdown("# π₯ Combined S3 SQL + Vector MCP Tool")
gr.Markdown("## π SQL Query on S3 CSV Files")
with gr.Row():
path = gr.Textbox(label="S3 Path", placeholder="folder/file.csv")
sql = gr.Textbox(label="SQL Query", lines=3, placeholder="SELECT * FROM data LIMIT 10")
btn_sql = gr.Button("Run SQL Query")
out_sql = gr.Dataframe()
gr.Markdown("---\n## π Vector Search MCP Tool")
with gr.Row():
user_query = gr.Textbox(label="Query", placeholder="Explain gold market trends")
collection_name = gr.Textbox(label="Collection Name", placeholder="gold&silver-db")
btn_vec = gr.Button("Run Vector Search")
out_vec = gr.Textbox(label="Vector Output", lines=10)
btn_sql.click(run_sql, [path, sql], out_sql)
btn_vec.click(query_vector_agent_calling, [user_query, collection_name], out_vec)
if __name__ == "__main__":
app.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True) |