vector_tool / app.py
srivatsavdamaraju's picture
Update app.py
50d215d verified
# --------------------------------------------------------------
# combined_s3_sql_vector_app.py
# Full Combined: S3 SQL + Vector MCP Tool in One Gradio App
# --------------------------------------------------------------
import boto3
import pandas as pd
import duckdb
import requests
from urllib.parse import quote
from io import StringIO
import gradio as gr
# === S3 Credentials ===
ENDPOINT_URL = "https://s3.us-west-1.idrivee2.com"
ACCESS_KEY = "rNuPBAQetemqpEeBospZ"
SECRET_KEY = "BU4FccUYxzXVqiWjPSJM1CWEX1cNhBqbU9NeGidE"
BUCKET = "accusagas3"
s3 = boto3.client(
"s3",
endpoint_url=ENDPOINT_URL,
aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY,
)
# --------------------------------------------------------------
# Vector MCP Tool
# --------------------------------------------------------------
def query_vector_agent_calling(user_query: str, collection_name: str) -> str:
base_url = "https://srivatsavdamaraju-mvp-2-0-deploy-all-apis.hf.space/qdrant/search"
encoded_collection = quote(collection_name, safe="")
url = f"{base_url}?collection_name={encoded_collection}&mode=hybrid"
headers = {
"accept": "application/json",
"Content-Type": "application/json",
}
payload = {"query": user_query, "top_k": 5}
try:
response = requests.post(url, headers=headers, json=payload, timeout=30)
response.raise_for_status()
data = response.json()
results = data.get("results") or data.get("result") or []
if not results:
return "No relevant context found."
output = []
for item in results:
text = item.get("text") or item.get("payload", {}).get("text") or str(item)
score = item.get("score", "?")
output.append(f"Score: {score}\n{text}\n---")
return "\n".join(output)
except requests.exceptions.Timeout:
return "Vector API timeout."
except requests.exceptions.HTTPError as e:
return f"HTTP Error: {e.response.status_code}"
except Exception as e:
return f"Unexpected Error: {str(e)}"
# --------------------------------------------------------------
# SQL Query Tool (S3 β†’ DuckDB)
# --------------------------------------------------------------
def run_sql(path: str, sql: str) -> pd.DataFrame:
try:
obj = s3.get_object(Bucket=BUCKET, Key=path)
df = pd.read_csv(StringIO(obj["Body"].read().decode("utf-8")))
except Exception as e:
return pd.DataFrame({"error": [str(e)]})
if df.empty:
return pd.DataFrame({"error": ["Empty CSV"]})
for col in df.columns:
if any(x in col.lower() for x in ["price", "volume", "amount"]):
df[col] = pd.to_numeric(df[col].astype(str).str.replace(r"[^\d.-]", "", regex=True), errors="coerce")
con = duckdb.connect(":memory:")
con.register("data", df)
if not sql.strip().lower().startswith(("select", "with")):
con.close()
return pd.DataFrame({"error": ["Only SELECT allowed"]})
try:
result = con.execute(sql).df()
except Exception as e:
if "VARCHAR" in str(e):
import re
col = re.search(r"column ([a-zA-Z0-9_]+)", str(e))
if col and (c := col.group(1)) in df.columns:
sql = sql.replace(c, f"CAST({c} AS DOUBLE)")
result = con.execute(sql).df()
else:
con.close()
return pd.DataFrame({"error": [str(e)]})
else:
con.close()
return pd.DataFrame({"error": [str(e)]})
finally:
con.close()
return result.head(10000)
# --------------------------------------------------------------
# Combined Gradio App (with MCP enabled)
# --------------------------------------------------------------
with gr.Blocks() as app:
gr.Markdown("# πŸ”₯ Combined S3 SQL + Vector MCP Tool")
gr.Markdown("## πŸ“Œ SQL Query on S3 CSV Files")
with gr.Row():
path = gr.Textbox(label="S3 Path", placeholder="folder/file.csv")
sql = gr.Textbox(label="SQL Query", lines=3, placeholder="SELECT * FROM data LIMIT 10")
btn_sql = gr.Button("Run SQL Query")
out_sql = gr.Dataframe()
gr.Markdown("---\n## πŸ” Vector Search MCP Tool")
with gr.Row():
user_query = gr.Textbox(label="Query", placeholder="Explain gold market trends")
collection_name = gr.Textbox(label="Collection Name", placeholder="gold&silver-db")
btn_vec = gr.Button("Run Vector Search")
out_vec = gr.Textbox(label="Vector Output", lines=10)
btn_sql.click(run_sql, [path, sql], out_sql)
btn_vec.click(query_vector_agent_calling, [user_query, collection_name], out_vec)
if __name__ == "__main__":
app.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)