File size: 4,821 Bytes
bc80f2b
50d215d
 
bc80f2b
50d215d
 
 
bc80f2b
 
50d215d
 
 
 
 
 
 
 
bc80f2b
50d215d
 
 
 
 
 
bc80f2b
50d215d
 
 
bc80f2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50d215d
bc80f2b
 
50d215d
 
 
 
bc80f2b
 
 
 
50d215d
bc80f2b
50d215d
bc80f2b
 
 
 
50d215d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc80f2b
50d215d
bc80f2b
50d215d
bc80f2b
50d215d
 
 
 
bc80f2b
50d215d
 
 
 
 
 
bc80f2b
50d215d
 
bc80f2b
 
 
50d215d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# --------------------------------------------------------------
# combined_s3_sql_vector_app.py
# Full Combined: S3 SQL + Vector MCP Tool in One Gradio App
# --------------------------------------------------------------
import boto3
import pandas as pd
import duckdb
import requests
from urllib.parse import quote
from io import StringIO
import gradio as gr

# === S3 Credentials ===
ENDPOINT_URL = "https://s3.us-west-1.idrivee2.com"
ACCESS_KEY = "rNuPBAQetemqpEeBospZ"
SECRET_KEY = "BU4FccUYxzXVqiWjPSJM1CWEX1cNhBqbU9NeGidE"
BUCKET = "accusagas3"

s3 = boto3.client(
    "s3",
    endpoint_url=ENDPOINT_URL,
    aws_access_key_id=ACCESS_KEY,
    aws_secret_access_key=SECRET_KEY,
)

# --------------------------------------------------------------
# Vector MCP Tool
# --------------------------------------------------------------
def query_vector_agent_calling(user_query: str, collection_name: str) -> str:
    base_url = "https://srivatsavdamaraju-mvp-2-0-deploy-all-apis.hf.space/qdrant/search"
    encoded_collection = quote(collection_name, safe="")

    url = f"{base_url}?collection_name={encoded_collection}&mode=hybrid"

    headers = {
        "accept": "application/json",
        "Content-Type": "application/json",
    }

    payload = {"query": user_query, "top_k": 5}

    try:
        response = requests.post(url, headers=headers, json=payload, timeout=30)
        response.raise_for_status()

        data = response.json()
        results = data.get("results") or data.get("result") or []

        if not results:
            return "No relevant context found."

        output = []
        for item in results:
            text = item.get("text") or item.get("payload", {}).get("text") or str(item)
            score = item.get("score", "?")
            output.append(f"Score: {score}\n{text}\n---")

        return "\n".join(output)

    except requests.exceptions.Timeout:
        return "Vector API timeout."
    except requests.exceptions.HTTPError as e:
        return f"HTTP Error: {e.response.status_code}"
    except Exception as e:
        return f"Unexpected Error: {str(e)}"


# --------------------------------------------------------------
# SQL Query Tool (S3 β†’ DuckDB)
# --------------------------------------------------------------
def run_sql(path: str, sql: str) -> pd.DataFrame:
    try:
        obj = s3.get_object(Bucket=BUCKET, Key=path)
        df = pd.read_csv(StringIO(obj["Body"].read().decode("utf-8")))
    except Exception as e:
        return pd.DataFrame({"error": [str(e)]})

    if df.empty:
        return pd.DataFrame({"error": ["Empty CSV"]})

    for col in df.columns:
        if any(x in col.lower() for x in ["price", "volume", "amount"]):
            df[col] = pd.to_numeric(df[col].astype(str).str.replace(r"[^\d.-]", "", regex=True), errors="coerce")

    con = duckdb.connect(":memory:")
    con.register("data", df)

    if not sql.strip().lower().startswith(("select", "with")):
        con.close()
        return pd.DataFrame({"error": ["Only SELECT allowed"]})

    try:
        result = con.execute(sql).df()
    except Exception as e:
        if "VARCHAR" in str(e):
            import re
            col = re.search(r"column ([a-zA-Z0-9_]+)", str(e))
            if col and (c := col.group(1)) in df.columns:
                sql = sql.replace(c, f"CAST({c} AS DOUBLE)")
                result = con.execute(sql).df()
            else:
                con.close()
                return pd.DataFrame({"error": [str(e)]})
        else:
            con.close()
            return pd.DataFrame({"error": [str(e)]})
    finally:
        con.close()

    return result.head(10000)


# --------------------------------------------------------------
# Combined Gradio App (with MCP enabled)
# --------------------------------------------------------------
with gr.Blocks() as app:
    gr.Markdown("# πŸ”₯ Combined S3 SQL + Vector MCP Tool")

    gr.Markdown("## πŸ“Œ SQL Query on S3 CSV Files")
    with gr.Row():
        path = gr.Textbox(label="S3 Path", placeholder="folder/file.csv")
        sql = gr.Textbox(label="SQL Query", lines=3, placeholder="SELECT * FROM data LIMIT 10")
    btn_sql = gr.Button("Run SQL Query")
    out_sql = gr.Dataframe()

    gr.Markdown("---\n## πŸ” Vector Search MCP Tool")
    with gr.Row():
        user_query = gr.Textbox(label="Query", placeholder="Explain gold market trends")
        collection_name = gr.Textbox(label="Collection Name", placeholder="gold&silver-db")
    btn_vec = gr.Button("Run Vector Search")
    out_vec = gr.Textbox(label="Vector Output", lines=10)

    btn_sql.click(run_sql, [path, sql], out_sql)
    btn_vec.click(query_vector_agent_calling, [user_query, collection_name], out_vec)


if __name__ == "__main__":
    app.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)