srivatsavdamaraju commited on
Commit
f28d3b5
·
verified ·
1 Parent(s): 1e986cd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -0
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --------------------------------------------------------------
2
+ # combined_s3_sql_vector_app.py
3
+ # Full Combined: S3 SQL + Vector MCP Tool in One Gradio App
4
+ # --------------------------------------------------------------
5
+ import boto3
6
+ import pandas as pd
7
+ import duckdb
8
+ import requests
9
+ from urllib.parse import quote
10
+ from io import StringIO
11
+ import gradio as gr
12
+
13
+ # === S3 Credentials ===
14
+ ENDPOINT_URL = "https://s3.us-west-1.idrivee2.com"
15
+ ACCESS_KEY = "rNuPBAQetemqpEeBospZ"
16
+ SECRET_KEY = "BU4FccUYxzXVqiWjPSJM1CWEX1cNhBqbU9NeGidE"
17
+ BUCKET = "accusagas3"
18
+
19
+ s3 = boto3.client(
20
+ "s3",
21
+ endpoint_url=ENDPOINT_URL,
22
+ aws_access_key_id=ACCESS_KEY,
23
+ aws_secret_access_key=SECRET_KEY,
24
+ )
25
+
26
+ # --------------------------------------------------------------
27
+ # Vector MCP Tool
28
+ # --------------------------------------------------------------
29
+ def query_vector_agent_calling(user_query: str, collection_name: str) -> str:
30
+ base_url = "https://srivatsavdamaraju-mvp-2-0-deploy-all-apis.hf.space/qdrant/search"
31
+ encoded_collection = quote(collection_name, safe="")
32
+
33
+ url = f"{base_url}?collection_name={encoded_collection}&mode=hybrid"
34
+
35
+ headers = {
36
+ "accept": "application/json",
37
+ "Content-Type": "application/json",
38
+ }
39
+
40
+ payload = {"query": user_query, "top_k": 5}
41
+
42
+ try:
43
+ response = requests.post(url, headers=headers, json=payload, timeout=30)
44
+ response.raise_for_status()
45
+
46
+ data = response.json()
47
+ results = data.get("results") or data.get("result") or []
48
+
49
+ if not results:
50
+ return "No relevant context found."
51
+
52
+ output = []
53
+ for item in results:
54
+ text = item.get("text") or item.get("payload", {}).get("text") or str(item)
55
+ score = item.get("score", "?")
56
+ output.append(f"Score: {score}\n{text}\n---")
57
+
58
+ return "\n".join(output)
59
+
60
+ except requests.exceptions.Timeout:
61
+ return "Vector API timeout."
62
+ except requests.exceptions.HTTPError as e:
63
+ return f"HTTP Error: {e.response.status_code}"
64
+ except Exception as e:
65
+ return f"Unexpected Error: {str(e)}"
66
+
67
+
68
+ # --------------------------------------------------------------
69
+ # SQL Query Tool (S3 → DuckDB)
70
+ # --------------------------------------------------------------
71
+ def run_sql(path: str, sql: str) -> pd.DataFrame:
72
+ try:
73
+ obj = s3.get_object(Bucket=BUCKET, Key=path)
74
+ df = pd.read_csv(StringIO(obj["Body"].read().decode("utf-8")))
75
+ except Exception as e:
76
+ return pd.DataFrame({"error": [str(e)]})
77
+
78
+ if df.empty:
79
+ return pd.DataFrame({"error": ["Empty CSV"]})
80
+
81
+ for col in df.columns:
82
+ if any(x in col.lower() for x in ["price", "volume", "amount"]):
83
+ df[col] = pd.to_numeric(df[col].astype(str).str.replace(r"[^\d.-]", "", regex=True), errors="coerce")
84
+
85
+ con = duckdb.connect(":memory:")
86
+ con.register("data", df)
87
+
88
+ if not sql.strip().lower().startswith(("select", "with")):
89
+ con.close()
90
+ return pd.DataFrame({"error": ["Only SELECT allowed"]})
91
+
92
+ try:
93
+ result = con.execute(sql).df()
94
+ except Exception as e:
95
+ if "VARCHAR" in str(e):
96
+ import re
97
+ col = re.search(r"column ([a-zA-Z0-9_]+)", str(e))
98
+ if col and (c := col.group(1)) in df.columns:
99
+ sql = sql.replace(c, f"CAST({c} AS DOUBLE)")
100
+ result = con.execute(sql).df()
101
+ else:
102
+ con.close()
103
+ return pd.DataFrame({"error": [str(e)]})
104
+ else:
105
+ con.close()
106
+ return pd.DataFrame({"error": [str(e)]})
107
+ finally:
108
+ con.close()
109
+
110
+ return result.head(10000)
111
+
112
+
113
+ # --------------------------------------------------------------
114
+ # Combined Gradio App (with MCP enabled)
115
+ # --------------------------------------------------------------
116
+ with gr.Blocks() as app:
117
+ gr.Markdown("# 🔥 Combined S3 SQL + Vector MCP Tool")
118
+
119
+ gr.Markdown("## 📌 SQL Query on S3 CSV Files")
120
+ with gr.Row():
121
+ path = gr.Textbox(label="S3 Path", placeholder="folder/file.csv")
122
+ sql = gr.Textbox(label="SQL Query", lines=3, placeholder="SELECT * FROM data LIMIT 10")
123
+ btn_sql = gr.Button("Run SQL Query")
124
+ out_sql = gr.Dataframe()
125
+
126
+ gr.Markdown("---\n## 🔍 Vector Search MCP Tool")
127
+ with gr.Row():
128
+ user_query = gr.Textbox(label="Query", placeholder="Explain gold market trends")
129
+ collection_name = gr.Textbox(label="Collection Name", placeholder="gold&silver-db")
130
+ btn_vec = gr.Button("Run Vector Search")
131
+ out_vec = gr.Textbox(label="Vector Output", lines=10)
132
+
133
+ btn_sql.click(run_sql, [path, sql], out_sql)
134
+ btn_vec.click(query_vector_agent_calling, [user_query, collection_name], out_vec)
135
+
136
+
137
+ if __name__ == "__main__":
138
+ app.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)