KoRiF commited on
Commit ·
2c29ed1
1
Parent(s): fff734e
Tune Formats & Configs
Browse files- app.py +4 -1
- hfspaces_tracking.py +5 -2
- sql/select_spaces.sql +1 -1
- toolset_semantics.py +38 -31
app.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from typing import List, Dict, Any, Tuple
|
| 3 |
|
| 4 |
-
from toolset_semantics import search_suitable_tools, search_suitable_spaces
|
|
|
|
| 5 |
|
| 6 |
def suitable_MCP_servers(task: str) -> List[Dict[str, Any]]:
|
| 7 |
"""
|
|
@@ -58,4 +59,6 @@ mcps_interface = gr.TabbedInterface(
|
|
| 58 |
|
| 59 |
# Launch the interface and MCP server
|
| 60 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 61 |
mcps_interface.launch(mcp_server=True)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from typing import List, Dict, Any, Tuple
|
| 3 |
|
| 4 |
+
from toolset_semantics import search_suitable_tools, search_suitable_spaces, initialize_and_upload_to_vector_db
|
| 5 |
+
from hfspaces_tracking import update_database
|
| 6 |
|
| 7 |
def suitable_MCP_servers(task: str) -> List[Dict[str, Any]]:
|
| 8 |
"""
|
|
|
|
| 59 |
|
| 60 |
# Launch the interface and MCP server
|
| 61 |
if __name__ == "__main__":
|
| 62 |
+
update_database()
|
| 63 |
+
initialize_and_upload_to_vector_db()
|
| 64 |
mcps_interface.launch(mcp_server=True)
|
hfspaces_tracking.py
CHANGED
|
@@ -217,11 +217,14 @@ def save_to_database(spaces):
|
|
| 217 |
finally:
|
| 218 |
conn.close()
|
| 219 |
|
| 220 |
-
|
| 221 |
create_database()
|
| 222 |
print("Starting fetching process...")
|
| 223 |
|
| 224 |
spaces_data = fetch_spaces()
|
| 225 |
print(spaces_data)
|
| 226 |
save_to_database(spaces_data)
|
| 227 |
-
print("Process complete! Data saved to database")
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
finally:
|
| 218 |
conn.close()
|
| 219 |
|
| 220 |
+
def update_database():
|
| 221 |
create_database()
|
| 222 |
print("Starting fetching process...")
|
| 223 |
|
| 224 |
spaces_data = fetch_spaces()
|
| 225 |
print(spaces_data)
|
| 226 |
save_to_database(spaces_data)
|
| 227 |
+
print("Process complete! Data saved to database")
|
| 228 |
+
|
| 229 |
+
if __name__ == "__main__":
|
| 230 |
+
update_database()
|
sql/select_spaces.sql
CHANGED
|
@@ -2,7 +2,7 @@ SELECT
|
|
| 2 |
s.space_id,
|
| 3 |
s.title,
|
| 4 |
s.description,
|
| 5 |
-
s.url,
|
| 6 |
s.tags,
|
| 7 |
e.endpoint_url as schema_url,
|
| 8 |
COUNT(t.tool_name) as tool_count
|
|
|
|
| 2 |
s.space_id,
|
| 3 |
s.title,
|
| 4 |
s.description,
|
| 5 |
+
s.url as space_url,
|
| 6 |
s.tags,
|
| 7 |
e.endpoint_url as schema_url,
|
| 8 |
COUNT(t.tool_name) as tool_count
|
toolset_semantics.py
CHANGED
|
@@ -10,8 +10,8 @@ load_dotenv()
|
|
| 10 |
from sql.sql_utils import load_sql_query
|
| 11 |
DB_PATH = '/data/huggingface_spaces.db' if os.path.exists('/data') else 'huggingface_spaces.db'
|
| 12 |
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
|
| 13 |
-
TOOLS_INDEX_NAME = "mcp-tools"
|
| 14 |
-
SPACES_INDEX_NAME = "mcp-spaces"
|
| 15 |
SQL_SELECT_TOOLS = "sql/select_tools.sql"
|
| 16 |
SQL_SELECT_SPACES = "sql/select_spaces.sql"
|
| 17 |
|
|
@@ -102,10 +102,10 @@ def upsert_spaces_to_pinecone(pc: Pinecone, spaces: List[Dict[str, Any]]):
|
|
| 102 |
profile = prepare_space_profile(space)
|
| 103 |
record = {
|
| 104 |
"_id": space['space_id'],
|
| 105 |
-
"profile": profile,
|
| 106 |
-
"title": space['title'],
|
| 107 |
-
"url": space['
|
| 108 |
-
"tool_count": space['tool_count'],
|
| 109 |
"tags": space['tags'] if space['tags'] else '[]'
|
| 110 |
}
|
| 111 |
records.append(record)
|
|
@@ -116,7 +116,7 @@ def upsert_spaces_to_pinecone(pc: Pinecone, spaces: List[Dict[str, Any]]):
|
|
| 116 |
time.sleep(1)
|
| 117 |
print(f"Uploaded {len(spaces)} spaces")
|
| 118 |
|
| 119 |
-
def search_spaces(pc: Pinecone, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
| 120 |
"""Search for relevant MCP spaces based on their description and tools"""
|
| 121 |
index = pc.Index(SPACES_INDEX_NAME)
|
| 122 |
|
|
@@ -132,16 +132,20 @@ def search_spaces(pc: Pinecone, query: str, top_k: int = 5) -> List[Dict[str, An
|
|
| 132 |
|
| 133 |
spaces_list = []
|
| 134 |
for hit in results['result']['hits']:
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
| 144 |
|
|
|
|
|
|
|
| 145 |
return spaces_list
|
| 146 |
|
| 147 |
def load_tools_from_db() -> List[Dict[str, Any]]:
|
|
@@ -176,7 +180,7 @@ def upsert_tools_to_pinecone(pc: Pinecone, tools: List[Dict[str, Any]]):
|
|
| 176 |
time.sleep(1)
|
| 177 |
print(f"Uploaded {len(tools)} tools")
|
| 178 |
|
| 179 |
-
def search_tools(pc: Pinecone, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
|
| 180 |
"""Search for relevant tools based on description"""
|
| 181 |
index = pc.Index(TOOLS_INDEX_NAME)
|
| 182 |
|
|
@@ -190,34 +194,37 @@ def search_tools(pc: Pinecone, query: str, top_k: int = 5) -> List[Dict[str, Any
|
|
| 190 |
}
|
| 191 |
)
|
| 192 |
|
| 193 |
-
# Convert to schema endpoint format
|
| 194 |
tools_list = []
|
| 195 |
for hit in results['result']['hits']:
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
| 204 |
|
|
|
|
|
|
|
| 205 |
return tools_list
|
| 206 |
|
| 207 |
-
|
| 208 |
def search_suitable_tools(query: str)->List[Dict[str, Any]]:
|
| 209 |
"""Search for suitable tools based on query"""
|
| 210 |
pc = Pinecone(api_key=PINECONE_API_KEY)
|
| 211 |
-
tools = search_tools(pc, query)
|
| 212 |
return tools
|
| 213 |
|
| 214 |
def search_suitable_spaces(query: str)->List[Dict[str, Any]]:
|
| 215 |
"""Search for suitable spaces based on query"""
|
| 216 |
pc = Pinecone(api_key=PINECONE_API_KEY)
|
| 217 |
-
spaces = search_spaces(pc, query, top_k=3)
|
| 218 |
return spaces
|
| 219 |
|
| 220 |
-
def
|
| 221 |
"""Initialize Pinecone and upload all tools and spaces"""
|
| 222 |
pc = Pinecone(api_key=PINECONE_API_KEY)
|
| 223 |
create_tools_index(pc)
|
|
@@ -237,7 +244,7 @@ def initialize_and_upload():
|
|
| 237 |
return pc
|
| 238 |
|
| 239 |
if __name__ == "__main__":
|
| 240 |
-
pc =
|
| 241 |
|
| 242 |
# Interactive search loop
|
| 243 |
|
|
|
|
| 10 |
from sql.sql_utils import load_sql_query
|
| 11 |
DB_PATH = '/data/huggingface_spaces.db' if os.path.exists('/data') else 'huggingface_spaces.db'
|
| 12 |
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
|
| 13 |
+
TOOLS_INDEX_NAME = "vix-mcp-tools"
|
| 14 |
+
SPACES_INDEX_NAME = "vix-mcp-spaces"
|
| 15 |
SQL_SELECT_TOOLS = "sql/select_tools.sql"
|
| 16 |
SQL_SELECT_SPACES = "sql/select_spaces.sql"
|
| 17 |
|
|
|
|
| 102 |
profile = prepare_space_profile(space)
|
| 103 |
record = {
|
| 104 |
"_id": space['space_id'],
|
| 105 |
+
"profile": profile if profile else "",
|
| 106 |
+
"title": space['title'] if space['title'] else "",
|
| 107 |
+
"url": space['schema_url'] if space['schema_url'] else "",
|
| 108 |
+
"tool_count": space['tool_count'] if space['tool_count'] else 0,
|
| 109 |
"tags": space['tags'] if space['tags'] else '[]'
|
| 110 |
}
|
| 111 |
records.append(record)
|
|
|
|
| 116 |
time.sleep(1)
|
| 117 |
print(f"Uploaded {len(spaces)} spaces")
|
| 118 |
|
| 119 |
+
def search_spaces(pc: Pinecone, query: str, top_k: int = 5, score_threshold: float = 0.0) -> List[Dict[str, Any]]:
|
| 120 |
"""Search for relevant MCP spaces based on their description and tools"""
|
| 121 |
index = pc.Index(SPACES_INDEX_NAME)
|
| 122 |
|
|
|
|
| 132 |
|
| 133 |
spaces_list = []
|
| 134 |
for hit in results['result']['hits']:
|
| 135 |
+
score = hit.get("_score", 0)
|
| 136 |
+
if score > score_threshold:
|
| 137 |
+
fields = hit.get('fields', {})
|
| 138 |
+
space = {
|
| 139 |
+
"title": fields.get("title"),
|
| 140 |
+
"url": fields.get("url"),
|
| 141 |
+
"tool_count": fields.get("tool_count"),
|
| 142 |
+
"tags": fields.get("tags"),
|
| 143 |
+
"score": score
|
| 144 |
+
}
|
| 145 |
+
spaces_list.append(space)
|
| 146 |
|
| 147 |
+
# Sort by score in descending order
|
| 148 |
+
spaces_list.sort(key=lambda x: x["score"], reverse=True)
|
| 149 |
return spaces_list
|
| 150 |
|
| 151 |
def load_tools_from_db() -> List[Dict[str, Any]]:
|
|
|
|
| 180 |
time.sleep(1)
|
| 181 |
print(f"Uploaded {len(tools)} tools")
|
| 182 |
|
| 183 |
+
def search_tools(pc: Pinecone, query: str, top_k: int = 5, score_threshold: float = 0.0) -> List[Dict[str, Any]]:
|
| 184 |
"""Search for relevant tools based on description"""
|
| 185 |
index = pc.Index(TOOLS_INDEX_NAME)
|
| 186 |
|
|
|
|
| 194 |
}
|
| 195 |
)
|
| 196 |
|
|
|
|
| 197 |
tools_list = []
|
| 198 |
for hit in results['result']['hits']:
|
| 199 |
+
score = hit.get("_score", 0)
|
| 200 |
+
if score > score_threshold:
|
| 201 |
+
fields = hit.get('fields', {})
|
| 202 |
+
tool = {
|
| 203 |
+
"name": fields.get("tool_name"),
|
| 204 |
+
"description": fields.get("description"),
|
| 205 |
+
"inputSchema": json.loads(fields.get("input_schema", "{}")),
|
| 206 |
+
"server_url": fields.get("server_url"),
|
| 207 |
+
"score": score
|
| 208 |
+
}
|
| 209 |
+
tools_list.append(tool)
|
| 210 |
|
| 211 |
+
# Sort by score in descending order
|
| 212 |
+
tools_list.sort(key=lambda x: x["score"], reverse=True)
|
| 213 |
return tools_list
|
| 214 |
|
|
|
|
| 215 |
def search_suitable_tools(query: str)->List[Dict[str, Any]]:
|
| 216 |
"""Search for suitable tools based on query"""
|
| 217 |
pc = Pinecone(api_key=PINECONE_API_KEY)
|
| 218 |
+
tools = search_tools(pc, query, top_k=13, score_threshold=0.25)
|
| 219 |
return tools
|
| 220 |
|
| 221 |
def search_suitable_spaces(query: str)->List[Dict[str, Any]]:
|
| 222 |
"""Search for suitable spaces based on query"""
|
| 223 |
pc = Pinecone(api_key=PINECONE_API_KEY)
|
| 224 |
+
spaces = search_spaces(pc, query, top_k=3, score_threshold=0.1)
|
| 225 |
return spaces
|
| 226 |
|
| 227 |
+
def initialize_and_upload_to_vector_db():
|
| 228 |
"""Initialize Pinecone and upload all tools and spaces"""
|
| 229 |
pc = Pinecone(api_key=PINECONE_API_KEY)
|
| 230 |
create_tools_index(pc)
|
|
|
|
| 244 |
return pc
|
| 245 |
|
| 246 |
if __name__ == "__main__":
|
| 247 |
+
pc = initialize_and_upload_to_vector_db()
|
| 248 |
|
| 249 |
# Interactive search loop
|
| 250 |
|