KoRiF commited on
Commit
2c29ed1
·
1 Parent(s): fff734e

Tune Formats & Configs

Browse files
Files changed (4) hide show
  1. app.py +4 -1
  2. hfspaces_tracking.py +5 -2
  3. sql/select_spaces.sql +1 -1
  4. toolset_semantics.py +38 -31
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import gradio as gr
2
  from typing import List, Dict, Any, Tuple
3
 
4
- from toolset_semantics import search_suitable_tools, search_suitable_spaces
 
5
 
6
  def suitable_MCP_servers(task: str) -> List[Dict[str, Any]]:
7
  """
@@ -58,4 +59,6 @@ mcps_interface = gr.TabbedInterface(
58
 
59
  # Launch the interface and MCP server
60
  if __name__ == "__main__":
 
 
61
  mcps_interface.launch(mcp_server=True)
 
1
  import gradio as gr
2
  from typing import List, Dict, Any, Tuple
3
 
4
+ from toolset_semantics import search_suitable_tools, search_suitable_spaces, initialize_and_upload_to_vector_db
5
+ from hfspaces_tracking import update_database
6
 
7
  def suitable_MCP_servers(task: str) -> List[Dict[str, Any]]:
8
  """
 
59
 
60
  # Launch the interface and MCP server
61
  if __name__ == "__main__":
62
+ update_database()
63
+ initialize_and_upload_to_vector_db()
64
  mcps_interface.launch(mcp_server=True)
hfspaces_tracking.py CHANGED
@@ -217,11 +217,14 @@ def save_to_database(spaces):
217
  finally:
218
  conn.close()
219
 
220
- if __name__ == "__main__":
221
  create_database()
222
  print("Starting fetching process...")
223
 
224
  spaces_data = fetch_spaces()
225
  print(spaces_data)
226
  save_to_database(spaces_data)
227
- print("Process complete! Data saved to database")
 
 
 
 
217
  finally:
218
  conn.close()
219
 
220
+ def update_database():
221
  create_database()
222
  print("Starting fetching process...")
223
 
224
  spaces_data = fetch_spaces()
225
  print(spaces_data)
226
  save_to_database(spaces_data)
227
+ print("Process complete! Data saved to database")
228
+
229
+ if __name__ == "__main__":
230
+ update_database()
sql/select_spaces.sql CHANGED
@@ -2,7 +2,7 @@ SELECT
2
  s.space_id,
3
  s.title,
4
  s.description,
5
- s.url,
6
  s.tags,
7
  e.endpoint_url as schema_url,
8
  COUNT(t.tool_name) as tool_count
 
2
  s.space_id,
3
  s.title,
4
  s.description,
5
+ s.url as space_url,
6
  s.tags,
7
  e.endpoint_url as schema_url,
8
  COUNT(t.tool_name) as tool_count
toolset_semantics.py CHANGED
@@ -10,8 +10,8 @@ load_dotenv()
10
  from sql.sql_utils import load_sql_query
11
  DB_PATH = '/data/huggingface_spaces.db' if os.path.exists('/data') else 'huggingface_spaces.db'
12
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
13
- TOOLS_INDEX_NAME = "mcp-tools"
14
- SPACES_INDEX_NAME = "mcp-spaces"
15
  SQL_SELECT_TOOLS = "sql/select_tools.sql"
16
  SQL_SELECT_SPACES = "sql/select_spaces.sql"
17
 
@@ -102,10 +102,10 @@ def upsert_spaces_to_pinecone(pc: Pinecone, spaces: List[Dict[str, Any]]):
102
  profile = prepare_space_profile(space)
103
  record = {
104
  "_id": space['space_id'],
105
- "profile": profile,
106
- "title": space['title'],
107
- "url": space['url'],
108
- "tool_count": space['tool_count'],
109
  "tags": space['tags'] if space['tags'] else '[]'
110
  }
111
  records.append(record)
@@ -116,7 +116,7 @@ def upsert_spaces_to_pinecone(pc: Pinecone, spaces: List[Dict[str, Any]]):
116
  time.sleep(1)
117
  print(f"Uploaded {len(spaces)} spaces")
118
 
119
- def search_spaces(pc: Pinecone, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
120
  """Search for relevant MCP spaces based on their description and tools"""
121
  index = pc.Index(SPACES_INDEX_NAME)
122
 
@@ -132,16 +132,20 @@ def search_spaces(pc: Pinecone, query: str, top_k: int = 5) -> List[Dict[str, An
132
 
133
  spaces_list = []
134
  for hit in results['result']['hits']:
135
- fields = hit.get('fields', {})
136
- space = {
137
- "title": fields.get("title"),
138
- "url": fields.get("url"),
139
- "tool_count": fields.get("tool_count"),
140
- "tags": fields.get("tags"),
141
- "score": hit.get("score", 0)
142
- }
143
- spaces_list.append(space)
 
 
144
 
 
 
145
  return spaces_list
146
 
147
  def load_tools_from_db() -> List[Dict[str, Any]]:
@@ -176,7 +180,7 @@ def upsert_tools_to_pinecone(pc: Pinecone, tools: List[Dict[str, Any]]):
176
  time.sleep(1)
177
  print(f"Uploaded {len(tools)} tools")
178
 
179
- def search_tools(pc: Pinecone, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
180
  """Search for relevant tools based on description"""
181
  index = pc.Index(TOOLS_INDEX_NAME)
182
 
@@ -190,34 +194,37 @@ def search_tools(pc: Pinecone, query: str, top_k: int = 5) -> List[Dict[str, Any
190
  }
191
  )
192
 
193
- # Convert to schema endpoint format
194
  tools_list = []
195
  for hit in results['result']['hits']:
196
- fields = hit.get('fields', {})
197
- tool = {
198
- "name": fields.get("tool_name"),
199
- "description": fields.get("description"),
200
- "inputSchema": json.loads(fields.get("input_schema", "{}")),
201
- "server_url": fields.get("server_url")
202
- }
203
- tools_list.append(tool)
 
 
 
204
 
 
 
205
  return tools_list
206
 
207
-
208
  def search_suitable_tools(query: str)->List[Dict[str, Any]]:
209
  """Search for suitable tools based on query"""
210
  pc = Pinecone(api_key=PINECONE_API_KEY)
211
- tools = search_tools(pc, query)
212
  return tools
213
 
214
  def search_suitable_spaces(query: str)->List[Dict[str, Any]]:
215
  """Search for suitable spaces based on query"""
216
  pc = Pinecone(api_key=PINECONE_API_KEY)
217
- spaces = search_spaces(pc, query, top_k=3)
218
  return spaces
219
 
220
- def initialize_and_upload():
221
  """Initialize Pinecone and upload all tools and spaces"""
222
  pc = Pinecone(api_key=PINECONE_API_KEY)
223
  create_tools_index(pc)
@@ -237,7 +244,7 @@ def initialize_and_upload():
237
  return pc
238
 
239
  if __name__ == "__main__":
240
- pc = initialize_and_upload()
241
 
242
  # Interactive search loop
243
 
 
10
  from sql.sql_utils import load_sql_query
11
  DB_PATH = '/data/huggingface_spaces.db' if os.path.exists('/data') else 'huggingface_spaces.db'
12
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
13
+ TOOLS_INDEX_NAME = "vix-mcp-tools"
14
+ SPACES_INDEX_NAME = "vix-mcp-spaces"
15
  SQL_SELECT_TOOLS = "sql/select_tools.sql"
16
  SQL_SELECT_SPACES = "sql/select_spaces.sql"
17
 
 
102
  profile = prepare_space_profile(space)
103
  record = {
104
  "_id": space['space_id'],
105
+ "profile": profile if profile else "",
106
+ "title": space['title'] if space['title'] else "",
107
+ "url": space['schema_url'] if space['schema_url'] else "",
108
+ "tool_count": space['tool_count'] if space['tool_count'] else 0,
109
  "tags": space['tags'] if space['tags'] else '[]'
110
  }
111
  records.append(record)
 
116
  time.sleep(1)
117
  print(f"Uploaded {len(spaces)} spaces")
118
 
119
+ def search_spaces(pc: Pinecone, query: str, top_k: int = 5, score_threshold: float = 0.0) -> List[Dict[str, Any]]:
120
  """Search for relevant MCP spaces based on their description and tools"""
121
  index = pc.Index(SPACES_INDEX_NAME)
122
 
 
132
 
133
  spaces_list = []
134
  for hit in results['result']['hits']:
135
+ score = hit.get("_score", 0)
136
+ if score > score_threshold:
137
+ fields = hit.get('fields', {})
138
+ space = {
139
+ "title": fields.get("title"),
140
+ "url": fields.get("url"),
141
+ "tool_count": fields.get("tool_count"),
142
+ "tags": fields.get("tags"),
143
+ "score": score
144
+ }
145
+ spaces_list.append(space)
146
 
147
+ # Sort by score in descending order
148
+ spaces_list.sort(key=lambda x: x["score"], reverse=True)
149
  return spaces_list
150
 
151
  def load_tools_from_db() -> List[Dict[str, Any]]:
 
180
  time.sleep(1)
181
  print(f"Uploaded {len(tools)} tools")
182
 
183
+ def search_tools(pc: Pinecone, query: str, top_k: int = 5, score_threshold: float = 0.0) -> List[Dict[str, Any]]:
184
  """Search for relevant tools based on description"""
185
  index = pc.Index(TOOLS_INDEX_NAME)
186
 
 
194
  }
195
  )
196
 
 
197
  tools_list = []
198
  for hit in results['result']['hits']:
199
+ score = hit.get("_score", 0)
200
+ if score > score_threshold:
201
+ fields = hit.get('fields', {})
202
+ tool = {
203
+ "name": fields.get("tool_name"),
204
+ "description": fields.get("description"),
205
+ "inputSchema": json.loads(fields.get("input_schema", "{}")),
206
+ "server_url": fields.get("server_url"),
207
+ "score": score
208
+ }
209
+ tools_list.append(tool)
210
 
211
+ # Sort by score in descending order
212
+ tools_list.sort(key=lambda x: x["score"], reverse=True)
213
  return tools_list
214
 
 
215
  def search_suitable_tools(query: str)->List[Dict[str, Any]]:
216
  """Search for suitable tools based on query"""
217
  pc = Pinecone(api_key=PINECONE_API_KEY)
218
+ tools = search_tools(pc, query, top_k=13, score_threshold=0.25)
219
  return tools
220
 
221
  def search_suitable_spaces(query: str)->List[Dict[str, Any]]:
222
  """Search for suitable spaces based on query"""
223
  pc = Pinecone(api_key=PINECONE_API_KEY)
224
+ spaces = search_spaces(pc, query, top_k=3, score_threshold=0.1)
225
  return spaces
226
 
227
+ def initialize_and_upload_to_vector_db():
228
  """Initialize Pinecone and upload all tools and spaces"""
229
  pc = Pinecone(api_key=PINECONE_API_KEY)
230
  create_tools_index(pc)
 
244
  return pc
245
 
246
  if __name__ == "__main__":
247
+ pc = initialize_and_upload_to_vector_db()
248
 
249
  # Interactive search loop
250