Spaces:
Running
Running
| from typing import Dict, List, Tuple | |
| import json | |
| import pandas as pd | |
| def agentbase_indexing(db_path: str) -> Tuple[pd.DataFrame, List[str]]: | |
| """ | |
| Another indexing configuration for AgentBase dense models. | |
| 1. concatenate and embed all columns, except agent_id (redundant) and misc (not to go over max_seq_length) | |
| 2. handle missing/null values | |
| 3. prioritise important fields (see field semantics) | |
| :returns: ids and prepared documents | |
| """ | |
| agents_df = pd.read_csv(db_path) | |
| agent_ids = agents_df["agent_id"] | |
| agents_df.drop(columns=["agent_id", "misc"], inplace=True) | |
| columns = agents_df.columns | |
| high_priority_cols = ["agent_name", "agent_description", "agent_category"] | |
| columns = high_priority_cols + [col for col in agents_df.columns if col not in high_priority_cols] | |
| documents = agents_df.apply( | |
| lambda row: ' '.join([f"{row[col]}" for col in columns if pd.notna(row[col])]), axis=1 | |
| ).tolist() | |
| return agent_ids, documents | |
| def load_documents(db_path: str, columns=["agent_name", "agent_description"]) -> Tuple[pd.DataFrame, List[str]]: | |
| """ | |
| Loads documents (for sparse and dense models) by concatenating all column fields | |
| :returns: ids and prepared documents | |
| """ | |
| agents_df = pd.read_csv(db_path) | |
| agent_ids = agents_df["agent_id"] # keep agent IDs (mapping back after retrieval) | |
| documents = agents_df[columns].apply( | |
| lambda row: ' '.join(row.fillna('').astype(str)), axis=1 | |
| ).tolist() | |
| return agent_ids, documents | |
| def tokenise(doc: str) -> List[str]: | |
| return doc.lower().split() | |
| def load_queries(queries_path: str) -> Dict[str, str]: | |
| with open(queries_path) as json_file: | |
| data = json.load(json_file) | |
| return data | |