Spaces:
Build error
Build error
Herbert
commited on
Commit
·
4188210
1
Parent(s):
ca7c002
Added hf_spaces instructions
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .coveragerc +0 -25
- .dockerignore +0 -46
- .github/workflows/tests.yaml +0 -35
- .gitignore +0 -215
- .gitmodules +0 -0
- .python-version +0 -1
- CLAUDE.md +0 -165
- Dockerfile +0 -50
- LICENSE +0 -201
- README.md +1 -187
- agentic_nav/__init__.py +0 -0
- agentic_nav/agents/__init__.py +0 -1
- agentic_nav/agents/base.py +0 -327
- agentic_nav/agents/neurips2025_conference.py +0 -48
- agentic_nav/frontend/__init__.py +0 -0
- agentic_nav/frontend/browser_ui.py +0 -525
- agentic_nav/frontend/cli.py +0 -371
- agentic_nav/tools/__init__.py +0 -15
- agentic_nav/tools/knowledge_graph/__init__.py +0 -326
- agentic_nav/tools/knowledge_graph/file_handler.py +0 -29
- agentic_nav/tools/knowledge_graph/graph_generator.py +0 -446
- agentic_nav/tools/knowledge_graph/graph_traversal_strategies/__init__.py +0 -15
- agentic_nav/tools/knowledge_graph/graph_traversal_strategies/breadth_first_random.py +0 -80
- agentic_nav/tools/knowledge_graph/graph_traversal_strategies/depth_first_random.py +0 -78
- agentic_nav/tools/knowledge_graph/graph_traversal_strategies/neo4j_builtin.py +0 -50
- agentic_nav/tools/knowledge_graph/neo4j_db_importer.py +0 -537
- agentic_nav/tools/knowledge_graph/retriever.py +0 -612
- agentic_nav/tools/session_routing/__init__.py +0 -210
- agentic_nav/tools/session_routing/scheduler.py +0 -377
- agentic_nav/tools/session_routing/utils.py +0 -253
- agentic_nav/utils/__init__.py +0 -3
- agentic_nav/utils/cli/__init__.py +0 -3
- agentic_nav/utils/cli/editor.py +0 -29
- agentic_nav/utils/cli/help.py +0 -14
- agentic_nav/utils/cli/history.py +0 -11
- agentic_nav/utils/embedding_generator.py +0 -151
- agentic_nav/utils/file_handlers.py +0 -10
- agentic_nav/utils/logger.py +0 -49
- agentic_nav/utils/tooling.py +0 -44
- app.py +5 -0
- data/.keep +0 -0
- docker-compose.yaml +0 -137
- graphs/.gitkeep +0 -0
- pyproject.toml +0 -59
- pytest.ini +0 -26
- requirements.txt +1 -1
- scripts/docker-entrypoint.sh +0 -14
- scripts/import_neurips2025_kg.sh +0 -13
- scripts/prepare_gradio.sh +0 -18
- tests/__init__.py +0 -1
.coveragerc
DELETED
|
@@ -1,25 +0,0 @@
|
|
| 1 |
-
[run]
|
| 2 |
-
source = llm_agents
|
| 3 |
-
omit =
|
| 4 |
-
*/gradio/*
|
| 5 |
-
*/tests/*
|
| 6 |
-
*/__pycache__/*
|
| 7 |
-
*/.*
|
| 8 |
-
*/venv/*
|
| 9 |
-
*/.venv/*
|
| 10 |
-
|
| 11 |
-
[report]
|
| 12 |
-
exclude_lines =
|
| 13 |
-
pragma: no cover
|
| 14 |
-
def __repr__
|
| 15 |
-
if self.debug:
|
| 16 |
-
if settings.DEBUG
|
| 17 |
-
raise AssertionError
|
| 18 |
-
raise NotImplementedError
|
| 19 |
-
if 0:
|
| 20 |
-
if __name__ == .__main__.:
|
| 21 |
-
class .*\bProtocol\):
|
| 22 |
-
@(abc\.)?abstractmethod
|
| 23 |
-
|
| 24 |
-
[html]
|
| 25 |
-
directory = htmlcov
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.dockerignore
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
# Python
|
| 2 |
-
__pycache__
|
| 3 |
-
*.py[cod]
|
| 4 |
-
*$py.class
|
| 5 |
-
*.so
|
| 6 |
-
.Python
|
| 7 |
-
*.egg-info
|
| 8 |
-
dist
|
| 9 |
-
build
|
| 10 |
-
.eggs
|
| 11 |
-
|
| 12 |
-
# Virtual environments
|
| 13 |
-
.venv
|
| 14 |
-
venv
|
| 15 |
-
ENV
|
| 16 |
-
env
|
| 17 |
-
|
| 18 |
-
# IDE
|
| 19 |
-
.vscode
|
| 20 |
-
.idea
|
| 21 |
-
*.swp
|
| 22 |
-
*.swo
|
| 23 |
-
*~
|
| 24 |
-
|
| 25 |
-
# Version control
|
| 26 |
-
.git
|
| 27 |
-
.gitignore
|
| 28 |
-
|
| 29 |
-
# OS
|
| 30 |
-
.DS_Store
|
| 31 |
-
Thumbs.db
|
| 32 |
-
|
| 33 |
-
# Testing
|
| 34 |
-
.pytest_cache
|
| 35 |
-
.coverage
|
| 36 |
-
htmlcov
|
| 37 |
-
|
| 38 |
-
# Documentation
|
| 39 |
-
docs/_build
|
| 40 |
-
|
| 41 |
-
# Logs
|
| 42 |
-
*.log
|
| 43 |
-
|
| 44 |
-
# Local development files
|
| 45 |
-
.env.local
|
| 46 |
-
*.local
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.github/workflows/tests.yaml
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
name: Tests
|
| 2 |
-
|
| 3 |
-
on:
|
| 4 |
-
push:
|
| 5 |
-
branches: [ main, master, dev ]
|
| 6 |
-
pull_request:
|
| 7 |
-
branches: [ main, master, dev ]
|
| 8 |
-
|
| 9 |
-
jobs:
|
| 10 |
-
test:
|
| 11 |
-
runs-on: ubuntu-latest
|
| 12 |
-
strategy:
|
| 13 |
-
matrix:
|
| 14 |
-
python-version: ['3.14']
|
| 15 |
-
|
| 16 |
-
steps:
|
| 17 |
-
- uses: actions/checkout@v4
|
| 18 |
-
with:
|
| 19 |
-
submodules: recursive
|
| 20 |
-
|
| 21 |
-
- name: Set up Python ${{ matrix.python-version }}
|
| 22 |
-
uses: actions/setup-python@v4
|
| 23 |
-
with:
|
| 24 |
-
python-version: ${{ matrix.python-version }}
|
| 25 |
-
|
| 26 |
-
- name: Install uv
|
| 27 |
-
uses: astral-sh/setup-uv@v3
|
| 28 |
-
|
| 29 |
-
- name: Install dependencies
|
| 30 |
-
run: uv sync
|
| 31 |
-
|
| 32 |
-
- name: Run tests with coverage
|
| 33 |
-
run: |
|
| 34 |
-
uv run pytest --cov=llm_agents --cov-report=term --cov-report=json tests/
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
DELETED
|
@@ -1,215 +0,0 @@
|
|
| 1 |
-
# Byte-compiled / optimized / DLL files
|
| 2 |
-
__pycache__/
|
| 3 |
-
*.py[codz]
|
| 4 |
-
*$py.class
|
| 5 |
-
|
| 6 |
-
# C extensions
|
| 7 |
-
*.so
|
| 8 |
-
|
| 9 |
-
# Distribution / packaging
|
| 10 |
-
.Python
|
| 11 |
-
build/
|
| 12 |
-
develop-eggs/
|
| 13 |
-
dist/
|
| 14 |
-
downloads/
|
| 15 |
-
eggs/
|
| 16 |
-
.eggs/
|
| 17 |
-
lib/
|
| 18 |
-
lib64/
|
| 19 |
-
parts/
|
| 20 |
-
sdist/
|
| 21 |
-
var/
|
| 22 |
-
wheels/
|
| 23 |
-
share/python-wheels/
|
| 24 |
-
*.egg-info/
|
| 25 |
-
.installed.cfg
|
| 26 |
-
*.egg
|
| 27 |
-
MANIFEST
|
| 28 |
-
|
| 29 |
-
# PyInstaller
|
| 30 |
-
# Usually these files are written by a python script from a template
|
| 31 |
-
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
-
*.manifest
|
| 33 |
-
*.spec
|
| 34 |
-
|
| 35 |
-
# Installer logs
|
| 36 |
-
pip-log.txt
|
| 37 |
-
pip-delete-this-directory.txt
|
| 38 |
-
|
| 39 |
-
# Unit test / coverage reports
|
| 40 |
-
htmlcov/
|
| 41 |
-
.tox/
|
| 42 |
-
.nox/
|
| 43 |
-
.coverage
|
| 44 |
-
.coverage.*
|
| 45 |
-
.cache
|
| 46 |
-
nosetests.xml
|
| 47 |
-
coverage.xml
|
| 48 |
-
*.cover
|
| 49 |
-
*.py.cover
|
| 50 |
-
.hypothesis/
|
| 51 |
-
.pytest_cache/
|
| 52 |
-
cover/
|
| 53 |
-
|
| 54 |
-
# Translations
|
| 55 |
-
*.mo
|
| 56 |
-
*.pot
|
| 57 |
-
|
| 58 |
-
# Django stuff:
|
| 59 |
-
*.log
|
| 60 |
-
local_settings.py
|
| 61 |
-
db.sqlite3
|
| 62 |
-
db.sqlite3-journal
|
| 63 |
-
|
| 64 |
-
# Flask stuff:
|
| 65 |
-
instance/
|
| 66 |
-
.webassets-cache
|
| 67 |
-
|
| 68 |
-
# Scrapy stuff:
|
| 69 |
-
.scrapy
|
| 70 |
-
|
| 71 |
-
# Sphinx documentation
|
| 72 |
-
docs/_build/
|
| 73 |
-
|
| 74 |
-
# PyBuilder
|
| 75 |
-
.pybuilder/
|
| 76 |
-
target/
|
| 77 |
-
|
| 78 |
-
# Jupyter Notebook
|
| 79 |
-
.ipynb_checkpoints
|
| 80 |
-
|
| 81 |
-
# IPython
|
| 82 |
-
profile_default/
|
| 83 |
-
ipython_config.py
|
| 84 |
-
|
| 85 |
-
# pyenv
|
| 86 |
-
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
-
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
-
# .python-version
|
| 89 |
-
|
| 90 |
-
# pipenv
|
| 91 |
-
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
-
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
-
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
-
# install all needed dependencies.
|
| 95 |
-
#Pipfile.lock
|
| 96 |
-
|
| 97 |
-
# UV
|
| 98 |
-
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
| 99 |
-
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 100 |
-
# commonly ignored for libraries.
|
| 101 |
-
#uv.lock
|
| 102 |
-
|
| 103 |
-
# poetry
|
| 104 |
-
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 105 |
-
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 106 |
-
# commonly ignored for libraries.
|
| 107 |
-
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 108 |
-
#poetry.lock
|
| 109 |
-
#poetry.toml
|
| 110 |
-
|
| 111 |
-
# pdm
|
| 112 |
-
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 113 |
-
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
| 114 |
-
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
| 115 |
-
#pdm.lock
|
| 116 |
-
#pdm.toml
|
| 117 |
-
.pdm-python
|
| 118 |
-
.pdm-build/
|
| 119 |
-
|
| 120 |
-
# pixi
|
| 121 |
-
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
| 122 |
-
#pixi.lock
|
| 123 |
-
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
| 124 |
-
# in the .venv directory. It is recommended not to include this directory in version control.
|
| 125 |
-
.pixi
|
| 126 |
-
|
| 127 |
-
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 128 |
-
__pypackages__/
|
| 129 |
-
|
| 130 |
-
# Celery stuff
|
| 131 |
-
celerybeat-schedule
|
| 132 |
-
celerybeat.pid
|
| 133 |
-
|
| 134 |
-
# SageMath parsed files
|
| 135 |
-
*.sage.py
|
| 136 |
-
|
| 137 |
-
# Environments
|
| 138 |
-
.env
|
| 139 |
-
.envrc
|
| 140 |
-
.venv
|
| 141 |
-
env/
|
| 142 |
-
venv/
|
| 143 |
-
ENV/
|
| 144 |
-
env.bak/
|
| 145 |
-
venv.bak/
|
| 146 |
-
.idea/
|
| 147 |
-
|
| 148 |
-
# Spyder project settings
|
| 149 |
-
.spyderproject
|
| 150 |
-
.spyproject
|
| 151 |
-
|
| 152 |
-
# Rope project settings
|
| 153 |
-
.ropeproject
|
| 154 |
-
|
| 155 |
-
# mkdocs documentation
|
| 156 |
-
/site
|
| 157 |
-
|
| 158 |
-
# mypy
|
| 159 |
-
.mypy_cache/
|
| 160 |
-
.dmypy.json
|
| 161 |
-
dmypy.json
|
| 162 |
-
|
| 163 |
-
# Pyre type checker
|
| 164 |
-
.pyre/
|
| 165 |
-
|
| 166 |
-
# pytype static type analyzer
|
| 167 |
-
.pytype/
|
| 168 |
-
|
| 169 |
-
# Cython debug symbols
|
| 170 |
-
cython_debug/
|
| 171 |
-
|
| 172 |
-
# PyCharm
|
| 173 |
-
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 174 |
-
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 175 |
-
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 176 |
-
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 177 |
-
#.idea/
|
| 178 |
-
|
| 179 |
-
# Abstra
|
| 180 |
-
# Abstra is an AI-powered process automation framework.
|
| 181 |
-
# Ignore directories containing user credentials, local state, and settings.
|
| 182 |
-
# Learn more at https://abstra.io/docs
|
| 183 |
-
.abstra/
|
| 184 |
-
|
| 185 |
-
# Visual Studio Code
|
| 186 |
-
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
| 187 |
-
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
| 188 |
-
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
| 189 |
-
# you could uncomment the following to ignore the entire vscode folder
|
| 190 |
-
# .vscode/
|
| 191 |
-
|
| 192 |
-
# Ruff stuff:
|
| 193 |
-
.ruff_cache/
|
| 194 |
-
|
| 195 |
-
# PyPI configuration file
|
| 196 |
-
.pypirc
|
| 197 |
-
|
| 198 |
-
# Cursor
|
| 199 |
-
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
| 200 |
-
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
| 201 |
-
# refer to https://docs.cursor.com/context/ignore-files
|
| 202 |
-
.cursorignore
|
| 203 |
-
.cursorindexingignore
|
| 204 |
-
|
| 205 |
-
# Marimo
|
| 206 |
-
marimo/_static/
|
| 207 |
-
marimo/_lsp/
|
| 208 |
-
__marimo__/
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
data/*.json
|
| 212 |
-
.vscode/
|
| 213 |
-
rag_index_json/
|
| 214 |
-
*.pkl
|
| 215 |
-
*.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitmodules
DELETED
|
File without changes
|
.python-version
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
3.10
|
|
|
|
|
|
CLAUDE.md
DELETED
|
@@ -1,165 +0,0 @@
|
|
| 1 |
-
# CLAUDE.md
|
| 2 |
-
|
| 3 |
-
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
| 4 |
-
|
| 5 |
-
## Project Overview
|
| 6 |
-
|
| 7 |
-
This is LLMAgents, a Python package for AI research analysis agents. The system helps researchers browse papers, find similar papers, write summaries, and plan conference schedules using Neo4j knowledge graphs and LLM agents.
|
| 8 |
-
|
| 9 |
-
## Key Commands
|
| 10 |
-
|
| 11 |
-
### Environment Setup
|
| 12 |
-
```bash
|
| 13 |
-
# Install dependencies
|
| 14 |
-
uv sync
|
| 15 |
-
|
| 16 |
-
# Setup environment variables (required before running)
|
| 17 |
-
export $(grep -v '^#' .env | xargs)
|
| 18 |
-
|
| 19 |
-
# Prepare gradio from source (for Python 3.14 compatibility)
|
| 20 |
-
bash scripts/prepare_gradio.sh
|
| 21 |
-
```
|
| 22 |
-
|
| 23 |
-
### Running the Application
|
| 24 |
-
```bash
|
| 25 |
-
# CLI interface
|
| 26 |
-
uv run agentic-nav-cli -t 0.4 --max-tokens 6000 -c 131072 --max-num-papers 10
|
| 27 |
-
|
| 28 |
-
# Web interface
|
| 29 |
-
agentic-nav-web
|
| 30 |
-
```
|
| 31 |
-
|
| 32 |
-
### Database and Knowledge Graph
|
| 33 |
-
```bash
|
| 34 |
-
# Start required services
|
| 35 |
-
docker compose up neo4j_db ollama_embed ollama_agent -d
|
| 36 |
-
|
| 37 |
-
# Build knowledge graph from NeurIPS 2025 data
|
| 38 |
-
uv run llm_agents/tools/knowledge_graph/graph_generator.py \
|
| 39 |
-
--input-json-file data/neurips-2025-orals-posters.json \
|
| 40 |
-
--embedding-model $EMBEDDING_MODEL_NAME \
|
| 41 |
-
--ollama-server-url $EMBEDDING_MODEL_API_BASE \
|
| 42 |
-
--embedding-gen-batch-size 32 \
|
| 43 |
-
--max-parallel-workers 28 \
|
| 44 |
-
--similarity-threshold 0.8 \
|
| 45 |
-
--output-file graphs/knowledge_graph.pkl
|
| 46 |
-
|
| 47 |
-
# Import knowledge graph to Neo4j
|
| 48 |
-
uv run llm_agents/tools/knowledge_graph/neo4j_db_importer.py \
|
| 49 |
-
--graph-path graphs/knowledge_graph.pkl \
|
| 50 |
-
--neo4j-uri bolt://localhost:7687 \
|
| 51 |
-
--batch-size 100 \
|
| 52 |
-
--embedding-dimension 768
|
| 53 |
-
```
|
| 54 |
-
|
| 55 |
-
### Testing
|
| 56 |
-
```bash
|
| 57 |
-
# Run all tests (recommended - avoids gradio conflicts)
|
| 58 |
-
uv run pytest tests/
|
| 59 |
-
|
| 60 |
-
# Run tests with coverage report
|
| 61 |
-
uv run pytest tests/ --cov=llm_agents --cov-report=term-missing
|
| 62 |
-
|
| 63 |
-
# Alternative: Use the custom test runner
|
| 64 |
-
python run_tests.py
|
| 65 |
-
|
| 66 |
-
# Run specific test categories
|
| 67 |
-
uv run pytest tests/ -m unit # Unit tests only
|
| 68 |
-
uv run pytest tests/ -m integration # Integration tests only
|
| 69 |
-
uv run pytest tests/ -m "not slow" # Skip slow tests
|
| 70 |
-
|
| 71 |
-
# Run tests for specific module
|
| 72 |
-
uv run pytest tests/agents/
|
| 73 |
-
uv run pytest tests/tools/
|
| 74 |
-
uv run pytest tests/utils/
|
| 75 |
-
uv run pytest tests/frontend/
|
| 76 |
-
|
| 77 |
-
# Run single test file
|
| 78 |
-
uv run pytest tests/agents/test_base.py
|
| 79 |
-
|
| 80 |
-
# Run with verbose output
|
| 81 |
-
uv run pytest tests/ -v
|
| 82 |
-
|
| 83 |
-
# Generate HTML coverage report
|
| 84 |
-
uv run pytest tests/ --cov=llm_agents --cov-report=html
|
| 85 |
-
# View coverage report at htmlcov/index.html
|
| 86 |
-
|
| 87 |
-
# Note: Always specify tests/ directory to avoid conflicts with gradio workspace
|
| 88 |
-
```
|
| 89 |
-
|
| 90 |
-
### Development
|
| 91 |
-
```bash
|
| 92 |
-
# Run full system with Docker
|
| 93 |
-
docker compose up --build -d
|
| 94 |
-
|
| 95 |
-
# Import pre-generated NeurIPS 2025 knowledge graph
|
| 96 |
-
bash scripts/import_neurips2025_kg.sh
|
| 97 |
-
```
|
| 98 |
-
|
| 99 |
-
## Architecture
|
| 100 |
-
|
| 101 |
-
### Core Components
|
| 102 |
-
|
| 103 |
-
1. **Agent System (`llm_agents/agents/`)**
|
| 104 |
-
- `base.py`: Core LLMAgent class with streaming support and tool execution
|
| 105 |
-
- `neurips2025_conference.py`: Specialized agent for NeurIPS 2025 conference data
|
| 106 |
-
- Uses LiteLLM for model abstraction, supports Ollama models
|
| 107 |
-
|
| 108 |
-
2. **Tools System (`llm_agents/tools/`)**
|
| 109 |
-
- Knowledge graph tools: `search_similar_papers`, `find_neighboring_papers`, `traverse_graph`
|
| 110 |
-
- Graph traversal strategies: breadth-first, depth-first, neo4j builtin
|
| 111 |
-
- Tool registry automatically discovers callable functions
|
| 112 |
-
|
| 113 |
-
3. **Frontend (`llm_agents/frontend/`)**
|
| 114 |
-
- `cli.py`: Rich terminal interface with streaming, command history, auto-completion
|
| 115 |
-
- `browser_ui.py`: Gradio web interface for browser-based interactions
|
| 116 |
-
- Both interfaces support the same agent functionality
|
| 117 |
-
|
| 118 |
-
4. **Knowledge Graph (`llm_agents/tools/knowledge_graph/`)**
|
| 119 |
-
- Neo4j-based paper similarity and relationship storage
|
| 120 |
-
- Embedding-based vector search for paper discovery
|
| 121 |
-
- Support for graph traversal algorithms
|
| 122 |
-
|
| 123 |
-
### Key Data Flow
|
| 124 |
-
|
| 125 |
-
1. User input → Frontend (CLI/Web)
|
| 126 |
-
2. Frontend → Agent (stateless interaction with streaming)
|
| 127 |
-
3. Agent → LLM (via LiteLLM) + Tools (knowledge graph queries)
|
| 128 |
-
4. Tools → Neo4j database for paper retrieval
|
| 129 |
-
5. Results streamed back to user with live markdown rendering
|
| 130 |
-
|
| 131 |
-
## Configuration
|
| 132 |
-
|
| 133 |
-
### Required Environment Variables
|
| 134 |
-
```bash
|
| 135 |
-
NEO4J_USERNAME=neo4j
|
| 136 |
-
NEO4J_PASSWORD=<password>
|
| 137 |
-
EMBEDDING_MODEL_NAME=nomic-embed-text
|
| 138 |
-
EMBEDDING_MODEL_API_BASE=http://localhost:11435
|
| 139 |
-
AGENT_MODEL_NAME=gpt-oss:20b
|
| 140 |
-
AGENT_MODEL_API_BASE=http://localhost:11436
|
| 141 |
-
OLLAMA_API_KEY=<optional>
|
| 142 |
-
POPULATE_DATABASE_NIPS2025=false
|
| 143 |
-
AGENTIC_NAV_LOG_LEVEL=INFO
|
| 144 |
-
```
|
| 145 |
-
|
| 146 |
-
### Model Support
|
| 147 |
-
- Primary: Ollama models (local and remote)
|
| 148 |
-
- Remote Ollama models via https://ollama.com with API key
|
| 149 |
-
- Uses LiteLLM for provider abstraction
|
| 150 |
-
|
| 151 |
-
## Dependencies
|
| 152 |
-
|
| 153 |
-
- **Python**: 3.14+ required
|
| 154 |
-
- **uv**: For dependency management
|
| 155 |
-
- **Neo4j**: Graph database for knowledge storage
|
| 156 |
-
- **Ollama**: LLM inference (supports GPU acceleration with Nvidia Container Toolkit)
|
| 157 |
-
- **Gradio**: Built from source for Python 3.14 compatibility
|
| 158 |
-
|
| 159 |
-
## Development Notes
|
| 160 |
-
|
| 161 |
-
- The system is designed for multi-user sessions via stateless agent interactions
|
| 162 |
-
- Streaming responses are supported in both CLI and web interfaces
|
| 163 |
-
- Tool calls are automatically executed and results fed back to the LLM
|
| 164 |
-
- Chat history can be saved/loaded in JSON format
|
| 165 |
-
- Logging is configured per environment with structured output to `logs/` directory
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile
DELETED
|
@@ -1,50 +0,0 @@
|
|
| 1 |
-
FROM python:3.14-slim
|
| 2 |
-
|
| 3 |
-
WORKDIR /app
|
| 4 |
-
|
| 5 |
-
RUN apt-get update && apt-get install -y \
|
| 6 |
-
build-essential \
|
| 7 |
-
git \
|
| 8 |
-
bash \
|
| 9 |
-
wget \
|
| 10 |
-
curl \
|
| 11 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
-
|
| 13 |
-
# Install Node.js (required for pnpm and building Gradio frontend)
|
| 14 |
-
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
|
| 15 |
-
apt-get install -y nodejs && \
|
| 16 |
-
rm -rf /var/lib/apt/lists/*
|
| 17 |
-
|
| 18 |
-
# Install pnpm globally
|
| 19 |
-
RUN npm install -g pnpm
|
| 20 |
-
|
| 21 |
-
# Install uv first (before copying files)
|
| 22 |
-
RUN pip install --no-cache-dir uv
|
| 23 |
-
|
| 24 |
-
# Copy all necessary files
|
| 25 |
-
COPY pyproject.toml uv.lock* ./
|
| 26 |
-
COPY .python-version* ./
|
| 27 |
-
COPY README.md ./
|
| 28 |
-
COPY LICENSE ./
|
| 29 |
-
COPY llm_agents/ ./llm_agents/
|
| 30 |
-
COPY scripts/ ./scripts/
|
| 31 |
-
COPY graphs/ ./graphs/
|
| 32 |
-
|
| 33 |
-
RUN mkdir ./gradio
|
| 34 |
-
RUN git clone https://github.com/gradio-app/gradio.git gradio/
|
| 35 |
-
|
| 36 |
-
# Run the gradio preparation script (build frontend only, submodule already initialized)
|
| 37 |
-
RUN bash scripts/prepare_gradio.sh
|
| 38 |
-
|
| 39 |
-
# Use uv sync to install dependencies
|
| 40 |
-
RUN uv sync
|
| 41 |
-
|
| 42 |
-
EXPOSE 7860
|
| 43 |
-
|
| 44 |
-
# Set entrypoint
|
| 45 |
-
# Download and initialize the NeurIPS 2025 conference knowledge graph
|
| 46 |
-
RUN chmod +x /app/scripts/docker-entrypoint.sh
|
| 47 |
-
RUN chmod +x /app/scripts/import_neurips2025_kg.sh
|
| 48 |
-
ENTRYPOINT ["scripts/docker-entrypoint.sh"]
|
| 49 |
-
|
| 50 |
-
CMD ["uv", "run", "llm_agents/frontend/browser_ui.py"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LICENSE
DELETED
|
@@ -1,201 +0,0 @@
|
|
| 1 |
-
Apache License
|
| 2 |
-
Version 2.0, January 2004
|
| 3 |
-
http://www.apache.org/licenses/
|
| 4 |
-
|
| 5 |
-
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
| 6 |
-
|
| 7 |
-
1. Definitions.
|
| 8 |
-
|
| 9 |
-
"License" shall mean the terms and conditions for use, reproduction,
|
| 10 |
-
and distribution as defined by Sections 1 through 9 of this document.
|
| 11 |
-
|
| 12 |
-
"Licensor" shall mean the copyright owner or entity authorized by
|
| 13 |
-
the copyright owner that is granting the License.
|
| 14 |
-
|
| 15 |
-
"Legal Entity" shall mean the union of the acting entity and all
|
| 16 |
-
other entities that control, are controlled by, or are under common
|
| 17 |
-
control with that entity. For the purposes of this definition,
|
| 18 |
-
"control" means (i) the power, direct or indirect, to cause the
|
| 19 |
-
direction or management of such entity, whether by contract or
|
| 20 |
-
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
| 21 |
-
outstanding shares, or (iii) beneficial ownership of such entity.
|
| 22 |
-
|
| 23 |
-
"You" (or "Your") shall mean an individual or Legal Entity
|
| 24 |
-
exercising permissions granted by this License.
|
| 25 |
-
|
| 26 |
-
"Source" form shall mean the preferred form for making modifications,
|
| 27 |
-
including but not limited to software source code, documentation
|
| 28 |
-
source, and configuration files.
|
| 29 |
-
|
| 30 |
-
"Object" form shall mean any form resulting from mechanical
|
| 31 |
-
transformation or translation of a Source form, including but
|
| 32 |
-
not limited to compiled object code, generated documentation,
|
| 33 |
-
and conversions to other media types.
|
| 34 |
-
|
| 35 |
-
"Work" shall mean the work of authorship, whether in Source or
|
| 36 |
-
Object form, made available under the License, as indicated by a
|
| 37 |
-
copyright notice that is included in or attached to the work
|
| 38 |
-
(an example is provided in the Appendix below).
|
| 39 |
-
|
| 40 |
-
"Derivative Works" shall mean any work, whether in Source or Object
|
| 41 |
-
form, that is based on (or derived from) the Work and for which the
|
| 42 |
-
editorial revisions, annotations, elaborations, or other modifications
|
| 43 |
-
represent, as a whole, an original work of authorship. For the purposes
|
| 44 |
-
of this License, Derivative Works shall not include works that remain
|
| 45 |
-
separable from, or merely link (or bind by name) to the interfaces of,
|
| 46 |
-
the Work and Derivative Works thereof.
|
| 47 |
-
|
| 48 |
-
"Contribution" shall mean any work of authorship, including
|
| 49 |
-
the original version of the Work and any modifications or additions
|
| 50 |
-
to that Work or Derivative Works thereof, that is intentionally
|
| 51 |
-
submitted to Licensor for inclusion in the Work by the copyright owner
|
| 52 |
-
or by an individual or Legal Entity authorized to submit on behalf of
|
| 53 |
-
the copyright owner. For the purposes of this definition, "submitted"
|
| 54 |
-
means any form of electronic, verbal, or written communication sent
|
| 55 |
-
to the Licensor or its representatives, including but not limited to
|
| 56 |
-
communication on electronic mailing lists, source code control systems,
|
| 57 |
-
and issue tracking systems that are managed by, or on behalf of, the
|
| 58 |
-
Licensor for the purpose of discussing and improving the Work, but
|
| 59 |
-
excluding communication that is conspicuously marked or otherwise
|
| 60 |
-
designated in writing by the copyright owner as "Not a Contribution."
|
| 61 |
-
|
| 62 |
-
"Contributor" shall mean Licensor and any individual or Legal Entity
|
| 63 |
-
on behalf of whom a Contribution has been received by Licensor and
|
| 64 |
-
subsequently incorporated within the Work.
|
| 65 |
-
|
| 66 |
-
2. Grant of Copyright License. Subject to the terms and conditions of
|
| 67 |
-
this License, each Contributor hereby grants to You a perpetual,
|
| 68 |
-
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 69 |
-
copyright license to reproduce, prepare Derivative Works of,
|
| 70 |
-
publicly display, publicly perform, sublicense, and distribute the
|
| 71 |
-
Work and such Derivative Works in Source or Object form.
|
| 72 |
-
|
| 73 |
-
3. Grant of Patent License. Subject to the terms and conditions of
|
| 74 |
-
this License, each Contributor hereby grants to You a perpetual,
|
| 75 |
-
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
| 76 |
-
(except as stated in this section) patent license to make, have made,
|
| 77 |
-
use, offer to sell, sell, import, and otherwise transfer the Work,
|
| 78 |
-
where such license applies only to those patent claims licensable
|
| 79 |
-
by such Contributor that are necessarily infringed by their
|
| 80 |
-
Contribution(s) alone or by combination of their Contribution(s)
|
| 81 |
-
with the Work to which such Contribution(s) was submitted. If You
|
| 82 |
-
institute patent litigation against any entity (including a
|
| 83 |
-
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
| 84 |
-
or a Contribution incorporated within the Work constitutes direct
|
| 85 |
-
or contributory patent infringement, then any patent licenses
|
| 86 |
-
granted to You under this License for that Work shall terminate
|
| 87 |
-
as of the date such litigation is filed.
|
| 88 |
-
|
| 89 |
-
4. Redistribution. You may reproduce and distribute copies of the
|
| 90 |
-
Work or Derivative Works thereof in any medium, with or without
|
| 91 |
-
modifications, and in Source or Object form, provided that You
|
| 92 |
-
meet the following conditions:
|
| 93 |
-
|
| 94 |
-
(a) You must give any other recipients of the Work or
|
| 95 |
-
Derivative Works a copy of this License; and
|
| 96 |
-
|
| 97 |
-
(b) You must cause any modified files to carry prominent notices
|
| 98 |
-
stating that You changed the files; and
|
| 99 |
-
|
| 100 |
-
(c) You must retain, in the Source form of any Derivative Works
|
| 101 |
-
that You distribute, all copyright, patent, trademark, and
|
| 102 |
-
attribution notices from the Source form of the Work,
|
| 103 |
-
excluding those notices that do not pertain to any part of
|
| 104 |
-
the Derivative Works; and
|
| 105 |
-
|
| 106 |
-
(d) If the Work includes a "NOTICE" text file as part of its
|
| 107 |
-
distribution, then any Derivative Works that You distribute must
|
| 108 |
-
include a readable copy of the attribution notices contained
|
| 109 |
-
within such NOTICE file, excluding those notices that do not
|
| 110 |
-
pertain to any part of the Derivative Works, in at least one
|
| 111 |
-
of the following places: within a NOTICE text file distributed
|
| 112 |
-
as part of the Derivative Works; within the Source form or
|
| 113 |
-
documentation, if provided along with the Derivative Works; or,
|
| 114 |
-
within a display generated by the Derivative Works, if and
|
| 115 |
-
wherever such third-party notices normally appear. The contents
|
| 116 |
-
of the NOTICE file are for informational purposes only and
|
| 117 |
-
do not modify the License. You may add Your own attribution
|
| 118 |
-
notices within Derivative Works that You distribute, alongside
|
| 119 |
-
or as an addendum to the NOTICE text from the Work, provided
|
| 120 |
-
that such additional attribution notices cannot be construed
|
| 121 |
-
as modifying the License.
|
| 122 |
-
|
| 123 |
-
You may add Your own copyright statement to Your modifications and
|
| 124 |
-
may provide additional or different license terms and conditions
|
| 125 |
-
for use, reproduction, or distribution of Your modifications, or
|
| 126 |
-
for any such Derivative Works as a whole, provided Your use,
|
| 127 |
-
reproduction, and distribution of the Work otherwise complies with
|
| 128 |
-
the conditions stated in this License.
|
| 129 |
-
|
| 130 |
-
5. Submission of Contributions. Unless You explicitly state otherwise,
|
| 131 |
-
any Contribution intentionally submitted for inclusion in the Work
|
| 132 |
-
by You to the Licensor shall be under the terms and conditions of
|
| 133 |
-
this License, without any additional terms or conditions.
|
| 134 |
-
Notwithstanding the above, nothing herein shall supersede or modify
|
| 135 |
-
the terms of any separate license agreement you may have executed
|
| 136 |
-
with Licensor regarding such Contributions.
|
| 137 |
-
|
| 138 |
-
6. Trademarks. This License does not grant permission to use the trade
|
| 139 |
-
names, trademarks, service marks, or product names of the Licensor,
|
| 140 |
-
except as required for reasonable and customary use in describing the
|
| 141 |
-
origin of the Work and reproducing the content of the NOTICE file.
|
| 142 |
-
|
| 143 |
-
7. Disclaimer of Warranty. Unless required by applicable law or
|
| 144 |
-
agreed to in writing, Licensor provides the Work (and each
|
| 145 |
-
Contributor provides its Contributions) on an "AS IS" BASIS,
|
| 146 |
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
| 147 |
-
implied, including, without limitation, any warranties or conditions
|
| 148 |
-
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
| 149 |
-
PARTICULAR PURPOSE. You are solely responsible for determining the
|
| 150 |
-
appropriateness of using or redistributing the Work and assume any
|
| 151 |
-
risks associated with Your exercise of permissions under this License.
|
| 152 |
-
|
| 153 |
-
8. Limitation of Liability. In no event and under no legal theory,
|
| 154 |
-
whether in tort (including negligence), contract, or otherwise,
|
| 155 |
-
unless required by applicable law (such as deliberate and grossly
|
| 156 |
-
negligent acts) or agreed to in writing, shall any Contributor be
|
| 157 |
-
liable to You for damages, including any direct, indirect, special,
|
| 158 |
-
incidental, or consequential damages of any character arising as a
|
| 159 |
-
result of this License or out of the use or inability to use the
|
| 160 |
-
Work (including but not limited to damages for loss of goodwill,
|
| 161 |
-
work stoppage, computer failure or malfunction, or any and all
|
| 162 |
-
other commercial damages or losses), even if such Contributor
|
| 163 |
-
has been advised of the possibility of such damages.
|
| 164 |
-
|
| 165 |
-
9. Accepting Warranty or Additional Liability. While redistributing
|
| 166 |
-
the Work or Derivative Works thereof, You may choose to offer,
|
| 167 |
-
and charge a fee for, acceptance of support, warranty, indemnity,
|
| 168 |
-
or other liability obligations and/or rights consistent with this
|
| 169 |
-
License. However, in accepting such obligations, You may act only
|
| 170 |
-
on Your own behalf and on Your sole responsibility, not on behalf
|
| 171 |
-
of any other Contributor, and only if You agree to indemnify,
|
| 172 |
-
defend, and hold each Contributor harmless for any liability
|
| 173 |
-
incurred by, or claims asserted against, such Contributor by reason
|
| 174 |
-
of your accepting any such warranty or additional liability.
|
| 175 |
-
|
| 176 |
-
END OF TERMS AND CONDITIONS
|
| 177 |
-
|
| 178 |
-
APPENDIX: How to apply the Apache License to your work.
|
| 179 |
-
|
| 180 |
-
To apply the Apache License to your work, attach the following
|
| 181 |
-
boilerplate notice, with the fields enclosed by brackets "[]"
|
| 182 |
-
replaced with your own identifying information. (Don't include
|
| 183 |
-
the brackets!) The text should be enclosed in the appropriate
|
| 184 |
-
comment syntax for the file format. We also recommend that a
|
| 185 |
-
file or class name and description of purpose be included on the
|
| 186 |
-
same "printed page" as the copyright notice for easier
|
| 187 |
-
identification within third-party archives.
|
| 188 |
-
|
| 189 |
-
Copyright [yyyy] [name of copyright owner]
|
| 190 |
-
|
| 191 |
-
Licensed under the Apache License, Version 2.0 (the "License");
|
| 192 |
-
you may not use this file except in compliance with the License.
|
| 193 |
-
You may obtain a copy of the License at
|
| 194 |
-
|
| 195 |
-
http://www.apache.org/licenses/LICENSE-2.0
|
| 196 |
-
|
| 197 |
-
Unless required by applicable law or agreed to in writing, software
|
| 198 |
-
distributed under the License is distributed on an "AS IS" BASIS,
|
| 199 |
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 200 |
-
See the License for the specific language governing permissions and
|
| 201 |
-
limitations under the License.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -17,193 +17,7 @@ short_description: Agent for NeurIPS paper discovery and visit schedule builder
|
|
| 17 |
|
| 18 |
# AgenticNAV - Your AI conference companion
|
| 19 |
|
| 20 |
-
[
|
| 21 |
-

|
| 22 |
|
| 23 |
-
This repository contains code for an agent that can help you do related work for your next research project.
|
| 24 |
-
Given the sheer amount of new publications that are being published at major machine learning conferences, this agent
|
| 25 |
-
can help browse papers, find similar papers, and help you write summaries to quickly get an overview of what is currently
|
| 26 |
-
going on.
|
| 27 |
|
| 28 |
-
The agent can also support you in planning your next conference trip by providing a schedule around one or more topics
|
| 29 |
-
that you are interested in.
|
| 30 |
|
| 31 |
-
## Installation & usage of the web-based interface
|
| 32 |
-
The agent is conveniently packaged as a docker image. You can spin up the entire system by using the commands below.
|
| 33 |
-
Make sure to have the [Nvidia Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation) installed.
|
| 34 |
-
At the moment we only support `ollama` models.
|
| 35 |
-
|
| 36 |
-
Instead of a local agent model, you can also make use of remote ollama models. A full list is available here:
|
| 37 |
-
https://docs.ollama.com/cloud.
|
| 38 |
-
To make use of these large models, set `AGENT_MODEL_NAME=<your model of choice>` and
|
| 39 |
-
`AGENT_MODEL_API_BASE=https://ollama.com`.
|
| 40 |
-
Don't forget to set your `OLLAMA_API_KEY` either directly via the environment or in the browser.
|
| 41 |
-
|
| 42 |
-
**Important note:** The ollama docker containers cannot use GPU acceleration on MacOS. If you want to use your Mac's GPU,
|
| 43 |
-
you need to run ollama without containerization (i.e., you need to manually spin up the ollama server).
|
| 44 |
-
With `NEO4J_DB_NODE_RETURN_LIMIT`, we set a strict return limit of 200 nodes per query to avoid overstraining the database.
|
| 45 |
-
You can set it as needed for your use case.
|
| 46 |
-
|
| 47 |
-
```commandline
|
| 48 |
-
# Database config
|
| 49 |
-
echo "NEO4J_USERNAME=neo4j" >> .env
|
| 50 |
-
echo "NEO4J_PASSWORD=<a password of your choice>" >> .env
|
| 51 |
-
echo "NEO4J_DB_URI=bolt://neo4j_db:7687" >> .env
|
| 52 |
-
echo "NEO4J_DB_NODE_RETURN_LIMIT=200" >> .env
|
| 53 |
-
|
| 54 |
-
echo "EMBEDDING_MODEL_NAME=nomic-embed-text" >> .env
|
| 55 |
-
echo "EMBEDDING_MODEL_API_BASE=http://ollama_agent:11434" >> .env
|
| 56 |
-
|
| 57 |
-
echo "AGENT_MODEL_NAME=gpt-oss:20b" >> .env
|
| 58 |
-
echo "AGENT_MODEL_API_BASE=http://ollama_agent:11434" >> .env
|
| 59 |
-
|
| 60 |
-
# Optional: set your OLLAMA_API_KEY when using remote models
|
| 61 |
-
echo "OLLAMA_API_KEY=<your key here>" >> .env
|
| 62 |
-
|
| 63 |
-
# Set the following to true if you would like to import our pre-generated knowledge graph for the NeurIPS 2025 conference
|
| 64 |
-
# Warning (!): Setting the parameter below to 'true' will clear any existing data inside the docker-based neo4j database
|
| 65 |
-
echo "POPULATE_DATABASE_NIPS2025=false" >> .env
|
| 66 |
-
|
| 67 |
-
git clone https://github.com/core-aix/agentic-nav.git
|
| 68 |
-
cd agentic-nav
|
| 69 |
-
docker compose up --build -d
|
| 70 |
-
```
|
| 71 |
-
|
| 72 |
-
This will launch the agent and its web interface, available via `http://localhost:7860`, along with a neo4j database
|
| 73 |
-
(community edition).
|
| 74 |
-
**It will also populate the database with all accepted papers of the NeurIPS 2025 machine learning conference (if you set `POPULATE_DATABASE_NIPS2025=true`).**
|
| 75 |
-
We include pair-wise similarity scores to enable graph traversals and the search for broadly related papers.
|
| 76 |
-
|
| 77 |
-
After the docker containers are up and running, you can interact with the agent. Have fun!
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
## Development & contributing to the agent
|
| 81 |
-
If you are interested in understanding the system in detail, you may want to run all setup steps manually and avoid a
|
| 82 |
-
containerized runtime. Run the following steps to setup a development environment.
|
| 83 |
-
|
| 84 |
-
We use [uv](https://docs.astral.sh/uv/) for dependency management.
|
| 85 |
-
Our docker containers for serving LLMs use Ollama and GPU acceleration. For that, you need the [Nvidia Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation)
|
| 86 |
-
Make sure to have both installed before you proceed.
|
| 87 |
-
|
| 88 |
-
### Installation
|
| 89 |
-
After you cloned the repository, you need to setup the database. We use neo4j to manage the knowledge graph data we need
|
| 90 |
-
for the agent to work properly.
|
| 91 |
-
**Note:** We are using gradio built from source as the latest release (as of Nov. 12, 2025) does not yet support python 3.14.
|
| 92 |
-
|
| 93 |
-
First, export all necessary environment variables:
|
| 94 |
-
```commandline
|
| 95 |
-
echo "NEO4J_USERNAME=neo4j" >> .env
|
| 96 |
-
echo "NEO4J_PASSWORD=<a password of your choice>" >> .env
|
| 97 |
-
echo "NEO4J_DB_URI=bolt://localhost:7687" >> .env
|
| 98 |
-
echo "NEO4J_DB_NODE_RETURN_LIMIT=200" >> .env
|
| 99 |
-
|
| 100 |
-
echo "EMBEDDING_MODEL_NAME=ollama/nomic-embed-text" >> .env
|
| 101 |
-
echo "EMBEDDING_MODEL_API_BASE=http://localhost:11435" >> .env
|
| 102 |
-
|
| 103 |
-
echo "AGENT_MODEL_NAME=ollama_chat/gpt-oss:20b" >> .env
|
| 104 |
-
echo "AGENT_MODEL_API_BASE=http://localhost:11436" >> .env
|
| 105 |
-
|
| 106 |
-
# Optional: set your OLLAMA_API_KEY when using remote models
|
| 107 |
-
echo "OLLAMA_API_KEY=<your key here>" >> .env
|
| 108 |
-
|
| 109 |
-
# Set the following to true if you would like to import our pre-generated knowledge graph for the NeurIPS 2025 conference
|
| 110 |
-
# Warning (!): Setting the parameter below to 'true' will clear any existing data inside the docker-based neo4j database
|
| 111 |
-
echo "POPULATE_DATABASE_NIPS2025=false" >> .env
|
| 112 |
-
|
| 113 |
-
# Make sure you also have those values in your commandline environment
|
| 114 |
-
export $(grep -v '^#' .env | xargs)
|
| 115 |
-
```
|
| 116 |
-
|
| 117 |
-
Then get the project files:
|
| 118 |
-
```commandline
|
| 119 |
-
git clone https://github.com/core-aix/agentic-nav.git
|
| 120 |
-
cd agentic-nav
|
| 121 |
-
docker compose up neo4j_db ollama_embed ollama_agent -d
|
| 122 |
-
|
| 123 |
-
# The following command is only needed if you'd like to use the gradio-based GUI
|
| 124 |
-
# This will eventually go away once gradio bumps their release version to support python 3.14
|
| 125 |
-
bash scripts/prepare_gradio.sh
|
| 126 |
-
|
| 127 |
-
uv sync
|
| 128 |
-
```
|
| 129 |
-
|
| 130 |
-
### Building the NeurIPS 2025 knowledge graph locally
|
| 131 |
-
You can also build the knowledge graph yourself and, for example, swap the embedding model we use by default.
|
| 132 |
-
Follow the steps below to do so. Note, that you still need to setup the project as described in the `Installation`
|
| 133 |
-
subsection above. Make sure to set `POPULATE_DATABASE_NIPS2025=false` in your .env file.
|
| 134 |
-
|
| 135 |
-
#### Get the data
|
| 136 |
-
Download https://neurips.cc/static/virtual/data/neurips-2025-orals-posters.json and put the file in the `./data` folder.
|
| 137 |
-
```commandline
|
| 138 |
-
wget -O data/neurips-2025-orals-posters.json https://neurips.cc/static/virtual/data/neurips-2025-orals-posters.json
|
| 139 |
-
```
|
| 140 |
-
|
| 141 |
-
#### Build the knowledge graph
|
| 142 |
-
You can build the knowledge graph per your needs by running the following script:
|
| 143 |
-
```commandline
|
| 144 |
-
uv run llm_agents/tools/knowledge_graph/graph_generator.py \
|
| 145 |
-
--input-json-file data/neurips-2025-orals-posters.json \
|
| 146 |
-
--embedding-model $EMBEDDING_MODEL_NAME \
|
| 147 |
-
--ollama-server-url $EMBEDDING_MODEL_API_BASE \
|
| 148 |
-
--embedding-gen-batch-size 32 \
|
| 149 |
-
--max-parallel-workers 28 \
|
| 150 |
-
--similarity-threshold 0.6 \
|
| 151 |
-
--output-file graphs/knowledge_graph.pkl \
|
| 152 |
-
# --limit-num-papers # Optional
|
| 153 |
-
```
|
| 154 |
-
**Important note:** Generating the full graph for over 6k papers can take more than 1 hour. You can find a set of pre-generated
|
| 155 |
-
knowledge graphs here (the "thresh" in the file name indicates the `similarity-threshold` for which we create a `similar_to` relationship between papers): [LRZ Sync+Share](https://syncandshare.lrz.de/getlink/fiFMhMLLH7FaQ3Jipqqsye/)
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
#### Importing the knowledge graph to a neo4j database
|
| 159 |
-
We provide an importer to move the knowledge graph into a graph database that supports vector-based similarity search.
|
| 160 |
-
```commandline
|
| 161 |
-
uv run llm_agents/tools/knowledge_graph/neo4j_db_importer.py \
|
| 162 |
-
--graph-path graphs/knowledge_graph.pkl \
|
| 163 |
-
--neo4j-uri $NEO4J_DB_URI \
|
| 164 |
-
--batch-size 100 \
|
| 165 |
-
--embedding-dimension 768 # This must match the vector dims generated by the embedding model.
|
| 166 |
-
```
|
| 167 |
-
**Note:** Depending on what your graph looks like this can also take a while (> 20min for 6K papers). Also, beware that
|
| 168 |
-
running this script will first clear any existing entries before the new graph is written to the database.
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
### Agent interactions
|
| 172 |
-
We offer two ways of interacting with agents, via the command line and via the browser.
|
| 173 |
-
The backend uses LiteLLM, which allows you to use a variety of LLM inference endpoints.
|
| 174 |
-
Find details on the various providers [here](https://docs.litellm.ai/docs/providers).
|
| 175 |
-
|
| 176 |
-
#### Commandline interface
|
| 177 |
-
The agent can also be used via a versatile CLI.
|
| 178 |
-
Below are two examples how to run a local and a remote model.
|
| 179 |
-
We are using LiteLLM to abstract away from individual inference API providers.
|
| 180 |
-
Note, that we currently only test with Ollama models.
|
| 181 |
-
```commandline
|
| 182 |
-
uv run agentic-nav-cli \
|
| 183 |
-
-t 0.4 \
|
| 184 |
-
--max-tokens 6000 \
|
| 185 |
-
-c 131072 \
|
| 186 |
-
--max-num-papers 10
|
| 187 |
-
```
|
| 188 |
-
|
| 189 |
-
#### Web-based interface (beginner friendly)
|
| 190 |
-
We use gradio to provide a chat interface with the same functionalities as the commandline-based interface.
|
| 191 |
-
You can launch the web app by running:
|
| 192 |
-
```commandline
|
| 193 |
-
agentic-nav-web
|
| 194 |
-
```
|
| 195 |
-
All the hyperparameters you need to set can be configured in the web interface and will be used in you individual session.
|
| 196 |
-
Once you close the browser window, your session will terminate and all custom configuration will be removed.
|
| 197 |
-
At the moment, the web UI only supports Ollama models.
|
| 198 |
-
|
| 199 |
-
### Debugging agent interactions
|
| 200 |
-
The agent involves a set of asynchronous operations. We provide a built-in logging instance to capture all relevant logs
|
| 201 |
-
for debugging. To set the right debugging level for your application, you can use the environment variable `AGENTIC_NAV_LOG_LEVEL`.
|
| 202 |
-
By default, it is set to `INFO`.
|
| 203 |
-
|
| 204 |
-
#### Running tests
|
| 205 |
-
We try to cover all tools and agent functionalities in thorough unit tests.
|
| 206 |
-
You can run them via:
|
| 207 |
-
```commandline
|
| 208 |
-
uv run pytest tests/
|
| 209 |
-
```
|
|
|
|
| 17 |
|
| 18 |
# AgenticNAV - Your AI conference companion
|
| 19 |
|
| 20 |
+
PLEASE FIND THE IMPLEMENTATION OF AGENTIC NAV ON GITHUB: [https://github.com/core-aix/agentic-nav](https://github.com/core-aix/agentic-nav)
|
|
|
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
|
|
|
|
|
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/__init__.py
DELETED
|
File without changes
|
agentic_nav/agents/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
from agentic_nav.agents.neurips2025_conference import NeurIPS2025Agent, DEFAULT_NEURIPS2025_AGENT_ARGS
|
|
|
|
|
|
agentic_nav/agents/base.py
DELETED
|
@@ -1,327 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
|
| 3 |
-
import litellm
|
| 4 |
-
import logging
|
| 5 |
-
|
| 6 |
-
from dataclasses import dataclass, field
|
| 7 |
-
from typing import List, Dict
|
| 8 |
-
|
| 9 |
-
from agentic_nav.tools import get_all_tools
|
| 10 |
-
from agentic_nav.utils.tooling import infer_tool
|
| 11 |
-
|
| 12 |
-
try:
|
| 13 |
-
from datetime import datetime, UTC
|
| 14 |
-
except ImportError:
|
| 15 |
-
from datetime import datetime, timezone
|
| 16 |
-
UTC = timezone.utc
|
| 17 |
-
|
| 18 |
-
LOGGER = logging.getLogger(__name__)
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
@dataclass
|
| 22 |
-
class LLMAgent:
|
| 23 |
-
model: str = "ollama_chat/gpt-oss:20b"
|
| 24 |
-
api_base: str = "http://localhost:11434"
|
| 25 |
-
api_key: str = None
|
| 26 |
-
llm_args: dict = field(default_factory=lambda: {"temperature": 0.2, "max_tokens": 6000, "num_ctx": 131072})
|
| 27 |
-
tools: List[callable] = field(default_factory=lambda: get_all_tools())
|
| 28 |
-
global_tool_args: dict = field(default_factory=lambda: {"max_num_papers": 10})
|
| 29 |
-
max_interaction_rounds: int = 10
|
| 30 |
-
messages: List[Dict] = field(default_factory=lambda: [])
|
| 31 |
-
tool_registry: Dict = None
|
| 32 |
-
tool_descriptions: List = None
|
| 33 |
-
default_system_prompt: Dict[str, str] = None
|
| 34 |
-
|
| 35 |
-
def __remove_model_key_from_llm_args(self, stateful: bool = True):
|
| 36 |
-
if stateful:
|
| 37 |
-
self.model = self.llm_args["model"]
|
| 38 |
-
self.api_base = self.llm_args["api_base"]
|
| 39 |
-
|
| 40 |
-
if "model" in self.llm_args.keys():
|
| 41 |
-
del self.llm_args["model"]
|
| 42 |
-
|
| 43 |
-
if "api_base" in self.llm_args.keys():
|
| 44 |
-
del self.llm_args["api_base"]
|
| 45 |
-
|
| 46 |
-
def test_llm_connection(self):
|
| 47 |
-
self.__remove_model_key_from_llm_args(stateful=True)
|
| 48 |
-
try:
|
| 49 |
-
response = litellm.completion(
|
| 50 |
-
model=self.model,
|
| 51 |
-
messages=[{"role": "user", "content": "test", "_ts": str(datetime.now(UTC))}],
|
| 52 |
-
tool_choice="auto",
|
| 53 |
-
api_base=self.api_base,
|
| 54 |
-
api_key=self.api_key,
|
| 55 |
-
stream=True,
|
| 56 |
-
**self.llm_args,
|
| 57 |
-
)
|
| 58 |
-
|
| 59 |
-
LOGGER.info(f"Model is available! Response: {response.choices[0].message.content}")
|
| 60 |
-
except Exception as e:
|
| 61 |
-
LOGGER.error(f"Model not available or connection failed: {str(e)}")
|
| 62 |
-
|
| 63 |
-
def setup_session(self, tool_funcs: List[callable] = None):
|
| 64 |
-
self.tool_registry = {fn.__name__: fn for fn in self.tools} if tool_funcs is None else {fn.__name__: fn for fn in tool_funcs}
|
| 65 |
-
self.tool_descriptions = [infer_tool(fn, tool_args=self.global_tool_args) for fn in self.tool_registry.values()]
|
| 66 |
-
LOGGER.info(f"Agent setup and tools ready to use.")
|
| 67 |
-
LOGGER.debug(f"Available tools: {self.tools}")
|
| 68 |
-
|
| 69 |
-
def remove_session(self):
|
| 70 |
-
"""De-registers tools and resets messages to the initial state."""
|
| 71 |
-
self.tool_registry = None
|
| 72 |
-
self.tool_descriptions = None
|
| 73 |
-
self.messages = [self.default_system_prompt if not None else {"role": "system", "content": "You are a helpful assistant."}]
|
| 74 |
-
|
| 75 |
-
def interact(self, message: Dict):
|
| 76 |
-
assert self.tool_registry is not None, "Make sure to call 'setup_session()' before the first interaction."
|
| 77 |
-
assert self.tool_descriptions is not None, "Make sure to call 'setup_session()' before the first interaction."
|
| 78 |
-
|
| 79 |
-
assert type(message) == dict, "Make sure to pass a dictionary as next message for the agent."
|
| 80 |
-
assert "role" in message.keys(), "The message must contain a 'role' key."
|
| 81 |
-
assert "content" in message.keys(), "The message must contain a 'content' key."
|
| 82 |
-
|
| 83 |
-
self.__remove_model_key_from_llm_args(stateful=True)
|
| 84 |
-
if "_ts" not in message.keys():
|
| 85 |
-
message["_ts"] = str(datetime.now(UTC))
|
| 86 |
-
|
| 87 |
-
self.messages.append(message)
|
| 88 |
-
for _ in range(self.max_interaction_rounds):
|
| 89 |
-
collected, calls = self._send_to_llm(
|
| 90 |
-
messages=self.messages,
|
| 91 |
-
model=self.model,
|
| 92 |
-
api_base=self.api_base,
|
| 93 |
-
api_key=self.api_key
|
| 94 |
-
)
|
| 95 |
-
# append the assembled assistant message so tool execution sees the assistant's follow-up
|
| 96 |
-
self.messages.append({"role": "assistant", "content": collected, "_ts": str(datetime.now(UTC))})
|
| 97 |
-
LOGGER.debug(f"Agent response: {collected}")
|
| 98 |
-
|
| 99 |
-
if not calls:
|
| 100 |
-
return self.messages
|
| 101 |
-
else:
|
| 102 |
-
self.messages[-1]["tool_calls"] = calls
|
| 103 |
-
LOGGER.debug(f"Agent requested tool calls: {calls}")
|
| 104 |
-
|
| 105 |
-
# execute tools and append results
|
| 106 |
-
for call in calls:
|
| 107 |
-
self.messages.append(
|
| 108 |
-
self.call_tool(
|
| 109 |
-
tool_call=call
|
| 110 |
-
)
|
| 111 |
-
)
|
| 112 |
-
|
| 113 |
-
LOGGER.debug(f"Interaction complete. Total messages: {len(self.messages)}")
|
| 114 |
-
return self.messages
|
| 115 |
-
|
| 116 |
-
def interact_stateless(
|
| 117 |
-
self,
|
| 118 |
-
messages: List[Dict],
|
| 119 |
-
model: str,
|
| 120 |
-
api_base: str,
|
| 121 |
-
api_key: str,
|
| 122 |
-
llm_args: Dict = None
|
| 123 |
-
):
|
| 124 |
-
"""
|
| 125 |
-
This method is designed to support multi-user sessions and requires state management outside the agent class.
|
| 126 |
-
"""
|
| 127 |
-
assert self.tool_registry is not None, "Make sure to call 'setup_session()' before the first interaction."
|
| 128 |
-
assert self.tool_descriptions is not None, "Make sure to call 'setup_session()' before the first interaction."
|
| 129 |
-
self.__remove_model_key_from_llm_args(stateful=False)
|
| 130 |
-
|
| 131 |
-
# Sanity check for all messages
|
| 132 |
-
for message in messages:
|
| 133 |
-
if "_ts" not in message.keys():
|
| 134 |
-
message["_ts"] = str(datetime.now(UTC))
|
| 135 |
-
|
| 136 |
-
for round_num in range(self.max_interaction_rounds):
|
| 137 |
-
# Stream the LLM response
|
| 138 |
-
collected = ""
|
| 139 |
-
calls = []
|
| 140 |
-
|
| 141 |
-
# Create initial assistant message
|
| 142 |
-
assistant_msg_idx = len(messages)
|
| 143 |
-
messages.append({"role": "assistant", "content": "", "_ts": str(datetime.now(UTC))})
|
| 144 |
-
|
| 145 |
-
stream_iter = litellm.completion(
|
| 146 |
-
model=model if model is not None else self.model,
|
| 147 |
-
messages=messages[:assistant_msg_idx], # Don't include the empty assistant message
|
| 148 |
-
tools=self.tool_descriptions,
|
| 149 |
-
tool_choice="auto",
|
| 150 |
-
api_base=api_base if api_base is not None else self.api_base,
|
| 151 |
-
api_key=api_key if api_key is not None else self.api_key,
|
| 152 |
-
stream=True,
|
| 153 |
-
**llm_args if llm_args is not None else self.llm_args,
|
| 154 |
-
)
|
| 155 |
-
|
| 156 |
-
for chunk in stream_iter:
|
| 157 |
-
choices = chunk.get("choices", []) or []
|
| 158 |
-
if not choices:
|
| 159 |
-
continue
|
| 160 |
-
choice = choices[0]
|
| 161 |
-
|
| 162 |
-
# Extract content from chunk
|
| 163 |
-
content = None
|
| 164 |
-
delta = choice.get("delta")
|
| 165 |
-
|
| 166 |
-
if delta and "content" in delta:
|
| 167 |
-
content = delta["content"]
|
| 168 |
-
elif delta and "message" in delta and isinstance(delta["message"], dict):
|
| 169 |
-
content = delta["message"].get("content")
|
| 170 |
-
|
| 171 |
-
if delta and "tool_calls" in delta:
|
| 172 |
-
calls.extend(delta["tool_calls"] or [])
|
| 173 |
-
|
| 174 |
-
if content is None:
|
| 175 |
-
msg = choice.get("message")
|
| 176 |
-
if isinstance(msg, dict):
|
| 177 |
-
content = msg.get("content")
|
| 178 |
-
|
| 179 |
-
if content is None:
|
| 180 |
-
content = choice.get("text")
|
| 181 |
-
|
| 182 |
-
if content:
|
| 183 |
-
if not isinstance(content, str):
|
| 184 |
-
try:
|
| 185 |
-
content = json.dumps(content, ensure_ascii=False)
|
| 186 |
-
except Exception:
|
| 187 |
-
content = str(content)
|
| 188 |
-
|
| 189 |
-
collected += content
|
| 190 |
-
# Update the assistant message with accumulated content
|
| 191 |
-
messages[assistant_msg_idx]["content"] = collected
|
| 192 |
-
|
| 193 |
-
# Yield the updated messages for streaming display
|
| 194 |
-
yield messages.copy()
|
| 195 |
-
|
| 196 |
-
# After streaming is complete, update with final content
|
| 197 |
-
messages[assistant_msg_idx]["content"] = collected
|
| 198 |
-
LOGGER.debug(f"Agent response: {collected}")
|
| 199 |
-
|
| 200 |
-
if not calls:
|
| 201 |
-
yield messages
|
| 202 |
-
return
|
| 203 |
-
else:
|
| 204 |
-
messages[assistant_msg_idx]["tool_calls"] = calls
|
| 205 |
-
LOGGER.debug(f"Agent requested tool calls: {calls}")
|
| 206 |
-
yield messages.copy()
|
| 207 |
-
|
| 208 |
-
# Execute tools and append results
|
| 209 |
-
for call in calls:
|
| 210 |
-
messages.append(self.call_tool(tool_call=call))
|
| 211 |
-
yield messages.copy()
|
| 212 |
-
|
| 213 |
-
yield messages
|
| 214 |
-
|
| 215 |
-
def _send_to_llm(
|
| 216 |
-
self,
|
| 217 |
-
messages: List[Dict],
|
| 218 |
-
model: str,
|
| 219 |
-
api_base: str,
|
| 220 |
-
api_key: str,
|
| 221 |
-
llm_args: Dict = None
|
| 222 |
-
):
|
| 223 |
-
stream_iter = litellm.completion(
|
| 224 |
-
model=model if model is not None else self.model,
|
| 225 |
-
messages=messages,
|
| 226 |
-
tools=self.tool_descriptions,
|
| 227 |
-
tool_choice="auto",
|
| 228 |
-
api_base=api_base if api_base is not None else self.api_base,
|
| 229 |
-
api_key=api_key if api_key is not None else self.api_key,
|
| 230 |
-
stream=True,
|
| 231 |
-
**llm_args if llm_args is not None else self.llm_args,
|
| 232 |
-
)
|
| 233 |
-
|
| 234 |
-
collected = ""
|
| 235 |
-
calls = []
|
| 236 |
-
|
| 237 |
-
for chunk in stream_iter:
|
| 238 |
-
choices = chunk.get("choices", []) or []
|
| 239 |
-
if not choices:
|
| 240 |
-
continue
|
| 241 |
-
choice = choices[0]
|
| 242 |
-
|
| 243 |
-
# try several places where partial content may appear
|
| 244 |
-
content = None
|
| 245 |
-
delta = choice.get("delta")
|
| 246 |
-
|
| 247 |
-
if "content" in delta:
|
| 248 |
-
content = delta["content"]
|
| 249 |
-
elif "message" in delta and isinstance(delta["message"], dict):
|
| 250 |
-
content = delta["message"].get("content")
|
| 251 |
-
|
| 252 |
-
if "tool_calls" in delta:
|
| 253 |
-
calls.extend(delta["tool_calls"] or [])
|
| 254 |
-
|
| 255 |
-
if content is None:
|
| 256 |
-
msg = choice.get("message")
|
| 257 |
-
if isinstance(msg, dict):
|
| 258 |
-
content = msg.get("content")
|
| 259 |
-
|
| 260 |
-
if content is None:
|
| 261 |
-
content = choice.get("text")
|
| 262 |
-
|
| 263 |
-
if content:
|
| 264 |
-
if not isinstance(content, str):
|
| 265 |
-
try:
|
| 266 |
-
content = json.dumps(content, ensure_ascii=False)
|
| 267 |
-
except Exception as e:
|
| 268 |
-
LOGGER.error(f"JSON encoding error encountered. {e}. Treating agent response as regular text.")
|
| 269 |
-
content = str(content)
|
| 270 |
-
|
| 271 |
-
collected += content
|
| 272 |
-
|
| 273 |
-
return collected, calls
|
| 274 |
-
|
| 275 |
-
def call_tool(self, tool_call: Dict):
|
| 276 |
-
name = tool_call["function"]["name"]
|
| 277 |
-
args = tool_call["function"].get("arguments", "{}")
|
| 278 |
-
LOGGER.debug(f"Preparing tool call: {name}")
|
| 279 |
-
LOGGER.debug(f"Expected tool arguments: {args}")
|
| 280 |
-
try:
|
| 281 |
-
parsed = json.loads(args) if isinstance(args, str) else (args or {})
|
| 282 |
-
LOGGER.debug(f"Parsed tool call arguments: {parsed}")
|
| 283 |
-
except json.JSONDecodeError:
|
| 284 |
-
parsed = {}
|
| 285 |
-
LOGGER.warning(f"Tool call arguments: COULD NOT BE PARSED")
|
| 286 |
-
out = self.tool_registry[name](**parsed)
|
| 287 |
-
LOGGER.debug(f"Tool call output: {parsed}")
|
| 288 |
-
|
| 289 |
-
return {
|
| 290 |
-
"role": "tool",
|
| 291 |
-
"tool_call_id": tool_call.get("id"),
|
| 292 |
-
"name": name,
|
| 293 |
-
"content": json.dumps(out, ensure_ascii=False),
|
| 294 |
-
"_ts": str(datetime.now(UTC))
|
| 295 |
-
}
|
| 296 |
-
|
| 297 |
-
def set_history(self, messages):
|
| 298 |
-
self.messages = messages
|
| 299 |
-
LOGGER.info(f"Set new message history.")
|
| 300 |
-
|
| 301 |
-
def get_history(self):
|
| 302 |
-
return self.messages
|
| 303 |
-
|
| 304 |
-
@staticmethod
|
| 305 |
-
def set_system_prompt(new_system_prompt: str, messages: List[Dict]):
|
| 306 |
-
messages = [m for m in messages if m.get("role") != "system"]
|
| 307 |
-
messages.insert(0, {
|
| 308 |
-
"role": "system",
|
| 309 |
-
"content": new_system_prompt,
|
| 310 |
-
"_ts": str(datetime.now(UTC))
|
| 311 |
-
})
|
| 312 |
-
LOGGER.info(f"New system prompt set and configured.")
|
| 313 |
-
LOGGER.debug(f"New system prompt: {new_system_prompt}")
|
| 314 |
-
return messages
|
| 315 |
-
|
| 316 |
-
def get_system_prompt(self):
|
| 317 |
-
for message in self.messages:
|
| 318 |
-
if "role" in message.keys() and message["role"] == "system":
|
| 319 |
-
return message
|
| 320 |
-
|
| 321 |
-
return None
|
| 322 |
-
|
| 323 |
-
def get_most_recent_assistant_message(self):
|
| 324 |
-
for message in reversed(self.messages):
|
| 325 |
-
if message.get("role") == "assistant":
|
| 326 |
-
return message
|
| 327 |
-
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/agents/neurips2025_conference.py
DELETED
|
@@ -1,48 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
|
| 3 |
-
from dataclasses import dataclass
|
| 4 |
-
from agentic_nav.agents.base import LLMAgent
|
| 5 |
-
from agentic_nav.tools import search_similar_papers, find_neighboring_papers, traverse_graph, build_visit_schedule # <- the tools we expose
|
| 6 |
-
from zoneinfo import ZoneInfo
|
| 7 |
-
|
| 8 |
-
try:
|
| 9 |
-
from datetime import datetime, UTC
|
| 10 |
-
except ImportError:
|
| 11 |
-
from datetime import datetime, timezone
|
| 12 |
-
UTC = timezone.utc
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
DEFAULT_NEURIPS2025_AGENT_ARGS = {
|
| 16 |
-
"model": os.environ.get("AGENT_MODEL_NAME", "gpt-oss:120b-cloud"),
|
| 17 |
-
"api_base": os.environ.get("AGENT_MODEL_API_BASE", "https://ollama.com"),
|
| 18 |
-
"api_key": os.environ.get("OLLAMA_API_KEY"),
|
| 19 |
-
"llm_args": {"temperature": 0.2, "max_tokens": 6000, "num_ctx": 131072},
|
| 20 |
-
"global_tool_args": {"max_num_papers": 10}
|
| 21 |
-
}
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
system = {
|
| 25 |
-
"role": "system",
|
| 26 |
-
"content": (
|
| 27 |
-
"You are an assistant who can help browsing NeurIPS 2025 papers. "
|
| 28 |
-
"You are provided with a search tool that can search all accepted papers of NeurIPS 2025. "
|
| 29 |
-
"However, note that the search tool only takes paper titles and abstracts as input keywords; "
|
| 30 |
-
"it cannot take anything else as the input keywords. "
|
| 31 |
-
"However, the output of the search includes various metadata fields such as authors, affiliations, "
|
| 32 |
-
"and session times. \n"
|
| 33 |
-
"When building a schedule, do not specify the name of the day.\n"
|
| 34 |
-
"If you find duplicates, just omit them. Only keep the first appearance.\n"
|
| 35 |
-
f"Generally, if you do not find a result, tell the user you don't know.\n"
|
| 36 |
-
f"Here is the current timestamp: {datetime.now(ZoneInfo('America/Los_Angeles'))}. The conference is happening in San Diego, California."
|
| 37 |
-
)
|
| 38 |
-
}
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
@dataclass
|
| 42 |
-
class NeurIPS2025Agent(LLMAgent):
|
| 43 |
-
|
| 44 |
-
def __init__(self, *args, **kwargs):
|
| 45 |
-
super().__init__(*args, **kwargs)
|
| 46 |
-
self.messages = [{**system}]
|
| 47 |
-
self.tools = [search_similar_papers, find_neighboring_papers, traverse_graph, build_visit_schedule]
|
| 48 |
-
self.default_system_prompt = system
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/frontend/__init__.py
DELETED
|
File without changes
|
agentic_nav/frontend/browser_ui.py
DELETED
|
@@ -1,525 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Gradio web UI that interacts with an agent implementation.
|
| 3 |
-
|
| 4 |
-
Features matching terminal UI:
|
| 5 |
-
- Multi-turn chat with Markdown rendering
|
| 6 |
-
- System prompt editing
|
| 7 |
-
- View conversation history
|
| 8 |
-
- Save chat history to file
|
| 9 |
-
- All model configuration options
|
| 10 |
-
- Clear chat functionality
|
| 11 |
-
- **Per-user conversation state management with stateless agent**
|
| 12 |
-
"""
|
| 13 |
-
from venv import logger
|
| 14 |
-
|
| 15 |
-
import gradio as gr
|
| 16 |
-
import os
|
| 17 |
-
import datetime
|
| 18 |
-
import logging
|
| 19 |
-
import json
|
| 20 |
-
|
| 21 |
-
from pathlib import Path
|
| 22 |
-
from typing import List, Tuple, Optional, Dict
|
| 23 |
-
|
| 24 |
-
from agentic_nav.agents import NeurIPS2025Agent, DEFAULT_NEURIPS2025_AGENT_ARGS
|
| 25 |
-
from agentic_nav.utils.logger import setup_logging
|
| 26 |
-
from agentic_nav.utils.file_handlers import save_chat_history
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
LOGGER = logging.getLogger(__name__)
|
| 30 |
-
|
| 31 |
-
EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "nomic-embed-text")
|
| 32 |
-
EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
|
| 33 |
-
|
| 34 |
-
AGENT_MODEL_NAME = os.environ.get("AGENT_MODEL_NAME", "gpt-oss:20b")
|
| 35 |
-
AGENT_MODEL_API_BASE = os.environ.get("AGENT_MODEL_API_BASE", "http://localhost:11436")
|
| 36 |
-
OLLAMA_API_KEY = os.environ.get("OLLAMA_API_KEY", DEFAULT_NEURIPS2025_AGENT_ARGS["api_key"])
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
def initialize_agent():
|
| 40 |
-
"""Initialize the AGENT instance."""
|
| 41 |
-
agent = NeurIPS2025Agent(
|
| 42 |
-
model=f"ollama_chat/{AGENT_MODEL_NAME}",
|
| 43 |
-
api_base=AGENT_MODEL_API_BASE,
|
| 44 |
-
api_key=OLLAMA_API_KEY,
|
| 45 |
-
llm_args=DEFAULT_NEURIPS2025_AGENT_ARGS["llm_args"],
|
| 46 |
-
global_tool_args=DEFAULT_NEURIPS2025_AGENT_ARGS["global_tool_args"],
|
| 47 |
-
)
|
| 48 |
-
agent.setup_session()
|
| 49 |
-
return agent
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
def configure_agent(
|
| 53 |
-
api_base: str,
|
| 54 |
-
api_key: str,
|
| 55 |
-
model: str,
|
| 56 |
-
temperature: float,
|
| 57 |
-
max_tokens: int,
|
| 58 |
-
num_ctx: int,
|
| 59 |
-
max_num_papers: int,
|
| 60 |
-
current_config: Dict
|
| 61 |
-
):
|
| 62 |
-
"""Initialize the agent with a given configuration."""
|
| 63 |
-
LOGGER.info(f"Agent runtime started via Gradio UI for session")
|
| 64 |
-
current_config.update({
|
| 65 |
-
"model": model,
|
| 66 |
-
"api_base": api_base,
|
| 67 |
-
"api_key": api_key,
|
| 68 |
-
"llm_args": {
|
| 69 |
-
"temperature": temperature,
|
| 70 |
-
"max_tokens": max_tokens,
|
| 71 |
-
"num_ctx": num_ctx
|
| 72 |
-
},
|
| 73 |
-
"global_tool_args": {"max_num_papers": max_num_papers}
|
| 74 |
-
})
|
| 75 |
-
|
| 76 |
-
current_config_to_print = current_config.copy()
|
| 77 |
-
if "api_key" in current_config_to_print:
|
| 78 |
-
del current_config_to_print["api_key"]
|
| 79 |
-
LOGGER.info(f"User-defined configuration saved. Config: {current_config_to_print}")
|
| 80 |
-
|
| 81 |
-
return current_config, "✓ Agent initialized successfully!"
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
def chat_fn(
|
| 85 |
-
new_message: str,
|
| 86 |
-
history: List[Dict],
|
| 87 |
-
config: Optional[Dict],
|
| 88 |
-
messages: Optional[List[Dict]],
|
| 89 |
-
agent: NeurIPS2025Agent,
|
| 90 |
-
) -> Tuple[List[Dict], Optional[List[Dict]]]:
|
| 91 |
-
"""
|
| 92 |
-
Handle chat interaction using stateless agent.
|
| 93 |
-
|
| 94 |
-
Args:
|
| 95 |
-
new_message: User's input message
|
| 96 |
-
history: Chat history as list of message dictionaries with role/content
|
| 97 |
-
config: Configuration dict with model, api_base, api_key, llm_args
|
| 98 |
-
messages: Current conversation messages list
|
| 99 |
-
agent: Agent instance
|
| 100 |
-
|
| 101 |
-
Returns:
|
| 102 |
-
Tuple of (updated_history, messages)
|
| 103 |
-
"""
|
| 104 |
-
if not new_message.strip():
|
| 105 |
-
yield history, messages
|
| 106 |
-
return
|
| 107 |
-
|
| 108 |
-
LOGGER.debug(f"USER PROMPT: {new_message}")
|
| 109 |
-
|
| 110 |
-
# Safety check: ensure messages is a list
|
| 111 |
-
if messages is None or not isinstance(messages, list):
|
| 112 |
-
LOGGER.warning("Messages state was not properly initialized, resetting...")
|
| 113 |
-
messages = [agent.get_system_prompt()]
|
| 114 |
-
|
| 115 |
-
# Create a copy of history and messages to avoid mutation issues
|
| 116 |
-
history = history.copy() if history else []
|
| 117 |
-
messages = messages.copy()
|
| 118 |
-
|
| 119 |
-
# Add user message to history immediately with empty assistant response
|
| 120 |
-
user_msg_dict = {"role": "user", "content": new_message}
|
| 121 |
-
assistant_msg_dict = {"role": "assistant", "content": ""}
|
| 122 |
-
history.extend([user_msg_dict, assistant_msg_dict])
|
| 123 |
-
|
| 124 |
-
try:
|
| 125 |
-
# Create user message with timestamp
|
| 126 |
-
user_message = {
|
| 127 |
-
"role": "user",
|
| 128 |
-
"content": new_message,
|
| 129 |
-
"_ts": str(datetime.datetime.now(datetime.timezone.utc))
|
| 130 |
-
}
|
| 131 |
-
|
| 132 |
-
# Add user message to conversation
|
| 133 |
-
messages.append(user_message)
|
| 134 |
-
|
| 135 |
-
# Stream the response
|
| 136 |
-
accumulated_response = ""
|
| 137 |
-
for partial_messages in agent.interact_stateless(
|
| 138 |
-
messages=messages,
|
| 139 |
-
model=config["model"],
|
| 140 |
-
api_base=config["api_base"],
|
| 141 |
-
api_key=config["api_key"],
|
| 142 |
-
llm_args=config["llm_args"]
|
| 143 |
-
):
|
| 144 |
-
# Get the latest assistant message content
|
| 145 |
-
for msg in reversed(partial_messages):
|
| 146 |
-
if msg.get("role") == "assistant":
|
| 147 |
-
accumulated_response = msg["content"]
|
| 148 |
-
break
|
| 149 |
-
|
| 150 |
-
# Update the last assistant message in history with accumulated response
|
| 151 |
-
history[-1]["content"] = accumulated_response
|
| 152 |
-
yield history, partial_messages
|
| 153 |
-
|
| 154 |
-
# Final update with complete messages
|
| 155 |
-
messages = partial_messages
|
| 156 |
-
LOGGER.info("Agent response generated successfully")
|
| 157 |
-
|
| 158 |
-
except Exception as e:
|
| 159 |
-
LOGGER.error(f"Agent encountered an error: {e}", exc_info=True)
|
| 160 |
-
error_msg = f"❌ Error: {str(e)}"
|
| 161 |
-
history[-1]["content"] = error_msg
|
| 162 |
-
yield history, messages
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
def update_system_prompt(
|
| 166 |
-
new_prompt: str,
|
| 167 |
-
messages: Optional[List[Dict]],
|
| 168 |
-
agent: NeurIPS2025Agent
|
| 169 |
-
) -> Tuple[str, Optional[List[Dict]]]:
|
| 170 |
-
"""Update the system prompt in the message history.
|
| 171 |
-
|
| 172 |
-
Args:
|
| 173 |
-
new_prompt: New system prompt
|
| 174 |
-
messages: Current message history
|
| 175 |
-
agent: Agent instance
|
| 176 |
-
|
| 177 |
-
Returns:
|
| 178 |
-
Tuple of (status_message, agent_instance, config, updated_messages)
|
| 179 |
-
"""
|
| 180 |
-
if not new_prompt.strip():
|
| 181 |
-
return "System prompt cannot be empty.", messages
|
| 182 |
-
|
| 183 |
-
try:
|
| 184 |
-
# Initialize messages if None
|
| 185 |
-
if messages is None:
|
| 186 |
-
messages = []
|
| 187 |
-
|
| 188 |
-
# Use the static method to update system prompt
|
| 189 |
-
messages = agent.set_system_prompt(new_system_prompt=new_prompt, messages=messages)
|
| 190 |
-
|
| 191 |
-
LOGGER.info("System prompt updated")
|
| 192 |
-
LOGGER.info(f"New system prompt: {messages[0]}")
|
| 193 |
-
return "✓ System prompt updated successfully!", messages
|
| 194 |
-
except Exception as e:
|
| 195 |
-
LOGGER.error(f"Error updating system prompt: {e}")
|
| 196 |
-
return f"Error: {str(e)}", messages
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
def view_history(messages: Optional[List[Dict]]) -> str:
|
| 200 |
-
"""View the full conversation history in JSON format.
|
| 201 |
-
|
| 202 |
-
Args:
|
| 203 |
-
messages: Current message history
|
| 204 |
-
|
| 205 |
-
Returns:
|
| 206 |
-
JSON formatted history string
|
| 207 |
-
"""
|
| 208 |
-
if messages is None:
|
| 209 |
-
return "⚠️ No conversation history yet."
|
| 210 |
-
|
| 211 |
-
try:
|
| 212 |
-
# Format as pretty JSON
|
| 213 |
-
return json.dumps(messages, indent=2, ensure_ascii=False)
|
| 214 |
-
except Exception as e:
|
| 215 |
-
LOGGER.error(f"Error viewing history: {e}")
|
| 216 |
-
return f"❌ Error: {str(e)}"
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
def save_history(filename: str, messages: Optional[List[Dict]]) -> str:
|
| 220 |
-
"""Save chat history to a JSON file.
|
| 221 |
-
|
| 222 |
-
Args:
|
| 223 |
-
filename: Optional filename
|
| 224 |
-
messages: Current message history
|
| 225 |
-
|
| 226 |
-
Returns:
|
| 227 |
-
Status message
|
| 228 |
-
"""
|
| 229 |
-
if messages is None or len(messages) == 0:
|
| 230 |
-
return "⚠️ No conversation history to save."
|
| 231 |
-
|
| 232 |
-
try:
|
| 233 |
-
# Create directory if it doesn't exist
|
| 234 |
-
Path("chat_histories/").mkdir(exist_ok=True, parents=True)
|
| 235 |
-
|
| 236 |
-
# Generate filename if not provided
|
| 237 |
-
if not filename.strip():
|
| 238 |
-
time_now = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
| 239 |
-
# Add session identifier to prevent conflicts
|
| 240 |
-
import uuid
|
| 241 |
-
session_id = str(uuid.uuid4())[:8]
|
| 242 |
-
filename = f"chat_histories/{time_now}_session_{session_id}_chat_history.json"
|
| 243 |
-
else:
|
| 244 |
-
filename = filename.strip()
|
| 245 |
-
# Ensure it's in chat_histories directory
|
| 246 |
-
if not filename.startswith("chat_histories/"):
|
| 247 |
-
filename = f"chat_histories/{filename}"
|
| 248 |
-
if not filename.endswith(".json"):
|
| 249 |
-
filename += ".json"
|
| 250 |
-
|
| 251 |
-
# Save the history
|
| 252 |
-
save_chat_history(messages, filename)
|
| 253 |
-
|
| 254 |
-
LOGGER.info(f"Chat history saved to {filename}")
|
| 255 |
-
return f"✓ Chat history saved to: {filename}"
|
| 256 |
-
|
| 257 |
-
except Exception as e:
|
| 258 |
-
LOGGER.error(f"Error saving history: {e}")
|
| 259 |
-
return f"❌ Error: {str(e)}"
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
def clear_chat(
|
| 263 |
-
config: Optional[Dict],
|
| 264 |
-
messages: Optional[List[Dict]],
|
| 265 |
-
agent: NeurIPS2025Agent
|
| 266 |
-
) -> Tuple[str, List, Optional[List[Dict]]]:
|
| 267 |
-
"""Clear the chat history in the UI and reset message list.
|
| 268 |
-
|
| 269 |
-
Args:
|
| 270 |
-
config: Current configuration
|
| 271 |
-
messages: Current message history
|
| 272 |
-
agent: Agent instance
|
| 273 |
-
|
| 274 |
-
Returns:
|
| 275 |
-
Tuple of (status_message, empty_history, reset_messages)
|
| 276 |
-
"""
|
| 277 |
-
system_prompt = agent.get_system_prompt()
|
| 278 |
-
if isinstance(system_prompt, dict):
|
| 279 |
-
reset_messages = [system_prompt]
|
| 280 |
-
else:
|
| 281 |
-
reset_messages = []
|
| 282 |
-
|
| 283 |
-
LOGGER.info("Chat cleared and reset")
|
| 284 |
-
return "✓ Chat cleared!", [], reset_messages
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
def submit_message(message, history, config, messages, agent):
|
| 288 |
-
"""Wrapper to clear input and process message"""
|
| 289 |
-
yield from chat_fn(message, history, config, messages, agent)
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
def main():
|
| 293 |
-
|
| 294 |
-
# Setup the agent instance
|
| 295 |
-
agent = initialize_agent()
|
| 296 |
-
|
| 297 |
-
with gr.Blocks(
|
| 298 |
-
title="AgenticNAV",
|
| 299 |
-
theme=gr.themes.Default(
|
| 300 |
-
spacing_size=gr.themes.sizes.spacing_sm,
|
| 301 |
-
radius_size=gr.themes.sizes.radius_none
|
| 302 |
-
)) as webapp:
|
| 303 |
-
|
| 304 |
-
gr.Markdown(
|
| 305 |
-
"# 🤖 AgenticNAV - Explore NeurIPS 2025 papers and build your personalized schedule, effortlessly!\n "
|
| 306 |
-
"This agent can help you explore the more than 5000 papers at this year's NeurIPS conference. "
|
| 307 |
-
"You can start chatting right away but see below for more specific instructions on how to use the agent "
|
| 308 |
-
"with your favorite model and inference config. You can also set a custom system prompt.\n\n "
|
| 309 |
-
"**Note:** This is an experimental deployment and LLMs can make mistakes. This can mean that the agent may "
|
| 310 |
-
"not discover your paper even though it is presented at the conference."
|
| 311 |
-
)
|
| 312 |
-
|
| 313 |
-
# Session state for agent instance, config, and messages
|
| 314 |
-
config_state = gr.State(value=DEFAULT_NEURIPS2025_AGENT_ARGS)
|
| 315 |
-
messages_state = gr.State(value=[agent.get_system_prompt()])
|
| 316 |
-
|
| 317 |
-
with gr.Row():
|
| 318 |
-
with gr.Column():
|
| 319 |
-
# Main chat interface
|
| 320 |
-
chatbot = gr.Chatbot(
|
| 321 |
-
label="Conversation Trail",
|
| 322 |
-
height=750,
|
| 323 |
-
type="messages",
|
| 324 |
-
show_copy_button=True,
|
| 325 |
-
)
|
| 326 |
-
|
| 327 |
-
with gr.Row():
|
| 328 |
-
msg_input = gr.Textbox(
|
| 329 |
-
label="Your message",
|
| 330 |
-
placeholder="Type your message here...",
|
| 331 |
-
lines=3,
|
| 332 |
-
scale=4
|
| 333 |
-
)
|
| 334 |
-
submit_btn = gr.Button("Send", variant="primary", scale=1)
|
| 335 |
-
|
| 336 |
-
with gr.Row():
|
| 337 |
-
clear_btn = gr.Button("🗑️ Clear Chat", size="sm")
|
| 338 |
-
save_btn = gr.Button("💾 Save History", size="sm")
|
| 339 |
-
|
| 340 |
-
with gr.Row():
|
| 341 |
-
# Help text at bottom
|
| 342 |
-
gr.Markdown("""
|
| 343 |
-
### 📖 Usage Guide
|
| 344 |
-
|
| 345 |
-
1. **Initialize**: Configure settings and click "Initialize Agent"
|
| 346 |
-
2. **Chat**: Type messages and press Enter or click Send
|
| 347 |
-
3. **System Prompt**: Customize the agent's behavior via System Prompt panel
|
| 348 |
-
4. **History**: View or save your conversation using the History & Save panel
|
| 349 |
-
5. **Clear**: Start a fresh conversation with the Clear Chat button
|
| 350 |
-
|
| 351 |
-
### Note on Ollama API Keys
|
| 352 |
-
In case you are experiencing an error calling the agent model (usually indicated by a message
|
| 353 |
-
containing the word "unauthorized"), you may go to https://ollama.com and generate your own key.
|
| 354 |
-
You can provide it in the configuration below. It will not be stored on our system and gets deleted
|
| 355 |
-
when you end session (i.e., close your browser window).
|
| 356 |
-
|
| 357 |
-
**Note**: Each browser session maintains its own independent conversation state.
|
| 358 |
-
Uses stateless agent interaction for better concurrency support.
|
| 359 |
-
"""
|
| 360 |
-
)
|
| 361 |
-
|
| 362 |
-
with gr.Row():
|
| 363 |
-
with gr.Column():
|
| 364 |
-
# Settings panel
|
| 365 |
-
gr.Markdown("### ⚙️ Agent Settings")
|
| 366 |
-
|
| 367 |
-
with gr.Accordion("Configuration", open=True):
|
| 368 |
-
api_base_input = gr.Textbox(
|
| 369 |
-
label="API Base URL",
|
| 370 |
-
value=AGENT_MODEL_API_BASE,
|
| 371 |
-
placeholder="http://localhost:11434"
|
| 372 |
-
)
|
| 373 |
-
|
| 374 |
-
api_key_input = gr.Textbox(
|
| 375 |
-
label="API Key (only needed for remote models)",
|
| 376 |
-
value="",
|
| 377 |
-
type="password",
|
| 378 |
-
placeholder="Leave empty if not needed"
|
| 379 |
-
)
|
| 380 |
-
|
| 381 |
-
model_input = gr.Textbox(
|
| 382 |
-
label="Model",
|
| 383 |
-
value=f"ollama_chat/{AGENT_MODEL_NAME}" if "ollama_chat" not in AGENT_MODEL_NAME else AGENT_MODEL_NAME,
|
| 384 |
-
placeholder="ollama_chat/gpt-oss:20b"
|
| 385 |
-
)
|
| 386 |
-
|
| 387 |
-
temperature_input = gr.Slider(
|
| 388 |
-
label="Temperature",
|
| 389 |
-
minimum=0.0,
|
| 390 |
-
maximum=1.0,
|
| 391 |
-
value=0.2,
|
| 392 |
-
step=0.1
|
| 393 |
-
)
|
| 394 |
-
|
| 395 |
-
max_tokens_input = gr.Slider(
|
| 396 |
-
label="Max Tokens",
|
| 397 |
-
minimum=100,
|
| 398 |
-
maximum=8192,
|
| 399 |
-
value=6000,
|
| 400 |
-
step=10
|
| 401 |
-
)
|
| 402 |
-
|
| 403 |
-
num_ctx_input = gr.Number(
|
| 404 |
-
label="Context Window",
|
| 405 |
-
value=131072,
|
| 406 |
-
precision=0
|
| 407 |
-
)
|
| 408 |
-
|
| 409 |
-
max_papers_input = gr.Slider(
|
| 410 |
-
label="Max Papers to Retrieve",
|
| 411 |
-
minimum=0,
|
| 412 |
-
maximum=100,
|
| 413 |
-
value=50,
|
| 414 |
-
step=1
|
| 415 |
-
)
|
| 416 |
-
|
| 417 |
-
init_btn = gr.Button("Update Config", variant="primary")
|
| 418 |
-
init_status = gr.Textbox(label="Status", interactive=False)
|
| 419 |
-
|
| 420 |
-
with gr.Accordion("System Prompt", open=False):
|
| 421 |
-
system_prompt_input = gr.Textbox(
|
| 422 |
-
label="System Prompt",
|
| 423 |
-
value=agent.get_system_prompt()["content"] if type(agent.get_system_prompt()) is dict else None,
|
| 424 |
-
placeholder="Enter custom system prompt here...",
|
| 425 |
-
lines=12
|
| 426 |
-
)
|
| 427 |
-
update_system_btn = gr.Button("Update System Prompt")
|
| 428 |
-
system_status = gr.Textbox(label="Status", interactive=False)
|
| 429 |
-
|
| 430 |
-
with gr.Accordion("History & Save", open=False):
|
| 431 |
-
view_history_btn = gr.Button("📜 View Full History")
|
| 432 |
-
history_output = gr.Code(
|
| 433 |
-
label="Conversation History (JSON)",
|
| 434 |
-
language="json",
|
| 435 |
-
lines=10
|
| 436 |
-
)
|
| 437 |
-
|
| 438 |
-
save_filename_input = gr.Textbox(
|
| 439 |
-
label="Filename (optional)",
|
| 440 |
-
placeholder="Leave empty for auto-generated name",
|
| 441 |
-
value=""
|
| 442 |
-
)
|
| 443 |
-
save_status = gr.Textbox(label="Save Status", interactive=False)
|
| 444 |
-
|
| 445 |
-
# Event handlers
|
| 446 |
-
init_btn.click(
|
| 447 |
-
fn=configure_agent,
|
| 448 |
-
inputs=[
|
| 449 |
-
api_base_input,
|
| 450 |
-
api_key_input,
|
| 451 |
-
model_input,
|
| 452 |
-
temperature_input,
|
| 453 |
-
max_tokens_input,
|
| 454 |
-
num_ctx_input,
|
| 455 |
-
max_papers_input,
|
| 456 |
-
config_state
|
| 457 |
-
],
|
| 458 |
-
outputs=[config_state, init_status]
|
| 459 |
-
)
|
| 460 |
-
|
| 461 |
-
# Chat submission
|
| 462 |
-
submit_btn.click(
|
| 463 |
-
fn=lambda msg_input, chatbot, config_state, messages_state: (yield from submit_message(msg_input, chatbot, config_state, messages_state, agent)),
|
| 464 |
-
inputs=[msg_input, chatbot, config_state, messages_state],
|
| 465 |
-
outputs=[chatbot, messages_state]
|
| 466 |
-
).then(
|
| 467 |
-
fn=lambda: "",
|
| 468 |
-
inputs=None,
|
| 469 |
-
outputs=msg_input
|
| 470 |
-
)
|
| 471 |
-
|
| 472 |
-
msg_input.submit(
|
| 473 |
-
fn=lambda msg_input, chatbot, config_state, messages_state: (yield from submit_message(msg_input, chatbot, config_state, messages_state, agent)),
|
| 474 |
-
inputs=[msg_input, chatbot, config_state, messages_state],
|
| 475 |
-
outputs=[chatbot, messages_state]
|
| 476 |
-
).then(
|
| 477 |
-
fn=lambda: "",
|
| 478 |
-
inputs=None,
|
| 479 |
-
outputs=msg_input
|
| 480 |
-
)
|
| 481 |
-
|
| 482 |
-
# System prompt update
|
| 483 |
-
update_system_btn.click(
|
| 484 |
-
fn=lambda system_prompt_input, messages_state: update_system_prompt(system_prompt_input, messages_state, agent),
|
| 485 |
-
inputs=[system_prompt_input, messages_state],
|
| 486 |
-
outputs=[system_status, messages_state]
|
| 487 |
-
)
|
| 488 |
-
|
| 489 |
-
# History viewing
|
| 490 |
-
view_history_btn.click(
|
| 491 |
-
fn=view_history,
|
| 492 |
-
inputs=messages_state,
|
| 493 |
-
outputs=history_output
|
| 494 |
-
)
|
| 495 |
-
|
| 496 |
-
# Save history
|
| 497 |
-
save_btn.click(
|
| 498 |
-
fn=save_history,
|
| 499 |
-
inputs=[save_filename_input, messages_state],
|
| 500 |
-
outputs=save_status
|
| 501 |
-
)
|
| 502 |
-
|
| 503 |
-
# Clear chat
|
| 504 |
-
clear_btn.click(
|
| 505 |
-
fn=lambda config_state, messages_state: clear_chat(config_state, messages_state, agent),
|
| 506 |
-
inputs=[config_state, messages_state],
|
| 507 |
-
outputs=[save_status, chatbot, messages_state]
|
| 508 |
-
)
|
| 509 |
-
|
| 510 |
-
webapp.launch(
|
| 511 |
-
server_name="0.0.0.0", # Allow external connections
|
| 512 |
-
server_port=7860, # Default Gradio port
|
| 513 |
-
share=False, # Set to True to create a public link
|
| 514 |
-
show_error=True,
|
| 515 |
-
debug=True
|
| 516 |
-
)
|
| 517 |
-
|
| 518 |
-
if __name__ == "__main__":
|
| 519 |
-
# Setup logging (only needs to be done once globally)
|
| 520 |
-
setup_logging(
|
| 521 |
-
log_dir="logs",
|
| 522 |
-
level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
|
| 523 |
-
)
|
| 524 |
-
|
| 525 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/frontend/cli.py
DELETED
|
@@ -1,371 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Enhanced terminal chat UI with async streaming and full terminal functionality.
|
| 3 |
-
|
| 4 |
-
Features:
|
| 5 |
-
- Async streaming output as LLM generates tokens
|
| 6 |
-
- Rich prompt with command history and auto-completion
|
| 7 |
-
- Live markdown rendering during streaming
|
| 8 |
-
- Multi-line input via Ctrl+O or /edit command
|
| 9 |
-
- Commands: /help, /exit, /system, /edit, /history, /save <path>, /clear
|
| 10 |
-
- Keyboard shortcuts: Ctrl+C to cancel, Ctrl+D to exit
|
| 11 |
-
"""
|
| 12 |
-
import asyncio
|
| 13 |
-
import click
|
| 14 |
-
import os
|
| 15 |
-
import logging
|
| 16 |
-
import litellm
|
| 17 |
-
from pathlib import Path
|
| 18 |
-
from typing import Optional
|
| 19 |
-
|
| 20 |
-
from rich.console import Console
|
| 21 |
-
from rich.markdown import Markdown
|
| 22 |
-
from rich.live import Live
|
| 23 |
-
from rich.panel import Panel
|
| 24 |
-
from rich.text import Text
|
| 25 |
-
from prompt_toolkit import PromptSession
|
| 26 |
-
from prompt_toolkit.history import FileHistory
|
| 27 |
-
from prompt_toolkit.auto_suggest import AutoSuggestFromHistory
|
| 28 |
-
from prompt_toolkit.completion import WordCompleter
|
| 29 |
-
from prompt_toolkit.key_binding import KeyBindings
|
| 30 |
-
from prompt_toolkit.formatted_text import HTML
|
| 31 |
-
|
| 32 |
-
from agentic_nav.agents import NeurIPS2025Agent
|
| 33 |
-
from agentic_nav.utils.logger import setup_logging
|
| 34 |
-
from agentic_nav.utils.file_handlers import save_chat_history
|
| 35 |
-
from agentic_nav.utils.cli import open_editor, show_history, print_help
|
| 36 |
-
|
| 37 |
-
try:
|
| 38 |
-
from datetime import datetime, UTC
|
| 39 |
-
except ImportError:
|
| 40 |
-
from datetime import datetime, timezone
|
| 41 |
-
UTC = timezone.utc
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
LOGGER = logging.getLogger(__name__)
|
| 45 |
-
|
| 46 |
-
EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "nomic-embed-text")
|
| 47 |
-
EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
|
| 48 |
-
|
| 49 |
-
AGENT_MODEL_NAME = os.environ.get("AGENT_MODEL_NAME", "gpt-oss:20b")
|
| 50 |
-
AGENT_MODEL_API_BASE = os.environ.get("AGENT_MODEL_API_BASE", "http://localhost:11436")
|
| 51 |
-
OLLAMA_API_KEY = os.environ.get("OLLAMA_API_KEY")
|
| 52 |
-
|
| 53 |
-
litellm._logging._disable_debugging()
|
| 54 |
-
console = Console(soft_wrap=True)
|
| 55 |
-
|
| 56 |
-
# Command completer for auto-complete
|
| 57 |
-
command_completer = WordCompleter(
|
| 58 |
-
['/help', '/exit', '/system', '/edit', '/history', '/save', '/clear'],
|
| 59 |
-
ignore_case=True,
|
| 60 |
-
sentence=True
|
| 61 |
-
)
|
| 62 |
-
|
| 63 |
-
bindings = KeyBindings()
|
| 64 |
-
|
| 65 |
-
@bindings.add('c-o')
|
| 66 |
-
def _(event):
|
| 67 |
-
"""Multi-line input with Ctrl+O"""
|
| 68 |
-
event.current_buffer.insert_text('\n')
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
def create_prompt_session():
|
| 72 |
-
"""Create a prompt_toolkit session with history and auto-suggest"""
|
| 73 |
-
history_file = Path.home() / ".llm_agents_history"
|
| 74 |
-
|
| 75 |
-
return PromptSession(
|
| 76 |
-
history=FileHistory(str(history_file)),
|
| 77 |
-
auto_suggest=AutoSuggestFromHistory(),
|
| 78 |
-
completer=command_completer,
|
| 79 |
-
complete_while_typing=True,
|
| 80 |
-
key_bindings=bindings,
|
| 81 |
-
enable_open_in_editor=True,
|
| 82 |
-
multiline=False,
|
| 83 |
-
)
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
def render_markdown(text: str, title: Optional[str] = None):
|
| 87 |
-
"""Render markdown with optional panel title"""
|
| 88 |
-
if title:
|
| 89 |
-
console.print(Panel(Markdown(text), title=title, border_style="blue"))
|
| 90 |
-
else:
|
| 91 |
-
console.print(Markdown(text))
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
def stream_agent_response_sync(agent, message: dict):
|
| 95 |
-
"""
|
| 96 |
-
Stream agent response with live markdown rendering.
|
| 97 |
-
|
| 98 |
-
This function:
|
| 99 |
-
1. Copies the agent's current history and appends the new message
|
| 100 |
-
2. Streams the response using interact_stateless generator
|
| 101 |
-
3. Updates the live display with markdown content and tool execution status
|
| 102 |
-
4. Updates the agent's history with the final message list
|
| 103 |
-
|
| 104 |
-
Note: KeyboardInterrupt is caught to allow graceful cancellation,
|
| 105 |
-
then re-raised so the caller can handle cleanup.
|
| 106 |
-
|
| 107 |
-
Args:
|
| 108 |
-
agent: The agent instance with interact_stateless support
|
| 109 |
-
message: User message dict with 'role', 'content', and optional '_ts'
|
| 110 |
-
"""
|
| 111 |
-
# Get current history and add the new message
|
| 112 |
-
messages = agent.get_history().copy()
|
| 113 |
-
messages.append(message)
|
| 114 |
-
|
| 115 |
-
accumulated_text = ""
|
| 116 |
-
tool_calls_made = []
|
| 117 |
-
final_messages = None
|
| 118 |
-
|
| 119 |
-
with Live(console=console, refresh_per_second=10) as live:
|
| 120 |
-
try:
|
| 121 |
-
# Use interact_stateless for streaming (it's a generator)
|
| 122 |
-
for updated_messages in agent.interact_stateless(
|
| 123 |
-
messages=messages,
|
| 124 |
-
model=agent.model,
|
| 125 |
-
api_base=agent.api_base,
|
| 126 |
-
api_key=agent.api_key,
|
| 127 |
-
llm_args=agent.llm_args
|
| 128 |
-
):
|
| 129 |
-
final_messages = updated_messages
|
| 130 |
-
|
| 131 |
-
# Extract the last assistant message
|
| 132 |
-
for msg in reversed(updated_messages):
|
| 133 |
-
if msg.get("role") == "assistant":
|
| 134 |
-
content = msg.get("content", "")
|
| 135 |
-
if content != accumulated_text:
|
| 136 |
-
accumulated_text = content
|
| 137 |
-
|
| 138 |
-
# Show streaming content
|
| 139 |
-
if accumulated_text:
|
| 140 |
-
live.update(Markdown(accumulated_text))
|
| 141 |
-
|
| 142 |
-
# Check for tool calls
|
| 143 |
-
if "tool_calls" in msg and msg["tool_calls"] != tool_calls_made:
|
| 144 |
-
tool_calls_made = msg["tool_calls"]
|
| 145 |
-
# Show tool execution
|
| 146 |
-
tool_names = [tc["function"]["name"] for tc in tool_calls_made]
|
| 147 |
-
tool_info = Text(f"\n🔧 Executing tools: {', '.join(tool_names)}", style="yellow")
|
| 148 |
-
live.update(tool_info)
|
| 149 |
-
break
|
| 150 |
-
|
| 151 |
-
# Update agent's history with final messages
|
| 152 |
-
if final_messages:
|
| 153 |
-
agent.set_history(final_messages)
|
| 154 |
-
|
| 155 |
-
except KeyboardInterrupt:
|
| 156 |
-
live.stop()
|
| 157 |
-
console.print("\n[yellow]⚠ Response cancelled by user[/yellow]")
|
| 158 |
-
raise
|
| 159 |
-
except Exception as e:
|
| 160 |
-
live.stop()
|
| 161 |
-
console.print(f"\n[red]❌ Error: {e}[/red]")
|
| 162 |
-
LOGGER.error(f"Streaming error: {e}", exc_info=True)
|
| 163 |
-
raise
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
async def async_interact(agent, message: dict):
|
| 167 |
-
"""
|
| 168 |
-
Async wrapper for agent interaction with streaming.
|
| 169 |
-
|
| 170 |
-
Note: KeyboardInterrupt from stream_agent_response_sync is caught here
|
| 171 |
-
to prevent it from propagating up. The actual interrupt handling and
|
| 172 |
-
user feedback happens in stream_agent_response_sync.
|
| 173 |
-
"""
|
| 174 |
-
try:
|
| 175 |
-
# Run the synchronous streaming function in a thread pool
|
| 176 |
-
await asyncio.to_thread(stream_agent_response_sync, agent, message)
|
| 177 |
-
except KeyboardInterrupt:
|
| 178 |
-
# Already handled in stream_agent_response_sync with user feedback
|
| 179 |
-
LOGGER.info("Agent interaction cancelled by user")
|
| 180 |
-
except Exception as e:
|
| 181 |
-
LOGGER.error(f"Agent interaction failed: {e}")
|
| 182 |
-
console.print(f"[red]Error: {e}[/red]")
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
def print_welcome():
|
| 186 |
-
"""Print welcome message"""
|
| 187 |
-
welcome = Text()
|
| 188 |
-
welcome.append("╔═══════════════════════════════════════╗\n", style="bold blue")
|
| 189 |
-
welcome.append("║ ", style="bold blue")
|
| 190 |
-
welcome.append("LLM Agent Chat Interface", style="bold white")
|
| 191 |
-
welcome.append(" ║\n", style="bold blue")
|
| 192 |
-
welcome.append("╚═══════════════════════════════════════╝\n", style="bold blue")
|
| 193 |
-
welcome.append("\nCommands:\n", style="bold yellow")
|
| 194 |
-
welcome.append(" /help - Show help\n", style="cyan")
|
| 195 |
-
welcome.append(" /edit - Multi-line input\n", style="cyan")
|
| 196 |
-
welcome.append(" /history - Show conversation history\n", style="cyan")
|
| 197 |
-
welcome.append(" /system - Set system prompt\n", style="cyan")
|
| 198 |
-
welcome.append(" /save - Save conversation\n", style="cyan")
|
| 199 |
-
welcome.append(" /clear - Clear screen\n", style="cyan")
|
| 200 |
-
welcome.append(" /exit - Exit (or Ctrl+D)\n", style="cyan")
|
| 201 |
-
welcome.append("\nShortcuts:\n", style="bold yellow")
|
| 202 |
-
welcome.append(" Ctrl+O - New line in input\n", style="cyan")
|
| 203 |
-
welcome.append(" Ctrl+C - Cancel current response\n", style="cyan")
|
| 204 |
-
welcome.append(" Ctrl+D - Exit\n", style="cyan")
|
| 205 |
-
welcome.append(" ↑/↓ - Navigate history\n", style="cyan")
|
| 206 |
-
welcome.append(" Tab - Auto-complete commands\n", style="cyan")
|
| 207 |
-
|
| 208 |
-
console.print(welcome)
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
@click.command()
|
| 212 |
-
@click.option("-t", "--temperature", default=0.2, type=float,
|
| 213 |
-
help="Specify the model temperature.")
|
| 214 |
-
@click.option("--max-tokens", default=6000, type=int,
|
| 215 |
-
help="Specify the max. number of model output tokens.")
|
| 216 |
-
@click.option("-c", "--num-ctx", default=131072, type=int,
|
| 217 |
-
help="Specify the model context window.")
|
| 218 |
-
@click.option("-l", "--max-num-papers", default=50, type=int,
|
| 219 |
-
help="Specify the maximum number of papers to retrieve.")
|
| 220 |
-
def main(temperature, max_tokens, num_ctx, max_num_papers):
|
| 221 |
-
"""Enhanced LLM Agent CLI with async streaming and rich terminal features"""
|
| 222 |
-
|
| 223 |
-
# Setup logging
|
| 224 |
-
setup_logging(
|
| 225 |
-
log_dir="logs",
|
| 226 |
-
level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
|
| 227 |
-
)
|
| 228 |
-
|
| 229 |
-
print_welcome()
|
| 230 |
-
LOGGER.info("Agent runtime started")
|
| 231 |
-
|
| 232 |
-
# Config for the LLM messages
|
| 233 |
-
llm_config = {
|
| 234 |
-
"model": f"ollama_chat/{AGENT_MODEL_NAME}",
|
| 235 |
-
"api_base": AGENT_MODEL_API_BASE,
|
| 236 |
-
"temperature": temperature,
|
| 237 |
-
"max_tokens": max_tokens,
|
| 238 |
-
"num_ctx": num_ctx
|
| 239 |
-
}
|
| 240 |
-
LOGGER.info(f"LLM configuration: {llm_config}")
|
| 241 |
-
|
| 242 |
-
# Parameters to limit the tool calling scope
|
| 243 |
-
tool_args = {
|
| 244 |
-
"num_records": max_num_papers
|
| 245 |
-
}
|
| 246 |
-
LOGGER.info(f"Global tool arguments: {tool_args}")
|
| 247 |
-
|
| 248 |
-
# Initialize agent (model is passed via llm_config/llm_args)
|
| 249 |
-
agent = NeurIPS2025Agent(
|
| 250 |
-
api_base=AGENT_MODEL_API_BASE,
|
| 251 |
-
api_key=OLLAMA_API_KEY,
|
| 252 |
-
llm_args=llm_config,
|
| 253 |
-
global_tool_args=tool_args,
|
| 254 |
-
)
|
| 255 |
-
|
| 256 |
-
agent.setup_session()
|
| 257 |
-
console.print("[green]✓ Agent initialized successfully[/green]\n")
|
| 258 |
-
|
| 259 |
-
# Create prompt session
|
| 260 |
-
session = create_prompt_session()
|
| 261 |
-
|
| 262 |
-
# Main interaction loop
|
| 263 |
-
while True:
|
| 264 |
-
try:
|
| 265 |
-
# Get user input with rich prompt
|
| 266 |
-
line = session.prompt(
|
| 267 |
-
HTML('<ansiyellow><b>You></b></ansiyellow> '),
|
| 268 |
-
multiline=False,
|
| 269 |
-
).strip()
|
| 270 |
-
|
| 271 |
-
LOGGER.debug(f"USER PROMPT: {line}")
|
| 272 |
-
|
| 273 |
-
except (EOFError, KeyboardInterrupt):
|
| 274 |
-
console.print("\n[yellow]Goodbye! 👋[/yellow]")
|
| 275 |
-
LOGGER.info("User exited")
|
| 276 |
-
break
|
| 277 |
-
|
| 278 |
-
if not line:
|
| 279 |
-
continue
|
| 280 |
-
|
| 281 |
-
# Handle commands
|
| 282 |
-
if line.startswith("/"):
|
| 283 |
-
parts = line.split(maxsplit=1)
|
| 284 |
-
cmd = parts[0].lower()
|
| 285 |
-
arg = parts[1] if len(parts) > 1 else ""
|
| 286 |
-
|
| 287 |
-
if cmd == "/help":
|
| 288 |
-
print_help()
|
| 289 |
-
continue
|
| 290 |
-
|
| 291 |
-
elif cmd == "/exit":
|
| 292 |
-
console.print("[yellow]Goodbye! 👋[/yellow]")
|
| 293 |
-
LOGGER.info("User exited via /exit command")
|
| 294 |
-
break
|
| 295 |
-
|
| 296 |
-
elif cmd == "/clear":
|
| 297 |
-
console.clear()
|
| 298 |
-
print_welcome()
|
| 299 |
-
continue
|
| 300 |
-
|
| 301 |
-
elif cmd == "/edit":
|
| 302 |
-
content = open_editor()
|
| 303 |
-
if content:
|
| 304 |
-
next_message = {
|
| 305 |
-
"role": "user",
|
| 306 |
-
"content": content,
|
| 307 |
-
"_ts": str(datetime.now(UTC))
|
| 308 |
-
}
|
| 309 |
-
else:
|
| 310 |
-
console.print("[yellow]⚠ No content provided[/yellow]")
|
| 311 |
-
continue
|
| 312 |
-
|
| 313 |
-
elif cmd == "/system":
|
| 314 |
-
content = open_editor()
|
| 315 |
-
if content:
|
| 316 |
-
messages = agent.set_system_prompt(
|
| 317 |
-
messages=agent.get_history(),
|
| 318 |
-
new_system_prompt=content
|
| 319 |
-
)
|
| 320 |
-
agent.set_history(messages=messages)
|
| 321 |
-
console.print("[green]✓ System prompt updated[/green]")
|
| 322 |
-
continue
|
| 323 |
-
else:
|
| 324 |
-
console.print("[yellow]⚠ No content provided[/yellow]")
|
| 325 |
-
continue
|
| 326 |
-
|
| 327 |
-
elif cmd == "/history":
|
| 328 |
-
show_history(agent.get_history())
|
| 329 |
-
continue
|
| 330 |
-
|
| 331 |
-
elif cmd == "/save":
|
| 332 |
-
Path("chat_histories/").mkdir(exist_ok=True, parents=True)
|
| 333 |
-
time_now = datetime.now().strftime("%Y-%m-%d_%H-%M")
|
| 334 |
-
path = arg.strip() or f"chat_histories/{time_now}_chat_history.json"
|
| 335 |
-
|
| 336 |
-
try:
|
| 337 |
-
save_chat_history(agent.get_history(), path)
|
| 338 |
-
console.print(f"[green]✓ Chat saved to {path}[/green]")
|
| 339 |
-
except Exception as e:
|
| 340 |
-
console.print(f"[red]❌ Failed to save: {e}[/red]")
|
| 341 |
-
LOGGER.error(f"Save failed: {e}")
|
| 342 |
-
continue
|
| 343 |
-
|
| 344 |
-
else:
|
| 345 |
-
console.print(f"[red]❌ Unknown command: {cmd}[/red]")
|
| 346 |
-
console.print("[yellow]Type /help for available commands[/yellow]")
|
| 347 |
-
continue
|
| 348 |
-
else:
|
| 349 |
-
# Regular single-line user message
|
| 350 |
-
next_message = {
|
| 351 |
-
"role": "user",
|
| 352 |
-
"content": line,
|
| 353 |
-
"_ts": str(datetime.now(UTC))
|
| 354 |
-
}
|
| 355 |
-
|
| 356 |
-
try:
|
| 357 |
-
console.print()
|
| 358 |
-
asyncio.run(async_interact(agent, next_message))
|
| 359 |
-
console.print()
|
| 360 |
-
|
| 361 |
-
except KeyboardInterrupt:
|
| 362 |
-
console.print("\n[yellow]⚠ Interrupted[/yellow]")
|
| 363 |
-
continue
|
| 364 |
-
except Exception as e:
|
| 365 |
-
console.print(f"\n[red]❌ Error: {e}[/red]")
|
| 366 |
-
LOGGER.error(f"Interaction error: {e}", exc_info=True)
|
| 367 |
-
continue
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
if __name__ == "__main__":
|
| 371 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/__init__.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
| 1 |
-
from agentic_nav.tools.knowledge_graph import search_similar_papers, find_neighboring_papers, traverse_graph
|
| 2 |
-
from agentic_nav.tools.session_routing import build_visit_schedule
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
__all__ = [
|
| 6 |
-
'search_similar_papers',
|
| 7 |
-
'find_neighboring_papers',
|
| 8 |
-
'traverse_graph',
|
| 9 |
-
'build_visit_schedule',
|
| 10 |
-
]
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
def get_all_tools():
|
| 14 |
-
"""Get all tools as a dictionary."""
|
| 15 |
-
return [globals()[name] for name in __all__]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/knowledge_graph/__init__.py
DELETED
|
@@ -1,326 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
This file defines the tools that can be made available to an agent.
|
| 3 |
-
The idea is to put the actual functions into wrappers that provide LLM-friendly and token efficient outputs.
|
| 4 |
-
"""
|
| 5 |
-
import os
|
| 6 |
-
import random
|
| 7 |
-
|
| 8 |
-
from toon_format import encode as toon_encode
|
| 9 |
-
from typing import List, Optional, Union
|
| 10 |
-
|
| 11 |
-
from agentic_nav.tools.knowledge_graph.retriever import Neo4jGraphWorker, LOGGER
|
| 12 |
-
|
| 13 |
-
NEO4J_DB_URI = os.environ.get("NEO4J_DB_URI", "bolt://neo4j_db:7687")
|
| 14 |
-
NEO4J_USERNAME = os.environ.get("NEO4J_USERNAME", "neo4j")
|
| 15 |
-
NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD")
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
def search_similar_papers(
|
| 19 |
-
user_query: str,
|
| 20 |
-
num_papers_to_return: int = 50,
|
| 21 |
-
min_similarity: float = None,
|
| 22 |
-
day: str = None,
|
| 23 |
-
timeslots: List[str] = None
|
| 24 |
-
) -> str:
|
| 25 |
-
"""
|
| 26 |
-
Search for research papers semantically similar to a user's natural language query.
|
| 27 |
-
|
| 28 |
-
This function performs vector similarity search against a Neo4j knowledge graph database
|
| 29 |
-
to find papers that match the semantic meaning of the user's query. It serves as the
|
| 30 |
-
entry point for paper discovery workflows and is typically followed by neighborhood
|
| 31 |
-
or graph traversal searches for deeper exploration.
|
| 32 |
-
|
| 33 |
-
Args:
|
| 34 |
-
user_query (str): Natural language query describing the research topic or interest.
|
| 35 |
-
The query is embedded and compared against paper embeddings in the database.
|
| 36 |
-
num_papers_to_return (int, optional): Maximum number of papers to return, ranked by
|
| 37 |
-
similarity score. Defaults to 50.
|
| 38 |
-
min_similarity (float, optional): Minimum similarity threshold for returned papers.
|
| 39 |
-
Defaults to None (no filtering). Should be a value between 0.0 and 1.0, where
|
| 40 |
-
higher values indicate stricter similarity requirements.
|
| 41 |
-
day (str, optional): Conference day as a date string in ISO format (e.g., "2024-12-10").
|
| 42 |
-
When provided, only papers scheduled on this day will be searched. Defaults to None
|
| 43 |
-
(no day filtering).
|
| 44 |
-
timeslots (List[str], optional): List of time ranges to filter papers by their session
|
| 45 |
-
times. Each timeslot should be formatted as "HH:MM:SS-HH:MM:SS" (e.g.,
|
| 46 |
-
["09:00:00-12:00:00", "14:00:00-17:00:00"]). Papers with session start times
|
| 47 |
-
falling within any of these ranges will be included. Defaults to None (no time filtering).
|
| 48 |
-
|
| 49 |
-
Returns:
|
| 50 |
-
str: A token-efficient formatted string representation of papers matching the query,
|
| 51 |
-
encoded using the toon_encode function. Papers are typically ordered by
|
| 52 |
-
descending similarity score.
|
| 53 |
-
|
| 54 |
-
Restrictions:
|
| 55 |
-
- Requires a running Neo4j database instance at bolt://localhost:7687 with credentials
|
| 56 |
-
(username: "neo4j", password: "llm_agents")
|
| 57 |
-
- The database must have pre-computed embeddings for papers to enable similarity search
|
| 58 |
-
- The database must have a vector index configured for efficient similarity queries
|
| 59 |
-
- Currently creates a new database connection for each function call, which may not be
|
| 60 |
-
optimal for concurrent usage (see TODO note)
|
| 61 |
-
|
| 62 |
-
Notes:
|
| 63 |
-
- This function is designed as the initial step in a multi-stage paper discovery workflow
|
| 64 |
-
- Results can be further explored using find_neighboring_papers() or traverse_graph()
|
| 65 |
-
- When day and/or timeslots are provided, the database filters papers by their session
|
| 66 |
-
times BEFORE performing vector similarity search for better performance
|
| 67 |
-
- TODO: The Neo4jGraphWorker should be wrapped in a session to better handle
|
| 68 |
-
concurrent connections and connection pooling
|
| 69 |
-
|
| 70 |
-
Raises:
|
| 71 |
-
Connection errors if Neo4j database is not accessible
|
| 72 |
-
ValueError if min_similarity is outside the valid range [0.0, 1.0]
|
| 73 |
-
ValueError if day is not in valid ISO date format (YYYY-MM-DD)
|
| 74 |
-
ValueError if timeslots are not properly formatted
|
| 75 |
-
Embedding errors if the query cannot be properly embedded
|
| 76 |
-
|
| 77 |
-
Example:
|
| 78 |
-
>>> # Basic similarity search
|
| 79 |
-
>>> papers = search_similar_papers(
|
| 80 |
-
... user_query="federated learning for privacy-preserving machine learning",
|
| 81 |
-
... num_papers_to_return=15
|
| 82 |
-
... )
|
| 83 |
-
>>>
|
| 84 |
-
>>> # Search with similarity threshold
|
| 85 |
-
>>> highly_relevant_papers = search_similar_papers(
|
| 86 |
-
... user_query="transformer architectures for NLP",
|
| 87 |
-
... num_papers_to_return=20,
|
| 88 |
-
... min_similarity=0.75
|
| 89 |
-
... )
|
| 90 |
-
>>>
|
| 91 |
-
>>> # Search for papers on a specific day and time
|
| 92 |
-
>>> morning_papers = search_similar_papers(
|
| 93 |
-
... user_query="computer vision applications",
|
| 94 |
-
... num_papers_to_return=50,
|
| 95 |
-
... day="2024-12-10",
|
| 96 |
-
... timeslots=["09:00:00-12:00:00"]
|
| 97 |
-
... )
|
| 98 |
-
>>>
|
| 99 |
-
>>> # Search across multiple timeslots on a specific day
|
| 100 |
-
>>> daytime_papers = search_similar_papers(
|
| 101 |
-
... user_query="reinforcement learning",
|
| 102 |
-
... num_papers_to_return=25,
|
| 103 |
-
... day="2024-12-11",
|
| 104 |
-
... timeslots=["09:00:00-12:00:00", "14:00:00-17:00:00"]
|
| 105 |
-
... )
|
| 106 |
-
"""
|
| 107 |
-
# Type coercion for parameters that may come as strings from LLM tool calls
|
| 108 |
-
if num_papers_to_return is not None and not isinstance(num_papers_to_return, int):
|
| 109 |
-
num_papers_to_return = int(num_papers_to_return)
|
| 110 |
-
if min_similarity is not None and not isinstance(min_similarity, float):
|
| 111 |
-
min_similarity = float(min_similarity)
|
| 112 |
-
|
| 113 |
-
# Handle timeslots - ensure it's a list or None
|
| 114 |
-
if timeslots is not None and isinstance(timeslots, str):
|
| 115 |
-
# If a single string is provided, wrap it in a list
|
| 116 |
-
timeslots = [timeslots]
|
| 117 |
-
|
| 118 |
-
worker = Neo4jGraphWorker(
|
| 119 |
-
uri=NEO4J_DB_URI,
|
| 120 |
-
username=NEO4J_USERNAME,
|
| 121 |
-
password=NEO4J_PASSWORD
|
| 122 |
-
)
|
| 123 |
-
|
| 124 |
-
# Fetch papers with optional day and time filtering
|
| 125 |
-
papers = worker.similarity_search(
|
| 126 |
-
user_query=user_query,
|
| 127 |
-
top_k=num_papers_to_return,
|
| 128 |
-
min_similarity=min_similarity,
|
| 129 |
-
day=day,
|
| 130 |
-
timeslots=timeslots
|
| 131 |
-
)
|
| 132 |
-
|
| 133 |
-
# Format outputs to be more token efficient
|
| 134 |
-
formatted_papers = toon_encode(papers)
|
| 135 |
-
|
| 136 |
-
return formatted_papers
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
def find_neighboring_papers(
|
| 140 |
-
paper_id: str,
|
| 141 |
-
relationship_types: Union[List[str], str] = ["SIMILAR_TO"],
|
| 142 |
-
num_neighbors_to_return: int = 10,
|
| 143 |
-
min_similarity: float = 0.75
|
| 144 |
-
) -> str:
|
| 145 |
-
"""
|
| 146 |
-
Retrieve immediate neighboring entities of a specific paper from the Neo4j knowledge graph.
|
| 147 |
-
|
| 148 |
-
This function performs a one-hop neighborhood search to find entities directly connected to
|
| 149 |
-
a target paper. It is designed to be used after an initial similarity search when users want
|
| 150 |
-
to explore specific relationships (similar papers, authors, or topics) for a paper of interest.
|
| 151 |
-
|
| 152 |
-
Args:
|
| 153 |
-
paper_id (str): The unique identifier of the target paper node in the graph. neo4j UUID.
|
| 154 |
-
relationship_types (List[str], str): Types of relationships to query.
|
| 155 |
-
Defaults to ["SIMILAR_TO"].
|
| 156 |
-
Valid options: ["SIMILAR_TO", "IS_AUTHOR_OF", "BELONGS_TO_TOPIC"]
|
| 157 |
-
neighbor_entity (str, optional): The type of neighboring entity to return.
|
| 158 |
-
Defaults to "similar_papers".
|
| 159 |
-
Valid options: ["similar_papers", "authors", "topics", "raw_results"]
|
| 160 |
-
num_neighbors_to_return (int, optional): Maximum number of neighbors to return.
|
| 161 |
-
Defaults to 10. Results are randomly shuffled before truncation to provide diversity.
|
| 162 |
-
min_similarity (float, optional): Minimum similarity threshold for returned neighbors.
|
| 163 |
-
|
| 164 |
-
Returns:
|
| 165 |
-
str: A token-efficient formatted string representation of neighboring entities,
|
| 166 |
-
encoded using the toon_encode function.
|
| 167 |
-
|
| 168 |
-
Restrictions:
|
| 169 |
-
- Requires a running Neo4j database instance at bolt://localhost:7687 with credentials
|
| 170 |
-
(username: "neo4j", password: "llm_agents")
|
| 171 |
-
- Should be used after an initial similarity search as part of a focused exploration workflow
|
| 172 |
-
- The paper_id must exist in the Neo4j graph database
|
| 173 |
-
- Only performs one-hop searches (direct neighbors only)
|
| 174 |
-
- Only the three specified relationship types are supported
|
| 175 |
-
- Only the four specified neighbor entity types are supported
|
| 176 |
-
- The neighbor_entity parameter must match the relationship_types used
|
| 177 |
-
(e.g., "similar_papers" with "SIMILAR_TO", "authors" with "IS_AUTHOR_OF")
|
| 178 |
-
|
| 179 |
-
Notes:
|
| 180 |
-
- Results are randomly shuffled to provide diverse recommendations across multiple calls
|
| 181 |
-
- The function extracts only the "neighbor" data from the returned results
|
| 182 |
-
- There is a potential bug: the type check `type(relevant_neighbors) is int` should likely be
|
| 183 |
-
`type(num_neighbors_to_return) is int` for proper list truncation
|
| 184 |
-
|
| 185 |
-
Raises:
|
| 186 |
-
Connection errors if Neo4j database is not accessible
|
| 187 |
-
KeyError if neighbor_entity doesn't exist in the returned neighbors dictionary
|
| 188 |
-
ValueError if invalid relationship_types or neighbor_entity are provided
|
| 189 |
-
|
| 190 |
-
Example:
|
| 191 |
-
>>> similar_papers = find_neighboring_papers(
|
| 192 |
-
... paper_id="<UUID>",
|
| 193 |
-
... relationship_types=["SIMILAR_TO"],
|
| 194 |
-
... neighbor_entity="similar_papers",
|
| 195 |
-
... num_neighbors_to_return=5
|
| 196 |
-
... )
|
| 197 |
-
>>>
|
| 198 |
-
>>> authors = find_neighboring_papers(
|
| 199 |
-
... paper_id="<UUID>",
|
| 200 |
-
... relationship_types=["IS_AUTHOR_OF"],
|
| 201 |
-
... neighbor_entity="authors",
|
| 202 |
-
... num_neighbors_to_return=3
|
| 203 |
-
... )
|
| 204 |
-
"""
|
| 205 |
-
# Type coercion for parameters that may come as strings from LLM tool calls
|
| 206 |
-
if num_neighbors_to_return is not None and not isinstance(num_neighbors_to_return, int):
|
| 207 |
-
num_neighbors_to_return = int(num_neighbors_to_return)
|
| 208 |
-
|
| 209 |
-
if type(relationship_types) is str:
|
| 210 |
-
relationship_types = [relationship_types]
|
| 211 |
-
|
| 212 |
-
worker = Neo4jGraphWorker(
|
| 213 |
-
uri=NEO4J_DB_URI,
|
| 214 |
-
username=NEO4J_USERNAME,
|
| 215 |
-
password=NEO4J_PASSWORD
|
| 216 |
-
)
|
| 217 |
-
|
| 218 |
-
neighbors = worker.neighborhood_search(
|
| 219 |
-
paper_id=paper_id,
|
| 220 |
-
relationship_types=relationship_types,
|
| 221 |
-
min_similarity=min_similarity,
|
| 222 |
-
)
|
| 223 |
-
|
| 224 |
-
relevant_neighbors = []
|
| 225 |
-
for rel_type, neighbor in neighbors.items():
|
| 226 |
-
if rel_type != relationship_types:
|
| 227 |
-
relevant_neighbors.append(neighbor)
|
| 228 |
-
|
| 229 |
-
# Constrain and shuffle neighbors for more diverse responses
|
| 230 |
-
random.shuffle(relevant_neighbors)
|
| 231 |
-
|
| 232 |
-
# FIX: Changed type(relevant_neighbors) to type(num_neighbors_to_return)
|
| 233 |
-
if num_neighbors_to_return is not None and isinstance(num_neighbors_to_return, int):
|
| 234 |
-
relevant_neighbors = relevant_neighbors[:num_neighbors_to_return]
|
| 235 |
-
|
| 236 |
-
# Format outputs to be more token efficient
|
| 237 |
-
formatted_neighbors = toon_encode(relevant_neighbors)
|
| 238 |
-
|
| 239 |
-
return formatted_neighbors
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
def traverse_graph(
|
| 243 |
-
start_paper_id: str,
|
| 244 |
-
n_hops: int = 2,
|
| 245 |
-
relationship_type: Optional[str] = "BELONGS_TO_TOPIC",
|
| 246 |
-
max_results: Optional[int] = 30,
|
| 247 |
-
strategy: str = "breadth_first_random",
|
| 248 |
-
max_branches: Optional[int] = 2,
|
| 249 |
-
random_seed: Optional[int] = 42
|
| 250 |
-
) -> str:
|
| 251 |
-
"""
|
| 252 |
-
Traverse a Neo4j knowledge graph to discover related research papers through various relationship types.
|
| 253 |
-
|
| 254 |
-
This function performs exploratory graph traversal starting from a seed paper to find potentially
|
| 255 |
-
interesting related papers. It is designed to be used after an initial similarity search, allowing
|
| 256 |
-
users to discover papers through different connection paths (topics, authors, similarity).
|
| 257 |
-
|
| 258 |
-
Args:
|
| 259 |
-
start_paper_id (str): The unique identifier of the starting paper node in the graph. neo4j UUID.
|
| 260 |
-
n_hops (int, optional): Number of relationship hops to traverse from the starting paper.
|
| 261 |
-
Defaults to 2. Higher values explore further but may return less relevant results.
|
| 262 |
-
relationship_type (str, optional): Types of relationships to follow during traversal.
|
| 263 |
-
Defaults to "BELONGS_TO_TOPIC".
|
| 264 |
-
Valid options: ["SIMILAR_TO", "AUTHORED_BY", "BELONGS_TO_TOPIC"]
|
| 265 |
-
max_results (int, optional): Maximum number of papers to return. Defaults to 30.
|
| 266 |
-
strategy (str, optional): Graph traversal strategy to use. Defaults to "breadth_first_random".
|
| 267 |
-
Valid options: ["breadth_first", "depth_first", "breadth_first_random", "depth_first_random"]
|
| 268 |
-
max_branches (int, optional): Maximum number of branches to follow at each node during traversal.
|
| 269 |
-
Defaults to 2. Controls the breadth of exploration at each step.
|
| 270 |
-
random_seed (int, optional): Seed for random number generation in randomized strategies.
|
| 271 |
-
Defaults to 42. Ensures reproducible results when using random strategies.
|
| 272 |
-
|
| 273 |
-
Returns:
|
| 274 |
-
str: A formatted string representation of discovered papers, encoded using the toon_encode function.
|
| 275 |
-
|
| 276 |
-
Restrictions:
|
| 277 |
-
- Requires a running Neo4j database instance at bolt://localhost:7687 with credentials
|
| 278 |
-
(username: "neo4j", password: "llm_agents")
|
| 279 |
-
- Should be used after an initial similarity search as part of an exploratory workflow
|
| 280 |
-
- The start_paper_id must exist in the Neo4j graph database
|
| 281 |
-
- Only the three specified relationship types are supported
|
| 282 |
-
- Only the four specified traversal strategies are supported
|
| 283 |
-
- Random strategies require random_seed for reproducibility
|
| 284 |
-
|
| 285 |
-
Raises:
|
| 286 |
-
Connection errors if Neo4j database is not accessible
|
| 287 |
-
ValueError if invalid relationship_types or strategy are provided
|
| 288 |
-
|
| 289 |
-
Example:
|
| 290 |
-
>>> related_papers = traverse_graph(
|
| 291 |
-
... start_paper_id="paper_12345",
|
| 292 |
-
... n_hops=3,
|
| 293 |
-
... relationship_type="SIMILAR_TO",
|
| 294 |
-
... max_results=50,
|
| 295 |
-
... strategy="breadth_first_random"
|
| 296 |
-
... )
|
| 297 |
-
"""
|
| 298 |
-
# Type coercion for parameters that may come as strings from LLM tool calls
|
| 299 |
-
if n_hops is not None and not isinstance(n_hops, int):
|
| 300 |
-
n_hops = int(n_hops)
|
| 301 |
-
if max_results is not None and not isinstance(max_results, int):
|
| 302 |
-
max_results = int(max_results)
|
| 303 |
-
if max_branches is not None and not isinstance(max_branches, int):
|
| 304 |
-
max_branches = int(max_branches)
|
| 305 |
-
if random_seed is not None and not isinstance(random_seed, int):
|
| 306 |
-
random_seed = int(random_seed)
|
| 307 |
-
|
| 308 |
-
worker = Neo4jGraphWorker(
|
| 309 |
-
uri=NEO4J_DB_URI,
|
| 310 |
-
username=NEO4J_USERNAME,
|
| 311 |
-
password=NEO4J_PASSWORD
|
| 312 |
-
)
|
| 313 |
-
|
| 314 |
-
papers = worker.graph_traversal(
|
| 315 |
-
start_paper_id=start_paper_id,
|
| 316 |
-
n_hops=n_hops,
|
| 317 |
-
relationship_type=relationship_type,
|
| 318 |
-
max_results=max_results,
|
| 319 |
-
strategy=strategy,
|
| 320 |
-
max_branches=max_branches,
|
| 321 |
-
random_seed=random_seed
|
| 322 |
-
)
|
| 323 |
-
|
| 324 |
-
formatted_neighbors = toon_encode(papers)
|
| 325 |
-
|
| 326 |
-
return formatted_neighbors
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/knowledge_graph/file_handler.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
import pickle
|
| 2 |
-
import networkx as nx
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
def save_graph(graph: nx.Graph, output_path: str):
|
| 6 |
-
"""
|
| 7 |
-
Save the graph to a file using pickle.
|
| 8 |
-
|
| 9 |
-
Args:
|
| 10 |
-
output_path: Path to save the graph
|
| 11 |
-
"""
|
| 12 |
-
with open(output_path, 'wb') as f:
|
| 13 |
-
pickle.dump(graph, f)
|
| 14 |
-
f.close()
|
| 15 |
-
print(f"Graph saved to {output_path}")
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
def load_graph(input_path: str) -> nx.Graph:
|
| 19 |
-
"""
|
| 20 |
-
Load a graph from a pickle file.
|
| 21 |
-
|
| 22 |
-
Args:
|
| 23 |
-
input_path: Path to the saved graph
|
| 24 |
-
"""
|
| 25 |
-
with open(input_path, 'rb') as f:
|
| 26 |
-
graph = pickle.load(f)
|
| 27 |
-
f.close()
|
| 28 |
-
print(f"Graph loaded from {input_path}")
|
| 29 |
-
return graph
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/knowledge_graph/graph_generator.py
DELETED
|
@@ -1,446 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
import logging
|
| 3 |
-
import os
|
| 4 |
-
|
| 5 |
-
import click
|
| 6 |
-
import networkx as nx
|
| 7 |
-
import numpy as np
|
| 8 |
-
import litellm
|
| 9 |
-
from typing import List, Dict, Any, Union
|
| 10 |
-
from litellm import embedding
|
| 11 |
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 12 |
-
from tqdm import tqdm
|
| 13 |
-
|
| 14 |
-
from pathlib import Path
|
| 15 |
-
|
| 16 |
-
from agentic_nav.utils.embedding_generator import batch_embed_documents
|
| 17 |
-
from agentic_nav.utils.logging import setup_logging
|
| 18 |
-
from agentic_nav.tools.knowledge_graph.file_handler import save_graph
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
# Setup logging
|
| 22 |
-
setup_logging(
|
| 23 |
-
log_dir="logs",
|
| 24 |
-
level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
|
| 25 |
-
)
|
| 26 |
-
LOGGER = logging.getLogger(__name__)
|
| 27 |
-
litellm._logging._disable_debugging()
|
| 28 |
-
PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
|
| 29 |
-
EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "ollama/nomic-embed-text")
|
| 30 |
-
EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
class PaperKnowledgeGraph:
|
| 34 |
-
"""
|
| 35 |
-
A knowledge graph builder for academic papers focusing on:
|
| 36 |
-
- Paper names (nodes)
|
| 37 |
-
- Topics (nodes)
|
| 38 |
-
- Abstract embeddings (stored as node attributes)
|
| 39 |
-
Uses litellm with ollama for local embedding generation with parallel processing.
|
| 40 |
-
"""
|
| 41 |
-
def __init__(
|
| 42 |
-
self,
|
| 43 |
-
embedding_model: str = EMBEDDING_MODEL_NAME,
|
| 44 |
-
ollama_base_url: str = EMBEDDING_MODEL_API_BASE,
|
| 45 |
-
embedding_gen_batch_size: int = 32,
|
| 46 |
-
max_parallel_workers: int = 8,
|
| 47 |
-
limit_num_papers: Union[int, None] = None
|
| 48 |
-
):
|
| 49 |
-
"""
|
| 50 |
-
Initialize the knowledge graph builder.
|
| 51 |
-
|
| 52 |
-
Args:
|
| 53 |
-
embedding_model: Name of the ollama embedding model (e.g., 'nomic-embed-text')
|
| 54 |
-
ollama_base_url: Base URL for the ollama server
|
| 55 |
-
embedding_gen_batch_size: Batch size for generating text embeddings
|
| 56 |
-
max_parallel_workers: Number of parallel workers for embedding generation
|
| 57 |
-
"""
|
| 58 |
-
self.graph = nx.Graph()
|
| 59 |
-
self.embedding_model = embedding_model
|
| 60 |
-
self.ollama_base_url = ollama_base_url
|
| 61 |
-
self.batch_size = embedding_gen_batch_size
|
| 62 |
-
self.max_workers = max_parallel_workers
|
| 63 |
-
self.papers_data = []
|
| 64 |
-
self.limit_num_papers = limit_num_papers
|
| 65 |
-
|
| 66 |
-
# Test connection
|
| 67 |
-
LOGGER.info(f"Initializing with model: {embedding_model}")
|
| 68 |
-
LOGGER.info(f"Ollama server: {ollama_base_url}")
|
| 69 |
-
self._test_embedding_connection()
|
| 70 |
-
|
| 71 |
-
def _test_embedding_connection(self):
|
| 72 |
-
"""Test connection to ollama server."""
|
| 73 |
-
try:
|
| 74 |
-
response = embedding(
|
| 75 |
-
model=self.embedding_model,
|
| 76 |
-
input=["test connection"],
|
| 77 |
-
api_base=self.ollama_base_url
|
| 78 |
-
)
|
| 79 |
-
LOGGER.info(f"Successfully connected to ollama server")
|
| 80 |
-
LOGGER.info(f"Embedding dimension: {len(response.data[0]['embedding'])}")
|
| 81 |
-
except Exception as e:
|
| 82 |
-
LOGGER.error(f"❌ Error connecting to ollama server: {e}")
|
| 83 |
-
LOGGER.error(f"Please ensure ollama is running and the model '{self.embedding_model}' is available")
|
| 84 |
-
LOGGER.error(f"Run: ollama pull nomic-embed-text")
|
| 85 |
-
raise
|
| 86 |
-
|
| 87 |
-
def load_papers_from_json(self, json_file_path: str, paper_dict_key: str = "results"):
|
| 88 |
-
"""
|
| 89 |
-
Load papers from a JSON file or JSONL file.
|
| 90 |
-
|
| 91 |
-
Args:
|
| 92 |
-
json_file_path: Path to the JSON/JSONL file
|
| 93 |
-
"""
|
| 94 |
-
self.papers_data = []
|
| 95 |
-
|
| 96 |
-
with open(json_file_path, 'r') as f:
|
| 97 |
-
# Try to parse as regular JSON first
|
| 98 |
-
try:
|
| 99 |
-
content = f.read()
|
| 100 |
-
# Try parsing as a single JSON object
|
| 101 |
-
try:
|
| 102 |
-
data = json.loads(content)
|
| 103 |
-
if isinstance(data[paper_dict_key], list):
|
| 104 |
-
self.papers_data = data[paper_dict_key]
|
| 105 |
-
else:
|
| 106 |
-
raise TypeError("File importer expects a list of papers.")
|
| 107 |
-
except json.JSONDecodeError:
|
| 108 |
-
# Try parsing as JSONL (one JSON object per line)
|
| 109 |
-
f.seek(0)
|
| 110 |
-
for line in f:
|
| 111 |
-
line = line.strip()
|
| 112 |
-
if line:
|
| 113 |
-
self.papers_data.append(json.loads(line))
|
| 114 |
-
except Exception as e:
|
| 115 |
-
raise ValueError(f"Error parsing JSON file: {e}")
|
| 116 |
-
|
| 117 |
-
if self.limit_num_papers is not None and self.limit_num_papers > 0:
|
| 118 |
-
LOGGER.warning(f"WARNING: Number of papers limited to {self.limit_num_papers} items. Set to 'None' for all papers")
|
| 119 |
-
self.papers_data = self.papers_data[:self.limit_num_papers]
|
| 120 |
-
|
| 121 |
-
LOGGER.info(f"Loaded {len(self.papers_data)} papers from {json_file_path}")
|
| 122 |
-
|
| 123 |
-
def build_graph(self):
|
| 124 |
-
"""
|
| 125 |
-
Build the knowledge graph from loaded papers.
|
| 126 |
-
Creates nodes for papers and topics, and edges between them.
|
| 127 |
-
Computes embeddings for abstracts in parallel.
|
| 128 |
-
"""
|
| 129 |
-
topic_nodes = set()
|
| 130 |
-
author_nodes = set()
|
| 131 |
-
|
| 132 |
-
LOGGER.info(f"\nPreparing to process {len(self.papers_data)} papers...")
|
| 133 |
-
|
| 134 |
-
# Extract all abstracts and paper info
|
| 135 |
-
paper_info = []
|
| 136 |
-
abstracts = []
|
| 137 |
-
|
| 138 |
-
for paper in self.papers_data:
|
| 139 |
-
paper_id = paper.get('uid', paper.get('id'))
|
| 140 |
-
paper_name = paper.get('name', 'Unnamed Paper')
|
| 141 |
-
abstract = paper.get('abstract', '')
|
| 142 |
-
topic = paper.get('topic', 'Unknown')
|
| 143 |
-
authors = paper.get('authors', [])
|
| 144 |
-
keywords = paper.get("keywords", [])
|
| 145 |
-
decision = paper.get("decision", "")
|
| 146 |
-
session = paper.get("session", "")
|
| 147 |
-
session_start_time = paper.get("starttime", "")
|
| 148 |
-
session_end_time = paper.get("endtime", "")
|
| 149 |
-
presentation_type = paper.get("eventtype", "")
|
| 150 |
-
room_name = paper.get("room_name", "")
|
| 151 |
-
project_url = paper.get("url", "")
|
| 152 |
-
poster_position = paper.get("poster_position", "")
|
| 153 |
-
paper_url = paper.get("paper_url", "")
|
| 154 |
-
sourceid = paper.get("sourceid", "")
|
| 155 |
-
virtualsite_url = paper.get("virtualsite_url", "")
|
| 156 |
-
|
| 157 |
-
paper_info.append({
|
| 158 |
-
"id": paper_id,
|
| 159 |
-
"name": paper_name,
|
| 160 |
-
"abstract": abstract,
|
| 161 |
-
"topic": topic,
|
| 162 |
-
"authors": authors,
|
| 163 |
-
"keywords": keywords,
|
| 164 |
-
"decisions": decision,
|
| 165 |
-
"session": session,
|
| 166 |
-
"session_start_time": session_start_time,
|
| 167 |
-
"session_end_time": session_end_time,
|
| 168 |
-
"presentation_type": presentation_type,
|
| 169 |
-
"room_name": room_name,
|
| 170 |
-
"project_url": project_url,
|
| 171 |
-
"poster_position": poster_position,
|
| 172 |
-
"paper_url": paper_url,
|
| 173 |
-
"sourceid": sourceid,
|
| 174 |
-
"virtualsite_url": virtualsite_url
|
| 175 |
-
|
| 176 |
-
})
|
| 177 |
-
abstracts.append(abstract)
|
| 178 |
-
|
| 179 |
-
# Generate all embeddings in parallel
|
| 180 |
-
LOGGER.info(f"\nGenerating embeddings with batch size {self.batch_size}...")
|
| 181 |
-
embeddings = batch_embed_documents(
|
| 182 |
-
abstracts,
|
| 183 |
-
batch_size=self.batch_size,
|
| 184 |
-
embedding_model=self.embedding_model,
|
| 185 |
-
api_base=self.ollama_base_url
|
| 186 |
-
)
|
| 187 |
-
|
| 188 |
-
# Convert to list so that embeddings can be mapped to samples properly
|
| 189 |
-
embeddings = embeddings.tolist()
|
| 190 |
-
|
| 191 |
-
# Add nodes to graph
|
| 192 |
-
LOGGER.info("\nBuilding graph structure...")
|
| 193 |
-
with tqdm(total=len(paper_info), desc="Adding nodes") as pbar:
|
| 194 |
-
for info, embedding in zip(paper_info, embeddings):
|
| 195 |
-
|
| 196 |
-
# Extract author information (store as list of dicts)
|
| 197 |
-
author_list = []
|
| 198 |
-
if info['authors']:
|
| 199 |
-
for author in info['authors']:
|
| 200 |
-
author_info = {
|
| 201 |
-
'id': author.get('id'),
|
| 202 |
-
'fullname': author.get('fullname', ''),
|
| 203 |
-
'institution': author.get('institution', ''),
|
| 204 |
-
'url': author.get('url', '')
|
| 205 |
-
}
|
| 206 |
-
|
| 207 |
-
author_uid = f"{author_info['id']} - {author_info['fullname']}"
|
| 208 |
-
if author_uid not in author_nodes:
|
| 209 |
-
self.graph.add_node(
|
| 210 |
-
author_uid,
|
| 211 |
-
**author_info
|
| 212 |
-
)
|
| 213 |
-
author_nodes.add(author_uid)
|
| 214 |
-
|
| 215 |
-
author_list.append(author_info)
|
| 216 |
-
|
| 217 |
-
# Add paper node with attributes
|
| 218 |
-
paper_attrs = info.copy()
|
| 219 |
-
del paper_attrs["authors"]
|
| 220 |
-
|
| 221 |
-
self.graph.add_node(
|
| 222 |
-
info["id"],
|
| 223 |
-
**paper_attrs,
|
| 224 |
-
embedding=embedding,
|
| 225 |
-
authors=author_list,
|
| 226 |
-
node_type="paper"
|
| 227 |
-
)
|
| 228 |
-
|
| 229 |
-
for author in author_list:
|
| 230 |
-
self.graph.add_edge(f"{author['id']} - {author['fullname']}", info["id"], relationship="is_author_of")
|
| 231 |
-
|
| 232 |
-
# Add topic node if it doesn't exist
|
| 233 |
-
if info['topic'] and info['topic'] not in topic_nodes:
|
| 234 |
-
self.graph.add_node(
|
| 235 |
-
info['topic'],
|
| 236 |
-
node_type='topic',
|
| 237 |
-
name=info['topic']
|
| 238 |
-
)
|
| 239 |
-
topic_nodes.add(info['topic'])
|
| 240 |
-
|
| 241 |
-
# Add edge between paper and topic
|
| 242 |
-
if info['topic']:
|
| 243 |
-
self.graph.add_edge(info['id'], info['topic'], relationship='belongs_to_topic')
|
| 244 |
-
|
| 245 |
-
pbar.update(1)
|
| 246 |
-
|
| 247 |
-
LOGGER.info(f"Built graph with {self.graph.number_of_nodes()} nodes and {self.graph.number_of_edges()} edges")
|
| 248 |
-
LOGGER.info(f" Papers: {len([n for n, d in self.graph.nodes(data=True) if d.get('node_type') == 'paper'])}")
|
| 249 |
-
LOGGER.info(f" Topics: {len([n for n, d in self.graph.nodes(data=True) if d.get('node_type') == 'topic'])}")
|
| 250 |
-
|
| 251 |
-
def connect_similar_papers(self, similarity_threshold: float = 0.7):
|
| 252 |
-
"""
|
| 253 |
-
Connect papers based on abstract embedding similarity using parallel processing.
|
| 254 |
-
Args:
|
| 255 |
-
similarity_threshold: Minimum cosine similarity to create an edge (0-1)
|
| 256 |
-
"""
|
| 257 |
-
paper_nodes = [(n, d) for n, d in self.graph.nodes(data=True) if d.get('node_type') == 'paper']
|
| 258 |
-
LOGGER.info(f"\nComputing similarities for {len(paper_nodes)} papers...")
|
| 259 |
-
|
| 260 |
-
# Create pairs to compare (fast!)
|
| 261 |
-
pairs = [(i, j) for i in range(len(paper_nodes)) for j in range(i + 1, len(paper_nodes))]
|
| 262 |
-
LOGGER.info(f"Created {len(pairs)} pairs to compare")
|
| 263 |
-
|
| 264 |
-
connections_added = 0
|
| 265 |
-
|
| 266 |
-
def compute_similarity(pair_idx):
|
| 267 |
-
"""Compute similarity for a pair of papers."""
|
| 268 |
-
i, j = pair_idx
|
| 269 |
-
node1, data1 = paper_nodes[i]
|
| 270 |
-
node2, data2 = paper_nodes[j]
|
| 271 |
-
emb1 = data1['embedding']
|
| 272 |
-
emb2 = data2['embedding']
|
| 273 |
-
similarity = np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2))
|
| 274 |
-
if similarity >= similarity_threshold:
|
| 275 |
-
return (node1, node2, float(similarity))
|
| 276 |
-
return None
|
| 277 |
-
|
| 278 |
-
# Compute similarities in parallel
|
| 279 |
-
edges_to_add = []
|
| 280 |
-
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
| 281 |
-
futures = {executor.submit(compute_similarity, pair): pair for pair in pairs}
|
| 282 |
-
with tqdm(total=len(pairs), desc="Computing similarities", unit="pair") as pbar:
|
| 283 |
-
for future in as_completed(futures):
|
| 284 |
-
result = future.result()
|
| 285 |
-
if result is not None:
|
| 286 |
-
edges_to_add.append(result)
|
| 287 |
-
pbar.update(1)
|
| 288 |
-
|
| 289 |
-
# Add edges to graph
|
| 290 |
-
for node1, node2, similarity in edges_to_add:
|
| 291 |
-
self.graph.add_edge(
|
| 292 |
-
node1,
|
| 293 |
-
node2,
|
| 294 |
-
relationship='similar_to',
|
| 295 |
-
similarity=similarity
|
| 296 |
-
)
|
| 297 |
-
connections_added += 1
|
| 298 |
-
|
| 299 |
-
LOGGER.info(f"Added {connections_added} similarity edges with threshold {similarity_threshold}")
|
| 300 |
-
|
| 301 |
-
def get_papers_by_topic(self, topic: str) -> List[Dict[str, Any]]:
|
| 302 |
-
"""
|
| 303 |
-
Get all papers belonging to a specific topic.
|
| 304 |
-
|
| 305 |
-
Args:
|
| 306 |
-
topic: Topic name
|
| 307 |
-
|
| 308 |
-
Returns:
|
| 309 |
-
List of paper information dictionaries
|
| 310 |
-
"""
|
| 311 |
-
if topic not in self.graph:
|
| 312 |
-
return []
|
| 313 |
-
|
| 314 |
-
papers = []
|
| 315 |
-
for neighbor in self.graph.neighbors(topic):
|
| 316 |
-
node_data = self.graph.nodes[neighbor]
|
| 317 |
-
if node_data.get('node_type') == 'paper':
|
| 318 |
-
papers.append({
|
| 319 |
-
'id': neighbor,
|
| 320 |
-
'name': node_data.get('name'),
|
| 321 |
-
'abstract': node_data.get('abstract'),
|
| 322 |
-
'embedding': node_data.get('embedding')
|
| 323 |
-
})
|
| 324 |
-
return papers
|
| 325 |
-
|
| 326 |
-
def find_similar_papers(self, paper_id: str, top_k: int = 5) -> List[tuple]:
|
| 327 |
-
"""
|
| 328 |
-
Find the most similar papers to a given paper.
|
| 329 |
-
|
| 330 |
-
Args:
|
| 331 |
-
paper_id: ID of the paper
|
| 332 |
-
top_k: Number of similar papers to return
|
| 333 |
-
|
| 334 |
-
Returns:
|
| 335 |
-
List of (paper_id, similarity_score) tuples
|
| 336 |
-
"""
|
| 337 |
-
if paper_id not in self.graph:
|
| 338 |
-
return []
|
| 339 |
-
|
| 340 |
-
paper_data = self.graph.nodes[paper_id]
|
| 341 |
-
if paper_data.get('node_type') != 'paper':
|
| 342 |
-
return []
|
| 343 |
-
|
| 344 |
-
target_embedding = paper_data['embedding']
|
| 345 |
-
similarities = []
|
| 346 |
-
|
| 347 |
-
for node, data in self.graph.nodes(data=True):
|
| 348 |
-
if data.get('node_type') == 'paper' and node != paper_id:
|
| 349 |
-
similarity = np.dot(target_embedding, data['embedding']) / \
|
| 350 |
-
(np.linalg.norm(target_embedding) * np.linalg.norm(data['embedding']))
|
| 351 |
-
similarities.append((node, float(similarity), data.get('name')))
|
| 352 |
-
|
| 353 |
-
# Sort by similarity and return top_k
|
| 354 |
-
similarities.sort(key=lambda x: x[1], reverse=True)
|
| 355 |
-
return similarities[:top_k]
|
| 356 |
-
|
| 357 |
-
def get_graph_statistics(self) -> Dict[str, Any]:
|
| 358 |
-
"""
|
| 359 |
-
Get statistics about the knowledge graph.
|
| 360 |
-
|
| 361 |
-
Returns:
|
| 362 |
-
Dictionary with graph statistics
|
| 363 |
-
"""
|
| 364 |
-
paper_nodes = [n for n, d in self.graph.nodes(data=True)
|
| 365 |
-
if d.get('node_type') == 'paper']
|
| 366 |
-
topic_nodes = [n for n, d in self.graph.nodes(data=True)
|
| 367 |
-
if d.get('node_type') == 'topic']
|
| 368 |
-
|
| 369 |
-
stats = {
|
| 370 |
-
'total_nodes': self.graph.number_of_nodes(),
|
| 371 |
-
'total_edges': self.graph.number_of_edges(),
|
| 372 |
-
'paper_nodes': len(paper_nodes),
|
| 373 |
-
'topic_nodes': len(topic_nodes),
|
| 374 |
-
'average_degree': sum(dict(self.graph.degree()).values()) / self.graph.number_of_nodes(),
|
| 375 |
-
'density': nx.density(self.graph),
|
| 376 |
-
'is_connected': nx.is_connected(self.graph),
|
| 377 |
-
}
|
| 378 |
-
|
| 379 |
-
if nx.is_connected(self.graph):
|
| 380 |
-
stats['diameter'] = nx.diameter(self.graph)
|
| 381 |
-
stats['average_shortest_path'] = nx.average_shortest_path_length(self.graph)
|
| 382 |
-
|
| 383 |
-
return stats
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
@click.command()
|
| 387 |
-
@click.option("-m", "--embedding-model", default="nomic-embed-text")
|
| 388 |
-
@click.option("-l", "--ollama-server-url", default="http://localhost:11434")
|
| 389 |
-
@click.option("-b", "--embedding-gen-batch-size", default=32)
|
| 390 |
-
@click.option("-w", "--max-parallel-workers", default=16)
|
| 391 |
-
@click.option("-p", "--limit-num-papers", default=None, type=int)
|
| 392 |
-
@click.option("-f", "--input-json-file", default=f"{PROJECT_ROOT}/data/neurips-2025-orals-posters.json")
|
| 393 |
-
@click.option("-o", "--output-file", default=f"{PROJECT_ROOT}/graphs/knowledge_graph.pkl")
|
| 394 |
-
@click.option("-s", "--similarity-threshold", default=0.8)
|
| 395 |
-
def main(
|
| 396 |
-
embedding_model: str,
|
| 397 |
-
ollama_server_url: str,
|
| 398 |
-
embedding_gen_batch_size: int,
|
| 399 |
-
max_parallel_workers: int,
|
| 400 |
-
limit_num_papers: int,
|
| 401 |
-
input_json_file: str,
|
| 402 |
-
output_file: str,
|
| 403 |
-
similarity_threshold: float
|
| 404 |
-
):
|
| 405 |
-
|
| 406 |
-
kg = PaperKnowledgeGraph(
|
| 407 |
-
embedding_model=f"ollama/{embedding_model}",
|
| 408 |
-
ollama_base_url=ollama_server_url,
|
| 409 |
-
embedding_gen_batch_size=embedding_gen_batch_size,
|
| 410 |
-
max_parallel_workers=max_parallel_workers,
|
| 411 |
-
limit_num_papers=limit_num_papers
|
| 412 |
-
)
|
| 413 |
-
|
| 414 |
-
# Load papers from JSON file
|
| 415 |
-
kg.load_papers_from_json(input_json_file)
|
| 416 |
-
|
| 417 |
-
# Build the graph (parallel embedding generation)
|
| 418 |
-
kg.build_graph()
|
| 419 |
-
|
| 420 |
-
# Optionally connect similar papers based on embeddings (parallel)
|
| 421 |
-
kg.connect_similar_papers(similarity_threshold=similarity_threshold)
|
| 422 |
-
|
| 423 |
-
# Save the graph to disk
|
| 424 |
-
save_graph(
|
| 425 |
-
graph=kg.graph,
|
| 426 |
-
output_path=output_file
|
| 427 |
-
)
|
| 428 |
-
|
| 429 |
-
# Print statistics
|
| 430 |
-
stats = kg.get_graph_statistics()
|
| 431 |
-
LOGGER.info("\nGraph Statistics:")
|
| 432 |
-
for key, value in stats.items():
|
| 433 |
-
LOGGER.info(f" {key}: {value}")
|
| 434 |
-
|
| 435 |
-
# Test run: Find similar papers
|
| 436 |
-
if kg.papers_data:
|
| 437 |
-
first_paper_id = kg.papers_data[0].get('uid', kg.papers_data[0].get('id'))
|
| 438 |
-
LOGGER.debug(f"\nPapers similar to '{kg.graph.nodes[first_paper_id]['name']}':")
|
| 439 |
-
similar = kg.find_similar_papers(first_paper_id, top_k=3)
|
| 440 |
-
for pid, sim, name in similar:
|
| 441 |
-
LOGGER.debug(f" - {name} (similarity: {sim:.3f})")
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
# Run
|
| 445 |
-
if __name__ == "__main__":
|
| 446 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/__init__.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
| 1 |
-
from enum import Enum
|
| 2 |
-
|
| 3 |
-
from agentic_nav.tools.knowledge_graph.graph_traversal_strategies.breadth_first_random import _graph_traversal_bfs_random
|
| 4 |
-
from agentic_nav.tools.knowledge_graph.graph_traversal_strategies.depth_first_random import _graph_traversal_dfs_random
|
| 5 |
-
from agentic_nav.tools.knowledge_graph.graph_traversal_strategies.neo4j_builtin import _graph_traversal_cypher
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
class TraversalStrategy(Enum):
|
| 9 |
-
"""Traversal strategy options"""
|
| 10 |
-
BFS = "breadth_first"
|
| 11 |
-
DFS = "depth_first"
|
| 12 |
-
BFS_RANDOM = "breadth_first_random"
|
| 13 |
-
DFS_RANDOM = "depth_first_random"
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/breadth_first_random.py
DELETED
|
@@ -1,80 +0,0 @@
|
|
| 1 |
-
import neo4j
|
| 2 |
-
from typing import List, Dict, Any, Optional, Set
|
| 3 |
-
from collections import deque
|
| 4 |
-
import random
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
def _graph_traversal_bfs_random(
|
| 8 |
-
db_driver: neo4j.Driver,
|
| 9 |
-
start_paper_id: str,
|
| 10 |
-
n_hops: int,
|
| 11 |
-
relationship_type: Optional[str],
|
| 12 |
-
max_results: Optional[int],
|
| 13 |
-
max_branches: int
|
| 14 |
-
) -> List[Dict[str, Any]]:
|
| 15 |
-
"""
|
| 16 |
-
BFS traversal with random neighbor sampling.
|
| 17 |
-
Explores level by level, randomly sampling neighbors at each level.
|
| 18 |
-
"""
|
| 19 |
-
with db_driver.session() as session:
|
| 20 |
-
visited: Set[str] = {start_paper_id}
|
| 21 |
-
queue = deque([(start_paper_id, 0)]) # (paper_id, distance)
|
| 22 |
-
papers = []
|
| 23 |
-
|
| 24 |
-
# Build relationship type filter
|
| 25 |
-
if relationship_type:
|
| 26 |
-
rel_filter = f":{':'.join([relationship_type])}"
|
| 27 |
-
else:
|
| 28 |
-
rel_filter = ""
|
| 29 |
-
|
| 30 |
-
while queue:
|
| 31 |
-
if max_results and type(max_results) is int and len(papers) >= max_results:
|
| 32 |
-
break
|
| 33 |
-
|
| 34 |
-
current_id, distance = queue.popleft()
|
| 35 |
-
|
| 36 |
-
# Stop if we've reached max depth
|
| 37 |
-
if distance >= n_hops:
|
| 38 |
-
continue
|
| 39 |
-
|
| 40 |
-
# Query to get all neighbors
|
| 41 |
-
query = f"""
|
| 42 |
-
MATCH (p:Paper {{id: $paper_id}})-[r{rel_filter}]->(neighbor:Paper)
|
| 43 |
-
RETURN neighbor.id as id,
|
| 44 |
-
neighbor.name as name,
|
| 45 |
-
neighbor.abstract as abstract,
|
| 46 |
-
neighbor.topic as topic
|
| 47 |
-
"""
|
| 48 |
-
|
| 49 |
-
result = session.run(query, paper_id=current_id)
|
| 50 |
-
neighbors = list(result)
|
| 51 |
-
|
| 52 |
-
# Randomly sample neighbors
|
| 53 |
-
if neighbors:
|
| 54 |
-
sampled_neighbors = random.sample(
|
| 55 |
-
neighbors,
|
| 56 |
-
min(max_branches, len(neighbors))
|
| 57 |
-
)
|
| 58 |
-
|
| 59 |
-
for record in sampled_neighbors:
|
| 60 |
-
neighbor_id = record['id']
|
| 61 |
-
|
| 62 |
-
if neighbor_id not in visited:
|
| 63 |
-
visited.add(neighbor_id)
|
| 64 |
-
|
| 65 |
-
paper = {
|
| 66 |
-
'id': neighbor_id,
|
| 67 |
-
'name': record['name'],
|
| 68 |
-
'abstract': record['abstract'],
|
| 69 |
-
'topic': record['topic'],
|
| 70 |
-
'distance': distance + 1
|
| 71 |
-
}
|
| 72 |
-
papers.append(paper)
|
| 73 |
-
|
| 74 |
-
# Add to queue for next level
|
| 75 |
-
queue.append((neighbor_id, distance + 1))
|
| 76 |
-
|
| 77 |
-
if max_results and type(max_results) is int and len(papers) >= max_results:
|
| 78 |
-
break
|
| 79 |
-
|
| 80 |
-
return papers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/depth_first_random.py
DELETED
|
@@ -1,78 +0,0 @@
|
|
| 1 |
-
from typing import List, Dict, Any, Optional, Set
|
| 2 |
-
import random
|
| 3 |
-
|
| 4 |
-
import neo4j
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
def _graph_traversal_dfs_random(
|
| 8 |
-
db_driver: neo4j.Driver,
|
| 9 |
-
start_paper_id: str,
|
| 10 |
-
n_hops: int,
|
| 11 |
-
relationship_type: Optional[str],
|
| 12 |
-
max_results: Optional[int],
|
| 13 |
-
max_branches: int
|
| 14 |
-
) -> List[Dict[str, Any]]:
|
| 15 |
-
"""
|
| 16 |
-
DFS traversal with random neighbor sampling.
|
| 17 |
-
Explores deeply along random branches before backtracking.
|
| 18 |
-
"""
|
| 19 |
-
with db_driver.session() as session:
|
| 20 |
-
visited: Set[str] = {start_paper_id}
|
| 21 |
-
papers = []
|
| 22 |
-
|
| 23 |
-
# Build relationship type filter
|
| 24 |
-
if relationship_type:
|
| 25 |
-
rel_filter = f":{':'.join([relationship_type])}"
|
| 26 |
-
else:
|
| 27 |
-
rel_filter = ""
|
| 28 |
-
|
| 29 |
-
def dfs_traverse(paper_id: str, distance: int):
|
| 30 |
-
"""Recursive DFS helper"""
|
| 31 |
-
if max_results and len(papers) >= max_results:
|
| 32 |
-
return
|
| 33 |
-
|
| 34 |
-
if distance >= n_hops:
|
| 35 |
-
return
|
| 36 |
-
|
| 37 |
-
# Query to get all neighbors
|
| 38 |
-
query = f"""
|
| 39 |
-
MATCH (p:Paper {{id: $paper_id}})-[r{rel_filter}]->(neighbor:Paper)
|
| 40 |
-
RETURN neighbor.id as id,
|
| 41 |
-
neighbor.name as name,
|
| 42 |
-
neighbor.abstract as abstract,
|
| 43 |
-
neighbor.topic as topic
|
| 44 |
-
"""
|
| 45 |
-
|
| 46 |
-
result = session.run(query, paper_id=paper_id)
|
| 47 |
-
neighbors = list(result)
|
| 48 |
-
|
| 49 |
-
# Randomly sample neighbors
|
| 50 |
-
if neighbors:
|
| 51 |
-
sampled_neighbors = random.sample(
|
| 52 |
-
neighbors,
|
| 53 |
-
min(max_branches, len(neighbors))
|
| 54 |
-
)
|
| 55 |
-
|
| 56 |
-
for record in sampled_neighbors:
|
| 57 |
-
neighbor_id = record['id']
|
| 58 |
-
|
| 59 |
-
if neighbor_id not in visited:
|
| 60 |
-
if max_results and len(papers) >= max_results:
|
| 61 |
-
return
|
| 62 |
-
|
| 63 |
-
visited.add(neighbor_id)
|
| 64 |
-
|
| 65 |
-
paper = {
|
| 66 |
-
'id': neighbor_id,
|
| 67 |
-
'name': record['name'],
|
| 68 |
-
'abstract': record['abstract'],
|
| 69 |
-
'topic': record['topic'],
|
| 70 |
-
'distance': distance + 1
|
| 71 |
-
}
|
| 72 |
-
papers.append(paper)
|
| 73 |
-
|
| 74 |
-
# Recursively explore this branch
|
| 75 |
-
dfs_traverse(neighbor_id, distance + 1)
|
| 76 |
-
|
| 77 |
-
dfs_traverse(start_paper_id, 0)
|
| 78 |
-
return papers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/neo4j_builtin.py
DELETED
|
@@ -1,50 +0,0 @@
|
|
| 1 |
-
from typing import List, Dict, Any, Optional
|
| 2 |
-
|
| 3 |
-
import neo4j
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
_DB_GRAPH_TRAVERSAL_QUERY = lambda rel_filter, n_hops: f"""
|
| 7 |
-
MATCH path = (start:Paper)-[{rel_filter}*1..{n_hops}]-(related:Paper)
|
| 8 |
-
WHERE start.id IN $start_paper_ids
|
| 9 |
-
AND related.id <> start.id
|
| 10 |
-
WITH related, min(length(path)) as min_distance
|
| 11 |
-
RETURN DISTINCT related.id as id,
|
| 12 |
-
related.name as name,
|
| 13 |
-
related.abstract as abstract,
|
| 14 |
-
related.topic as topic,
|
| 15 |
-
min_distance as distance
|
| 16 |
-
ORDER BY min_distance, related.name
|
| 17 |
-
"""
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def _graph_traversal_cypher(
|
| 21 |
-
db_driver: neo4j.Driver,
|
| 22 |
-
start_paper_id: str,
|
| 23 |
-
n_hops: int,
|
| 24 |
-
relationship_type: Optional[str],
|
| 25 |
-
max_results: Optional[int]
|
| 26 |
-
) -> List[Dict[str, Any]]:
|
| 27 |
-
"""Original Cypher-based traversal (BFS/DFS handled by Neo4j)"""
|
| 28 |
-
with db_driver.session() as session:
|
| 29 |
-
if relationship_type:
|
| 30 |
-
rel_filter = f":{':'.join([relationship_type])}"
|
| 31 |
-
else:
|
| 32 |
-
rel_filter = ""
|
| 33 |
-
|
| 34 |
-
query = _DB_GRAPH_TRAVERSAL_QUERY(rel_filter=rel_filter, n_hops=n_hops)
|
| 35 |
-
if max_results:
|
| 36 |
-
query += f" LIMIT {max_results}"
|
| 37 |
-
|
| 38 |
-
result = session.run(query, start_paper_ids=[start_paper_id])
|
| 39 |
-
papers = []
|
| 40 |
-
for record in result:
|
| 41 |
-
paper = {
|
| 42 |
-
'id': record['id'],
|
| 43 |
-
'name': record['name'],
|
| 44 |
-
'abstract': record['abstract'],
|
| 45 |
-
'topic': record['topic'],
|
| 46 |
-
'distance': record['distance']
|
| 47 |
-
}
|
| 48 |
-
papers.append(paper)
|
| 49 |
-
|
| 50 |
-
return papers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/knowledge_graph/neo4j_db_importer.py
DELETED
|
@@ -1,537 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Neo4j exporter for PaperKnowledgeGraph
|
| 3 |
-
Exports NetworkX graph to Neo4j database with proper handling of embeddings and relationships
|
| 4 |
-
"""
|
| 5 |
-
import logging
|
| 6 |
-
import os
|
| 7 |
-
|
| 8 |
-
import click
|
| 9 |
-
import networkx as nx
|
| 10 |
-
from neo4j import GraphDatabase
|
| 11 |
-
from typing import Dict, Any
|
| 12 |
-
import numpy as np
|
| 13 |
-
from tqdm import tqdm
|
| 14 |
-
from pathlib import Path
|
| 15 |
-
|
| 16 |
-
from agentic_nav.tools.knowledge_graph.file_handler import load_graph
|
| 17 |
-
from agentic_nav.utils.logger import setup_logging
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
# Setup logging
|
| 21 |
-
setup_logging(
|
| 22 |
-
log_dir="logs",
|
| 23 |
-
level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
|
| 24 |
-
)
|
| 25 |
-
LOGGER = logging.getLogger(__name__)
|
| 26 |
-
PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
|
| 27 |
-
NEO4J_USERNAME = os.environ.get("NEO4J_USERNAME", "neo4j")
|
| 28 |
-
NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD")
|
| 29 |
-
NEO4J_DB_URI = os.environ.get("NEO4J_DB_URI", "bolt://neo4j_db:7687")
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
class Neo4jImporter:
|
| 33 |
-
"""Import PaperKnowledgeGraph to Neo4j database."""
|
| 34 |
-
|
| 35 |
-
def __init__(
|
| 36 |
-
self,
|
| 37 |
-
uri: str = NEO4J_DB_URI,
|
| 38 |
-
username: str = NEO4J_USERNAME,
|
| 39 |
-
password: str = NEO4J_PASSWORD
|
| 40 |
-
):
|
| 41 |
-
"""Initialize Neo4j connection."""
|
| 42 |
-
self.driver = GraphDatabase.driver(uri, auth=(username, password))
|
| 43 |
-
self.driver.verify_connectivity()
|
| 44 |
-
LOGGER.info(f"Connected to Neo4j at {uri}")
|
| 45 |
-
|
| 46 |
-
def close(self):
|
| 47 |
-
"""Close the Neo4j driver connection."""
|
| 48 |
-
self.driver.close()
|
| 49 |
-
|
| 50 |
-
def clear_database(self, batch_size=500):
|
| 51 |
-
with self.driver.session() as session:
|
| 52 |
-
deleted_total = 0
|
| 53 |
-
while True:
|
| 54 |
-
result = session.run("""
|
| 55 |
-
MATCH (n)
|
| 56 |
-
WITH n LIMIT $batch_size
|
| 57 |
-
DETACH DELETE n
|
| 58 |
-
RETURN count(n) as deleted
|
| 59 |
-
""",
|
| 60 |
-
batch_size=batch_size
|
| 61 |
-
)
|
| 62 |
-
|
| 63 |
-
deleted = result.single()["deleted"]
|
| 64 |
-
deleted_total += deleted
|
| 65 |
-
LOGGER.info(f"Deleted {deleted} nodes (total: {deleted_total})")
|
| 66 |
-
|
| 67 |
-
if deleted == 0:
|
| 68 |
-
break
|
| 69 |
-
|
| 70 |
-
def create_indexes(self, embedding_dimension: int = 768):
|
| 71 |
-
"""Create indexes for better query performance, including vector index."""
|
| 72 |
-
with self.driver.session() as session:
|
| 73 |
-
# Create index on paper IDs
|
| 74 |
-
session.run("CREATE INDEX paper_id IF NOT EXISTS FOR (p:Paper) ON (p.id)")
|
| 75 |
-
|
| 76 |
-
# Create index on topic names
|
| 77 |
-
session.run("CREATE INDEX topic_name IF NOT EXISTS FOR (t:Topic) ON (t.name)")
|
| 78 |
-
|
| 79 |
-
# Create index on author IDs
|
| 80 |
-
session.run("CREATE INDEX author_id IF NOT EXISTS FOR (a:Author) ON (a.author_id)")
|
| 81 |
-
|
| 82 |
-
# Create index on author names (useful for searching)
|
| 83 |
-
session.run("CREATE INDEX author_name IF NOT EXISTS FOR (a:Author) ON (a.fullname)")
|
| 84 |
-
|
| 85 |
-
# Create vector index for embeddings (Neo4j 5.11+)
|
| 86 |
-
try:
|
| 87 |
-
session.run("""
|
| 88 |
-
CREATE VECTOR INDEX paper_embeddings IF NOT EXISTS
|
| 89 |
-
FOR (p:Paper)
|
| 90 |
-
ON p.embedding
|
| 91 |
-
OPTIONS {
|
| 92 |
-
indexConfig: {
|
| 93 |
-
`vector.dimensions`: $dimension,
|
| 94 |
-
`vector.similarity_function`: 'cosine'
|
| 95 |
-
}
|
| 96 |
-
}
|
| 97 |
-
""", dimension=embedding_dimension)
|
| 98 |
-
LOGGER.info(f"Created vector index for {embedding_dimension}-dimensional embeddings")
|
| 99 |
-
except Exception as e:
|
| 100 |
-
LOGGER.warning(f"Warning: Could not create vector index: {e}")
|
| 101 |
-
LOGGER.warning("Vector indexes require Neo4j 5.11+ or Enterprise Edition")
|
| 102 |
-
|
| 103 |
-
LOGGER.info("Created standard indexes")
|
| 104 |
-
|
| 105 |
-
def _export_paper_nodes(self, kg: nx.Graph, batch_size: int):
|
| 106 |
-
"""Export paper nodes to Neo4j with all attributes."""
|
| 107 |
-
paper_nodes = [(n, d) for n, d in kg.nodes(data=True)
|
| 108 |
-
if d.get('node_type') == 'paper']
|
| 109 |
-
|
| 110 |
-
LOGGER.info(f"\nExporting {len(paper_nodes)} paper nodes...")
|
| 111 |
-
|
| 112 |
-
with self.driver.session() as session:
|
| 113 |
-
for i in tqdm(range(0, len(paper_nodes), batch_size), desc="Paper nodes"):
|
| 114 |
-
batch = paper_nodes[i:i + batch_size]
|
| 115 |
-
papers_data = []
|
| 116 |
-
|
| 117 |
-
for node_id, data in batch:
|
| 118 |
-
# Convert embedding to list if it's numpy array
|
| 119 |
-
embedding = data.get('embedding', [])
|
| 120 |
-
if isinstance(embedding, np.ndarray):
|
| 121 |
-
embedding = embedding.tolist()
|
| 122 |
-
|
| 123 |
-
paper_dict = {
|
| 124 |
-
"id": node_id,
|
| 125 |
-
"name": data.get('name', ''),
|
| 126 |
-
"abstract": data.get('abstract', ''),
|
| 127 |
-
"topic": data.get('topic', ''),
|
| 128 |
-
"keywords": data.get('keywords', []),
|
| 129 |
-
"decision": data.get('decision', ''),
|
| 130 |
-
"session": data.get('session', ''),
|
| 131 |
-
"session_start_time": data.get('session_start_time', ''),
|
| 132 |
-
"session_end_time": data.get('session_end_time', ''),
|
| 133 |
-
"presentation_type": data.get('presentation_type', ''),
|
| 134 |
-
"room_name": data.get('room_name', ''),
|
| 135 |
-
"project_url": data.get('project_url', ''),
|
| 136 |
-
"poster_position": data.get('poster_position', ''),
|
| 137 |
-
"paper_url": data.get("paper_url", ""),
|
| 138 |
-
"sourceid": data.get("sourceid", ""),
|
| 139 |
-
"virtualsite_url": data.get("virtualsite_url", ""),
|
| 140 |
-
'embedding': embedding
|
| 141 |
-
}
|
| 142 |
-
papers_data.append(paper_dict)
|
| 143 |
-
|
| 144 |
-
# Batch create paper nodes
|
| 145 |
-
session.run("""
|
| 146 |
-
UNWIND $papers AS paper
|
| 147 |
-
CREATE (p:Paper {
|
| 148 |
-
id: paper.id,
|
| 149 |
-
name: paper.name,
|
| 150 |
-
abstract: paper.abstract,
|
| 151 |
-
topic: paper.topic,
|
| 152 |
-
keywords: paper.keywords,
|
| 153 |
-
decision: paper.decision,
|
| 154 |
-
session: paper.session,
|
| 155 |
-
session_start_time: paper.session_start_time,
|
| 156 |
-
session_end_time: paper.session_end_time,
|
| 157 |
-
presentation_type: paper.presentation_type,
|
| 158 |
-
room_name: paper.room_name,
|
| 159 |
-
project_url: paper.project_url,
|
| 160 |
-
poster_position: paper.poster_position,
|
| 161 |
-
paper_url: paper.paper_url,
|
| 162 |
-
sourceid: paper.sourceid,
|
| 163 |
-
virtualsite_url: paper.virtualsite_url,
|
| 164 |
-
embedding: paper.embedding
|
| 165 |
-
})
|
| 166 |
-
""", papers=papers_data)
|
| 167 |
-
|
| 168 |
-
LOGGER.info(f"Exported {len(paper_nodes)} paper nodes")
|
| 169 |
-
|
| 170 |
-
def _export_topic_hierarchy(self, kg: nx.Graph):
|
| 171 |
-
"""
|
| 172 |
-
Export topic nodes with hierarchical structure to Neo4j.
|
| 173 |
-
Splits topics like "Deep Learning->Theory" into separate nodes with parent-child relationships.
|
| 174 |
-
"""
|
| 175 |
-
# Collect all unique topic paths from paper nodes
|
| 176 |
-
topic_paths = set()
|
| 177 |
-
for node_id, data in kg.nodes(data=True):
|
| 178 |
-
if data.get('node_type') == 'paper':
|
| 179 |
-
topic = data.get('topic', '')
|
| 180 |
-
if topic:
|
| 181 |
-
topic_paths.add(topic)
|
| 182 |
-
|
| 183 |
-
LOGGER.info(f"Processing {len(topic_paths)} unique topic paths...")
|
| 184 |
-
|
| 185 |
-
# Parse topic paths and create hierarchy
|
| 186 |
-
all_topics = set()
|
| 187 |
-
topic_relationships = []
|
| 188 |
-
|
| 189 |
-
for path in topic_paths:
|
| 190 |
-
parts = [p.strip() for p in path.split('->')]
|
| 191 |
-
|
| 192 |
-
# Add all topic parts
|
| 193 |
-
for part in parts:
|
| 194 |
-
all_topics.add(part)
|
| 195 |
-
|
| 196 |
-
# Create parent-child relationships
|
| 197 |
-
for i in range(len(parts) - 1):
|
| 198 |
-
topic_relationships.append({
|
| 199 |
-
'parent': parts[i],
|
| 200 |
-
'child': parts[i + 1]
|
| 201 |
-
})
|
| 202 |
-
|
| 203 |
-
LOGGER.info(
|
| 204 |
-
f"Creating {len(all_topics)} topic nodes with {len(set(tuple(r.items()) for r in topic_relationships))} "
|
| 205 |
-
f"hierarchical relationships..."
|
| 206 |
-
)
|
| 207 |
-
|
| 208 |
-
with self.driver.session() as session:
|
| 209 |
-
# Create all topic nodes (using MERGE to avoid duplicates)
|
| 210 |
-
topics_data = [{'name': topic} for topic in all_topics]
|
| 211 |
-
session.run("""
|
| 212 |
-
UNWIND $topics AS topic
|
| 213 |
-
MERGE (t:Topic {name: topic.name})
|
| 214 |
-
""", topics=topics_data)
|
| 215 |
-
|
| 216 |
-
# Create hierarchical relationships between topics (deduplicate first)
|
| 217 |
-
if topic_relationships:
|
| 218 |
-
# Remove duplicates
|
| 219 |
-
unique_rels = list({(r['parent'], r['child']): r for r in topic_relationships}.values())
|
| 220 |
-
session.run("""
|
| 221 |
-
UNWIND $rels AS rel
|
| 222 |
-
MATCH (parent:Topic {name: rel.parent})
|
| 223 |
-
MATCH (child:Topic {name: rel.child})
|
| 224 |
-
MERGE (child)-[:SUBTOPIC_OF]->(parent)
|
| 225 |
-
""", rels=unique_rels)
|
| 226 |
-
|
| 227 |
-
LOGGER.info(f"Exported {len(all_topics)} topic nodes with hierarchy")
|
| 228 |
-
|
| 229 |
-
def _connect_papers_to_topics(self, kg: nx.Graph, batch_size: int):
|
| 230 |
-
"""
|
| 231 |
-
Connect papers to their leaf topic nodes.
|
| 232 |
-
For "Deep Learning->Theory", connects paper to "Theory" node.
|
| 233 |
-
"""
|
| 234 |
-
paper_topic_connections = []
|
| 235 |
-
|
| 236 |
-
for node_id, data in kg.nodes(data=True):
|
| 237 |
-
if data.get('node_type') == 'paper':
|
| 238 |
-
topic = data.get('topic', '')
|
| 239 |
-
if topic:
|
| 240 |
-
# Get the leaf topic (last part after splitting)
|
| 241 |
-
parts = [p.strip() for p in topic.split('->')]
|
| 242 |
-
leaf_topic = parts[-1]
|
| 243 |
-
|
| 244 |
-
paper_topic_connections.append({
|
| 245 |
-
'paper_id': node_id,
|
| 246 |
-
'topic_name': leaf_topic,
|
| 247 |
-
'full_path': topic # Store full path as property
|
| 248 |
-
})
|
| 249 |
-
|
| 250 |
-
LOGGER.info(f"Connecting {len(paper_topic_connections)} papers to topics...")
|
| 251 |
-
|
| 252 |
-
with self.driver.session() as session:
|
| 253 |
-
for i in tqdm(range(0, len(paper_topic_connections), batch_size),
|
| 254 |
-
desc="Paper-Topic connections"):
|
| 255 |
-
batch = paper_topic_connections[i:i + batch_size]
|
| 256 |
-
|
| 257 |
-
session.run("""
|
| 258 |
-
UNWIND $connections AS conn
|
| 259 |
-
MATCH (p:Paper {id: conn.paper_id})
|
| 260 |
-
MATCH (t:Topic {name: conn.topic_name})
|
| 261 |
-
MERGE (p)-[r:BELONGS_TO_TOPIC]->(t)
|
| 262 |
-
SET r.full_path = conn.full_path
|
| 263 |
-
""", connections=batch)
|
| 264 |
-
|
| 265 |
-
LOGGER.info(f"Connected papers to leaf topics")
|
| 266 |
-
|
| 267 |
-
def _export_similarity_relationships(self, kg: nx.Graph, batch_size: int):
|
| 268 |
-
"""Export similarity relationships between papers to Neo4j."""
|
| 269 |
-
# Filter only similarity edges
|
| 270 |
-
similarity_edges = [
|
| 271 |
-
(source, target, data)
|
| 272 |
-
for source, target, data in kg.edges(data=True)
|
| 273 |
-
if data.get('relationship') == 'similar_to'
|
| 274 |
-
]
|
| 275 |
-
|
| 276 |
-
LOGGER.info(f"Exporting {len(similarity_edges)} similarity relationships...")
|
| 277 |
-
|
| 278 |
-
with self.driver.session() as session:
|
| 279 |
-
for i in tqdm(range(0, len(similarity_edges), batch_size),
|
| 280 |
-
desc="Similarity relationships"):
|
| 281 |
-
batch = similarity_edges[i:i + batch_size]
|
| 282 |
-
|
| 283 |
-
edges_data = [{
|
| 284 |
-
'source': source,
|
| 285 |
-
'target': target,
|
| 286 |
-
'similarity': data.get('similarity', 0.0)
|
| 287 |
-
} for source, target, data in batch]
|
| 288 |
-
|
| 289 |
-
session.run("""
|
| 290 |
-
UNWIND $edges AS edge
|
| 291 |
-
MATCH (p1:Paper {id: edge.source})
|
| 292 |
-
MATCH (p2:Paper {id: edge.target})
|
| 293 |
-
MERGE (p1)-[:SIMILAR_TO {similarity: edge.similarity}]->(p2)
|
| 294 |
-
""", edges=edges_data)
|
| 295 |
-
|
| 296 |
-
LOGGER.info(f"Exported {len(similarity_edges)} similarity relationships")
|
| 297 |
-
|
| 298 |
-
def _export_authors_and_relationships(self, kg: nx.Graph, batch_size: int):
|
| 299 |
-
"""
|
| 300 |
-
Export author nodes from NetworkX graph (where they already exist as separate nodes)
|
| 301 |
-
and create IS_AUTHOR_OF relationships between authors and papers.
|
| 302 |
-
|
| 303 |
-
Author nodes in NetworkX have composite IDs like "12345 - John Doe"
|
| 304 |
-
"""
|
| 305 |
-
# Collect author nodes from the graph
|
| 306 |
-
author_nodes = [
|
| 307 |
-
(node_id, data)
|
| 308 |
-
for node_id, data in kg.nodes(data=True)
|
| 309 |
-
if data.get('node_type') != 'paper' and data.get('node_type') != 'topic'
|
| 310 |
-
]
|
| 311 |
-
|
| 312 |
-
LOGGER.info(f"Found {len(author_nodes)} author nodes in graph...")
|
| 313 |
-
|
| 314 |
-
# Extract author data
|
| 315 |
-
all_authors = []
|
| 316 |
-
for node_id, data in author_nodes:
|
| 317 |
-
# Parse composite ID "12345 - John Doe"
|
| 318 |
-
parts = node_id.split(' - ', 1)
|
| 319 |
-
author_id = parts[0].strip() if len(parts) > 0 else ""
|
| 320 |
-
|
| 321 |
-
author_dict = {
|
| 322 |
-
'composite_id': node_id, # Store the full composite ID
|
| 323 |
-
'author_id': author_id,
|
| 324 |
-
'fullname': data.get('fullname', ''),
|
| 325 |
-
'institution': data.get('institution', ''),
|
| 326 |
-
'url': data.get('url', '')
|
| 327 |
-
}
|
| 328 |
-
all_authors.append(author_dict)
|
| 329 |
-
|
| 330 |
-
LOGGER.info(f"Exporting {len(all_authors)} unique authors...")
|
| 331 |
-
|
| 332 |
-
with self.driver.session() as session:
|
| 333 |
-
# Create author nodes in batches
|
| 334 |
-
for i in tqdm(range(0, len(all_authors), batch_size), desc="Author nodes"):
|
| 335 |
-
batch = all_authors[i:i + batch_size]
|
| 336 |
-
|
| 337 |
-
session.run("""
|
| 338 |
-
UNWIND $authors AS author
|
| 339 |
-
MERGE (a:Author {composite_id: author.composite_id})
|
| 340 |
-
ON CREATE SET
|
| 341 |
-
a.author_id = author.author_id,
|
| 342 |
-
a.fullname = author.fullname,
|
| 343 |
-
a.institution = author.institution,
|
| 344 |
-
a.url = author.url
|
| 345 |
-
ON MATCH SET
|
| 346 |
-
a.author_id = author.author_id,
|
| 347 |
-
a.fullname = author.fullname,
|
| 348 |
-
a.institution = author.institution,
|
| 349 |
-
a.url = author.url
|
| 350 |
-
""", authors=batch)
|
| 351 |
-
|
| 352 |
-
LOGGER.info(f"Exported {len(all_authors)} author nodes")
|
| 353 |
-
|
| 354 |
-
# Method 1: Try to collect author-paper relationships from graph edges
|
| 355 |
-
author_paper_edges = [
|
| 356 |
-
(source, target, data)
|
| 357 |
-
for source, target, data in kg.edges(data=True)
|
| 358 |
-
if data.get('relationship') == 'is_author_of'
|
| 359 |
-
]
|
| 360 |
-
|
| 361 |
-
LOGGER.info(f"Found {len(author_paper_edges)} IS_AUTHOR_OF edges in graph")
|
| 362 |
-
|
| 363 |
-
# Method 2: If no edges found, extract from paper node 'authors' attribute
|
| 364 |
-
if len(author_paper_edges) == 0:
|
| 365 |
-
LOGGER.warning("No IS_AUTHOR_OF edges found in graph. Extracting from paper 'authors' attribute...")
|
| 366 |
-
|
| 367 |
-
paper_author_relationships = []
|
| 368 |
-
for node_id, data in kg.nodes(data=True):
|
| 369 |
-
if data.get('node_type') == 'paper':
|
| 370 |
-
authors = data.get('authors', [])
|
| 371 |
-
|
| 372 |
-
if authors and isinstance(authors, list) and len(authors) > 0:
|
| 373 |
-
# Check if authors are stored as dicts
|
| 374 |
-
if isinstance(authors[0], dict):
|
| 375 |
-
for author in authors:
|
| 376 |
-
author_id = str(author.get('id', ''))
|
| 377 |
-
fullname = author.get('fullname', '')
|
| 378 |
-
if author_id and fullname:
|
| 379 |
-
composite_id = f"{author_id} - {fullname}"
|
| 380 |
-
paper_author_relationships.append({
|
| 381 |
-
'author_id': composite_id,
|
| 382 |
-
'paper_id': node_id
|
| 383 |
-
})
|
| 384 |
-
|
| 385 |
-
LOGGER.info(f"Extracted {len(paper_author_relationships)} relationships from paper attributes")
|
| 386 |
-
|
| 387 |
-
# Create relationships from extracted data
|
| 388 |
-
with self.driver.session() as session:
|
| 389 |
-
for i in tqdm(range(0, len(paper_author_relationships), batch_size),
|
| 390 |
-
desc="Author-Paper relationships"):
|
| 391 |
-
batch = paper_author_relationships[i:i + batch_size]
|
| 392 |
-
|
| 393 |
-
session.run("""
|
| 394 |
-
UNWIND $edges AS edge
|
| 395 |
-
MATCH (a:Author {composite_id: edge.author_id})
|
| 396 |
-
MATCH (p:Paper {id: edge.paper_id})
|
| 397 |
-
MERGE (a)-[:IS_AUTHOR_OF]->(p)
|
| 398 |
-
""", edges=batch)
|
| 399 |
-
|
| 400 |
-
LOGGER.info(f"Created {len(paper_author_relationships)} author-paper relationships")
|
| 401 |
-
else:
|
| 402 |
-
# Create relationships from graph edges
|
| 403 |
-
with self.driver.session() as session:
|
| 404 |
-
for i in tqdm(range(0, len(author_paper_edges), batch_size),
|
| 405 |
-
desc="Author-Paper relationships"):
|
| 406 |
-
batch = author_paper_edges[i:i + batch_size]
|
| 407 |
-
|
| 408 |
-
edges_data = [{
|
| 409 |
-
'author_id': source, # composite ID like "12345 - John Doe"
|
| 410 |
-
'paper_id': target
|
| 411 |
-
} for source, target, data in batch]
|
| 412 |
-
|
| 413 |
-
session.run("""
|
| 414 |
-
UNWIND $edges AS edge
|
| 415 |
-
MATCH (a:Author {composite_id: edge.author_id})
|
| 416 |
-
MATCH (p:Paper {id: edge.paper_id})
|
| 417 |
-
MERGE (a)-[:IS_AUTHOR_OF]->(p)
|
| 418 |
-
""", edges=edges_data)
|
| 419 |
-
|
| 420 |
-
LOGGER.info(f"Created {len(author_paper_edges)} author-paper relationships")
|
| 421 |
-
|
| 422 |
-
def import_graph(self, kg_path: str, batch_size: int = 100, embedding_dimension: int = 768):
|
| 423 |
-
"""Import the entire knowledge graph to Neo4j."""
|
| 424 |
-
LOGGER.info(f"Loading graph from path {kg_path}")
|
| 425 |
-
kg = load_graph(kg_path)
|
| 426 |
-
|
| 427 |
-
LOGGER.info("Starting Neo4j export...")
|
| 428 |
-
|
| 429 |
-
# Clear and prepare database
|
| 430 |
-
self.clear_database()
|
| 431 |
-
self.create_indexes(embedding_dimension)
|
| 432 |
-
|
| 433 |
-
# Export paper nodes
|
| 434 |
-
self._export_paper_nodes(kg, batch_size)
|
| 435 |
-
|
| 436 |
-
# Export authors and author-paper relationships
|
| 437 |
-
self._export_authors_and_relationships(kg, batch_size)
|
| 438 |
-
|
| 439 |
-
# Export topic hierarchy
|
| 440 |
-
self._export_topic_hierarchy(kg)
|
| 441 |
-
|
| 442 |
-
# Connect papers to topics
|
| 443 |
-
self._connect_papers_to_topics(kg, batch_size)
|
| 444 |
-
|
| 445 |
-
# Export similarity relationships
|
| 446 |
-
self._export_similarity_relationships(kg, batch_size)
|
| 447 |
-
|
| 448 |
-
LOGGER.info("Export completed successfully!")
|
| 449 |
-
|
| 450 |
-
def verify_export(self) -> Dict[str, Any]:
|
| 451 |
-
"""Verify the export by checking node and relationship counts."""
|
| 452 |
-
with self.driver.session() as session:
|
| 453 |
-
# Count papers
|
| 454 |
-
result = session.run("MATCH (p:Paper) RETURN count(p) as count")
|
| 455 |
-
paper_count = result.single()['count']
|
| 456 |
-
|
| 457 |
-
# Count topics
|
| 458 |
-
result = session.run("MATCH (t:Topic) RETURN count(t) as count")
|
| 459 |
-
topic_count = result.single()['count']
|
| 460 |
-
|
| 461 |
-
# Count authors
|
| 462 |
-
result = session.run("MATCH (a:Author) RETURN count(a) as count")
|
| 463 |
-
author_count = result.single()['count']
|
| 464 |
-
|
| 465 |
-
# Count relationships
|
| 466 |
-
result = session.run("MATCH ()-[r]->() RETURN count(r) as count")
|
| 467 |
-
rel_count = result.single()['count']
|
| 468 |
-
|
| 469 |
-
# Count similarity relationships
|
| 470 |
-
result = session.run("MATCH ()-[r:SIMILAR_TO]->() RETURN count(r) as count")
|
| 471 |
-
similarity_count = result.single()['count']
|
| 472 |
-
|
| 473 |
-
# Count topic hierarchy relationships
|
| 474 |
-
result = session.run("MATCH ()-[r:SUBTOPIC_OF]->() RETURN count(r) as count")
|
| 475 |
-
subtopic_count = result.single()['count']
|
| 476 |
-
|
| 477 |
-
# Count author relationships (updated relationship name)
|
| 478 |
-
result = session.run("MATCH ()-[r:IS_AUTHOR_OF]->() RETURN count(r) as count")
|
| 479 |
-
is_author_of_count = result.single()['count']
|
| 480 |
-
|
| 481 |
-
stats = {
|
| 482 |
-
'papers': paper_count,
|
| 483 |
-
'topics': topic_count,
|
| 484 |
-
'authors': author_count,
|
| 485 |
-
'total_relationships': rel_count,
|
| 486 |
-
'similarity_relationships': similarity_count,
|
| 487 |
-
'subtopic_relationships': subtopic_count,
|
| 488 |
-
'is_author_of_relationships': is_author_of_count
|
| 489 |
-
}
|
| 490 |
-
|
| 491 |
-
LOGGER.info("Neo4j Database Statistics:")
|
| 492 |
-
for key, value in stats.items():
|
| 493 |
-
LOGGER.info(f" {key}: {value}")
|
| 494 |
-
|
| 495 |
-
return stats
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
@click.command()
|
| 499 |
-
@click.option("-g", "--graph-path", help="Path to the knowledge graph file (pickle).", default=f"{PROJECT_ROOT}/graphs/knowledge_graph.pkl")
|
| 500 |
-
@click.option("-l", "--neo4j-uri", help="Database URI", default="bolt://localhost:7687")
|
| 501 |
-
@click.option("-u", "--neo4j-username", help="Database user", default=NEO4J_USERNAME)
|
| 502 |
-
@click.option("-p", "--neo4j-password", help="Database password", default=NEO4J_PASSWORD)
|
| 503 |
-
@click.option("-b", "--batch-size", help="Batch size for node insertion", default=100)
|
| 504 |
-
@click.option("-e", "--embedding-dimension", help="Vector embedding dimensions", default=768)
|
| 505 |
-
def main(
|
| 506 |
-
graph_path: str,
|
| 507 |
-
neo4j_uri: str,
|
| 508 |
-
neo4j_username: str,
|
| 509 |
-
neo4j_password: str,
|
| 510 |
-
batch_size: int = 100,
|
| 511 |
-
embedding_dimension: int = 768
|
| 512 |
-
):
|
| 513 |
-
"""
|
| 514 |
-
Convenience function to export a knowledge graph to Neo4j.
|
| 515 |
-
|
| 516 |
-
Args:
|
| 517 |
-
graph_path: PaperKnowledgeGraph instance
|
| 518 |
-
neo4j_uri: Neo4j connection URI
|
| 519 |
-
neo4j_username: Neo4j username
|
| 520 |
-
neo4j_password: Neo4j password
|
| 521 |
-
batch_size: Batch size for processing
|
| 522 |
-
embedding_dimension: Dimension of embedding vectors (default: 768)
|
| 523 |
-
"""
|
| 524 |
-
importer = Neo4jImporter(neo4j_uri, neo4j_username, neo4j_password)
|
| 525 |
-
try:
|
| 526 |
-
importer.import_graph(
|
| 527 |
-
graph_path,
|
| 528 |
-
batch_size,
|
| 529 |
-
embedding_dimension
|
| 530 |
-
)
|
| 531 |
-
importer.verify_export()
|
| 532 |
-
finally:
|
| 533 |
-
importer.close()
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
if __name__ == "__main__":
|
| 537 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/knowledge_graph/retriever.py
DELETED
|
@@ -1,612 +0,0 @@
|
|
| 1 |
-
import logging
|
| 2 |
-
import numpy as np
|
| 3 |
-
import random
|
| 4 |
-
import os
|
| 5 |
-
|
| 6 |
-
from neo4j import GraphDatabase
|
| 7 |
-
from pathlib import Path
|
| 8 |
-
|
| 9 |
-
from typing import List, Dict, Any, Optional
|
| 10 |
-
|
| 11 |
-
from agentic_nav.tools.knowledge_graph.graph_traversal_strategies import (
|
| 12 |
-
TraversalStrategy,
|
| 13 |
-
_graph_traversal_dfs_random,
|
| 14 |
-
_graph_traversal_cypher,
|
| 15 |
-
_graph_traversal_bfs_random
|
| 16 |
-
)
|
| 17 |
-
|
| 18 |
-
from agentic_nav.utils.embedding_generator import batch_embed_documents
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
|
| 22 |
-
LOGGER = logging.getLogger(__name__)
|
| 23 |
-
EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "nomic-embed-text")
|
| 24 |
-
EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
|
| 25 |
-
NEO4J_DB_URI = os.environ.get("NEO4J_DB_URI", "bolt://neo4j_db:7687")
|
| 26 |
-
NEO4J_DB_NODE_RETURN_LIMIT = int(os.environ.get("NEO4J_DB_NODE_RETURN_LIMIT", 200))
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
class Neo4jGraphWorker:
|
| 30 |
-
"""Search and traversal operations for Neo4j paper knowledge graph."""
|
| 31 |
-
|
| 32 |
-
_DB_SIMILARITY_SEARCH_QUERY = """
|
| 33 |
-
MATCH (node:Paper)
|
| 34 |
-
WHERE ($day IS NULL OR node.session_start_time IS NOT NULL)
|
| 35 |
-
WITH node
|
| 36 |
-
WHERE ($day IS NULL OR date(datetime(node.session_start_time)).dayOfWeek = $day)
|
| 37 |
-
AND ($time_ranges IS NULL OR
|
| 38 |
-
any(range IN $time_ranges WHERE
|
| 39 |
-
time(datetime(node.session_start_time)) >= time(range.start)
|
| 40 |
-
AND time(datetime(node.session_start_time)) <= time(range.end)))
|
| 41 |
-
WITH collect(node) as filtered_nodes
|
| 42 |
-
CALL db.index.vector.queryNodes('paper_embeddings', $top_k, $query_embedding)
|
| 43 |
-
YIELD node, score
|
| 44 |
-
WHERE node IN filtered_nodes OR ($day IS NULL AND $time_ranges IS NULL)
|
| 45 |
-
RETURN node.id as id,
|
| 46 |
-
node.name as name,
|
| 47 |
-
node.abstract as abstract,
|
| 48 |
-
node.topic as topic,
|
| 49 |
-
node.paper_url as paper_url,
|
| 50 |
-
node.session as session,
|
| 51 |
-
node.session_start_time as session_start_time,
|
| 52 |
-
node.session_end_time as session_end_time,
|
| 53 |
-
node.presentation_type as presentation_type,
|
| 54 |
-
node.room_name as room_name,
|
| 55 |
-
node.project_url as project_url,
|
| 56 |
-
node.poster_position as poster_position,
|
| 57 |
-
node.sourceid as sourceid,
|
| 58 |
-
node.virtualsite_url as virtualsite_url,
|
| 59 |
-
node.decision as decision,
|
| 60 |
-
[(a:Author)-[:IS_AUTHOR_OF]->(node) | a] as authors,
|
| 61 |
-
score
|
| 62 |
-
ORDER BY score DESC
|
| 63 |
-
LIMIT $limit
|
| 64 |
-
"""
|
| 65 |
-
|
| 66 |
-
_DB_NEIGHBORHOOD_SEARCH_QUERY = """
|
| 67 |
-
MATCH (p:Paper)-[r]-(neighbor)
|
| 68 |
-
WHERE p.id IN $paper_ids
|
| 69 |
-
AND type(r) IN $allowed_rel_types
|
| 70 |
-
AND 'Paper' IN labels(neighbor)
|
| 71 |
-
AND (type(r) <> 'SIMILAR_TO' OR r.similarity >= $min_similarity)
|
| 72 |
-
RETURN neighbor.id as id,
|
| 73 |
-
neighbor.name as name,
|
| 74 |
-
neighbor.abstract as abstract,
|
| 75 |
-
neighbor.topic as topic,
|
| 76 |
-
neighbor.paper_url as paper_url,
|
| 77 |
-
neighbor.session as session,
|
| 78 |
-
neighbor.session_start_time as session_start_time,
|
| 79 |
-
neighbor.session_end_time as session_end_time,
|
| 80 |
-
neighbor.presentation_type as presentation_type,
|
| 81 |
-
neighbor.room_name as room_name,
|
| 82 |
-
neighbor.project_url as project_url,
|
| 83 |
-
neighbor.poster_position as poster_position,
|
| 84 |
-
neighbor.sourceid as sourceid,
|
| 85 |
-
neighbor.virtualsite_url as virtualsite_url,
|
| 86 |
-
neighbor.decision as decision,
|
| 87 |
-
[(a:Author)-[:IS_AUTHOR_OF]->(neighbor) | a] as authors,
|
| 88 |
-
p.id as source_paper_id,
|
| 89 |
-
type(r) as relationship_type,
|
| 90 |
-
CASE WHEN type(r) = 'SIMILAR_TO' THEN r.similarity ELSE null END as similarity
|
| 91 |
-
ORDER BY similarity DESC
|
| 92 |
-
LIMIT $limit
|
| 93 |
-
"""
|
| 94 |
-
|
| 95 |
-
# Find the DB query for graph traversal in the graph_traversal sub-folder.
|
| 96 |
-
_DB_PAPERS_BY_AUTHOR = """
|
| 97 |
-
MATCH (a:Author)-[:IS_AUTHOR_OF]->(p:Paper)
|
| 98 |
-
WHERE a.fullname = $author_name
|
| 99 |
-
WITH p, collect(DISTINCT a) as all_authors
|
| 100 |
-
RETURN p.id as id,
|
| 101 |
-
p.name as name,
|
| 102 |
-
p.abstract as abstract,
|
| 103 |
-
p.topic as topic,
|
| 104 |
-
p.paper_url as paper_url,
|
| 105 |
-
p.decision as decision,
|
| 106 |
-
p.session as session,
|
| 107 |
-
p.session_start_time as session_start_time,
|
| 108 |
-
p.session_end_time as session_end_time,
|
| 109 |
-
p.presentation_type as presentation_type,
|
| 110 |
-
p.room_name as room_name,
|
| 111 |
-
p.project_url as project_url,
|
| 112 |
-
p.poster_position as poster_position,
|
| 113 |
-
p.sourceid as sourceid,
|
| 114 |
-
p.virtualsite_url as virtualsite_url,
|
| 115 |
-
all_authors as authors
|
| 116 |
-
ORDER BY p.name
|
| 117 |
-
LIMIT $limit
|
| 118 |
-
"""
|
| 119 |
-
|
| 120 |
-
_DB_PAPERS_BY_AUTHOR_FUZZY = """
|
| 121 |
-
MATCH (a:Author)-[:IS_AUTHOR_OF]->(p:Paper)
|
| 122 |
-
WHERE toLower(a.fullname) CONTAINS toLower($author_name)
|
| 123 |
-
WITH p, collect(DISTINCT a) as all_authors
|
| 124 |
-
RETURN p.id as id,
|
| 125 |
-
p.name as name,
|
| 126 |
-
p.abstract as abstract,
|
| 127 |
-
p.topic as topic,
|
| 128 |
-
p.paper_url as paper_url,
|
| 129 |
-
p.decision as decision,
|
| 130 |
-
p.session as session,
|
| 131 |
-
p.session_start_time as session_start_time,
|
| 132 |
-
p.session_end_time as session_end_time,
|
| 133 |
-
p.presentation_type as presentation_type,
|
| 134 |
-
p.room_name as room_name,
|
| 135 |
-
p.project_url as project_url,
|
| 136 |
-
p.poster_position as poster_position,
|
| 137 |
-
p.sourceid as sourceid,
|
| 138 |
-
p.virtualsite_url as virtualsite_url,
|
| 139 |
-
all_authors as authors
|
| 140 |
-
ORDER BY p.name
|
| 141 |
-
LIMIT $limit
|
| 142 |
-
"""
|
| 143 |
-
|
| 144 |
-
_DB_PAPERS_BY_TOPIC = """
|
| 145 |
-
MATCH (p:Paper)-[:BELONGS_TO_TOPIC]->(t:Topic {name: $topic_name})
|
| 146 |
-
RETURN p.id as id,
|
| 147 |
-
p.name as name,
|
| 148 |
-
p.abstract as abstract,
|
| 149 |
-
p.topic as topic,
|
| 150 |
-
p.paper_url as paper_url,
|
| 151 |
-
p.decision as decision,
|
| 152 |
-
p.session as session,
|
| 153 |
-
p.session_start_time as session_start_time,
|
| 154 |
-
p.session_end_time as session_end_time,
|
| 155 |
-
p.presentation_type as presentation_type,
|
| 156 |
-
p.room_name as room_name,
|
| 157 |
-
p.project_url as project_url,
|
| 158 |
-
p.poster_position as poster_position,
|
| 159 |
-
p.sourceid as sourceid,
|
| 160 |
-
p.virtualsite_url as virtualsite_url,
|
| 161 |
-
[(a:Author)-[:IS_AUTHOR_OF]->(p) | a] as authors
|
| 162 |
-
ORDER BY p.name
|
| 163 |
-
LIMIT $limit
|
| 164 |
-
"""
|
| 165 |
-
|
| 166 |
-
_DB_PAPERS_BY_TOPIC_AND_SUBTOPIC = """
|
| 167 |
-
MATCH (t:Topic {name: $topic_name})
|
| 168 |
-
OPTIONAL MATCH (subtopic:Topic)-[:SUBTOPIC_OF*]->(t)
|
| 169 |
-
WITH t, collect(DISTINCT subtopic) + t as all_topics
|
| 170 |
-
UNWIND all_topics as topic
|
| 171 |
-
MATCH (p:Paper)-[:BELONGS_TO_TOPIC]->(topic)
|
| 172 |
-
WITH DISTINCT p
|
| 173 |
-
RETURN p.id as id,
|
| 174 |
-
p.name as name,
|
| 175 |
-
p.abstract as abstract,
|
| 176 |
-
p.topic as topic,
|
| 177 |
-
p.paper_url as paper_url,
|
| 178 |
-
p.decision as decision,
|
| 179 |
-
p.session as session,
|
| 180 |
-
p.session_start_time as session_start_time,
|
| 181 |
-
p.session_end_time as session_end_time,
|
| 182 |
-
p.presentation_type as presentation_type,
|
| 183 |
-
p.room_name as room_name,
|
| 184 |
-
p.project_url as project_url,
|
| 185 |
-
p.poster_position as poster_position,
|
| 186 |
-
p.sourceid as sourceid,
|
| 187 |
-
p.virtualsite_url as virtualsite_url,
|
| 188 |
-
[(a:Author)-[:IS_AUTHOR_OF]->(p) | a] as authors
|
| 189 |
-
ORDER BY p.name
|
| 190 |
-
LIMIT $limit
|
| 191 |
-
"""
|
| 192 |
-
|
| 193 |
-
def __init__(
|
| 194 |
-
self,
|
| 195 |
-
uri: str = NEO4J_DB_URI,
|
| 196 |
-
username: str = "neo4j",
|
| 197 |
-
password: str = "password"
|
| 198 |
-
):
|
| 199 |
-
"""Initialize Neo4j connection."""
|
| 200 |
-
self.driver = GraphDatabase.driver(uri, auth=(username, password))
|
| 201 |
-
self.driver.verify_connectivity()
|
| 202 |
-
LOGGER.info(f"Connected to Neo4j at {uri}")
|
| 203 |
-
|
| 204 |
-
def close(self):
|
| 205 |
-
"""Close the Neo4j driver connection."""
|
| 206 |
-
self.driver.close()
|
| 207 |
-
|
| 208 |
-
@staticmethod
|
| 209 |
-
def embed_user_query(
|
| 210 |
-
text: str,
|
| 211 |
-
embedding_model: str = f"ollama/{EMBEDDING_MODEL_NAME}",
|
| 212 |
-
api_base: str = EMBEDDING_MODEL_API_BASE
|
| 213 |
-
):
|
| 214 |
-
emb = batch_embed_documents(
|
| 215 |
-
texts=[text],
|
| 216 |
-
batch_size=1,
|
| 217 |
-
api_base=api_base,
|
| 218 |
-
embedding_model=embedding_model
|
| 219 |
-
).tolist()[0]
|
| 220 |
-
|
| 221 |
-
return emb
|
| 222 |
-
|
| 223 |
-
def similarity_search(
|
| 224 |
-
self,
|
| 225 |
-
user_query: str,
|
| 226 |
-
day: Optional[str] = None,
|
| 227 |
-
timeslots: Optional[List[str]] = None,
|
| 228 |
-
top_k: int = 5,
|
| 229 |
-
min_similarity: Optional[float] = None
|
| 230 |
-
) -> List[Dict[str, Any]]:
|
| 231 |
-
"""
|
| 232 |
-
Perform vector similarity search on paper embeddings.
|
| 233 |
-
|
| 234 |
-
Args:
|
| 235 |
-
user_query: User query (str)
|
| 236 |
-
day: Conference day as date string (e.g., "2024-12-10") or None
|
| 237 |
-
timeslots: List of time ranges as strings (e.g., ["09:00:00-12:00:00"]) or None
|
| 238 |
-
top_k: Number of top results to return
|
| 239 |
-
min_similarity: Optional minimum similarity threshold (0-1)
|
| 240 |
-
|
| 241 |
-
Returns:
|
| 242 |
-
List of dictionaries containing paper information and similarity scores
|
| 243 |
-
"""
|
| 244 |
-
|
| 245 |
-
# Generate text embedding
|
| 246 |
-
query_embedding = self.embed_user_query(
|
| 247 |
-
text=user_query
|
| 248 |
-
)
|
| 249 |
-
|
| 250 |
-
# Convert numpy array to list if needed
|
| 251 |
-
if isinstance(query_embedding, np.ndarray):
|
| 252 |
-
query_embedding = query_embedding.tolist()
|
| 253 |
-
|
| 254 |
-
# Parse day and timeslots for the query
|
| 255 |
-
day_filter = None
|
| 256 |
-
time_ranges = []
|
| 257 |
-
|
| 258 |
-
if day:
|
| 259 |
-
# Convert date string to day of week (1=Monday, 7=Sunday)
|
| 260 |
-
from datetime import datetime
|
| 261 |
-
date_obj = datetime.strptime(day, "%Y-%m-%d")
|
| 262 |
-
day_filter = date_obj.isoweekday()
|
| 263 |
-
|
| 264 |
-
if timeslots:
|
| 265 |
-
# Parse timeslot ranges (e.g., "09:00:00-12:00:00")
|
| 266 |
-
for slot in timeslots:
|
| 267 |
-
if '-' in slot:
|
| 268 |
-
start, end = slot.split('-')
|
| 269 |
-
time_ranges.append({'start': start.strip(), 'end': end.strip()})
|
| 270 |
-
else:
|
| 271 |
-
# If no range, assume it's a single time point with some buffer
|
| 272 |
-
time_ranges.append({'start': slot.strip(), 'end': slot.strip()})
|
| 273 |
-
|
| 274 |
-
with self.driver.session() as session:
|
| 275 |
-
result = session.run(
|
| 276 |
-
self._DB_SIMILARITY_SEARCH_QUERY,
|
| 277 |
-
query_embedding=query_embedding,
|
| 278 |
-
top_k=top_k,
|
| 279 |
-
limit=NEO4J_DB_NODE_RETURN_LIMIT,
|
| 280 |
-
day=day_filter,
|
| 281 |
-
time_ranges=time_ranges if time_ranges else None
|
| 282 |
-
)
|
| 283 |
-
papers = []
|
| 284 |
-
for record in result:
|
| 285 |
-
paper = {
|
| 286 |
-
'id': record['id'],
|
| 287 |
-
'name': record['name'],
|
| 288 |
-
'abstract': record['abstract'],
|
| 289 |
-
'topic': record['topic'],
|
| 290 |
-
'similarity_score': record['score'],
|
| 291 |
-
'paper_url': record['paper_url'],
|
| 292 |
-
'decision': record['decision'],
|
| 293 |
-
'session': record['session'],
|
| 294 |
-
'session_start_time': record['session_start_time'],
|
| 295 |
-
'session_end_time': record['session_end_time'],
|
| 296 |
-
'presentation_type': record['presentation_type'],
|
| 297 |
-
'room_name': record['room_name'],
|
| 298 |
-
'github_url': record['project_url'],
|
| 299 |
-
'poster_position': record['poster_position'],
|
| 300 |
-
'sourceid': record['sourceid'],
|
| 301 |
-
'virtualsite_url': record['virtualsite_url'],
|
| 302 |
-
'authors': [a['fullname'] for a in record['authors']]
|
| 303 |
-
}
|
| 304 |
-
|
| 305 |
-
# Apply minimum similarity filter if specified
|
| 306 |
-
if min_similarity is None or paper['similarity_score'] >= min_similarity:
|
| 307 |
-
papers.append(paper)
|
| 308 |
-
|
| 309 |
-
return papers
|
| 310 |
-
|
| 311 |
-
def neighborhood_search(
|
| 312 |
-
self,
|
| 313 |
-
paper_id: str,
|
| 314 |
-
relationship_types: List[str] = ["SIMILAR_TO"],
|
| 315 |
-
min_similarity: float = 0.7
|
| 316 |
-
) -> Dict[str, Any]:
|
| 317 |
-
"""
|
| 318 |
-
Find immediate neighbors of given paper nodes.
|
| 319 |
-
|
| 320 |
-
Args:
|
| 321 |
-
paper_id: Paper ID to find neighbors for
|
| 322 |
-
relationship_types: Optional list of relationship types to filter
|
| 323 |
-
(e.g., ['SIMILAR_TO', 'IS_AUTHOR_OF', 'BELONGS_TO_TOPIC', 'SUBTOPIC_OF'])
|
| 324 |
-
min_similarity (float): A minimum similarity score in the range of 0 - 1. Often a good value is 0.75 or 0.8.
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
Returns:
|
| 328 |
-
Dictionary with neighbors grouped by relationship type
|
| 329 |
-
"""
|
| 330 |
-
allowed_rel_types = ['SIMILAR_TO', 'IS_AUTHOR_OF', 'BELONGS_TO_TOPIC', 'SUBTOPIC_OF']
|
| 331 |
-
for rel_type in relationship_types:
|
| 332 |
-
if rel_type not in allowed_rel_types:
|
| 333 |
-
raise ValueError(f"Unsupported relationship type: {rel_type}. Supported relationship types: {allowed_rel_types}")
|
| 334 |
-
|
| 335 |
-
with self.driver.session() as session:
|
| 336 |
-
result = session.run(
|
| 337 |
-
self._DB_NEIGHBORHOOD_SEARCH_QUERY,
|
| 338 |
-
paper_ids=[paper_id],
|
| 339 |
-
allowed_rel_types=relationship_types,
|
| 340 |
-
min_similarity=min_similarity,
|
| 341 |
-
limit=NEO4J_DB_NODE_RETURN_LIMIT
|
| 342 |
-
)
|
| 343 |
-
|
| 344 |
-
# Organize results by relationship type
|
| 345 |
-
neighbors = {}
|
| 346 |
-
|
| 347 |
-
for record in result:
|
| 348 |
-
rel_type = record["relationship_type"]
|
| 349 |
-
if rel_type not in neighbors.keys():
|
| 350 |
-
neighbors[rel_type] = []
|
| 351 |
-
else:
|
| 352 |
-
neighbors[rel_type].append(record)
|
| 353 |
-
|
| 354 |
-
return neighbors
|
| 355 |
-
|
| 356 |
-
def graph_traversal(
|
| 357 |
-
self,
|
| 358 |
-
start_paper_id: str,
|
| 359 |
-
n_hops: int = 2,
|
| 360 |
-
relationship_type: Optional[str] = None,
|
| 361 |
-
max_results: Optional[int] = None,
|
| 362 |
-
strategy: str = "breadth_first_random",
|
| 363 |
-
max_branches: Optional[int] = None,
|
| 364 |
-
random_seed: Optional[int] = None
|
| 365 |
-
) -> List[Dict[str, Any]]:
|
| 366 |
-
"""
|
| 367 |
-
Traverse the graph for n hops from starting paper nodes.
|
| 368 |
-
|
| 369 |
-
Args:
|
| 370 |
-
start_paper_id: Paper ID to start traversal from
|
| 371 |
-
n_hops: Number of hops to traverse (1-5 recommended)
|
| 372 |
-
relationship_type: Optional list of relationship types to traverse
|
| 373 |
-
max_results: Optional maximum number of results to return
|
| 374 |
-
strategy: Traversal strategy (breadth_first, depth_first, breadth_first_random, depth_first_random)
|
| 375 |
-
max_branches: Maximum number of random neighbors to explore per node (only for random strategies)
|
| 376 |
-
random_seed: Optional seed for reproducible random sampling
|
| 377 |
-
|
| 378 |
-
Returns:
|
| 379 |
-
List of papers found through traversal with distance information
|
| 380 |
-
"""
|
| 381 |
-
if random_seed is not None:
|
| 382 |
-
random.seed(random_seed)
|
| 383 |
-
|
| 384 |
-
# Use original Cypher-based approach for non-random strategies
|
| 385 |
-
if strategy in ["breadth_first", "depth_first"]:
|
| 386 |
-
LOGGER.debug(f"Doing a graph traversal with neo4j's built-in strategy")
|
| 387 |
-
return _graph_traversal_cypher(
|
| 388 |
-
self.driver,
|
| 389 |
-
start_paper_id,
|
| 390 |
-
n_hops,
|
| 391 |
-
relationship_type,
|
| 392 |
-
max_results
|
| 393 |
-
)
|
| 394 |
-
|
| 395 |
-
# Use Python-based traversal for random strategies
|
| 396 |
-
elif strategy == "breadth_first_random":
|
| 397 |
-
LOGGER.debug(f"Doing a graph traversal with a random sampling breadth first strategy")
|
| 398 |
-
return _graph_traversal_bfs_random(
|
| 399 |
-
self.driver,
|
| 400 |
-
start_paper_id,
|
| 401 |
-
n_hops,
|
| 402 |
-
relationship_type,
|
| 403 |
-
max_results,
|
| 404 |
-
max_branches or 3
|
| 405 |
-
)
|
| 406 |
-
|
| 407 |
-
elif strategy == "depth_first_random":
|
| 408 |
-
LOGGER.debug(f"Doing a graph traversal with a random sampling depth first strategy")
|
| 409 |
-
return _graph_traversal_dfs_random(
|
| 410 |
-
self.driver,
|
| 411 |
-
start_paper_id,
|
| 412 |
-
n_hops,
|
| 413 |
-
relationship_type,
|
| 414 |
-
max_results,
|
| 415 |
-
max_branches or 3
|
| 416 |
-
)
|
| 417 |
-
|
| 418 |
-
else:
|
| 419 |
-
raise ValueError(f"Unsupported traversal strategy: {strategy}. "
|
| 420 |
-
f"Supported strategies: breadth_first, depth_first, breadth_first_random, depth_first_random")
|
| 421 |
-
|
| 422 |
-
def search_papers_by_author(
|
| 423 |
-
self,
|
| 424 |
-
author_name: str,
|
| 425 |
-
fuzzy: bool = True
|
| 426 |
-
) -> List[Dict[str, Any]]:
|
| 427 |
-
"""
|
| 428 |
-
Find all papers by a specific author.
|
| 429 |
-
|
| 430 |
-
Args:
|
| 431 |
-
author_name: Author name or partial name
|
| 432 |
-
fuzzy: Whether to use fuzzy matching (CONTAINS vs exact match)
|
| 433 |
-
|
| 434 |
-
Returns:
|
| 435 |
-
List of papers by the author
|
| 436 |
-
"""
|
| 437 |
-
with self.driver.session() as session:
|
| 438 |
-
if fuzzy:
|
| 439 |
-
query = self._DB_PAPERS_BY_AUTHOR_FUZZY
|
| 440 |
-
else:
|
| 441 |
-
query = self._DB_PAPERS_BY_AUTHOR
|
| 442 |
-
|
| 443 |
-
result = session.run(query, author_name=author_name)
|
| 444 |
-
|
| 445 |
-
papers = []
|
| 446 |
-
for record in result:
|
| 447 |
-
paper = {
|
| 448 |
-
'id': record['id'],
|
| 449 |
-
'name': record['name'],
|
| 450 |
-
'abstract': record['abstract'],
|
| 451 |
-
'topic': record['topic'],
|
| 452 |
-
'author_name': record['author_name'],
|
| 453 |
-
'paper_url': record['paper_url'],
|
| 454 |
-
'decision': record['decision'],
|
| 455 |
-
'session': record['session'],
|
| 456 |
-
'session_start_time': record['session_start_time'],
|
| 457 |
-
'session_end_time': record['session_end_time'],
|
| 458 |
-
'presentation_type': record['presentation_type'],
|
| 459 |
-
'room_name': record['room_name'],
|
| 460 |
-
'github_url': record['project_url'],
|
| 461 |
-
'poster_position': record['poster_position'],
|
| 462 |
-
'sourceid': record['sourceid'],
|
| 463 |
-
'virtualsite_url': record['virtualsite_url'],
|
| 464 |
-
}
|
| 465 |
-
papers.append(paper)
|
| 466 |
-
|
| 467 |
-
return papers
|
| 468 |
-
|
| 469 |
-
def search_papers_by_topic(
|
| 470 |
-
self,
|
| 471 |
-
topic_name: str,
|
| 472 |
-
include_subtopics: bool = True
|
| 473 |
-
) -> List[Dict[str, Any]]:
|
| 474 |
-
"""
|
| 475 |
-
Find all papers in a specific topic.
|
| 476 |
-
|
| 477 |
-
Args:
|
| 478 |
-
topic_name: Topic name
|
| 479 |
-
include_subtopics: Whether to include papers from subtopics
|
| 480 |
-
|
| 481 |
-
Returns:
|
| 482 |
-
List of papers in the topic
|
| 483 |
-
"""
|
| 484 |
-
with self.driver.session() as session:
|
| 485 |
-
if include_subtopics:
|
| 486 |
-
# Find topic and all its subtopics
|
| 487 |
-
query = self._DB_PAPERS_BY_TOPIC_AND_SUBTOPIC
|
| 488 |
-
else:
|
| 489 |
-
query = self._DB_PAPERS_BY_TOPIC
|
| 490 |
-
|
| 491 |
-
result = session.run(query, topic_name=topic_name, limit=NEO4J_DB_NODE_RETURN_LIMIT)
|
| 492 |
-
|
| 493 |
-
papers = []
|
| 494 |
-
for record in result:
|
| 495 |
-
paper = {
|
| 496 |
-
'id': record['id'],
|
| 497 |
-
'name': record['name'],
|
| 498 |
-
'abstract': record['abstract'],
|
| 499 |
-
'topic': record['topic'],
|
| 500 |
-
'paper_url': record['paper_url'],
|
| 501 |
-
'decision': record['decision'],
|
| 502 |
-
'session': record['session'],
|
| 503 |
-
'session_start_time': record['session_start_time'],
|
| 504 |
-
'session_end_time': record['session_end_time'],
|
| 505 |
-
'presentation_type': record['presentation_type'],
|
| 506 |
-
'room_name': record['room_name'],
|
| 507 |
-
'github_url': record['project_url'],
|
| 508 |
-
'poster_position': record['poster_position'],
|
| 509 |
-
'sourceid': record['sourceid'],
|
| 510 |
-
'virtualsite_url': record['virtualsite_url'],
|
| 511 |
-
}
|
| 512 |
-
papers.append(paper)
|
| 513 |
-
|
| 514 |
-
return papers
|
| 515 |
-
|
| 516 |
-
def get_collaboration_network(
|
| 517 |
-
self,
|
| 518 |
-
author_name: str,
|
| 519 |
-
n_hops: int = 2
|
| 520 |
-
) -> Dict[str, Any]:
|
| 521 |
-
"""
|
| 522 |
-
Find collaboration network: authors who co-authored papers.
|
| 523 |
-
|
| 524 |
-
Args:
|
| 525 |
-
author_name: Starting author name
|
| 526 |
-
n_hops: Degrees of separation to explore
|
| 527 |
-
|
| 528 |
-
Returns:
|
| 529 |
-
Dictionary with collaborators and shared papers
|
| 530 |
-
"""
|
| 531 |
-
with self.driver.session() as session:
|
| 532 |
-
query = f"""
|
| 533 |
-
MATCH (a1:Author)
|
| 534 |
-
WHERE toLower(a1.fullname) CONTAINS toLower($author_name)
|
| 535 |
-
MATCH path = (a1)<-[:AUTHORED_BY]-(p:Paper)-[:AUTHORED_BY]->(a2:Author)
|
| 536 |
-
WHERE a1 <> a2
|
| 537 |
-
WITH a1, a2, collect(DISTINCT p) as shared_papers, length(path) as distance
|
| 538 |
-
RETURN a1.fullname as source_author,
|
| 539 |
-
a2.fullname as collaborator,
|
| 540 |
-
a2.institution as institution,
|
| 541 |
-
[p IN shared_papers | {{id: p.id, name: p.name}}] as papers,
|
| 542 |
-
size(shared_papers) as paper_count
|
| 543 |
-
ORDER BY paper_count DESC
|
| 544 |
-
"""
|
| 545 |
-
|
| 546 |
-
result = session.run(query, author_name=author_name)
|
| 547 |
-
|
| 548 |
-
collaborations = []
|
| 549 |
-
for record in result:
|
| 550 |
-
collab = {
|
| 551 |
-
'source_author': record['source_author'],
|
| 552 |
-
'collaborator': record['collaborator'],
|
| 553 |
-
'institution': record['institution'],
|
| 554 |
-
'shared_papers': record['papers'],
|
| 555 |
-
'paper_count': record['paper_count']
|
| 556 |
-
}
|
| 557 |
-
collaborations.append(collab)
|
| 558 |
-
|
| 559 |
-
return {
|
| 560 |
-
'author': author_name,
|
| 561 |
-
'collaborators': collaborations,
|
| 562 |
-
'total_collaborators': len(collaborations)
|
| 563 |
-
}
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
# Test
|
| 567 |
-
if __name__ == "__main__":
|
| 568 |
-
# Initialize searcher
|
| 569 |
-
searcher = Neo4jGraphWorker(
|
| 570 |
-
uri="bolt://localhost:7687",
|
| 571 |
-
username="neo4j",
|
| 572 |
-
password="llm_agents"
|
| 573 |
-
)
|
| 574 |
-
|
| 575 |
-
try:
|
| 576 |
-
# Example 1: Similarity search
|
| 577 |
-
print("\n" + "=" * 60)
|
| 578 |
-
print("Example 1: Similarity Search")
|
| 579 |
-
print("=" * 60)
|
| 580 |
-
user_query = "Reinforcement learning"
|
| 581 |
-
similar_papers = searcher.similarity_search(user_query, top_k=30)
|
| 582 |
-
for i, paper in enumerate(similar_papers, 1):
|
| 583 |
-
print(f"\n{i}. {paper['name']}")
|
| 584 |
-
print(f" Topic: {paper['topic']}")
|
| 585 |
-
print(f" Similarity: {paper['similarity_score']:.4f}")
|
| 586 |
-
|
| 587 |
-
# Example 2: Neighborhood search
|
| 588 |
-
if similar_papers:
|
| 589 |
-
print("\n" + "=" * 60)
|
| 590 |
-
print("Example 2: Neighborhood Search")
|
| 591 |
-
print("=" * 60)
|
| 592 |
-
paper_id = similar_papers[0]['id']
|
| 593 |
-
neighbors = searcher.neighborhood_search(paper_id, min_similarity=0.75)
|
| 594 |
-
print(f"\nNeighbors of: {similar_papers[0]['name']}")
|
| 595 |
-
for rel_type, neighbors in neighbors.items():
|
| 596 |
-
print(f" \n{rel_type.upper()} RELATIONSHIPS:")
|
| 597 |
-
for neighbor in neighbors:
|
| 598 |
-
print(f" - {neighbor['name']} (similarity: {neighbor['similarity']:.4f})")
|
| 599 |
-
|
| 600 |
-
# Example 3: Graph traversal
|
| 601 |
-
print("\n" + "=" * 60)
|
| 602 |
-
print("Example 3: Graph Traversal (2 hops)")
|
| 603 |
-
print("=" * 60)
|
| 604 |
-
if similar_papers:
|
| 605 |
-
paper_ids = similar_papers[0]['id']
|
| 606 |
-
related = searcher.graph_traversal(paper_ids, n_hops=2)
|
| 607 |
-
print(f"\nFound {len(related)} related papers through traversal")
|
| 608 |
-
for paper in related[:5]: # Show first 5
|
| 609 |
-
print(f" - {paper['name']} (distance: {paper['distance']})")
|
| 610 |
-
|
| 611 |
-
finally:
|
| 612 |
-
searcher.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/session_routing/__init__.py
DELETED
|
@@ -1,210 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Session routing tool for building personalized conference visiting schedules.
|
| 3 |
-
|
| 4 |
-
This tool helps NeurIPS 2025 conference attendees create optimized schedules
|
| 5 |
-
for visiting poster sessions based on their research interests, preferred dates,
|
| 6 |
-
and time slots.
|
| 7 |
-
"""
|
| 8 |
-
|
| 9 |
-
import os
|
| 10 |
-
from typing import Union, List, Optional
|
| 11 |
-
from neo4j import GraphDatabase
|
| 12 |
-
|
| 13 |
-
from agentic_nav.tools.knowledge_graph import search_similar_papers
|
| 14 |
-
from agentic_nav.tools.session_routing.scheduler import ScheduleBuilder
|
| 15 |
-
from agentic_nav.tools.session_routing.utils import parse_date_input, parse_time_preference
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# Environment variables for Neo4j connection
|
| 19 |
-
NEO4J_DB_URI = os.getenv("NEO4J_DB_URI", "bolt://localhost:7687")
|
| 20 |
-
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j")
|
| 21 |
-
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "")
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
def build_visit_schedule(
|
| 25 |
-
topics: Union[str, List[str]],
|
| 26 |
-
dates: Union[str, List[str]] = None,
|
| 27 |
-
time_preferences: str = None,
|
| 28 |
-
max_papers: int = 20,
|
| 29 |
-
min_similarity: float = 0.6
|
| 30 |
-
) -> str:
|
| 31 |
-
# TODO: Add filter for ["poster", "oral"]. Make sure to match orals with poster counterpart.
|
| 32 |
-
"""
|
| 33 |
-
Build a personalized visiting schedule for NeurIPS 2025 conference poster sessions.
|
| 34 |
-
|
| 35 |
-
This tool helps you create an optimized schedule by:
|
| 36 |
-
1. Finding papers relevant to your research interests (topics)
|
| 37 |
-
2. Filtering by your preferred dates and time slots
|
| 38 |
-
3. Scoring papers by relevance to your topics
|
| 39 |
-
4. Clustering papers by room location to minimize walking
|
| 40 |
-
5. Organizing chronologically for easy navigation
|
| 41 |
-
|
| 42 |
-
The schedule includes paper titles, locations, poster positions, and relevance scores.
|
| 43 |
-
|
| 44 |
-
Args:
|
| 45 |
-
topics: Research topic(s) of interest. Can be a single topic string or a list of topics.
|
| 46 |
-
Examples: "transformer architectures", ["reinforcement learning", "multi-agent systems"]
|
| 47 |
-
dates: Conference date(s) to include. Can be:
|
| 48 |
-
- ISO format: "2025-12-02" or ["2025-12-02", "2025-12-03"]
|
| 49 |
-
- Day names: "Tuesday", "Wednesday"
|
| 50 |
-
- None (default): include all conference days (Dec 2-7, 2025)
|
| 51 |
-
time_preferences: Preferred time slot(s). Can be:
|
| 52 |
-
- Preset: "morning" (8am-12pm), "afternoon" (12pm-5pm), "evening" (5pm-9pm)
|
| 53 |
-
- Range: "9:00-12:00" or "14-17"
|
| 54 |
-
- None (default): include all time slots
|
| 55 |
-
max_papers: Maximum number of papers to include in schedule (default: 20)
|
| 56 |
-
min_similarity: Minimum similarity score for paper relevance (0.0-1.0, default: 0.6)
|
| 57 |
-
|
| 58 |
-
Returns:
|
| 59 |
-
Formatted markdown schedule organized by date, time slot, and room location.
|
| 60 |
-
All times are displayed in conference local time (PST/UTC-8).
|
| 61 |
-
|
| 62 |
-
Restrictions:
|
| 63 |
-
- Requires Neo4j database connection (NEO4J_DB_URI, NEO4J_USERNAME, NEO4J_PASSWORD)
|
| 64 |
-
- Requires Paper nodes with session timing and location fields
|
| 65 |
-
- Conference dates: December 2-7, 2025 in San Diego/Mexico City (UTC-8)
|
| 66 |
-
|
| 67 |
-
Notes:
|
| 68 |
-
- Papers are scored by similarity to your topics using embedding search
|
| 69 |
-
- Schedule optimizes for both relevance and room clustering
|
| 70 |
-
- Time zones are automatically converted from UTC to PST
|
| 71 |
-
- Poster positions help you quickly locate papers in exhibition halls
|
| 72 |
-
|
| 73 |
-
Raises:
|
| 74 |
-
ValueError: If topics is empty or dates cannot be parsed
|
| 75 |
-
Exception: If Neo4j connection fails
|
| 76 |
-
|
| 77 |
-
Example:
|
| 78 |
-
>>> build_visit_schedule(
|
| 79 |
-
... topics=["machine learning", "computer vision"],
|
| 80 |
-
... dates="2025-12-02",
|
| 81 |
-
... time_preferences="morning",
|
| 82 |
-
... max_papers=15
|
| 83 |
-
... )
|
| 84 |
-
# Your NeurIPS 2025 Conference Schedule
|
| 85 |
-
|
| 86 |
-
## Tuesday, December 2, 2025
|
| 87 |
-
|
| 88 |
-
### 9:00 AM - 11:00 AM PST
|
| 89 |
-
|
| 90 |
-
**Hall A**
|
| 91 |
-
- **Poster #123** | Attention Mechanisms in Vision Transformers
|
| 92 |
-
- Authors: John Doe, Jane Doe, et al.
|
| 93 |
-
- Topic: Computer Vision
|
| 94 |
-
- Relevance: 0.92
|
| 95 |
-
...
|
| 96 |
-
"""
|
| 97 |
-
# Type coercion for parameters that may come as strings from LLM tool calls
|
| 98 |
-
if isinstance(topics, str):
|
| 99 |
-
# If topics is a single string, treat as one topic
|
| 100 |
-
topics = [topics]
|
| 101 |
-
elif topics is None:
|
| 102 |
-
raise ValueError("Topics parameter is required. Please provide at least one research topic.")
|
| 103 |
-
|
| 104 |
-
if max_papers is not None and not isinstance(max_papers, int):
|
| 105 |
-
max_papers = int(max_papers)
|
| 106 |
-
|
| 107 |
-
if min_similarity is not None and not isinstance(min_similarity, float):
|
| 108 |
-
min_similarity = float(min_similarity)
|
| 109 |
-
|
| 110 |
-
# Parse dates
|
| 111 |
-
parsed_dates = None
|
| 112 |
-
if dates:
|
| 113 |
-
if isinstance(dates, str):
|
| 114 |
-
dates = [dates]
|
| 115 |
-
|
| 116 |
-
parsed_dates = []
|
| 117 |
-
for date_str in dates:
|
| 118 |
-
parsed = parse_date_input(date_str)
|
| 119 |
-
if parsed:
|
| 120 |
-
parsed_dates.append(parsed)
|
| 121 |
-
|
| 122 |
-
if not parsed_dates:
|
| 123 |
-
parsed_dates = None # Fall back to all dates if parsing fails
|
| 124 |
-
|
| 125 |
-
# Parse time preferences (convert to UTC for database query)
|
| 126 |
-
time_range = None
|
| 127 |
-
if time_preferences:
|
| 128 |
-
local_time_range = parse_time_preference(time_preferences)
|
| 129 |
-
if local_time_range:
|
| 130 |
-
# Convert PST to UTC (add 8 hours)
|
| 131 |
-
start_utc = (local_time_range[0] + 8) % 24
|
| 132 |
-
end_utc = (local_time_range[1] + 8) % 24
|
| 133 |
-
time_range = (start_utc, end_utc)
|
| 134 |
-
|
| 135 |
-
# Step 1: Search for papers matching each topic using existing tool
|
| 136 |
-
all_paper_ids = set()
|
| 137 |
-
relevance_scores = {}
|
| 138 |
-
|
| 139 |
-
for topic in topics:
|
| 140 |
-
try:
|
| 141 |
-
|
| 142 |
-
from llm_agents.tools.knowledge_graph.retriever import Neo4jGraphWorker
|
| 143 |
-
|
| 144 |
-
worker = Neo4jGraphWorker(
|
| 145 |
-
uri=NEO4J_DB_URI,
|
| 146 |
-
username=NEO4J_USERNAME,
|
| 147 |
-
password=NEO4J_PASSWORD
|
| 148 |
-
)
|
| 149 |
-
|
| 150 |
-
papers = worker.similarity_search(
|
| 151 |
-
user_query=topic,
|
| 152 |
-
top_k=max_papers * 2,
|
| 153 |
-
min_similarity=min_similarity
|
| 154 |
-
)
|
| 155 |
-
|
| 156 |
-
worker.close()
|
| 157 |
-
|
| 158 |
-
# Extract paper IDs and scores
|
| 159 |
-
for paper in papers:
|
| 160 |
-
paper_id = paper.get('id')
|
| 161 |
-
score = paper.get('score', 0.0)
|
| 162 |
-
|
| 163 |
-
if paper_id:
|
| 164 |
-
all_paper_ids.add(paper_id)
|
| 165 |
-
# Keep highest score if paper matches multiple topics
|
| 166 |
-
if paper_id not in relevance_scores or score > relevance_scores[paper_id]:
|
| 167 |
-
relevance_scores[paper_id] = score
|
| 168 |
-
|
| 169 |
-
except Exception as e:
|
| 170 |
-
# If search fails for one topic, continue with others
|
| 171 |
-
continue
|
| 172 |
-
|
| 173 |
-
if not all_paper_ids:
|
| 174 |
-
return "No papers found matching your topics. Try broadening your search criteria or adjusting the minimum similarity threshold."
|
| 175 |
-
|
| 176 |
-
# Step 2: Initialize schedule builder
|
| 177 |
-
driver = GraphDatabase.driver(NEO4J_DB_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
|
| 178 |
-
builder = ScheduleBuilder(driver)
|
| 179 |
-
|
| 180 |
-
try:
|
| 181 |
-
# Step 3: Filter papers by date and time
|
| 182 |
-
filtered_papers = builder.filter_by_datetime(
|
| 183 |
-
paper_ids=list(all_paper_ids),
|
| 184 |
-
dates=parsed_dates,
|
| 185 |
-
time_range=time_range
|
| 186 |
-
)
|
| 187 |
-
|
| 188 |
-
if not filtered_papers:
|
| 189 |
-
return "No papers found matching your date and time preferences. Try expanding your time range or selecting different dates."
|
| 190 |
-
|
| 191 |
-
# Step 4: Score papers by relevance
|
| 192 |
-
scored_papers = builder.score_papers(filtered_papers, relevance_scores)
|
| 193 |
-
|
| 194 |
-
# Step 5: Optimize schedule (chronological + room clustering)
|
| 195 |
-
schedule = builder.optimize_schedule(scored_papers, max_papers=max_papers)
|
| 196 |
-
|
| 197 |
-
# Step 6: Format as markdown
|
| 198 |
-
markdown_output = builder.format_as_markdown(schedule, include_abstracts=False)
|
| 199 |
-
|
| 200 |
-
return markdown_output
|
| 201 |
-
|
| 202 |
-
finally:
|
| 203 |
-
builder.close()
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
__all__ = ['build_visit_schedule']
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
if __name__ == "__main__":
|
| 210 |
-
print(build_visit_schedule(topics=["federated learning"], max_papers=200, dates=["Wednesday"]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/session_routing/scheduler.py
DELETED
|
@@ -1,377 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Schedule builder for NeurIPS 2025 conference paper sessions.
|
| 3 |
-
|
| 4 |
-
This module provides the ScheduleBuilder class that handles filtering,
|
| 5 |
-
scoring, and organizing papers into optimized visiting schedules.
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
from datetime import datetime
|
| 9 |
-
from typing import List, Dict, Any, Optional, Tuple
|
| 10 |
-
from collections import defaultdict
|
| 11 |
-
import neo4j
|
| 12 |
-
|
| 13 |
-
from agentic_nav.tools.session_routing.utils import (
|
| 14 |
-
convert_utc_to_local,
|
| 15 |
-
format_time_slot,
|
| 16 |
-
format_date_header,
|
| 17 |
-
cluster_papers_by_room,
|
| 18 |
-
parse_time_preference
|
| 19 |
-
)
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
class ScheduleBuilder:
|
| 23 |
-
"""
|
| 24 |
-
Build optimized conference visiting schedules.
|
| 25 |
-
|
| 26 |
-
This class handles filtering papers by date/time, scoring by relevance,
|
| 27 |
-
clustering by room location, and formatting the final schedule.
|
| 28 |
-
"""
|
| 29 |
-
|
| 30 |
-
def __init__(self, neo4j_driver: neo4j.Driver):
|
| 31 |
-
"""
|
| 32 |
-
Initialize the schedule builder.
|
| 33 |
-
|
| 34 |
-
Args:
|
| 35 |
-
neo4j_driver: Neo4j database driver for querying papers
|
| 36 |
-
"""
|
| 37 |
-
self.driver = neo4j_driver
|
| 38 |
-
|
| 39 |
-
def filter_by_datetime(
|
| 40 |
-
self,
|
| 41 |
-
paper_ids: List[str],
|
| 42 |
-
dates: Optional[List[datetime]] = None,
|
| 43 |
-
time_range: Optional[Tuple[int, int]] = None
|
| 44 |
-
) -> List[Dict[str, Any]]:
|
| 45 |
-
"""
|
| 46 |
-
Filter papers by date and time preferences.
|
| 47 |
-
|
| 48 |
-
Args:
|
| 49 |
-
paper_ids: List of paper IDs to filter
|
| 50 |
-
dates: List of conference dates to include (None = all dates)
|
| 51 |
-
time_range: Tuple of (start_hour, end_hour) in UTC (None = all times)
|
| 52 |
-
|
| 53 |
-
Returns:
|
| 54 |
-
List of paper dictionaries with full details including session times
|
| 55 |
-
|
| 56 |
-
Example:
|
| 57 |
-
>>> builder.filter_by_datetime(['paper1', 'paper2'], dates=[datetime(2025,12,2)])
|
| 58 |
-
"""
|
| 59 |
-
if not paper_ids:
|
| 60 |
-
return []
|
| 61 |
-
|
| 62 |
-
# Deduplicate paper_ids to ensure we only query each paper once
|
| 63 |
-
unique_paper_ids = list(set(paper_ids))
|
| 64 |
-
|
| 65 |
-
# Build Cypher query to get full paper details including authors via relationship
|
| 66 |
-
# Relationship is IS_AUTHOR_OF (uppercase) and author property is 'fullname'
|
| 67 |
-
query = """
|
| 68 |
-
MATCH (p:Paper)
|
| 69 |
-
WHERE p.id IN $paper_ids
|
| 70 |
-
OPTIONAL MATCH (a:Author)-[:IS_AUTHOR_OF]-(p)
|
| 71 |
-
WITH p, collect(a.fullname) as authors
|
| 72 |
-
RETURN DISTINCT p.id as id,
|
| 73 |
-
p.name as name,
|
| 74 |
-
p.abstract as abstract,
|
| 75 |
-
p.topic as topic,
|
| 76 |
-
p.session as session,
|
| 77 |
-
p.session_start_time as session_start_time,
|
| 78 |
-
p.session_end_time as session_end_time,
|
| 79 |
-
p.room_name as room_name,
|
| 80 |
-
p.poster_position as poster_position,
|
| 81 |
-
p.presentation_type as presentation_type,
|
| 82 |
-
p.url as url,
|
| 83 |
-
authors
|
| 84 |
-
"""
|
| 85 |
-
|
| 86 |
-
with self.driver.session() as session:
|
| 87 |
-
result = session.run(query, paper_ids=unique_paper_ids)
|
| 88 |
-
papers = [dict(record) for record in result]
|
| 89 |
-
|
| 90 |
-
# Deduplicate papers by ID (just in case)
|
| 91 |
-
seen_ids = set()
|
| 92 |
-
unique_papers = []
|
| 93 |
-
for paper in papers:
|
| 94 |
-
paper_id = paper.get('id')
|
| 95 |
-
if paper_id and paper_id not in seen_ids:
|
| 96 |
-
seen_ids.add(paper_id)
|
| 97 |
-
unique_papers.append(paper)
|
| 98 |
-
|
| 99 |
-
papers = unique_papers
|
| 100 |
-
|
| 101 |
-
# Filter by date if specified
|
| 102 |
-
if dates:
|
| 103 |
-
date_strs = [d.strftime("%Y-%m-%d") for d in dates]
|
| 104 |
-
papers = [
|
| 105 |
-
p for p in papers
|
| 106 |
-
if p.get('session_start_time') and
|
| 107 |
-
any(date_str in p['session_start_time'] for date_str in date_strs)
|
| 108 |
-
]
|
| 109 |
-
|
| 110 |
-
# Filter by time range if specified (convert UTC time range)
|
| 111 |
-
if time_range:
|
| 112 |
-
start_hour, end_hour = time_range
|
| 113 |
-
filtered_papers = []
|
| 114 |
-
|
| 115 |
-
for paper in papers:
|
| 116 |
-
try:
|
| 117 |
-
start_time_str = paper.get('session_start_time', '')
|
| 118 |
-
if not start_time_str:
|
| 119 |
-
continue
|
| 120 |
-
|
| 121 |
-
# Parse UTC time
|
| 122 |
-
if 'T' in start_time_str:
|
| 123 |
-
dt = datetime.fromisoformat(start_time_str.replace('Z', ''))
|
| 124 |
-
else:
|
| 125 |
-
continue
|
| 126 |
-
|
| 127 |
-
# Check if paper session falls within time range (UTC)
|
| 128 |
-
if start_hour <= dt.hour < end_hour:
|
| 129 |
-
filtered_papers.append(paper)
|
| 130 |
-
|
| 131 |
-
except (ValueError, AttributeError):
|
| 132 |
-
# If we can't parse time, include the paper to be safe
|
| 133 |
-
filtered_papers.append(paper)
|
| 134 |
-
|
| 135 |
-
papers = filtered_papers
|
| 136 |
-
|
| 137 |
-
return papers
|
| 138 |
-
|
| 139 |
-
def score_papers(
|
| 140 |
-
self,
|
| 141 |
-
papers: List[Dict[str, Any]],
|
| 142 |
-
relevance_scores: Dict[str, float]
|
| 143 |
-
) -> List[Dict[str, Any]]:
|
| 144 |
-
"""
|
| 145 |
-
Add relevance scores to papers.
|
| 146 |
-
|
| 147 |
-
Args:
|
| 148 |
-
papers: List of paper dictionaries
|
| 149 |
-
relevance_scores: Dict mapping paper_id to relevance score
|
| 150 |
-
|
| 151 |
-
Returns:
|
| 152 |
-
Papers with added 'relevance_score' field, sorted by score descending
|
| 153 |
-
|
| 154 |
-
Example:
|
| 155 |
-
>>> builder.score_papers(papers, {'paper1': 0.95, 'paper2': 0.87})
|
| 156 |
-
"""
|
| 157 |
-
scored_papers = []
|
| 158 |
-
|
| 159 |
-
for paper in papers:
|
| 160 |
-
paper_id = paper.get('id')
|
| 161 |
-
score = relevance_scores.get(paper_id, 0.0)
|
| 162 |
-
|
| 163 |
-
paper_with_score = paper.copy()
|
| 164 |
-
paper_with_score['relevance_score'] = score
|
| 165 |
-
scored_papers.append(paper_with_score)
|
| 166 |
-
|
| 167 |
-
# Sort by relevance score (highest first)
|
| 168 |
-
scored_papers.sort(key=lambda p: p['relevance_score'], reverse=True)
|
| 169 |
-
|
| 170 |
-
return scored_papers
|
| 171 |
-
|
| 172 |
-
def optimize_schedule(
|
| 173 |
-
self,
|
| 174 |
-
papers: List[Dict[str, Any]],
|
| 175 |
-
max_papers: int = 20
|
| 176 |
-
) -> Dict[str, Dict[str, List[Dict[str, Any]]]]:
|
| 177 |
-
"""
|
| 178 |
-
Optimize schedule by grouping papers chronologically and by room.
|
| 179 |
-
|
| 180 |
-
Args:
|
| 181 |
-
papers: List of scored paper dictionaries
|
| 182 |
-
max_papers: Maximum number of papers to include
|
| 183 |
-
|
| 184 |
-
Returns:
|
| 185 |
-
Nested dict: {date: {time_slot: {room: [papers]}}}
|
| 186 |
-
|
| 187 |
-
Example:
|
| 188 |
-
>>> schedule = builder.optimize_schedule(papers, max_papers=15)
|
| 189 |
-
"""
|
| 190 |
-
# Deduplicate papers by ID first
|
| 191 |
-
seen_ids = set()
|
| 192 |
-
unique_papers = []
|
| 193 |
-
for paper in papers:
|
| 194 |
-
paper_id = paper.get('id')
|
| 195 |
-
if paper_id and paper_id not in seen_ids:
|
| 196 |
-
seen_ids.add(paper_id)
|
| 197 |
-
unique_papers.append(paper)
|
| 198 |
-
|
| 199 |
-
# Limit to top papers by relevance
|
| 200 |
-
top_papers = unique_papers[:max_papers]
|
| 201 |
-
|
| 202 |
-
# Group by date and time
|
| 203 |
-
schedule = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
|
| 204 |
-
|
| 205 |
-
for paper in top_papers:
|
| 206 |
-
try:
|
| 207 |
-
start_time = paper.get('session_start_time', '')
|
| 208 |
-
if not start_time:
|
| 209 |
-
continue
|
| 210 |
-
|
| 211 |
-
# Extract date
|
| 212 |
-
date_str = start_time.split('T')[0]
|
| 213 |
-
|
| 214 |
-
# Create time slot key
|
| 215 |
-
end_time = paper.get('session_end_time', '')
|
| 216 |
-
time_slot = format_time_slot(start_time, end_time) if end_time else start_time
|
| 217 |
-
|
| 218 |
-
# Get room (handle None values, fallback to session for Mexico City papers)
|
| 219 |
-
room = paper.get('room_name')
|
| 220 |
-
if not room:
|
| 221 |
-
# Use session as fallback (e.g., for Mexico City papers)
|
| 222 |
-
room = paper.get('session') or 'N/A'
|
| 223 |
-
|
| 224 |
-
# Add to schedule
|
| 225 |
-
schedule[date_str][time_slot][room].append(paper)
|
| 226 |
-
|
| 227 |
-
except (ValueError, AttributeError, IndexError):
|
| 228 |
-
# Skip papers with invalid time data
|
| 229 |
-
continue
|
| 230 |
-
|
| 231 |
-
return schedule
|
| 232 |
-
|
| 233 |
-
def format_as_markdown(
|
| 234 |
-
self,
|
| 235 |
-
schedule: Dict[str, Dict[str, List[Dict[str, Any]]]],
|
| 236 |
-
include_abstracts: bool = False
|
| 237 |
-
) -> str:
|
| 238 |
-
"""
|
| 239 |
-
Format schedule as structured markdown.
|
| 240 |
-
|
| 241 |
-
Args:
|
| 242 |
-
schedule: Nested schedule dictionary
|
| 243 |
-
include_abstracts: Whether to include paper abstracts (default: False)
|
| 244 |
-
|
| 245 |
-
Returns:
|
| 246 |
-
Formatted markdown string with format:
|
| 247 |
-
"Date (MM dd, yyyy) - Time Slot - Session Name - Location"
|
| 248 |
-
|
| 249 |
-
Example:
|
| 250 |
-
>>> markdown = builder.format_as_markdown(schedule)
|
| 251 |
-
"""
|
| 252 |
-
if not schedule:
|
| 253 |
-
return "No papers found matching your criteria."
|
| 254 |
-
|
| 255 |
-
output = ["# Your NeurIPS 2025 Conference Schedule\n"]
|
| 256 |
-
|
| 257 |
-
# Flatten schedule into list of blocks for better formatting
|
| 258 |
-
schedule_blocks = []
|
| 259 |
-
|
| 260 |
-
for date_str in sorted(schedule.keys()):
|
| 261 |
-
time_slots = schedule[date_str]
|
| 262 |
-
|
| 263 |
-
for time_slot in sorted(time_slots.keys()):
|
| 264 |
-
rooms = time_slots[time_slot]
|
| 265 |
-
|
| 266 |
-
for room_or_session in sorted(rooms.keys()):
|
| 267 |
-
papers_in_block = rooms[room_or_session]
|
| 268 |
-
|
| 269 |
-
# Sort papers by poster position ID (numerically)
|
| 270 |
-
def poster_sort_key(paper):
|
| 271 |
-
poster_pos = paper.get('poster_position')
|
| 272 |
-
if not poster_pos:
|
| 273 |
-
return float('inf') # Put papers without position at end
|
| 274 |
-
|
| 275 |
-
# Remove '#' prefix if present
|
| 276 |
-
if isinstance(poster_pos, str) and poster_pos.startswith('#'):
|
| 277 |
-
poster_pos = poster_pos[1:]
|
| 278 |
-
|
| 279 |
-
# Convert to integer for numerical sorting
|
| 280 |
-
try:
|
| 281 |
-
return int(poster_pos)
|
| 282 |
-
except (ValueError, TypeError):
|
| 283 |
-
return float('inf') # Put invalid positions at end
|
| 284 |
-
|
| 285 |
-
papers_in_block.sort(key=poster_sort_key)
|
| 286 |
-
|
| 287 |
-
schedule_blocks.append({
|
| 288 |
-
'date': date_str,
|
| 289 |
-
'time_slot': time_slot,
|
| 290 |
-
'room_or_session': room_or_session,
|
| 291 |
-
'papers': papers_in_block
|
| 292 |
-
})
|
| 293 |
-
|
| 294 |
-
# Format each schedule block
|
| 295 |
-
total_papers = 0
|
| 296 |
-
for block in schedule_blocks:
|
| 297 |
-
date_str = block['date']
|
| 298 |
-
time_slot = block['time_slot']
|
| 299 |
-
room_or_session = block['room_or_session']
|
| 300 |
-
papers = block['papers']
|
| 301 |
-
|
| 302 |
-
total_papers += len(papers)
|
| 303 |
-
|
| 304 |
-
# Get session and location from first paper (all papers in block share these)
|
| 305 |
-
if papers:
|
| 306 |
-
first_paper = papers[0]
|
| 307 |
-
session_name = first_paper.get('session', 'N/A')
|
| 308 |
-
actual_room = first_paper.get('room_name')
|
| 309 |
-
|
| 310 |
-
# Determine location: use room if available, otherwise indicate session-based location
|
| 311 |
-
if actual_room:
|
| 312 |
-
location = actual_room
|
| 313 |
-
else:
|
| 314 |
-
location = "Mexico City" # Papers without room are from Mexico City
|
| 315 |
-
|
| 316 |
-
else:
|
| 317 |
-
session_name = room_or_session
|
| 318 |
-
location = room_or_session
|
| 319 |
-
|
| 320 |
-
# Format date as "Month DD, YYYY"
|
| 321 |
-
try:
|
| 322 |
-
from datetime import datetime
|
| 323 |
-
dt = datetime.fromisoformat(date_str)
|
| 324 |
-
formatted_date = dt.strftime("%B %d, %Y")
|
| 325 |
-
except:
|
| 326 |
-
formatted_date = date_str
|
| 327 |
-
|
| 328 |
-
# Create a comprehensive header
|
| 329 |
-
header = f"## {formatted_date} - {time_slot} - {session_name} - {location}\n"
|
| 330 |
-
output.append(f"\n{header}")
|
| 331 |
-
|
| 332 |
-
# List papers in this block
|
| 333 |
-
for paper in papers:
|
| 334 |
-
title = paper.get('name', 'Untitled')
|
| 335 |
-
poster_pos = paper.get('poster_position', 'N/A')
|
| 336 |
-
# TODO: This needs to be the distance between the user input query and the paper embedding, i.e.,
|
| 337 |
-
# compare encoded user_input with "embedding" in database.
|
| 338 |
-
relevance = paper.get('relevance_score', 0)
|
| 339 |
-
topic = paper.get('topic', 'General')
|
| 340 |
-
pres_type = paper.get('presentation_type', 'Poster')
|
| 341 |
-
authors = paper.get('authors', 'N/A')
|
| 342 |
-
|
| 343 |
-
# Format authors for display
|
| 344 |
-
if isinstance(authors, list):
|
| 345 |
-
authors_str = ', '.join(authors) if authors else 'N/A'
|
| 346 |
-
elif authors and authors != 'N/A':
|
| 347 |
-
authors_str = str(authors)
|
| 348 |
-
else:
|
| 349 |
-
authors_str = 'N/A'
|
| 350 |
-
|
| 351 |
-
# Format paper entry
|
| 352 |
-
output.append(f"- **{pres_type} {poster_pos.replace('#', '') if poster_pos is not None else ''}** | {title}")
|
| 353 |
-
output.append(f" - Authors: {authors_str}")
|
| 354 |
-
output.append(f" - Topic: {topic}")
|
| 355 |
-
|
| 356 |
-
# Add paper URL if available
|
| 357 |
-
paper_url = paper.get('url')
|
| 358 |
-
if paper_url:
|
| 359 |
-
output.append(f" - URL: {paper_url}")
|
| 360 |
-
|
| 361 |
-
output.append(f" - Relevance: {relevance:.2f}")
|
| 362 |
-
|
| 363 |
-
if include_abstracts and paper.get('abstract'):
|
| 364 |
-
abstract = paper['abstract'][:200] + "..." if len(paper['abstract']) > 200 else paper['abstract']
|
| 365 |
-
output.append(f" - Abstract: {abstract}")
|
| 366 |
-
|
| 367 |
-
output.append("") # Blank line between papers
|
| 368 |
-
|
| 369 |
-
# Add summary footer
|
| 370 |
-
output.append(f"\n---\n**Total Papers in Schedule: {total_papers}**")
|
| 371 |
-
|
| 372 |
-
return "\n".join(output)
|
| 373 |
-
|
| 374 |
-
def close(self):
|
| 375 |
-
"""Close the Neo4j driver connection."""
|
| 376 |
-
if self.driver:
|
| 377 |
-
self.driver.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/tools/session_routing/utils.py
DELETED
|
@@ -1,253 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Utility functions for session routing and schedule building.
|
| 3 |
-
|
| 4 |
-
This module provides helper functions for time zone conversion,
|
| 5 |
-
date parsing, and formatting schedule outputs.
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
from datetime import datetime, timedelta
|
| 9 |
-
from typing import Optional, Tuple
|
| 10 |
-
import re
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
def convert_utc_to_local(utc_time_str: str, timezone_offset: int = -8) -> str:
|
| 14 |
-
"""
|
| 15 |
-
Convert UTC time string to local conference time.
|
| 16 |
-
|
| 17 |
-
Args:
|
| 18 |
-
utc_time_str: ISO format UTC time string (e.g., "2025-12-02T17:00:00Z")
|
| 19 |
-
timezone_offset: Hours offset from UTC (default: -8 for PST/Mexico City)
|
| 20 |
-
|
| 21 |
-
Returns:
|
| 22 |
-
Local time string in format "9:00 AM PST"
|
| 23 |
-
|
| 24 |
-
Raises:
|
| 25 |
-
ValueError: If time string cannot be parsed
|
| 26 |
-
|
| 27 |
-
Example:
|
| 28 |
-
>>> convert_utc_to_local("2025-12-02T17:00:00Z")
|
| 29 |
-
"9:00 AM PST"
|
| 30 |
-
"""
|
| 31 |
-
try:
|
| 32 |
-
# Handle various UTC time formats
|
| 33 |
-
utc_time_str = utc_time_str.strip()
|
| 34 |
-
if utc_time_str.endswith('Z'):
|
| 35 |
-
utc_time_str = utc_time_str[:-1]
|
| 36 |
-
elif '+' in utc_time_str or utc_time_str.count('-') > 2:
|
| 37 |
-
# Has timezone info, extract just the datetime part
|
| 38 |
-
utc_time_str = utc_time_str.split('+')[0].split('T')[0] + 'T' + utc_time_str.split('T')[1].split('+')[0].split('-')[0]
|
| 39 |
-
|
| 40 |
-
# Parse the UTC time
|
| 41 |
-
if 'T' in utc_time_str:
|
| 42 |
-
utc_dt = datetime.fromisoformat(utc_time_str)
|
| 43 |
-
else:
|
| 44 |
-
# Try parsing without T separator
|
| 45 |
-
utc_dt = datetime.strptime(utc_time_str, "%Y-%m-%d %H:%M:%S")
|
| 46 |
-
|
| 47 |
-
# Apply timezone offset
|
| 48 |
-
local_dt = utc_dt + timedelta(hours=timezone_offset)
|
| 49 |
-
|
| 50 |
-
# Format as human-readable time
|
| 51 |
-
hour = local_dt.hour
|
| 52 |
-
minute = local_dt.minute
|
| 53 |
-
am_pm = "AM" if hour < 12 else "PM"
|
| 54 |
-
hour_12 = hour if hour <= 12 else hour - 12
|
| 55 |
-
hour_12 = 12 if hour_12 == 0 else hour_12
|
| 56 |
-
|
| 57 |
-
if minute == 0:
|
| 58 |
-
time_str = f"{hour_12}:00 {am_pm} PST"
|
| 59 |
-
else:
|
| 60 |
-
time_str = f"{hour_12}:{minute:02d} {am_pm} PST"
|
| 61 |
-
|
| 62 |
-
return time_str
|
| 63 |
-
except (ValueError, AttributeError) as e:
|
| 64 |
-
raise ValueError(f"Could not parse time string '{utc_time_str}': {e}")
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
def parse_date_input(date_str: str) -> Optional[datetime]:
|
| 68 |
-
"""
|
| 69 |
-
Parse flexible date input formats.
|
| 70 |
-
|
| 71 |
-
Supports:
|
| 72 |
-
- ISO format: "2025-12-02"
|
| 73 |
-
- Day names: "Monday", "Tuesday", etc.
|
| 74 |
-
- Relative: "today", "tomorrow"
|
| 75 |
-
|
| 76 |
-
Args:
|
| 77 |
-
date_str: Date string in various formats
|
| 78 |
-
|
| 79 |
-
Returns:
|
| 80 |
-
Datetime object or None if parsing fails
|
| 81 |
-
|
| 82 |
-
Example:
|
| 83 |
-
>>> parse_date_input("2025-12-02")
|
| 84 |
-
datetime.datetime(2025, 12, 2, 0, 0)
|
| 85 |
-
"""
|
| 86 |
-
if not date_str:
|
| 87 |
-
return None
|
| 88 |
-
|
| 89 |
-
date_str = date_str.strip().lower()
|
| 90 |
-
|
| 91 |
-
# Try ISO format first
|
| 92 |
-
try:
|
| 93 |
-
return datetime.fromisoformat(date_str)
|
| 94 |
-
except ValueError:
|
| 95 |
-
pass
|
| 96 |
-
|
| 97 |
-
# Try common date formats
|
| 98 |
-
for fmt in ["%Y-%m-%d", "%m/%d/%Y", "%d/%m/%Y", "%B %d, %Y", "%b %d, %Y"]:
|
| 99 |
-
try:
|
| 100 |
-
return datetime.strptime(date_str, fmt)
|
| 101 |
-
except ValueError:
|
| 102 |
-
continue
|
| 103 |
-
|
| 104 |
-
# Handle day names (for NeurIPS 2025: Dec 2-7, 2025)
|
| 105 |
-
conference_start = datetime(2025, 12, 2) # Tuesday
|
| 106 |
-
day_mapping = {
|
| 107 |
-
'monday': conference_start - timedelta(days=1),
|
| 108 |
-
'tuesday': conference_start,
|
| 109 |
-
'wednesday': conference_start + timedelta(days=1),
|
| 110 |
-
'thursday': conference_start + timedelta(days=2),
|
| 111 |
-
'friday': conference_start + timedelta(days=3),
|
| 112 |
-
'saturday': conference_start + timedelta(days=4),
|
| 113 |
-
'sunday': conference_start + timedelta(days=5),
|
| 114 |
-
}
|
| 115 |
-
|
| 116 |
-
if date_str in day_mapping:
|
| 117 |
-
return day_mapping[date_str]
|
| 118 |
-
|
| 119 |
-
return None
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
def parse_time_preference(time_pref: str) -> Optional[Tuple[int, int]]:
|
| 123 |
-
"""
|
| 124 |
-
Parse time preference string into hour range.
|
| 125 |
-
|
| 126 |
-
Args:
|
| 127 |
-
time_pref: Time preference like "morning", "afternoon", "9:00-12:00"
|
| 128 |
-
|
| 129 |
-
Returns:
|
| 130 |
-
Tuple of (start_hour, end_hour) in 24-hour format, or None
|
| 131 |
-
|
| 132 |
-
Example:
|
| 133 |
-
>>> parse_time_preference("morning")
|
| 134 |
-
(8, 12)
|
| 135 |
-
>>> parse_time_preference("9:00-15:00")
|
| 136 |
-
(9, 15)
|
| 137 |
-
"""
|
| 138 |
-
if not time_pref:
|
| 139 |
-
return None
|
| 140 |
-
|
| 141 |
-
time_pref = time_pref.strip().lower()
|
| 142 |
-
|
| 143 |
-
# Predefined time slots
|
| 144 |
-
presets = {
|
| 145 |
-
'morning': (8, 12),
|
| 146 |
-
'afternoon': (12, 17),
|
| 147 |
-
'evening': (17, 21),
|
| 148 |
-
'early': (8, 10),
|
| 149 |
-
'late': (19, 21),
|
| 150 |
-
}
|
| 151 |
-
|
| 152 |
-
if time_pref in presets:
|
| 153 |
-
return presets[time_pref]
|
| 154 |
-
|
| 155 |
-
# Parse time range format: "9:00-12:00" or "09:00-12:00" or "9-12"
|
| 156 |
-
range_pattern = r'(\d{1,2})(?::(\d{2}))?[\s\-]+(\d{1,2})(?::(\d{2}))?'
|
| 157 |
-
match = re.match(range_pattern, time_pref)
|
| 158 |
-
|
| 159 |
-
if match:
|
| 160 |
-
start_hour = int(match.group(1))
|
| 161 |
-
end_hour = int(match.group(3))
|
| 162 |
-
return (start_hour, end_hour)
|
| 163 |
-
|
| 164 |
-
return None
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
def format_time_slot(start_time: str, end_time: str) -> str:
|
| 168 |
-
"""
|
| 169 |
-
Format time slot for display.
|
| 170 |
-
|
| 171 |
-
Args:
|
| 172 |
-
start_time: Start time in UTC format
|
| 173 |
-
end_time: End time in UTC format
|
| 174 |
-
|
| 175 |
-
Returns:
|
| 176 |
-
Formatted time range string
|
| 177 |
-
|
| 178 |
-
Example:
|
| 179 |
-
>>> format_time_slot("2025-12-02T17:00:00Z", "2025-12-02T19:00:00Z")
|
| 180 |
-
"9:00 AM - 11:00 AM PST"
|
| 181 |
-
"""
|
| 182 |
-
try:
|
| 183 |
-
start_local = convert_utc_to_local(start_time)
|
| 184 |
-
end_local = convert_utc_to_local(end_time)
|
| 185 |
-
|
| 186 |
-
# Remove PST from start time if both are same timezone
|
| 187 |
-
if start_local.endswith(' PST') and end_local.endswith(' PST'):
|
| 188 |
-
start_local = start_local[:-4]
|
| 189 |
-
|
| 190 |
-
return f"{start_local} - {end_local}"
|
| 191 |
-
except ValueError:
|
| 192 |
-
return f"{start_time} - {end_time}"
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
def format_date_header(date_str: str) -> str:
|
| 196 |
-
"""
|
| 197 |
-
Format date for section headers.
|
| 198 |
-
|
| 199 |
-
Args:
|
| 200 |
-
date_str: Date string (ISO format or datetime)
|
| 201 |
-
|
| 202 |
-
Returns:
|
| 203 |
-
Formatted date like "Tuesday, December 2, 2025"
|
| 204 |
-
|
| 205 |
-
Example:
|
| 206 |
-
>>> format_date_header("2025-12-02")
|
| 207 |
-
"Tuesday, December 2, 2025"
|
| 208 |
-
"""
|
| 209 |
-
try:
|
| 210 |
-
if isinstance(date_str, str):
|
| 211 |
-
dt = datetime.fromisoformat(date_str.split('T')[0])
|
| 212 |
-
else:
|
| 213 |
-
dt = date_str
|
| 214 |
-
|
| 215 |
-
return dt.strftime("%A, %B %d, %Y")
|
| 216 |
-
except (ValueError, AttributeError):
|
| 217 |
-
return str(date_str)
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
def cluster_papers_by_room(papers: list, time_slot_key: str = 'session') -> dict:
|
| 221 |
-
"""
|
| 222 |
-
Group papers by room within their time slots.
|
| 223 |
-
|
| 224 |
-
Args:
|
| 225 |
-
papers: List of paper dictionaries with room_name and session info
|
| 226 |
-
time_slot_key: Key to group by time slots (default: 'session')
|
| 227 |
-
|
| 228 |
-
Returns:
|
| 229 |
-
Nested dict: {time_slot: {room_name: [papers]}}
|
| 230 |
-
|
| 231 |
-
Example:
|
| 232 |
-
>>> papers = [
|
| 233 |
-
... {'session': 'Morning', 'room_name': 'Hall A', 'name': 'Paper 1'},
|
| 234 |
-
... {'session': 'Morning', 'room_name': 'Hall A', 'name': 'Paper 2'},
|
| 235 |
-
... ]
|
| 236 |
-
>>> cluster_papers_by_room(papers)
|
| 237 |
-
{'Morning': {'Hall A': [...]}}
|
| 238 |
-
"""
|
| 239 |
-
clustered = {}
|
| 240 |
-
|
| 241 |
-
for paper in papers:
|
| 242 |
-
time_slot = paper.get(time_slot_key, 'Unknown Session')
|
| 243 |
-
room = paper.get('room_name', 'Unknown Room')
|
| 244 |
-
|
| 245 |
-
if time_slot not in clustered:
|
| 246 |
-
clustered[time_slot] = {}
|
| 247 |
-
|
| 248 |
-
if room not in clustered[time_slot]:
|
| 249 |
-
clustered[time_slot][room] = []
|
| 250 |
-
|
| 251 |
-
clustered[time_slot][room].append(paper)
|
| 252 |
-
|
| 253 |
-
return clustered
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/utils/__init__.py
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
from agentic_nav.utils.tooling import infer_tool, _json_type
|
| 2 |
-
from agentic_nav.utils.logger import setup_logging
|
| 3 |
-
from agentic_nav.utils.embedding_generator import batch_embed_documents
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/utils/cli/__init__.py
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
from agentic_nav.utils.cli.editor import open_editor
|
| 2 |
-
from agentic_nav.utils.cli.help import print_help
|
| 3 |
-
from agentic_nav.utils.cli.history import show_history
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/utils/cli/editor.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import tempfile
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
def open_editor(initial_text=""):
|
| 6 |
-
editor = os.environ.get("EDITOR")
|
| 7 |
-
if not editor:
|
| 8 |
-
# Minimal sensible defaults
|
| 9 |
-
if os.name == "nt":
|
| 10 |
-
editor = "notepad"
|
| 11 |
-
else:
|
| 12 |
-
editor = "nano"
|
| 13 |
-
with tempfile.NamedTemporaryFile(suffix=".md", delete=False, mode="w+", encoding="utf-8") as tf:
|
| 14 |
-
path = tf.name
|
| 15 |
-
tf.write(initial_text)
|
| 16 |
-
tf.flush()
|
| 17 |
-
try:
|
| 18 |
-
# Open editor and wait
|
| 19 |
-
rc = os.system(f'{editor} "{path}"')
|
| 20 |
-
if rc != 0:
|
| 21 |
-
print(f"(editor exit code {rc})")
|
| 22 |
-
with open(path, "r", encoding="utf-8") as f:
|
| 23 |
-
content = f.read()
|
| 24 |
-
finally:
|
| 25 |
-
try:
|
| 26 |
-
os.unlink(path)
|
| 27 |
-
except Exception:
|
| 28 |
-
pass
|
| 29 |
-
return content.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/utils/cli/help.py
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
def print_help():
|
| 4 |
-
help_text = """
|
| 5 |
-
Commands:
|
| 6 |
-
/help Show this help
|
| 7 |
-
/exit Exit the chat
|
| 8 |
-
/system Set or replace system prompt (multi-line via $EDITOR)
|
| 9 |
-
/edit Compose multi-line user message via $EDITOR
|
| 10 |
-
/history Show conversation history (JSON)
|
| 11 |
-
/save <path> Save conversation history to a file (JSON)
|
| 12 |
-
Typing anything else will send it as a user message.
|
| 13 |
-
"""
|
| 14 |
-
print(help_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/utils/cli/history.py
DELETED
|
@@ -1,11 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
def show_history(messages):
|
| 3 |
-
for i, m in enumerate(messages):
|
| 4 |
-
ts = m.get("_ts", "")
|
| 5 |
-
role = m.get("role", "")
|
| 6 |
-
content = m.get("content", "")
|
| 7 |
-
header = f"[{i}] {role} {ts}"
|
| 8 |
-
print(header)
|
| 9 |
-
print("-" * len(header))
|
| 10 |
-
print(content)
|
| 11 |
-
print()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/utils/embedding_generator.py
DELETED
|
@@ -1,151 +0,0 @@
|
|
| 1 |
-
import logging
|
| 2 |
-
|
| 3 |
-
import litellm
|
| 4 |
-
import numpy as np
|
| 5 |
-
import spaces
|
| 6 |
-
|
| 7 |
-
from litellm import embedding
|
| 8 |
-
from sentence_transformers import SentenceTransformer
|
| 9 |
-
from tqdm import tqdm
|
| 10 |
-
|
| 11 |
-
from typing import List
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
LOGGER = logging.getLogger(__name__)
|
| 15 |
-
local_embedding_model = None
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class EmbeddingResponse:
|
| 19 |
-
def __init__(self, embeddings):
|
| 20 |
-
self.data = [
|
| 21 |
-
type('obj', (), {
|
| 22 |
-
'embedding': emb.tolist(),
|
| 23 |
-
'index': idx
|
| 24 |
-
})()
|
| 25 |
-
for idx, emb in enumerate(embeddings)
|
| 26 |
-
]
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
def _get_local_model(embedding_model_name: str = "nomic-ai/nomic-embed-text-v1.5"):
|
| 30 |
-
"""Lazy load the embedding model only once"""
|
| 31 |
-
global local_embedding_model
|
| 32 |
-
if local_embedding_model is None:
|
| 33 |
-
LOGGER.info(f"Loading embedding model: {embedding_model_name}")
|
| 34 |
-
local_embedding_model = SentenceTransformer(
|
| 35 |
-
embedding_model_name,
|
| 36 |
-
trust_remote_code=True
|
| 37 |
-
)
|
| 38 |
-
return local_embedding_model
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
@spaces.GPU
|
| 42 |
-
def embed_hf_spaces(input, embedding_model_name: str = "nomic-ai/nomic-embed-text-v1.5", api_base=None, **kwargs):
|
| 43 |
-
"""
|
| 44 |
-
Drop-in replacement for litellm.embedding()
|
| 45 |
-
|
| 46 |
-
Args:
|
| 47 |
-
input: Single string or list of strings to embed
|
| 48 |
-
embedding_model_name: HuggingFace model name to use
|
| 49 |
-
api_base: Ignored for local embedding
|
| 50 |
-
**kwargs: Additional args like num_ctx (ignored for local)
|
| 51 |
-
|
| 52 |
-
Returns:
|
| 53 |
-
Object with same structure as LiteLLM response
|
| 54 |
-
"""
|
| 55 |
-
# Get model (loads only on first call)
|
| 56 |
-
model_instance = _get_local_model(embedding_model_name)
|
| 57 |
-
|
| 58 |
-
texts = [input] if isinstance(input, str) else input
|
| 59 |
-
embeddings = model_instance.encode(
|
| 60 |
-
texts,
|
| 61 |
-
convert_to_tensor=True,
|
| 62 |
-
show_progress_bar=False,
|
| 63 |
-
normalize_embeddings=True
|
| 64 |
-
)
|
| 65 |
-
|
| 66 |
-
embeddings_np = embeddings.cpu().numpy()
|
| 67 |
-
|
| 68 |
-
return EmbeddingResponse(embeddings_np)
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
def embedding_fn(model, input, api_base, **kwargs):
|
| 72 |
-
if api_base == "hf_spaces_local":
|
| 73 |
-
return embed_hf_spaces(input=input, embedding_model_name=model, api_base=api_base, **kwargs)
|
| 74 |
-
elif "localhost" in api_base or "ollama.com" in api_base:
|
| 75 |
-
return embedding(input=input, model=model, api_base=api_base, **kwargs)
|
| 76 |
-
else:
|
| 77 |
-
raise NotImplementedError(f"Unknown api_base for provider {api_base}. Available options: hf_spaces_local, ollama local (http://localhost:11435), ollama cloud (https://ollama.com)")
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
def batch_embed_documents(
|
| 81 |
-
texts: List[str],
|
| 82 |
-
batch_size: int = 1,
|
| 83 |
-
embedding_model: str = "nomic-ai/nomic-embed-text-v1.5",
|
| 84 |
-
api_base: str = "hf_spaces_local",
|
| 85 |
-
show_progress: bool = False,
|
| 86 |
-
) -> np.ndarray:
|
| 87 |
-
|
| 88 |
-
if not texts:
|
| 89 |
-
return np.array([], dtype="float32").reshape(0, 0)
|
| 90 |
-
|
| 91 |
-
if None in texts:
|
| 92 |
-
LOGGER.warning(f"WARNING: Detected documents with 'None' values. Replacing 'None' with an empty string...")
|
| 93 |
-
texts = ['' if doc is None else doc for doc in texts]
|
| 94 |
-
|
| 95 |
-
vecs: List[List[float]] = []
|
| 96 |
-
for i in tqdm(range(0, len(texts), batch_size), disable=not show_progress):
|
| 97 |
-
chunk = texts[i:i + batch_size]
|
| 98 |
-
try:
|
| 99 |
-
resp = embedding_fn(
|
| 100 |
-
model=embedding_model,
|
| 101 |
-
input=chunk,
|
| 102 |
-
api_base=api_base,
|
| 103 |
-
**{"num_ctx": 2048}
|
| 104 |
-
)
|
| 105 |
-
except Exception as e:
|
| 106 |
-
LOGGER.error(f"Error during embedding batch {i}-{i + batch_size}: {e}. Falling back to single sample processing")
|
| 107 |
-
individual_responses = []
|
| 108 |
-
ctr = i
|
| 109 |
-
for sample in chunk:
|
| 110 |
-
try:
|
| 111 |
-
individual_responses.append(
|
| 112 |
-
embedding_fn(
|
| 113 |
-
model=embedding_model,
|
| 114 |
-
input=sample,
|
| 115 |
-
api_base=api_base,
|
| 116 |
-
**{"num_ctx": 2048}
|
| 117 |
-
)
|
| 118 |
-
)
|
| 119 |
-
except litellm.BadRequestError:
|
| 120 |
-
LOGGER.error(f"Encountered error processing paper #{ctr}. Please inspect and retry afterwards.")
|
| 121 |
-
ctr += 1
|
| 122 |
-
|
| 123 |
-
LOGGER.debug(f"Single sample response from embedding model: {individual_responses}")
|
| 124 |
-
|
| 125 |
-
# Extract embeddings from individual responses
|
| 126 |
-
for individual_resp in individual_responses:
|
| 127 |
-
vecs.extend([d["embedding"] for d in individual_resp.data])
|
| 128 |
-
else:
|
| 129 |
-
# Normal batch processing
|
| 130 |
-
vecs.extend([d["embedding"] for d in resp.data])
|
| 131 |
-
|
| 132 |
-
arr = np.array(vecs, dtype="float32")
|
| 133 |
-
# cosine similarity: normalize to unit length and use IndexFlatIP
|
| 134 |
-
norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
|
| 135 |
-
return arr / norms
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
if __name__ == "__main__":
|
| 139 |
-
res = batch_embed_documents(
|
| 140 |
-
texts=[
|
| 141 |
-
"test1",
|
| 142 |
-
"test2",
|
| 143 |
-
"test3",
|
| 144 |
-
"test4",
|
| 145 |
-
"test5"
|
| 146 |
-
],
|
| 147 |
-
batch_size=2,
|
| 148 |
-
embedding_model="ollama/nomic-embed-text",
|
| 149 |
-
api_base="http://localhost:11435"
|
| 150 |
-
)
|
| 151 |
-
print(f"Result shape: {res.shape}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/utils/file_handlers.py
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
import json
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
def save_chat_history(messages, path):
|
| 5 |
-
try:
|
| 6 |
-
with open(path, "w", encoding="utf-8") as f:
|
| 7 |
-
json.dump(messages, f, indent=2, ensure_ascii=False)
|
| 8 |
-
print(f"Saved to {path}")
|
| 9 |
-
except Exception as e:
|
| 10 |
-
print("Save failed:", e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/utils/logger.py
DELETED
|
@@ -1,49 +0,0 @@
|
|
| 1 |
-
import logging
|
| 2 |
-
import logging.handlers
|
| 3 |
-
|
| 4 |
-
from datetime import datetime
|
| 5 |
-
from pathlib import Path
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
def setup_logging(log_dir: str = "logs", level: str = "INFO", console_level: str = "WARNING"):
|
| 10 |
-
"""
|
| 11 |
-
Configure logging for the entire application.
|
| 12 |
-
|
| 13 |
-
Args:
|
| 14 |
-
log_dir: Directory for log files
|
| 15 |
-
level: Root logger level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
| 16 |
-
console_level: Console handler level - defaults to WARNING to avoid
|
| 17 |
-
interfering with CLI display. Set to INFO for verbose output.
|
| 18 |
-
"""
|
| 19 |
-
Path(log_dir).mkdir(exist_ok=True)
|
| 20 |
-
|
| 21 |
-
# Root logger configuration
|
| 22 |
-
root_logger = logging.getLogger()
|
| 23 |
-
root_logger.setLevel(getattr(logging, level.upper()))
|
| 24 |
-
|
| 25 |
-
# Console handler - set to WARNING by default to not interfere with CLI display
|
| 26 |
-
console_handler = logging.StreamHandler()
|
| 27 |
-
console_handler.setLevel(getattr(logging, console_level.upper()))
|
| 28 |
-
console_format = logging.Formatter(
|
| 29 |
-
"%(asctime)s - %(levelname)s - %(name)s - %(message)s",
|
| 30 |
-
datefmt="%Y-%m-%d %H:%M:%S"
|
| 31 |
-
)
|
| 32 |
-
console_handler.setFormatter(console_format)
|
| 33 |
-
|
| 34 |
-
# File handler - for production
|
| 35 |
-
time_now = datetime.now().strftime("%Y-%m-%d_%H-%M")
|
| 36 |
-
|
| 37 |
-
file_handler = logging.handlers.RotatingFileHandler(
|
| 38 |
-
f"{log_dir}/{time_now}_llm_agents.log",
|
| 39 |
-
maxBytes=10 * 1024 * 1024, # 10MB
|
| 40 |
-
backupCount=5
|
| 41 |
-
)
|
| 42 |
-
file_handler.setLevel(logging.DEBUG)
|
| 43 |
-
file_format = logging.Formatter(
|
| 44 |
-
"%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s"
|
| 45 |
-
)
|
| 46 |
-
file_handler.setFormatter(file_format)
|
| 47 |
-
|
| 48 |
-
root_logger.addHandler(console_handler)
|
| 49 |
-
root_logger.addHandler(file_handler)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
agentic_nav/utils/tooling.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import inspect
|
| 4 |
-
|
| 5 |
-
from typing import Any, Dict, List, Callable, get_args, get_origin, Literal
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
def _json_type(t: Any) -> Dict[str, Any]:
|
| 9 |
-
origin, args = get_origin(t), get_args(t)
|
| 10 |
-
if origin is Literal:
|
| 11 |
-
return {"type": "string", "enum": list(args)}
|
| 12 |
-
if origin in (list, List):
|
| 13 |
-
return {"type": "array", "items": {"type": "string"}}
|
| 14 |
-
if t in (str,): return {"type": "string"}
|
| 15 |
-
if t in (int,): return {"type": "integer"}
|
| 16 |
-
if t in (float,): return {"type": "number"}
|
| 17 |
-
if t in (bool,): return {"type": "boolean"}
|
| 18 |
-
return {"type": "string"}
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
def infer_tool(func: Callable[..., Any], tool_args: Dict[Any, Any]) -> Dict[str, Any]:
|
| 22 |
-
sig = inspect.signature(func)
|
| 23 |
-
hints = getattr(func, "__annotations__", {})
|
| 24 |
-
props, required = {}, []
|
| 25 |
-
for name, p in sig.parameters.items():
|
| 26 |
-
if name in ("self", "cls"): continue
|
| 27 |
-
schema = _json_type(hints.get(name, str))
|
| 28 |
-
if p.default is inspect._empty: required.append(name)
|
| 29 |
-
props[name] = schema
|
| 30 |
-
|
| 31 |
-
parameter_values = {}
|
| 32 |
-
for arg_name, arg_val in tool_args.items():
|
| 33 |
-
if arg_name in props.keys():
|
| 34 |
-
parameter_values[arg_name] = arg_val
|
| 35 |
-
|
| 36 |
-
return {
|
| 37 |
-
"type": "function",
|
| 38 |
-
"function": {
|
| 39 |
-
"name": func.__name__,
|
| 40 |
-
"description": (inspect.getdoc(func) or f"Call {func.__name__}"),
|
| 41 |
-
"parameters": {"type": "object", "properties": props, "required": required},
|
| 42 |
-
},
|
| 43 |
-
"parameter_properties_values": parameter_values
|
| 44 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agentic_nav.frontend.browser_ui import main
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
if __name__ == "__main__":
|
| 5 |
+
main()
|
data/.keep
DELETED
|
File without changes
|
docker-compose.yaml
DELETED
|
@@ -1,137 +0,0 @@
|
|
| 1 |
-
services:
|
| 2 |
-
neo4j_db:
|
| 3 |
-
image: neo4j:5.26.0
|
| 4 |
-
container_name: neo4j_db
|
| 5 |
-
expose:
|
| 6 |
-
- "7474"
|
| 7 |
-
- "7687"
|
| 8 |
-
ports:
|
| 9 |
-
- "7474:7474" # HTTP
|
| 10 |
-
- "7687:7687" # Bolt
|
| 11 |
-
environment:
|
| 12 |
-
# Authentication
|
| 13 |
-
- NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD}
|
| 14 |
-
|
| 15 |
-
# Memory settings
|
| 16 |
-
- NEO4J_server_memory_heap_initial__size=512m
|
| 17 |
-
- NEO4J_server_memory_heap_max__size=2G
|
| 18 |
-
- NEO4J_server_memory_pagecache_size=2G
|
| 19 |
-
- NEO4J_db_memory_transaction_total_max=3G
|
| 20 |
-
- NEO4J_dbms_memory_transaction_total_max=3G
|
| 21 |
-
|
| 22 |
-
# APOC plugin (optional but recommended)
|
| 23 |
-
- NEO4J_PLUGINS=["apoc"]
|
| 24 |
-
|
| 25 |
-
# Accept license (required for Enterprise features, remove if using Community)
|
| 26 |
-
# - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
|
| 27 |
-
volumes:
|
| 28 |
-
- neo4j_data:/data
|
| 29 |
-
- neo4j_logs:/logs
|
| 30 |
-
- neo4j_import:/var/lib/neo4j/import
|
| 31 |
-
- neo4j_plugins:/plugins
|
| 32 |
-
restart: unless-stopped
|
| 33 |
-
healthcheck:
|
| 34 |
-
test: [ "CMD-SHELL", "cypher-shell -u ${NEO4J_USERNAME:-neo4j} -p ${NEO4J_PASSWORD:-llm_agents} 'RETURN 1'" ]
|
| 35 |
-
interval: 10s
|
| 36 |
-
timeout: 5s
|
| 37 |
-
retries: 10
|
| 38 |
-
start_period: 30s
|
| 39 |
-
networks:
|
| 40 |
-
- llm_agents_net
|
| 41 |
-
|
| 42 |
-
ollama_embed:
|
| 43 |
-
image: ollama/ollama:latest
|
| 44 |
-
container_name: ollama_embed
|
| 45 |
-
ports:
|
| 46 |
-
- "11435:11434"
|
| 47 |
-
volumes:
|
| 48 |
-
- ~/.ollama:/root/.ollama
|
| 49 |
-
environment:
|
| 50 |
-
- OLLAMA_HOST=0.0.0.0
|
| 51 |
-
- NVIDIA_VISIBLE_DEVICES=all
|
| 52 |
-
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
| 53 |
-
- EMBEDDING_MODEL_NAME=${EMBEDDING_MODEL_NAME}
|
| 54 |
-
restart: always
|
| 55 |
-
entrypoint: [ "/bin/bash", "-c", "\
|
| 56 |
-
ollama serve & \
|
| 57 |
-
sleep 5 && \
|
| 58 |
-
ollama pull $EMBEDDING_MODEL_NAME && \
|
| 59 |
-
wait" ]
|
| 60 |
-
networks:
|
| 61 |
-
- llm_agents_net
|
| 62 |
-
deploy:
|
| 63 |
-
resources:
|
| 64 |
-
reservations:
|
| 65 |
-
devices:
|
| 66 |
-
- driver: nvidia
|
| 67 |
-
count: all
|
| 68 |
-
capabilities: [ gpu ]
|
| 69 |
-
|
| 70 |
-
ollama_agent:
|
| 71 |
-
image: ollama/ollama:latest
|
| 72 |
-
container_name: ollama_agent
|
| 73 |
-
ports:
|
| 74 |
-
- "11436:11434"
|
| 75 |
-
volumes:
|
| 76 |
-
- ~/.ollama:/root/.ollama
|
| 77 |
-
environment:
|
| 78 |
-
- OLLAMA_HOST=0.0.0.0
|
| 79 |
-
- NVIDIA_VISIBLE_DEVICES=all
|
| 80 |
-
- NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
| 81 |
-
- AGENT_MODEL_NAME=${AGENT_MODEL_NAME}
|
| 82 |
-
restart: always
|
| 83 |
-
entrypoint: [ "/bin/bash", "-c", "\
|
| 84 |
-
ollama serve & \
|
| 85 |
-
sleep 5 && \
|
| 86 |
-
ollama pull $AGENT_MODEL_NAME && \
|
| 87 |
-
wait" ]
|
| 88 |
-
networks:
|
| 89 |
-
- llm_agents_net
|
| 90 |
-
deploy:
|
| 91 |
-
resources:
|
| 92 |
-
reservations:
|
| 93 |
-
devices:
|
| 94 |
-
- driver: nvidia
|
| 95 |
-
count: all
|
| 96 |
-
capabilities: [ gpu ]
|
| 97 |
-
|
| 98 |
-
webinterface:
|
| 99 |
-
build:
|
| 100 |
-
context: .
|
| 101 |
-
dockerfile: Dockerfile
|
| 102 |
-
container_name: llm-agents-web
|
| 103 |
-
ports:
|
| 104 |
-
- "7860:7860"
|
| 105 |
-
environment:
|
| 106 |
-
- PYTHONUNBUFFERED=1
|
| 107 |
-
- OLLAMA_API_KEY=${OLLAMA_API_KEY}
|
| 108 |
-
- NEO4J_USERNAME=${NEO4J_USERNAME:-neo4j}
|
| 109 |
-
- NEO4J_PASSWORD=${NEO4J_PASSWORD:-llm_agents}
|
| 110 |
-
- NEO4J_DB_URI=${NEO4J_DB_URI}
|
| 111 |
-
- POPULATE_DATABASE_NIPS2025=false
|
| 112 |
-
- EMBEDDING_MODEL_NAME=${EMBEDDING_MODEL_NAME}
|
| 113 |
-
- EMBEDDING_MODEL_API_BASE=http://ollama_embed:11434
|
| 114 |
-
- AGENT_MODEL_NAME=${AGENT_MODEL_NAME}
|
| 115 |
-
- AGENT_MODEL_API_BASE=http://ollama_agent:11434
|
| 116 |
-
- NEO4J_DB_NODE_RETURN_LIMIT=${NEO4J_DB_NODE_RETURN_LIMIT}
|
| 117 |
-
restart: unless-stopped
|
| 118 |
-
networks:
|
| 119 |
-
- llm_agents_net
|
| 120 |
-
depends_on:
|
| 121 |
-
neo4j_db:
|
| 122 |
-
condition: service_healthy
|
| 123 |
-
ollama_embed:
|
| 124 |
-
condition: service_started
|
| 125 |
-
ollama_agent:
|
| 126 |
-
condition: service_started
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
networks:
|
| 130 |
-
llm_agents_net:
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
volumes:
|
| 134 |
-
neo4j_data:
|
| 135 |
-
neo4j_logs:
|
| 136 |
-
neo4j_import:
|
| 137 |
-
neo4j_plugins:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
graphs/.gitkeep
DELETED
|
File without changes
|
pyproject.toml
DELETED
|
@@ -1,59 +0,0 @@
|
|
| 1 |
-
[project]
|
| 2 |
-
name = "agentic-nav"
|
| 3 |
-
version = "0.1.0"
|
| 4 |
-
description = "Conference navigation agent leveraging graph databases and semantic search to provide paper recommendations, research network exploration, and automated schedule generation for NeurIPS 2025 attendees."
|
| 5 |
-
readme = "README.md"
|
| 6 |
-
authors = [
|
| 7 |
-
{name = "Shiqiang Wang", email = "s.wang9@exeter.ac.uk"},
|
| 8 |
-
{name = "Herbert Woisetschläger", email = "herbert.woisetschlaeger@tum.de"}
|
| 9 |
-
]
|
| 10 |
-
|
| 11 |
-
requires-python = ">=3.10"
|
| 12 |
-
dependencies = [
|
| 13 |
-
"aiofiles",
|
| 14 |
-
"einops",
|
| 15 |
-
"flask",
|
| 16 |
-
"gradio[mcp,oauth]",
|
| 17 |
-
"hatchling",
|
| 18 |
-
"httpx",
|
| 19 |
-
"kaleido",
|
| 20 |
-
"litellm",
|
| 21 |
-
"neo4j",
|
| 22 |
-
"prompt-toolkit",
|
| 23 |
-
"pydantic",
|
| 24 |
-
"pydantic-settings",
|
| 25 |
-
"pyvis>=0.3.2",
|
| 26 |
-
"rich>=13.0.0",
|
| 27 |
-
"sentence-transformers",
|
| 28 |
-
"toon-format",
|
| 29 |
-
"torch==2.8.0",
|
| 30 |
-
"typer",
|
| 31 |
-
]
|
| 32 |
-
|
| 33 |
-
[tool.uv.workspace]
|
| 34 |
-
members = [
|
| 35 |
-
"litellm",
|
| 36 |
-
]
|
| 37 |
-
|
| 38 |
-
[tool.uv.sources]
|
| 39 |
-
litellm = { git = "https://github.com/shiqiangw/litellm.git" }
|
| 40 |
-
toon-format = { git = "https://github.com/toon-format/toon-python.git" }
|
| 41 |
-
|
| 42 |
-
[build-system]
|
| 43 |
-
requires = ["hatchling"]
|
| 44 |
-
build-backend = "hatchling.build"
|
| 45 |
-
|
| 46 |
-
[dependency-groups]
|
| 47 |
-
dev = [
|
| 48 |
-
"pytest>=9.0.1",
|
| 49 |
-
"pytest-asyncio>=1.3.0",
|
| 50 |
-
"pytest-cov>=7.0.0",
|
| 51 |
-
"pytest-mock>=3.15.1",
|
| 52 |
-
]
|
| 53 |
-
|
| 54 |
-
[tool.hatchling.build.targets.wheel]
|
| 55 |
-
packages = ["llm_agents"]
|
| 56 |
-
|
| 57 |
-
[project.scripts]
|
| 58 |
-
agentic-nav-cli = "agentic_nav.frontend.cli:main"
|
| 59 |
-
agentic-nav-web = "agentic_nav.frontend.browser_ui:main"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pytest.ini
DELETED
|
@@ -1,26 +0,0 @@
|
|
| 1 |
-
[pytest]
|
| 2 |
-
minversion = 6.0
|
| 3 |
-
testpaths = tests
|
| 4 |
-
python_files = test_*.py
|
| 5 |
-
python_classes = Test*
|
| 6 |
-
python_functions = test_*
|
| 7 |
-
addopts =
|
| 8 |
-
--strict-markers
|
| 9 |
-
--strict-config
|
| 10 |
-
--verbose
|
| 11 |
-
markers =
|
| 12 |
-
unit: Unit tests
|
| 13 |
-
integration: Integration tests (currently skipped, require full setup)
|
| 14 |
-
slow: Slow tests that require external services
|
| 15 |
-
neo4j: Tests requiring Neo4j database
|
| 16 |
-
ollama: Tests requiring Ollama service
|
| 17 |
-
no_auto_env: Tests that should not use automatic environment variable loading
|
| 18 |
-
asyncio_mode = auto
|
| 19 |
-
asyncio_default_fixture_loop_scope = function
|
| 20 |
-
|
| 21 |
-
[coverage:run]
|
| 22 |
-
source = .
|
| 23 |
-
omit =
|
| 24 |
-
*/tests/*
|
| 25 |
-
*/test_*
|
| 26 |
-
setup.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# This file was autogenerated by uv via the following command:
|
| 2 |
# uv pip compile pyproject.toml
|
| 3 |
-
|
| 4 |
aiofiles==24.1.0
|
| 5 |
# via
|
| 6 |
# llm-agents (pyproject.toml)
|
|
|
|
| 1 |
# This file was autogenerated by uv via the following command:
|
| 2 |
# uv pip compile pyproject.toml
|
| 3 |
+
agentic_nav @ git+https://${GH_USER}:${GH_TOKEN}@github.com/core-aix/agentic-nav.git@dev
|
| 4 |
aiofiles==24.1.0
|
| 5 |
# via
|
| 6 |
# llm-agents (pyproject.toml)
|
scripts/docker-entrypoint.sh
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
#!/bin/bash
|
| 2 |
-
set -e
|
| 3 |
-
|
| 4 |
-
echo "Neo4j is up - executing command"
|
| 5 |
-
|
| 6 |
-
if [ "${POPULATE_DATABASE_NIPS2025}" = "true" ]; then
|
| 7 |
-
echo "Importing NeurIPS 2025 papers..."
|
| 8 |
-
bash scripts/import_neurips2025_kg.sh
|
| 9 |
-
else
|
| 10 |
-
echo "Skipping NeurIPS 2025 paper import (POPULATE_DATABASE_NIPS2025 is not set to 'true')"
|
| 11 |
-
fi
|
| 12 |
-
|
| 13 |
-
echo "Starting main application..."
|
| 14 |
-
exec "$@"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/import_neurips2025_kg.sh
DELETED
|
@@ -1,13 +0,0 @@
|
|
| 1 |
-
#!/bin/bash
|
| 2 |
-
|
| 3 |
-
# Download the pre-built knowledge graph
|
| 4 |
-
wget -O graphs/neurips2025_knowledge_graph.pkl https://syncandshare.lrz.de/dl/fiJPiUkKp1SZAqRX2m76S6/knowledge_graph_thresh_0.6_v3.pkl
|
| 5 |
-
|
| 6 |
-
# Import the knowledge graph to the database
|
| 7 |
-
uv run llm_agents/tools/knowledge_graph/neo4j_db_importer.py \
|
| 8 |
-
--graph-path graphs/neurips2025_knowledge_graph.pkl \
|
| 9 |
-
--neo4j-uri bolt://neo4j_db:7687 \
|
| 10 |
-
--neo4j-username $NEO4J_USERNAME \
|
| 11 |
-
--neo4j-password $NEO4J_PASSWORD \
|
| 12 |
-
--batch-size 100 \
|
| 13 |
-
--embedding-dimension 768
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/prepare_gradio.sh
DELETED
|
@@ -1,18 +0,0 @@
|
|
| 1 |
-
#!/bin/bash
|
| 2 |
-
|
| 3 |
-
set -e
|
| 4 |
-
|
| 5 |
-
# Only initialize submodules if not in Docker (gradio folder not present)
|
| 6 |
-
if [ ! -d "gradio/.git" ]; then
|
| 7 |
-
echo "Initializing and updating git submodules..."
|
| 8 |
-
git submodule update --init --recursive
|
| 9 |
-
cd gradio
|
| 10 |
-
echo "Pinned gradio version to GIT revision 648169d85fbeeffc184115c4c92b12957f2a162f (Nov. 12, 2025)"
|
| 11 |
-
git checkout 648169d85fbeeffc184115c4c92b12957f2a162f
|
| 12 |
-
cd ..
|
| 13 |
-
fi
|
| 14 |
-
|
| 15 |
-
echo "Building Gradio frontend..."
|
| 16 |
-
cd gradio
|
| 17 |
-
bash scripts/build_frontend.sh
|
| 18 |
-
cd ..
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
# Test package
|
|
|
|
|
|