Herbert commited on
Commit
4188210
·
1 Parent(s): ca7c002

Added hf_spaces instructions

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .coveragerc +0 -25
  2. .dockerignore +0 -46
  3. .github/workflows/tests.yaml +0 -35
  4. .gitignore +0 -215
  5. .gitmodules +0 -0
  6. .python-version +0 -1
  7. CLAUDE.md +0 -165
  8. Dockerfile +0 -50
  9. LICENSE +0 -201
  10. README.md +1 -187
  11. agentic_nav/__init__.py +0 -0
  12. agentic_nav/agents/__init__.py +0 -1
  13. agentic_nav/agents/base.py +0 -327
  14. agentic_nav/agents/neurips2025_conference.py +0 -48
  15. agentic_nav/frontend/__init__.py +0 -0
  16. agentic_nav/frontend/browser_ui.py +0 -525
  17. agentic_nav/frontend/cli.py +0 -371
  18. agentic_nav/tools/__init__.py +0 -15
  19. agentic_nav/tools/knowledge_graph/__init__.py +0 -326
  20. agentic_nav/tools/knowledge_graph/file_handler.py +0 -29
  21. agentic_nav/tools/knowledge_graph/graph_generator.py +0 -446
  22. agentic_nav/tools/knowledge_graph/graph_traversal_strategies/__init__.py +0 -15
  23. agentic_nav/tools/knowledge_graph/graph_traversal_strategies/breadth_first_random.py +0 -80
  24. agentic_nav/tools/knowledge_graph/graph_traversal_strategies/depth_first_random.py +0 -78
  25. agentic_nav/tools/knowledge_graph/graph_traversal_strategies/neo4j_builtin.py +0 -50
  26. agentic_nav/tools/knowledge_graph/neo4j_db_importer.py +0 -537
  27. agentic_nav/tools/knowledge_graph/retriever.py +0 -612
  28. agentic_nav/tools/session_routing/__init__.py +0 -210
  29. agentic_nav/tools/session_routing/scheduler.py +0 -377
  30. agentic_nav/tools/session_routing/utils.py +0 -253
  31. agentic_nav/utils/__init__.py +0 -3
  32. agentic_nav/utils/cli/__init__.py +0 -3
  33. agentic_nav/utils/cli/editor.py +0 -29
  34. agentic_nav/utils/cli/help.py +0 -14
  35. agentic_nav/utils/cli/history.py +0 -11
  36. agentic_nav/utils/embedding_generator.py +0 -151
  37. agentic_nav/utils/file_handlers.py +0 -10
  38. agentic_nav/utils/logger.py +0 -49
  39. agentic_nav/utils/tooling.py +0 -44
  40. app.py +5 -0
  41. data/.keep +0 -0
  42. docker-compose.yaml +0 -137
  43. graphs/.gitkeep +0 -0
  44. pyproject.toml +0 -59
  45. pytest.ini +0 -26
  46. requirements.txt +1 -1
  47. scripts/docker-entrypoint.sh +0 -14
  48. scripts/import_neurips2025_kg.sh +0 -13
  49. scripts/prepare_gradio.sh +0 -18
  50. tests/__init__.py +0 -1
.coveragerc DELETED
@@ -1,25 +0,0 @@
1
- [run]
2
- source = llm_agents
3
- omit =
4
- */gradio/*
5
- */tests/*
6
- */__pycache__/*
7
- */.*
8
- */venv/*
9
- */.venv/*
10
-
11
- [report]
12
- exclude_lines =
13
- pragma: no cover
14
- def __repr__
15
- if self.debug:
16
- if settings.DEBUG
17
- raise AssertionError
18
- raise NotImplementedError
19
- if 0:
20
- if __name__ == .__main__.:
21
- class .*\bProtocol\):
22
- @(abc\.)?abstractmethod
23
-
24
- [html]
25
- directory = htmlcov
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.dockerignore DELETED
@@ -1,46 +0,0 @@
1
- # Python
2
- __pycache__
3
- *.py[cod]
4
- *$py.class
5
- *.so
6
- .Python
7
- *.egg-info
8
- dist
9
- build
10
- .eggs
11
-
12
- # Virtual environments
13
- .venv
14
- venv
15
- ENV
16
- env
17
-
18
- # IDE
19
- .vscode
20
- .idea
21
- *.swp
22
- *.swo
23
- *~
24
-
25
- # Version control
26
- .git
27
- .gitignore
28
-
29
- # OS
30
- .DS_Store
31
- Thumbs.db
32
-
33
- # Testing
34
- .pytest_cache
35
- .coverage
36
- htmlcov
37
-
38
- # Documentation
39
- docs/_build
40
-
41
- # Logs
42
- *.log
43
-
44
- # Local development files
45
- .env.local
46
- *.local
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/tests.yaml DELETED
@@ -1,35 +0,0 @@
1
- name: Tests
2
-
3
- on:
4
- push:
5
- branches: [ main, master, dev ]
6
- pull_request:
7
- branches: [ main, master, dev ]
8
-
9
- jobs:
10
- test:
11
- runs-on: ubuntu-latest
12
- strategy:
13
- matrix:
14
- python-version: ['3.14']
15
-
16
- steps:
17
- - uses: actions/checkout@v4
18
- with:
19
- submodules: recursive
20
-
21
- - name: Set up Python ${{ matrix.python-version }}
22
- uses: actions/setup-python@v4
23
- with:
24
- python-version: ${{ matrix.python-version }}
25
-
26
- - name: Install uv
27
- uses: astral-sh/setup-uv@v3
28
-
29
- - name: Install dependencies
30
- run: uv sync
31
-
32
- - name: Run tests with coverage
33
- run: |
34
- uv run pytest --cov=llm_agents --cov-report=term --cov-report=json tests/
35
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore DELETED
@@ -1,215 +0,0 @@
1
- # Byte-compiled / optimized / DLL files
2
- __pycache__/
3
- *.py[codz]
4
- *$py.class
5
-
6
- # C extensions
7
- *.so
8
-
9
- # Distribution / packaging
10
- .Python
11
- build/
12
- develop-eggs/
13
- dist/
14
- downloads/
15
- eggs/
16
- .eggs/
17
- lib/
18
- lib64/
19
- parts/
20
- sdist/
21
- var/
22
- wheels/
23
- share/python-wheels/
24
- *.egg-info/
25
- .installed.cfg
26
- *.egg
27
- MANIFEST
28
-
29
- # PyInstaller
30
- # Usually these files are written by a python script from a template
31
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
- *.manifest
33
- *.spec
34
-
35
- # Installer logs
36
- pip-log.txt
37
- pip-delete-this-directory.txt
38
-
39
- # Unit test / coverage reports
40
- htmlcov/
41
- .tox/
42
- .nox/
43
- .coverage
44
- .coverage.*
45
- .cache
46
- nosetests.xml
47
- coverage.xml
48
- *.cover
49
- *.py.cover
50
- .hypothesis/
51
- .pytest_cache/
52
- cover/
53
-
54
- # Translations
55
- *.mo
56
- *.pot
57
-
58
- # Django stuff:
59
- *.log
60
- local_settings.py
61
- db.sqlite3
62
- db.sqlite3-journal
63
-
64
- # Flask stuff:
65
- instance/
66
- .webassets-cache
67
-
68
- # Scrapy stuff:
69
- .scrapy
70
-
71
- # Sphinx documentation
72
- docs/_build/
73
-
74
- # PyBuilder
75
- .pybuilder/
76
- target/
77
-
78
- # Jupyter Notebook
79
- .ipynb_checkpoints
80
-
81
- # IPython
82
- profile_default/
83
- ipython_config.py
84
-
85
- # pyenv
86
- # For a library or package, you might want to ignore these files since the code is
87
- # intended to run in multiple environments; otherwise, check them in:
88
- # .python-version
89
-
90
- # pipenv
91
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
- # install all needed dependencies.
95
- #Pipfile.lock
96
-
97
- # UV
98
- # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
- # This is especially recommended for binary packages to ensure reproducibility, and is more
100
- # commonly ignored for libraries.
101
- #uv.lock
102
-
103
- # poetry
104
- # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
- # This is especially recommended for binary packages to ensure reproducibility, and is more
106
- # commonly ignored for libraries.
107
- # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
- #poetry.lock
109
- #poetry.toml
110
-
111
- # pdm
112
- # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
- # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
- # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
- #pdm.lock
116
- #pdm.toml
117
- .pdm-python
118
- .pdm-build/
119
-
120
- # pixi
121
- # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
- #pixi.lock
123
- # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
- # in the .venv directory. It is recommended not to include this directory in version control.
125
- .pixi
126
-
127
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
- __pypackages__/
129
-
130
- # Celery stuff
131
- celerybeat-schedule
132
- celerybeat.pid
133
-
134
- # SageMath parsed files
135
- *.sage.py
136
-
137
- # Environments
138
- .env
139
- .envrc
140
- .venv
141
- env/
142
- venv/
143
- ENV/
144
- env.bak/
145
- venv.bak/
146
- .idea/
147
-
148
- # Spyder project settings
149
- .spyderproject
150
- .spyproject
151
-
152
- # Rope project settings
153
- .ropeproject
154
-
155
- # mkdocs documentation
156
- /site
157
-
158
- # mypy
159
- .mypy_cache/
160
- .dmypy.json
161
- dmypy.json
162
-
163
- # Pyre type checker
164
- .pyre/
165
-
166
- # pytype static type analyzer
167
- .pytype/
168
-
169
- # Cython debug symbols
170
- cython_debug/
171
-
172
- # PyCharm
173
- # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
174
- # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
175
- # and can be added to the global gitignore or merged into this file. For a more nuclear
176
- # option (not recommended) you can uncomment the following to ignore the entire idea folder.
177
- #.idea/
178
-
179
- # Abstra
180
- # Abstra is an AI-powered process automation framework.
181
- # Ignore directories containing user credentials, local state, and settings.
182
- # Learn more at https://abstra.io/docs
183
- .abstra/
184
-
185
- # Visual Studio Code
186
- # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
187
- # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
188
- # and can be added to the global gitignore or merged into this file. However, if you prefer,
189
- # you could uncomment the following to ignore the entire vscode folder
190
- # .vscode/
191
-
192
- # Ruff stuff:
193
- .ruff_cache/
194
-
195
- # PyPI configuration file
196
- .pypirc
197
-
198
- # Cursor
199
- # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
200
- # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
201
- # refer to https://docs.cursor.com/context/ignore-files
202
- .cursorignore
203
- .cursorindexingignore
204
-
205
- # Marimo
206
- marimo/_static/
207
- marimo/_lsp/
208
- __marimo__/
209
-
210
-
211
- data/*.json
212
- .vscode/
213
- rag_index_json/
214
- *.pkl
215
- *.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitmodules DELETED
File without changes
.python-version DELETED
@@ -1 +0,0 @@
1
- 3.10
 
 
CLAUDE.md DELETED
@@ -1,165 +0,0 @@
1
- # CLAUDE.md
2
-
3
- This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
-
5
- ## Project Overview
6
-
7
- This is LLMAgents, a Python package for AI research analysis agents. The system helps researchers browse papers, find similar papers, write summaries, and plan conference schedules using Neo4j knowledge graphs and LLM agents.
8
-
9
- ## Key Commands
10
-
11
- ### Environment Setup
12
- ```bash
13
- # Install dependencies
14
- uv sync
15
-
16
- # Setup environment variables (required before running)
17
- export $(grep -v '^#' .env | xargs)
18
-
19
- # Prepare gradio from source (for Python 3.14 compatibility)
20
- bash scripts/prepare_gradio.sh
21
- ```
22
-
23
- ### Running the Application
24
- ```bash
25
- # CLI interface
26
- uv run agentic-nav-cli -t 0.4 --max-tokens 6000 -c 131072 --max-num-papers 10
27
-
28
- # Web interface
29
- agentic-nav-web
30
- ```
31
-
32
- ### Database and Knowledge Graph
33
- ```bash
34
- # Start required services
35
- docker compose up neo4j_db ollama_embed ollama_agent -d
36
-
37
- # Build knowledge graph from NeurIPS 2025 data
38
- uv run llm_agents/tools/knowledge_graph/graph_generator.py \
39
- --input-json-file data/neurips-2025-orals-posters.json \
40
- --embedding-model $EMBEDDING_MODEL_NAME \
41
- --ollama-server-url $EMBEDDING_MODEL_API_BASE \
42
- --embedding-gen-batch-size 32 \
43
- --max-parallel-workers 28 \
44
- --similarity-threshold 0.8 \
45
- --output-file graphs/knowledge_graph.pkl
46
-
47
- # Import knowledge graph to Neo4j
48
- uv run llm_agents/tools/knowledge_graph/neo4j_db_importer.py \
49
- --graph-path graphs/knowledge_graph.pkl \
50
- --neo4j-uri bolt://localhost:7687 \
51
- --batch-size 100 \
52
- --embedding-dimension 768
53
- ```
54
-
55
- ### Testing
56
- ```bash
57
- # Run all tests (recommended - avoids gradio conflicts)
58
- uv run pytest tests/
59
-
60
- # Run tests with coverage report
61
- uv run pytest tests/ --cov=llm_agents --cov-report=term-missing
62
-
63
- # Alternative: Use the custom test runner
64
- python run_tests.py
65
-
66
- # Run specific test categories
67
- uv run pytest tests/ -m unit # Unit tests only
68
- uv run pytest tests/ -m integration # Integration tests only
69
- uv run pytest tests/ -m "not slow" # Skip slow tests
70
-
71
- # Run tests for specific module
72
- uv run pytest tests/agents/
73
- uv run pytest tests/tools/
74
- uv run pytest tests/utils/
75
- uv run pytest tests/frontend/
76
-
77
- # Run single test file
78
- uv run pytest tests/agents/test_base.py
79
-
80
- # Run with verbose output
81
- uv run pytest tests/ -v
82
-
83
- # Generate HTML coverage report
84
- uv run pytest tests/ --cov=llm_agents --cov-report=html
85
- # View coverage report at htmlcov/index.html
86
-
87
- # Note: Always specify tests/ directory to avoid conflicts with gradio workspace
88
- ```
89
-
90
- ### Development
91
- ```bash
92
- # Run full system with Docker
93
- docker compose up --build -d
94
-
95
- # Import pre-generated NeurIPS 2025 knowledge graph
96
- bash scripts/import_neurips2025_kg.sh
97
- ```
98
-
99
- ## Architecture
100
-
101
- ### Core Components
102
-
103
- 1. **Agent System (`llm_agents/agents/`)**
104
- - `base.py`: Core LLMAgent class with streaming support and tool execution
105
- - `neurips2025_conference.py`: Specialized agent for NeurIPS 2025 conference data
106
- - Uses LiteLLM for model abstraction, supports Ollama models
107
-
108
- 2. **Tools System (`llm_agents/tools/`)**
109
- - Knowledge graph tools: `search_similar_papers`, `find_neighboring_papers`, `traverse_graph`
110
- - Graph traversal strategies: breadth-first, depth-first, neo4j builtin
111
- - Tool registry automatically discovers callable functions
112
-
113
- 3. **Frontend (`llm_agents/frontend/`)**
114
- - `cli.py`: Rich terminal interface with streaming, command history, auto-completion
115
- - `browser_ui.py`: Gradio web interface for browser-based interactions
116
- - Both interfaces support the same agent functionality
117
-
118
- 4. **Knowledge Graph (`llm_agents/tools/knowledge_graph/`)**
119
- - Neo4j-based paper similarity and relationship storage
120
- - Embedding-based vector search for paper discovery
121
- - Support for graph traversal algorithms
122
-
123
- ### Key Data Flow
124
-
125
- 1. User input → Frontend (CLI/Web)
126
- 2. Frontend → Agent (stateless interaction with streaming)
127
- 3. Agent → LLM (via LiteLLM) + Tools (knowledge graph queries)
128
- 4. Tools → Neo4j database for paper retrieval
129
- 5. Results streamed back to user with live markdown rendering
130
-
131
- ## Configuration
132
-
133
- ### Required Environment Variables
134
- ```bash
135
- NEO4J_USERNAME=neo4j
136
- NEO4J_PASSWORD=<password>
137
- EMBEDDING_MODEL_NAME=nomic-embed-text
138
- EMBEDDING_MODEL_API_BASE=http://localhost:11435
139
- AGENT_MODEL_NAME=gpt-oss:20b
140
- AGENT_MODEL_API_BASE=http://localhost:11436
141
- OLLAMA_API_KEY=<optional>
142
- POPULATE_DATABASE_NIPS2025=false
143
- AGENTIC_NAV_LOG_LEVEL=INFO
144
- ```
145
-
146
- ### Model Support
147
- - Primary: Ollama models (local and remote)
148
- - Remote Ollama models via https://ollama.com with API key
149
- - Uses LiteLLM for provider abstraction
150
-
151
- ## Dependencies
152
-
153
- - **Python**: 3.14+ required
154
- - **uv**: For dependency management
155
- - **Neo4j**: Graph database for knowledge storage
156
- - **Ollama**: LLM inference (supports GPU acceleration with Nvidia Container Toolkit)
157
- - **Gradio**: Built from source for Python 3.14 compatibility
158
-
159
- ## Development Notes
160
-
161
- - The system is designed for multi-user sessions via stateless agent interactions
162
- - Streaming responses are supported in both CLI and web interfaces
163
- - Tool calls are automatically executed and results fed back to the LLM
164
- - Chat history can be saved/loaded in JSON format
165
- - Logging is configured per environment with structured output to `logs/` directory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile DELETED
@@ -1,50 +0,0 @@
1
- FROM python:3.14-slim
2
-
3
- WORKDIR /app
4
-
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- git \
8
- bash \
9
- wget \
10
- curl \
11
- && rm -rf /var/lib/apt/lists/*
12
-
13
- # Install Node.js (required for pnpm and building Gradio frontend)
14
- RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
15
- apt-get install -y nodejs && \
16
- rm -rf /var/lib/apt/lists/*
17
-
18
- # Install pnpm globally
19
- RUN npm install -g pnpm
20
-
21
- # Install uv first (before copying files)
22
- RUN pip install --no-cache-dir uv
23
-
24
- # Copy all necessary files
25
- COPY pyproject.toml uv.lock* ./
26
- COPY .python-version* ./
27
- COPY README.md ./
28
- COPY LICENSE ./
29
- COPY llm_agents/ ./llm_agents/
30
- COPY scripts/ ./scripts/
31
- COPY graphs/ ./graphs/
32
-
33
- RUN mkdir ./gradio
34
- RUN git clone https://github.com/gradio-app/gradio.git gradio/
35
-
36
- # Run the gradio preparation script (build frontend only, submodule already initialized)
37
- RUN bash scripts/prepare_gradio.sh
38
-
39
- # Use uv sync to install dependencies
40
- RUN uv sync
41
-
42
- EXPOSE 7860
43
-
44
- # Set entrypoint
45
- # Download and initialize the NeurIPS 2025 conference knowledge graph
46
- RUN chmod +x /app/scripts/docker-entrypoint.sh
47
- RUN chmod +x /app/scripts/import_neurips2025_kg.sh
48
- ENTRYPOINT ["scripts/docker-entrypoint.sh"]
49
-
50
- CMD ["uv", "run", "llm_agents/frontend/browser_ui.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
LICENSE DELETED
@@ -1,201 +0,0 @@
1
- Apache License
2
- Version 2.0, January 2004
3
- http://www.apache.org/licenses/
4
-
5
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
-
7
- 1. Definitions.
8
-
9
- "License" shall mean the terms and conditions for use, reproduction,
10
- and distribution as defined by Sections 1 through 9 of this document.
11
-
12
- "Licensor" shall mean the copyright owner or entity authorized by
13
- the copyright owner that is granting the License.
14
-
15
- "Legal Entity" shall mean the union of the acting entity and all
16
- other entities that control, are controlled by, or are under common
17
- control with that entity. For the purposes of this definition,
18
- "control" means (i) the power, direct or indirect, to cause the
19
- direction or management of such entity, whether by contract or
20
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
- outstanding shares, or (iii) beneficial ownership of such entity.
22
-
23
- "You" (or "Your") shall mean an individual or Legal Entity
24
- exercising permissions granted by this License.
25
-
26
- "Source" form shall mean the preferred form for making modifications,
27
- including but not limited to software source code, documentation
28
- source, and configuration files.
29
-
30
- "Object" form shall mean any form resulting from mechanical
31
- transformation or translation of a Source form, including but
32
- not limited to compiled object code, generated documentation,
33
- and conversions to other media types.
34
-
35
- "Work" shall mean the work of authorship, whether in Source or
36
- Object form, made available under the License, as indicated by a
37
- copyright notice that is included in or attached to the work
38
- (an example is provided in the Appendix below).
39
-
40
- "Derivative Works" shall mean any work, whether in Source or Object
41
- form, that is based on (or derived from) the Work and for which the
42
- editorial revisions, annotations, elaborations, or other modifications
43
- represent, as a whole, an original work of authorship. For the purposes
44
- of this License, Derivative Works shall not include works that remain
45
- separable from, or merely link (or bind by name) to the interfaces of,
46
- the Work and Derivative Works thereof.
47
-
48
- "Contribution" shall mean any work of authorship, including
49
- the original version of the Work and any modifications or additions
50
- to that Work or Derivative Works thereof, that is intentionally
51
- submitted to Licensor for inclusion in the Work by the copyright owner
52
- or by an individual or Legal Entity authorized to submit on behalf of
53
- the copyright owner. For the purposes of this definition, "submitted"
54
- means any form of electronic, verbal, or written communication sent
55
- to the Licensor or its representatives, including but not limited to
56
- communication on electronic mailing lists, source code control systems,
57
- and issue tracking systems that are managed by, or on behalf of, the
58
- Licensor for the purpose of discussing and improving the Work, but
59
- excluding communication that is conspicuously marked or otherwise
60
- designated in writing by the copyright owner as "Not a Contribution."
61
-
62
- "Contributor" shall mean Licensor and any individual or Legal Entity
63
- on behalf of whom a Contribution has been received by Licensor and
64
- subsequently incorporated within the Work.
65
-
66
- 2. Grant of Copyright License. Subject to the terms and conditions of
67
- this License, each Contributor hereby grants to You a perpetual,
68
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
- copyright license to reproduce, prepare Derivative Works of,
70
- publicly display, publicly perform, sublicense, and distribute the
71
- Work and such Derivative Works in Source or Object form.
72
-
73
- 3. Grant of Patent License. Subject to the terms and conditions of
74
- this License, each Contributor hereby grants to You a perpetual,
75
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
- (except as stated in this section) patent license to make, have made,
77
- use, offer to sell, sell, import, and otherwise transfer the Work,
78
- where such license applies only to those patent claims licensable
79
- by such Contributor that are necessarily infringed by their
80
- Contribution(s) alone or by combination of their Contribution(s)
81
- with the Work to which such Contribution(s) was submitted. If You
82
- institute patent litigation against any entity (including a
83
- cross-claim or counterclaim in a lawsuit) alleging that the Work
84
- or a Contribution incorporated within the Work constitutes direct
85
- or contributory patent infringement, then any patent licenses
86
- granted to You under this License for that Work shall terminate
87
- as of the date such litigation is filed.
88
-
89
- 4. Redistribution. You may reproduce and distribute copies of the
90
- Work or Derivative Works thereof in any medium, with or without
91
- modifications, and in Source or Object form, provided that You
92
- meet the following conditions:
93
-
94
- (a) You must give any other recipients of the Work or
95
- Derivative Works a copy of this License; and
96
-
97
- (b) You must cause any modified files to carry prominent notices
98
- stating that You changed the files; and
99
-
100
- (c) You must retain, in the Source form of any Derivative Works
101
- that You distribute, all copyright, patent, trademark, and
102
- attribution notices from the Source form of the Work,
103
- excluding those notices that do not pertain to any part of
104
- the Derivative Works; and
105
-
106
- (d) If the Work includes a "NOTICE" text file as part of its
107
- distribution, then any Derivative Works that You distribute must
108
- include a readable copy of the attribution notices contained
109
- within such NOTICE file, excluding those notices that do not
110
- pertain to any part of the Derivative Works, in at least one
111
- of the following places: within a NOTICE text file distributed
112
- as part of the Derivative Works; within the Source form or
113
- documentation, if provided along with the Derivative Works; or,
114
- within a display generated by the Derivative Works, if and
115
- wherever such third-party notices normally appear. The contents
116
- of the NOTICE file are for informational purposes only and
117
- do not modify the License. You may add Your own attribution
118
- notices within Derivative Works that You distribute, alongside
119
- or as an addendum to the NOTICE text from the Work, provided
120
- that such additional attribution notices cannot be construed
121
- as modifying the License.
122
-
123
- You may add Your own copyright statement to Your modifications and
124
- may provide additional or different license terms and conditions
125
- for use, reproduction, or distribution of Your modifications, or
126
- for any such Derivative Works as a whole, provided Your use,
127
- reproduction, and distribution of the Work otherwise complies with
128
- the conditions stated in this License.
129
-
130
- 5. Submission of Contributions. Unless You explicitly state otherwise,
131
- any Contribution intentionally submitted for inclusion in the Work
132
- by You to the Licensor shall be under the terms and conditions of
133
- this License, without any additional terms or conditions.
134
- Notwithstanding the above, nothing herein shall supersede or modify
135
- the terms of any separate license agreement you may have executed
136
- with Licensor regarding such Contributions.
137
-
138
- 6. Trademarks. This License does not grant permission to use the trade
139
- names, trademarks, service marks, or product names of the Licensor,
140
- except as required for reasonable and customary use in describing the
141
- origin of the Work and reproducing the content of the NOTICE file.
142
-
143
- 7. Disclaimer of Warranty. Unless required by applicable law or
144
- agreed to in writing, Licensor provides the Work (and each
145
- Contributor provides its Contributions) on an "AS IS" BASIS,
146
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
- implied, including, without limitation, any warranties or conditions
148
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
- PARTICULAR PURPOSE. You are solely responsible for determining the
150
- appropriateness of using or redistributing the Work and assume any
151
- risks associated with Your exercise of permissions under this License.
152
-
153
- 8. Limitation of Liability. In no event and under no legal theory,
154
- whether in tort (including negligence), contract, or otherwise,
155
- unless required by applicable law (such as deliberate and grossly
156
- negligent acts) or agreed to in writing, shall any Contributor be
157
- liable to You for damages, including any direct, indirect, special,
158
- incidental, or consequential damages of any character arising as a
159
- result of this License or out of the use or inability to use the
160
- Work (including but not limited to damages for loss of goodwill,
161
- work stoppage, computer failure or malfunction, or any and all
162
- other commercial damages or losses), even if such Contributor
163
- has been advised of the possibility of such damages.
164
-
165
- 9. Accepting Warranty or Additional Liability. While redistributing
166
- the Work or Derivative Works thereof, You may choose to offer,
167
- and charge a fee for, acceptance of support, warranty, indemnity,
168
- or other liability obligations and/or rights consistent with this
169
- License. However, in accepting such obligations, You may act only
170
- on Your own behalf and on Your sole responsibility, not on behalf
171
- of any other Contributor, and only if You agree to indemnify,
172
- defend, and hold each Contributor harmless for any liability
173
- incurred by, or claims asserted against, such Contributor by reason
174
- of your accepting any such warranty or additional liability.
175
-
176
- END OF TERMS AND CONDITIONS
177
-
178
- APPENDIX: How to apply the Apache License to your work.
179
-
180
- To apply the Apache License to your work, attach the following
181
- boilerplate notice, with the fields enclosed by brackets "[]"
182
- replaced with your own identifying information. (Don't include
183
- the brackets!) The text should be enclosed in the appropriate
184
- comment syntax for the file format. We also recommend that a
185
- file or class name and description of purpose be included on the
186
- same "printed page" as the copyright notice for easier
187
- identification within third-party archives.
188
-
189
- Copyright [yyyy] [name of copyright owner]
190
-
191
- Licensed under the Apache License, Version 2.0 (the "License");
192
- you may not use this file except in compliance with the License.
193
- You may obtain a copy of the License at
194
-
195
- http://www.apache.org/licenses/LICENSE-2.0
196
-
197
- Unless required by applicable law or agreed to in writing, software
198
- distributed under the License is distributed on an "AS IS" BASIS,
199
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
- See the License for the specific language governing permissions and
201
- limitations under the License.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -17,193 +17,7 @@ short_description: Agent for NeurIPS paper discovery and visit schedule builder
17
 
18
  # AgenticNAV - Your AI conference companion
19
 
20
- [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
21
- ![Coverage](https://github.com/core-aix/agentic-nav/workflows/Tests/badge.svg)
22
 
23
- This repository contains code for an agent that can help you do related work for your next research project.
24
- Given the sheer amount of new publications that are being published at major machine learning conferences, this agent
25
- can help browse papers, find similar papers, and help you write summaries to quickly get an overview of what is currently
26
- going on.
27
 
28
- The agent can also support you in planning your next conference trip by providing a schedule around one or more topics
29
- that you are interested in.
30
 
31
- ## Installation & usage of the web-based interface
32
- The agent is conveniently packaged as a docker image. You can spin up the entire system by using the commands below.
33
- Make sure to have the [Nvidia Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation) installed.
34
- At the moment we only support `ollama` models.
35
-
36
- Instead of a local agent model, you can also make use of remote ollama models. A full list is available here:
37
- https://docs.ollama.com/cloud.
38
- To make use of these large models, set `AGENT_MODEL_NAME=<your model of choice>` and
39
- `AGENT_MODEL_API_BASE=https://ollama.com`.
40
- Don't forget to set your `OLLAMA_API_KEY` either directly via the environment or in the browser.
41
-
42
- **Important note:** The ollama docker containers cannot use GPU acceleration on MacOS. If you want to use your Mac's GPU,
43
- you need to run ollama without containerization (i.e., you need to manually spin up the ollama server).
44
- With `NEO4J_DB_NODE_RETURN_LIMIT`, we set a strict return limit of 200 nodes per query to avoid overstraining the database.
45
- You can set it as needed for your use case.
46
-
47
- ```commandline
48
- # Database config
49
- echo "NEO4J_USERNAME=neo4j" >> .env
50
- echo "NEO4J_PASSWORD=<a password of your choice>" >> .env
51
- echo "NEO4J_DB_URI=bolt://neo4j_db:7687" >> .env
52
- echo "NEO4J_DB_NODE_RETURN_LIMIT=200" >> .env
53
-
54
- echo "EMBEDDING_MODEL_NAME=nomic-embed-text" >> .env
55
- echo "EMBEDDING_MODEL_API_BASE=http://ollama_agent:11434" >> .env
56
-
57
- echo "AGENT_MODEL_NAME=gpt-oss:20b" >> .env
58
- echo "AGENT_MODEL_API_BASE=http://ollama_agent:11434" >> .env
59
-
60
- # Optional: set your OLLAMA_API_KEY when using remote models
61
- echo "OLLAMA_API_KEY=<your key here>" >> .env
62
-
63
- # Set the following to true if you would like to import our pre-generated knowledge graph for the NeurIPS 2025 conference
64
- # Warning (!): Setting the parameter below to 'true' will clear any existing data inside the docker-based neo4j database
65
- echo "POPULATE_DATABASE_NIPS2025=false" >> .env
66
-
67
- git clone https://github.com/core-aix/agentic-nav.git
68
- cd agentic-nav
69
- docker compose up --build -d
70
- ```
71
-
72
- This will launch the agent and its web interface, available via `http://localhost:7860`, along with a neo4j database
73
- (community edition).
74
- **It will also populate the database with all accepted papers of the NeurIPS 2025 machine learning conference (if you set `POPULATE_DATABASE_NIPS2025=true`).**
75
- We include pair-wise similarity scores to enable graph traversals and the search for broadly related papers.
76
-
77
- After the docker containers are up and running, you can interact with the agent. Have fun!
78
-
79
-
80
- ## Development & contributing to the agent
81
- If you are interested in understanding the system in detail, you may want to run all setup steps manually and avoid a
82
- containerized runtime. Run the following steps to setup a development environment.
83
-
84
- We use [uv](https://docs.astral.sh/uv/) for dependency management.
85
- Our docker containers for serving LLMs use Ollama and GPU acceleration. For that, you need the [Nvidia Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation)
86
- Make sure to have both installed before you proceed.
87
-
88
- ### Installation
89
- After you cloned the repository, you need to setup the database. We use neo4j to manage the knowledge graph data we need
90
- for the agent to work properly.
91
- **Note:** We are using gradio built from source as the latest release (as of Nov. 12, 2025) does not yet support python 3.14.
92
-
93
- First, export all necessary environment variables:
94
- ```commandline
95
- echo "NEO4J_USERNAME=neo4j" >> .env
96
- echo "NEO4J_PASSWORD=<a password of your choice>" >> .env
97
- echo "NEO4J_DB_URI=bolt://localhost:7687" >> .env
98
- echo "NEO4J_DB_NODE_RETURN_LIMIT=200" >> .env
99
-
100
- echo "EMBEDDING_MODEL_NAME=ollama/nomic-embed-text" >> .env
101
- echo "EMBEDDING_MODEL_API_BASE=http://localhost:11435" >> .env
102
-
103
- echo "AGENT_MODEL_NAME=ollama_chat/gpt-oss:20b" >> .env
104
- echo "AGENT_MODEL_API_BASE=http://localhost:11436" >> .env
105
-
106
- # Optional: set your OLLAMA_API_KEY when using remote models
107
- echo "OLLAMA_API_KEY=<your key here>" >> .env
108
-
109
- # Set the following to true if you would like to import our pre-generated knowledge graph for the NeurIPS 2025 conference
110
- # Warning (!): Setting the parameter below to 'true' will clear any existing data inside the docker-based neo4j database
111
- echo "POPULATE_DATABASE_NIPS2025=false" >> .env
112
-
113
- # Make sure you also have those values in your commandline environment
114
- export $(grep -v '^#' .env | xargs)
115
- ```
116
-
117
- Then get the project files:
118
- ```commandline
119
- git clone https://github.com/core-aix/agentic-nav.git
120
- cd agentic-nav
121
- docker compose up neo4j_db ollama_embed ollama_agent -d
122
-
123
- # The following command is only needed if you'd like to use the gradio-based GUI
124
- # This will eventually go away once gradio bumps their release version to support python 3.14
125
- bash scripts/prepare_gradio.sh
126
-
127
- uv sync
128
- ```
129
-
130
- ### Building the NeurIPS 2025 knowledge graph locally
131
- You can also build the knowledge graph yourself and, for example, swap the embedding model we use by default.
132
- Follow the steps below to do so. Note, that you still need to setup the project as described in the `Installation`
133
- subsection above. Make sure to set `POPULATE_DATABASE_NIPS2025=false` in your .env file.
134
-
135
- #### Get the data
136
- Download https://neurips.cc/static/virtual/data/neurips-2025-orals-posters.json and put the file in the `./data` folder.
137
- ```commandline
138
- wget -O data/neurips-2025-orals-posters.json https://neurips.cc/static/virtual/data/neurips-2025-orals-posters.json
139
- ```
140
-
141
- #### Build the knowledge graph
142
- You can build the knowledge graph per your needs by running the following script:
143
- ```commandline
144
- uv run llm_agents/tools/knowledge_graph/graph_generator.py \
145
- --input-json-file data/neurips-2025-orals-posters.json \
146
- --embedding-model $EMBEDDING_MODEL_NAME \
147
- --ollama-server-url $EMBEDDING_MODEL_API_BASE \
148
- --embedding-gen-batch-size 32 \
149
- --max-parallel-workers 28 \
150
- --similarity-threshold 0.6 \
151
- --output-file graphs/knowledge_graph.pkl \
152
- # --limit-num-papers # Optional
153
- ```
154
- **Important note:** Generating the full graph for over 6k papers can take more than 1 hour. You can find a set of pre-generated
155
- knowledge graphs here (the "thresh" in the file name indicates the `similarity-threshold` for which we create a `similar_to` relationship between papers): [LRZ Sync+Share](https://syncandshare.lrz.de/getlink/fiFMhMLLH7FaQ3Jipqqsye/)
156
-
157
-
158
- #### Importing the knowledge graph to a neo4j database
159
- We provide an importer to move the knowledge graph into a graph database that supports vector-based similarity search.
160
- ```commandline
161
- uv run llm_agents/tools/knowledge_graph/neo4j_db_importer.py \
162
- --graph-path graphs/knowledge_graph.pkl \
163
- --neo4j-uri $NEO4J_DB_URI \
164
- --batch-size 100 \
165
- --embedding-dimension 768 # This must match the vector dims generated by the embedding model.
166
- ```
167
- **Note:** Depending on what your graph looks like this can also take a while (> 20min for 6K papers). Also, beware that
168
- running this script will first clear any existing entries before the new graph is written to the database.
169
-
170
-
171
- ### Agent interactions
172
- We offer two ways of interacting with agents, via the command line and via the browser.
173
- The backend uses LiteLLM, which allows you to use a variety of LLM inference endpoints.
174
- Find details on the various providers [here](https://docs.litellm.ai/docs/providers).
175
-
176
- #### Commandline interface
177
- The agent can also be used via a versatile CLI.
178
- Below are two examples how to run a local and a remote model.
179
- We are using LiteLLM to abstract away from individual inference API providers.
180
- Note, that we currently only test with Ollama models.
181
- ```commandline
182
- uv run agentic-nav-cli \
183
- -t 0.4 \
184
- --max-tokens 6000 \
185
- -c 131072 \
186
- --max-num-papers 10
187
- ```
188
-
189
- #### Web-based interface (beginner friendly)
190
- We use gradio to provide a chat interface with the same functionalities as the commandline-based interface.
191
- You can launch the web app by running:
192
- ```commandline
193
- agentic-nav-web
194
- ```
195
- All the hyperparameters you need to set can be configured in the web interface and will be used in you individual session.
196
- Once you close the browser window, your session will terminate and all custom configuration will be removed.
197
- At the moment, the web UI only supports Ollama models.
198
-
199
- ### Debugging agent interactions
200
- The agent involves a set of asynchronous operations. We provide a built-in logging instance to capture all relevant logs
201
- for debugging. To set the right debugging level for your application, you can use the environment variable `AGENTIC_NAV_LOG_LEVEL`.
202
- By default, it is set to `INFO`.
203
-
204
- #### Running tests
205
- We try to cover all tools and agent functionalities in thorough unit tests.
206
- You can run them via:
207
- ```commandline
208
- uv run pytest tests/
209
- ```
 
17
 
18
  # AgenticNAV - Your AI conference companion
19
 
20
+ PLEASE FIND THE IMPLEMENTATION OF AGENTIC NAV ON GITHUB: [https://github.com/core-aix/agentic-nav](https://github.com/core-aix/agentic-nav)
 
21
 
 
 
 
 
22
 
 
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/__init__.py DELETED
File without changes
agentic_nav/agents/__init__.py DELETED
@@ -1 +0,0 @@
1
- from agentic_nav.agents.neurips2025_conference import NeurIPS2025Agent, DEFAULT_NEURIPS2025_AGENT_ARGS
 
 
agentic_nav/agents/base.py DELETED
@@ -1,327 +0,0 @@
1
- import json
2
-
3
- import litellm
4
- import logging
5
-
6
- from dataclasses import dataclass, field
7
- from typing import List, Dict
8
-
9
- from agentic_nav.tools import get_all_tools
10
- from agentic_nav.utils.tooling import infer_tool
11
-
12
- try:
13
- from datetime import datetime, UTC
14
- except ImportError:
15
- from datetime import datetime, timezone
16
- UTC = timezone.utc
17
-
18
- LOGGER = logging.getLogger(__name__)
19
-
20
-
21
- @dataclass
22
- class LLMAgent:
23
- model: str = "ollama_chat/gpt-oss:20b"
24
- api_base: str = "http://localhost:11434"
25
- api_key: str = None
26
- llm_args: dict = field(default_factory=lambda: {"temperature": 0.2, "max_tokens": 6000, "num_ctx": 131072})
27
- tools: List[callable] = field(default_factory=lambda: get_all_tools())
28
- global_tool_args: dict = field(default_factory=lambda: {"max_num_papers": 10})
29
- max_interaction_rounds: int = 10
30
- messages: List[Dict] = field(default_factory=lambda: [])
31
- tool_registry: Dict = None
32
- tool_descriptions: List = None
33
- default_system_prompt: Dict[str, str] = None
34
-
35
- def __remove_model_key_from_llm_args(self, stateful: bool = True):
36
- if stateful:
37
- self.model = self.llm_args["model"]
38
- self.api_base = self.llm_args["api_base"]
39
-
40
- if "model" in self.llm_args.keys():
41
- del self.llm_args["model"]
42
-
43
- if "api_base" in self.llm_args.keys():
44
- del self.llm_args["api_base"]
45
-
46
- def test_llm_connection(self):
47
- self.__remove_model_key_from_llm_args(stateful=True)
48
- try:
49
- response = litellm.completion(
50
- model=self.model,
51
- messages=[{"role": "user", "content": "test", "_ts": str(datetime.now(UTC))}],
52
- tool_choice="auto",
53
- api_base=self.api_base,
54
- api_key=self.api_key,
55
- stream=True,
56
- **self.llm_args,
57
- )
58
-
59
- LOGGER.info(f"Model is available! Response: {response.choices[0].message.content}")
60
- except Exception as e:
61
- LOGGER.error(f"Model not available or connection failed: {str(e)}")
62
-
63
- def setup_session(self, tool_funcs: List[callable] = None):
64
- self.tool_registry = {fn.__name__: fn for fn in self.tools} if tool_funcs is None else {fn.__name__: fn for fn in tool_funcs}
65
- self.tool_descriptions = [infer_tool(fn, tool_args=self.global_tool_args) for fn in self.tool_registry.values()]
66
- LOGGER.info(f"Agent setup and tools ready to use.")
67
- LOGGER.debug(f"Available tools: {self.tools}")
68
-
69
- def remove_session(self):
70
- """De-registers tools and resets messages to the initial state."""
71
- self.tool_registry = None
72
- self.tool_descriptions = None
73
- self.messages = [self.default_system_prompt if not None else {"role": "system", "content": "You are a helpful assistant."}]
74
-
75
- def interact(self, message: Dict):
76
- assert self.tool_registry is not None, "Make sure to call 'setup_session()' before the first interaction."
77
- assert self.tool_descriptions is not None, "Make sure to call 'setup_session()' before the first interaction."
78
-
79
- assert type(message) == dict, "Make sure to pass a dictionary as next message for the agent."
80
- assert "role" in message.keys(), "The message must contain a 'role' key."
81
- assert "content" in message.keys(), "The message must contain a 'content' key."
82
-
83
- self.__remove_model_key_from_llm_args(stateful=True)
84
- if "_ts" not in message.keys():
85
- message["_ts"] = str(datetime.now(UTC))
86
-
87
- self.messages.append(message)
88
- for _ in range(self.max_interaction_rounds):
89
- collected, calls = self._send_to_llm(
90
- messages=self.messages,
91
- model=self.model,
92
- api_base=self.api_base,
93
- api_key=self.api_key
94
- )
95
- # append the assembled assistant message so tool execution sees the assistant's follow-up
96
- self.messages.append({"role": "assistant", "content": collected, "_ts": str(datetime.now(UTC))})
97
- LOGGER.debug(f"Agent response: {collected}")
98
-
99
- if not calls:
100
- return self.messages
101
- else:
102
- self.messages[-1]["tool_calls"] = calls
103
- LOGGER.debug(f"Agent requested tool calls: {calls}")
104
-
105
- # execute tools and append results
106
- for call in calls:
107
- self.messages.append(
108
- self.call_tool(
109
- tool_call=call
110
- )
111
- )
112
-
113
- LOGGER.debug(f"Interaction complete. Total messages: {len(self.messages)}")
114
- return self.messages
115
-
116
- def interact_stateless(
117
- self,
118
- messages: List[Dict],
119
- model: str,
120
- api_base: str,
121
- api_key: str,
122
- llm_args: Dict = None
123
- ):
124
- """
125
- This method is designed to support multi-user sessions and requires state management outside the agent class.
126
- """
127
- assert self.tool_registry is not None, "Make sure to call 'setup_session()' before the first interaction."
128
- assert self.tool_descriptions is not None, "Make sure to call 'setup_session()' before the first interaction."
129
- self.__remove_model_key_from_llm_args(stateful=False)
130
-
131
- # Sanity check for all messages
132
- for message in messages:
133
- if "_ts" not in message.keys():
134
- message["_ts"] = str(datetime.now(UTC))
135
-
136
- for round_num in range(self.max_interaction_rounds):
137
- # Stream the LLM response
138
- collected = ""
139
- calls = []
140
-
141
- # Create initial assistant message
142
- assistant_msg_idx = len(messages)
143
- messages.append({"role": "assistant", "content": "", "_ts": str(datetime.now(UTC))})
144
-
145
- stream_iter = litellm.completion(
146
- model=model if model is not None else self.model,
147
- messages=messages[:assistant_msg_idx], # Don't include the empty assistant message
148
- tools=self.tool_descriptions,
149
- tool_choice="auto",
150
- api_base=api_base if api_base is not None else self.api_base,
151
- api_key=api_key if api_key is not None else self.api_key,
152
- stream=True,
153
- **llm_args if llm_args is not None else self.llm_args,
154
- )
155
-
156
- for chunk in stream_iter:
157
- choices = chunk.get("choices", []) or []
158
- if not choices:
159
- continue
160
- choice = choices[0]
161
-
162
- # Extract content from chunk
163
- content = None
164
- delta = choice.get("delta")
165
-
166
- if delta and "content" in delta:
167
- content = delta["content"]
168
- elif delta and "message" in delta and isinstance(delta["message"], dict):
169
- content = delta["message"].get("content")
170
-
171
- if delta and "tool_calls" in delta:
172
- calls.extend(delta["tool_calls"] or [])
173
-
174
- if content is None:
175
- msg = choice.get("message")
176
- if isinstance(msg, dict):
177
- content = msg.get("content")
178
-
179
- if content is None:
180
- content = choice.get("text")
181
-
182
- if content:
183
- if not isinstance(content, str):
184
- try:
185
- content = json.dumps(content, ensure_ascii=False)
186
- except Exception:
187
- content = str(content)
188
-
189
- collected += content
190
- # Update the assistant message with accumulated content
191
- messages[assistant_msg_idx]["content"] = collected
192
-
193
- # Yield the updated messages for streaming display
194
- yield messages.copy()
195
-
196
- # After streaming is complete, update with final content
197
- messages[assistant_msg_idx]["content"] = collected
198
- LOGGER.debug(f"Agent response: {collected}")
199
-
200
- if not calls:
201
- yield messages
202
- return
203
- else:
204
- messages[assistant_msg_idx]["tool_calls"] = calls
205
- LOGGER.debug(f"Agent requested tool calls: {calls}")
206
- yield messages.copy()
207
-
208
- # Execute tools and append results
209
- for call in calls:
210
- messages.append(self.call_tool(tool_call=call))
211
- yield messages.copy()
212
-
213
- yield messages
214
-
215
- def _send_to_llm(
216
- self,
217
- messages: List[Dict],
218
- model: str,
219
- api_base: str,
220
- api_key: str,
221
- llm_args: Dict = None
222
- ):
223
- stream_iter = litellm.completion(
224
- model=model if model is not None else self.model,
225
- messages=messages,
226
- tools=self.tool_descriptions,
227
- tool_choice="auto",
228
- api_base=api_base if api_base is not None else self.api_base,
229
- api_key=api_key if api_key is not None else self.api_key,
230
- stream=True,
231
- **llm_args if llm_args is not None else self.llm_args,
232
- )
233
-
234
- collected = ""
235
- calls = []
236
-
237
- for chunk in stream_iter:
238
- choices = chunk.get("choices", []) or []
239
- if not choices:
240
- continue
241
- choice = choices[0]
242
-
243
- # try several places where partial content may appear
244
- content = None
245
- delta = choice.get("delta")
246
-
247
- if "content" in delta:
248
- content = delta["content"]
249
- elif "message" in delta and isinstance(delta["message"], dict):
250
- content = delta["message"].get("content")
251
-
252
- if "tool_calls" in delta:
253
- calls.extend(delta["tool_calls"] or [])
254
-
255
- if content is None:
256
- msg = choice.get("message")
257
- if isinstance(msg, dict):
258
- content = msg.get("content")
259
-
260
- if content is None:
261
- content = choice.get("text")
262
-
263
- if content:
264
- if not isinstance(content, str):
265
- try:
266
- content = json.dumps(content, ensure_ascii=False)
267
- except Exception as e:
268
- LOGGER.error(f"JSON encoding error encountered. {e}. Treating agent response as regular text.")
269
- content = str(content)
270
-
271
- collected += content
272
-
273
- return collected, calls
274
-
275
- def call_tool(self, tool_call: Dict):
276
- name = tool_call["function"]["name"]
277
- args = tool_call["function"].get("arguments", "{}")
278
- LOGGER.debug(f"Preparing tool call: {name}")
279
- LOGGER.debug(f"Expected tool arguments: {args}")
280
- try:
281
- parsed = json.loads(args) if isinstance(args, str) else (args or {})
282
- LOGGER.debug(f"Parsed tool call arguments: {parsed}")
283
- except json.JSONDecodeError:
284
- parsed = {}
285
- LOGGER.warning(f"Tool call arguments: COULD NOT BE PARSED")
286
- out = self.tool_registry[name](**parsed)
287
- LOGGER.debug(f"Tool call output: {parsed}")
288
-
289
- return {
290
- "role": "tool",
291
- "tool_call_id": tool_call.get("id"),
292
- "name": name,
293
- "content": json.dumps(out, ensure_ascii=False),
294
- "_ts": str(datetime.now(UTC))
295
- }
296
-
297
- def set_history(self, messages):
298
- self.messages = messages
299
- LOGGER.info(f"Set new message history.")
300
-
301
- def get_history(self):
302
- return self.messages
303
-
304
- @staticmethod
305
- def set_system_prompt(new_system_prompt: str, messages: List[Dict]):
306
- messages = [m for m in messages if m.get("role") != "system"]
307
- messages.insert(0, {
308
- "role": "system",
309
- "content": new_system_prompt,
310
- "_ts": str(datetime.now(UTC))
311
- })
312
- LOGGER.info(f"New system prompt set and configured.")
313
- LOGGER.debug(f"New system prompt: {new_system_prompt}")
314
- return messages
315
-
316
- def get_system_prompt(self):
317
- for message in self.messages:
318
- if "role" in message.keys() and message["role"] == "system":
319
- return message
320
-
321
- return None
322
-
323
- def get_most_recent_assistant_message(self):
324
- for message in reversed(self.messages):
325
- if message.get("role") == "assistant":
326
- return message
327
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/agents/neurips2025_conference.py DELETED
@@ -1,48 +0,0 @@
1
- import os
2
-
3
- from dataclasses import dataclass
4
- from agentic_nav.agents.base import LLMAgent
5
- from agentic_nav.tools import search_similar_papers, find_neighboring_papers, traverse_graph, build_visit_schedule # <- the tools we expose
6
- from zoneinfo import ZoneInfo
7
-
8
- try:
9
- from datetime import datetime, UTC
10
- except ImportError:
11
- from datetime import datetime, timezone
12
- UTC = timezone.utc
13
-
14
-
15
- DEFAULT_NEURIPS2025_AGENT_ARGS = {
16
- "model": os.environ.get("AGENT_MODEL_NAME", "gpt-oss:120b-cloud"),
17
- "api_base": os.environ.get("AGENT_MODEL_API_BASE", "https://ollama.com"),
18
- "api_key": os.environ.get("OLLAMA_API_KEY"),
19
- "llm_args": {"temperature": 0.2, "max_tokens": 6000, "num_ctx": 131072},
20
- "global_tool_args": {"max_num_papers": 10}
21
- }
22
-
23
-
24
- system = {
25
- "role": "system",
26
- "content": (
27
- "You are an assistant who can help browsing NeurIPS 2025 papers. "
28
- "You are provided with a search tool that can search all accepted papers of NeurIPS 2025. "
29
- "However, note that the search tool only takes paper titles and abstracts as input keywords; "
30
- "it cannot take anything else as the input keywords. "
31
- "However, the output of the search includes various metadata fields such as authors, affiliations, "
32
- "and session times. \n"
33
- "When building a schedule, do not specify the name of the day.\n"
34
- "If you find duplicates, just omit them. Only keep the first appearance.\n"
35
- f"Generally, if you do not find a result, tell the user you don't know.\n"
36
- f"Here is the current timestamp: {datetime.now(ZoneInfo('America/Los_Angeles'))}. The conference is happening in San Diego, California."
37
- )
38
- }
39
-
40
-
41
- @dataclass
42
- class NeurIPS2025Agent(LLMAgent):
43
-
44
- def __init__(self, *args, **kwargs):
45
- super().__init__(*args, **kwargs)
46
- self.messages = [{**system}]
47
- self.tools = [search_similar_papers, find_neighboring_papers, traverse_graph, build_visit_schedule]
48
- self.default_system_prompt = system
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/frontend/__init__.py DELETED
File without changes
agentic_nav/frontend/browser_ui.py DELETED
@@ -1,525 +0,0 @@
1
- """
2
- Gradio web UI that interacts with an agent implementation.
3
-
4
- Features matching terminal UI:
5
- - Multi-turn chat with Markdown rendering
6
- - System prompt editing
7
- - View conversation history
8
- - Save chat history to file
9
- - All model configuration options
10
- - Clear chat functionality
11
- - **Per-user conversation state management with stateless agent**
12
- """
13
- from venv import logger
14
-
15
- import gradio as gr
16
- import os
17
- import datetime
18
- import logging
19
- import json
20
-
21
- from pathlib import Path
22
- from typing import List, Tuple, Optional, Dict
23
-
24
- from agentic_nav.agents import NeurIPS2025Agent, DEFAULT_NEURIPS2025_AGENT_ARGS
25
- from agentic_nav.utils.logger import setup_logging
26
- from agentic_nav.utils.file_handlers import save_chat_history
27
-
28
-
29
- LOGGER = logging.getLogger(__name__)
30
-
31
- EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "nomic-embed-text")
32
- EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
33
-
34
- AGENT_MODEL_NAME = os.environ.get("AGENT_MODEL_NAME", "gpt-oss:20b")
35
- AGENT_MODEL_API_BASE = os.environ.get("AGENT_MODEL_API_BASE", "http://localhost:11436")
36
- OLLAMA_API_KEY = os.environ.get("OLLAMA_API_KEY", DEFAULT_NEURIPS2025_AGENT_ARGS["api_key"])
37
-
38
-
39
- def initialize_agent():
40
- """Initialize the AGENT instance."""
41
- agent = NeurIPS2025Agent(
42
- model=f"ollama_chat/{AGENT_MODEL_NAME}",
43
- api_base=AGENT_MODEL_API_BASE,
44
- api_key=OLLAMA_API_KEY,
45
- llm_args=DEFAULT_NEURIPS2025_AGENT_ARGS["llm_args"],
46
- global_tool_args=DEFAULT_NEURIPS2025_AGENT_ARGS["global_tool_args"],
47
- )
48
- agent.setup_session()
49
- return agent
50
-
51
-
52
- def configure_agent(
53
- api_base: str,
54
- api_key: str,
55
- model: str,
56
- temperature: float,
57
- max_tokens: int,
58
- num_ctx: int,
59
- max_num_papers: int,
60
- current_config: Dict
61
- ):
62
- """Initialize the agent with a given configuration."""
63
- LOGGER.info(f"Agent runtime started via Gradio UI for session")
64
- current_config.update({
65
- "model": model,
66
- "api_base": api_base,
67
- "api_key": api_key,
68
- "llm_args": {
69
- "temperature": temperature,
70
- "max_tokens": max_tokens,
71
- "num_ctx": num_ctx
72
- },
73
- "global_tool_args": {"max_num_papers": max_num_papers}
74
- })
75
-
76
- current_config_to_print = current_config.copy()
77
- if "api_key" in current_config_to_print:
78
- del current_config_to_print["api_key"]
79
- LOGGER.info(f"User-defined configuration saved. Config: {current_config_to_print}")
80
-
81
- return current_config, "✓ Agent initialized successfully!"
82
-
83
-
84
- def chat_fn(
85
- new_message: str,
86
- history: List[Dict],
87
- config: Optional[Dict],
88
- messages: Optional[List[Dict]],
89
- agent: NeurIPS2025Agent,
90
- ) -> Tuple[List[Dict], Optional[List[Dict]]]:
91
- """
92
- Handle chat interaction using stateless agent.
93
-
94
- Args:
95
- new_message: User's input message
96
- history: Chat history as list of message dictionaries with role/content
97
- config: Configuration dict with model, api_base, api_key, llm_args
98
- messages: Current conversation messages list
99
- agent: Agent instance
100
-
101
- Returns:
102
- Tuple of (updated_history, messages)
103
- """
104
- if not new_message.strip():
105
- yield history, messages
106
- return
107
-
108
- LOGGER.debug(f"USER PROMPT: {new_message}")
109
-
110
- # Safety check: ensure messages is a list
111
- if messages is None or not isinstance(messages, list):
112
- LOGGER.warning("Messages state was not properly initialized, resetting...")
113
- messages = [agent.get_system_prompt()]
114
-
115
- # Create a copy of history and messages to avoid mutation issues
116
- history = history.copy() if history else []
117
- messages = messages.copy()
118
-
119
- # Add user message to history immediately with empty assistant response
120
- user_msg_dict = {"role": "user", "content": new_message}
121
- assistant_msg_dict = {"role": "assistant", "content": ""}
122
- history.extend([user_msg_dict, assistant_msg_dict])
123
-
124
- try:
125
- # Create user message with timestamp
126
- user_message = {
127
- "role": "user",
128
- "content": new_message,
129
- "_ts": str(datetime.datetime.now(datetime.timezone.utc))
130
- }
131
-
132
- # Add user message to conversation
133
- messages.append(user_message)
134
-
135
- # Stream the response
136
- accumulated_response = ""
137
- for partial_messages in agent.interact_stateless(
138
- messages=messages,
139
- model=config["model"],
140
- api_base=config["api_base"],
141
- api_key=config["api_key"],
142
- llm_args=config["llm_args"]
143
- ):
144
- # Get the latest assistant message content
145
- for msg in reversed(partial_messages):
146
- if msg.get("role") == "assistant":
147
- accumulated_response = msg["content"]
148
- break
149
-
150
- # Update the last assistant message in history with accumulated response
151
- history[-1]["content"] = accumulated_response
152
- yield history, partial_messages
153
-
154
- # Final update with complete messages
155
- messages = partial_messages
156
- LOGGER.info("Agent response generated successfully")
157
-
158
- except Exception as e:
159
- LOGGER.error(f"Agent encountered an error: {e}", exc_info=True)
160
- error_msg = f"❌ Error: {str(e)}"
161
- history[-1]["content"] = error_msg
162
- yield history, messages
163
-
164
-
165
- def update_system_prompt(
166
- new_prompt: str,
167
- messages: Optional[List[Dict]],
168
- agent: NeurIPS2025Agent
169
- ) -> Tuple[str, Optional[List[Dict]]]:
170
- """Update the system prompt in the message history.
171
-
172
- Args:
173
- new_prompt: New system prompt
174
- messages: Current message history
175
- agent: Agent instance
176
-
177
- Returns:
178
- Tuple of (status_message, agent_instance, config, updated_messages)
179
- """
180
- if not new_prompt.strip():
181
- return "System prompt cannot be empty.", messages
182
-
183
- try:
184
- # Initialize messages if None
185
- if messages is None:
186
- messages = []
187
-
188
- # Use the static method to update system prompt
189
- messages = agent.set_system_prompt(new_system_prompt=new_prompt, messages=messages)
190
-
191
- LOGGER.info("System prompt updated")
192
- LOGGER.info(f"New system prompt: {messages[0]}")
193
- return "✓ System prompt updated successfully!", messages
194
- except Exception as e:
195
- LOGGER.error(f"Error updating system prompt: {e}")
196
- return f"Error: {str(e)}", messages
197
-
198
-
199
- def view_history(messages: Optional[List[Dict]]) -> str:
200
- """View the full conversation history in JSON format.
201
-
202
- Args:
203
- messages: Current message history
204
-
205
- Returns:
206
- JSON formatted history string
207
- """
208
- if messages is None:
209
- return "⚠️ No conversation history yet."
210
-
211
- try:
212
- # Format as pretty JSON
213
- return json.dumps(messages, indent=2, ensure_ascii=False)
214
- except Exception as e:
215
- LOGGER.error(f"Error viewing history: {e}")
216
- return f"❌ Error: {str(e)}"
217
-
218
-
219
- def save_history(filename: str, messages: Optional[List[Dict]]) -> str:
220
- """Save chat history to a JSON file.
221
-
222
- Args:
223
- filename: Optional filename
224
- messages: Current message history
225
-
226
- Returns:
227
- Status message
228
- """
229
- if messages is None or len(messages) == 0:
230
- return "⚠️ No conversation history to save."
231
-
232
- try:
233
- # Create directory if it doesn't exist
234
- Path("chat_histories/").mkdir(exist_ok=True, parents=True)
235
-
236
- # Generate filename if not provided
237
- if not filename.strip():
238
- time_now = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
239
- # Add session identifier to prevent conflicts
240
- import uuid
241
- session_id = str(uuid.uuid4())[:8]
242
- filename = f"chat_histories/{time_now}_session_{session_id}_chat_history.json"
243
- else:
244
- filename = filename.strip()
245
- # Ensure it's in chat_histories directory
246
- if not filename.startswith("chat_histories/"):
247
- filename = f"chat_histories/{filename}"
248
- if not filename.endswith(".json"):
249
- filename += ".json"
250
-
251
- # Save the history
252
- save_chat_history(messages, filename)
253
-
254
- LOGGER.info(f"Chat history saved to {filename}")
255
- return f"✓ Chat history saved to: {filename}"
256
-
257
- except Exception as e:
258
- LOGGER.error(f"Error saving history: {e}")
259
- return f"❌ Error: {str(e)}"
260
-
261
-
262
- def clear_chat(
263
- config: Optional[Dict],
264
- messages: Optional[List[Dict]],
265
- agent: NeurIPS2025Agent
266
- ) -> Tuple[str, List, Optional[List[Dict]]]:
267
- """Clear the chat history in the UI and reset message list.
268
-
269
- Args:
270
- config: Current configuration
271
- messages: Current message history
272
- agent: Agent instance
273
-
274
- Returns:
275
- Tuple of (status_message, empty_history, reset_messages)
276
- """
277
- system_prompt = agent.get_system_prompt()
278
- if isinstance(system_prompt, dict):
279
- reset_messages = [system_prompt]
280
- else:
281
- reset_messages = []
282
-
283
- LOGGER.info("Chat cleared and reset")
284
- return "✓ Chat cleared!", [], reset_messages
285
-
286
-
287
- def submit_message(message, history, config, messages, agent):
288
- """Wrapper to clear input and process message"""
289
- yield from chat_fn(message, history, config, messages, agent)
290
-
291
-
292
- def main():
293
-
294
- # Setup the agent instance
295
- agent = initialize_agent()
296
-
297
- with gr.Blocks(
298
- title="AgenticNAV",
299
- theme=gr.themes.Default(
300
- spacing_size=gr.themes.sizes.spacing_sm,
301
- radius_size=gr.themes.sizes.radius_none
302
- )) as webapp:
303
-
304
- gr.Markdown(
305
- "# 🤖 AgenticNAV - Explore NeurIPS 2025 papers and build your personalized schedule, effortlessly!\n "
306
- "This agent can help you explore the more than 5000 papers at this year's NeurIPS conference. "
307
- "You can start chatting right away but see below for more specific instructions on how to use the agent "
308
- "with your favorite model and inference config. You can also set a custom system prompt.\n\n "
309
- "**Note:** This is an experimental deployment and LLMs can make mistakes. This can mean that the agent may "
310
- "not discover your paper even though it is presented at the conference."
311
- )
312
-
313
- # Session state for agent instance, config, and messages
314
- config_state = gr.State(value=DEFAULT_NEURIPS2025_AGENT_ARGS)
315
- messages_state = gr.State(value=[agent.get_system_prompt()])
316
-
317
- with gr.Row():
318
- with gr.Column():
319
- # Main chat interface
320
- chatbot = gr.Chatbot(
321
- label="Conversation Trail",
322
- height=750,
323
- type="messages",
324
- show_copy_button=True,
325
- )
326
-
327
- with gr.Row():
328
- msg_input = gr.Textbox(
329
- label="Your message",
330
- placeholder="Type your message here...",
331
- lines=3,
332
- scale=4
333
- )
334
- submit_btn = gr.Button("Send", variant="primary", scale=1)
335
-
336
- with gr.Row():
337
- clear_btn = gr.Button("🗑️ Clear Chat", size="sm")
338
- save_btn = gr.Button("💾 Save History", size="sm")
339
-
340
- with gr.Row():
341
- # Help text at bottom
342
- gr.Markdown("""
343
- ### 📖 Usage Guide
344
-
345
- 1. **Initialize**: Configure settings and click "Initialize Agent"
346
- 2. **Chat**: Type messages and press Enter or click Send
347
- 3. **System Prompt**: Customize the agent's behavior via System Prompt panel
348
- 4. **History**: View or save your conversation using the History & Save panel
349
- 5. **Clear**: Start a fresh conversation with the Clear Chat button
350
-
351
- ### Note on Ollama API Keys
352
- In case you are experiencing an error calling the agent model (usually indicated by a message
353
- containing the word "unauthorized"), you may go to https://ollama.com and generate your own key.
354
- You can provide it in the configuration below. It will not be stored on our system and gets deleted
355
- when you end session (i.e., close your browser window).
356
-
357
- **Note**: Each browser session maintains its own independent conversation state.
358
- Uses stateless agent interaction for better concurrency support.
359
- """
360
- )
361
-
362
- with gr.Row():
363
- with gr.Column():
364
- # Settings panel
365
- gr.Markdown("### ⚙️ Agent Settings")
366
-
367
- with gr.Accordion("Configuration", open=True):
368
- api_base_input = gr.Textbox(
369
- label="API Base URL",
370
- value=AGENT_MODEL_API_BASE,
371
- placeholder="http://localhost:11434"
372
- )
373
-
374
- api_key_input = gr.Textbox(
375
- label="API Key (only needed for remote models)",
376
- value="",
377
- type="password",
378
- placeholder="Leave empty if not needed"
379
- )
380
-
381
- model_input = gr.Textbox(
382
- label="Model",
383
- value=f"ollama_chat/{AGENT_MODEL_NAME}" if "ollama_chat" not in AGENT_MODEL_NAME else AGENT_MODEL_NAME,
384
- placeholder="ollama_chat/gpt-oss:20b"
385
- )
386
-
387
- temperature_input = gr.Slider(
388
- label="Temperature",
389
- minimum=0.0,
390
- maximum=1.0,
391
- value=0.2,
392
- step=0.1
393
- )
394
-
395
- max_tokens_input = gr.Slider(
396
- label="Max Tokens",
397
- minimum=100,
398
- maximum=8192,
399
- value=6000,
400
- step=10
401
- )
402
-
403
- num_ctx_input = gr.Number(
404
- label="Context Window",
405
- value=131072,
406
- precision=0
407
- )
408
-
409
- max_papers_input = gr.Slider(
410
- label="Max Papers to Retrieve",
411
- minimum=0,
412
- maximum=100,
413
- value=50,
414
- step=1
415
- )
416
-
417
- init_btn = gr.Button("Update Config", variant="primary")
418
- init_status = gr.Textbox(label="Status", interactive=False)
419
-
420
- with gr.Accordion("System Prompt", open=False):
421
- system_prompt_input = gr.Textbox(
422
- label="System Prompt",
423
- value=agent.get_system_prompt()["content"] if type(agent.get_system_prompt()) is dict else None,
424
- placeholder="Enter custom system prompt here...",
425
- lines=12
426
- )
427
- update_system_btn = gr.Button("Update System Prompt")
428
- system_status = gr.Textbox(label="Status", interactive=False)
429
-
430
- with gr.Accordion("History & Save", open=False):
431
- view_history_btn = gr.Button("📜 View Full History")
432
- history_output = gr.Code(
433
- label="Conversation History (JSON)",
434
- language="json",
435
- lines=10
436
- )
437
-
438
- save_filename_input = gr.Textbox(
439
- label="Filename (optional)",
440
- placeholder="Leave empty for auto-generated name",
441
- value=""
442
- )
443
- save_status = gr.Textbox(label="Save Status", interactive=False)
444
-
445
- # Event handlers
446
- init_btn.click(
447
- fn=configure_agent,
448
- inputs=[
449
- api_base_input,
450
- api_key_input,
451
- model_input,
452
- temperature_input,
453
- max_tokens_input,
454
- num_ctx_input,
455
- max_papers_input,
456
- config_state
457
- ],
458
- outputs=[config_state, init_status]
459
- )
460
-
461
- # Chat submission
462
- submit_btn.click(
463
- fn=lambda msg_input, chatbot, config_state, messages_state: (yield from submit_message(msg_input, chatbot, config_state, messages_state, agent)),
464
- inputs=[msg_input, chatbot, config_state, messages_state],
465
- outputs=[chatbot, messages_state]
466
- ).then(
467
- fn=lambda: "",
468
- inputs=None,
469
- outputs=msg_input
470
- )
471
-
472
- msg_input.submit(
473
- fn=lambda msg_input, chatbot, config_state, messages_state: (yield from submit_message(msg_input, chatbot, config_state, messages_state, agent)),
474
- inputs=[msg_input, chatbot, config_state, messages_state],
475
- outputs=[chatbot, messages_state]
476
- ).then(
477
- fn=lambda: "",
478
- inputs=None,
479
- outputs=msg_input
480
- )
481
-
482
- # System prompt update
483
- update_system_btn.click(
484
- fn=lambda system_prompt_input, messages_state: update_system_prompt(system_prompt_input, messages_state, agent),
485
- inputs=[system_prompt_input, messages_state],
486
- outputs=[system_status, messages_state]
487
- )
488
-
489
- # History viewing
490
- view_history_btn.click(
491
- fn=view_history,
492
- inputs=messages_state,
493
- outputs=history_output
494
- )
495
-
496
- # Save history
497
- save_btn.click(
498
- fn=save_history,
499
- inputs=[save_filename_input, messages_state],
500
- outputs=save_status
501
- )
502
-
503
- # Clear chat
504
- clear_btn.click(
505
- fn=lambda config_state, messages_state: clear_chat(config_state, messages_state, agent),
506
- inputs=[config_state, messages_state],
507
- outputs=[save_status, chatbot, messages_state]
508
- )
509
-
510
- webapp.launch(
511
- server_name="0.0.0.0", # Allow external connections
512
- server_port=7860, # Default Gradio port
513
- share=False, # Set to True to create a public link
514
- show_error=True,
515
- debug=True
516
- )
517
-
518
- if __name__ == "__main__":
519
- # Setup logging (only needs to be done once globally)
520
- setup_logging(
521
- log_dir="logs",
522
- level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
523
- )
524
-
525
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/frontend/cli.py DELETED
@@ -1,371 +0,0 @@
1
- """
2
- Enhanced terminal chat UI with async streaming and full terminal functionality.
3
-
4
- Features:
5
- - Async streaming output as LLM generates tokens
6
- - Rich prompt with command history and auto-completion
7
- - Live markdown rendering during streaming
8
- - Multi-line input via Ctrl+O or /edit command
9
- - Commands: /help, /exit, /system, /edit, /history, /save <path>, /clear
10
- - Keyboard shortcuts: Ctrl+C to cancel, Ctrl+D to exit
11
- """
12
- import asyncio
13
- import click
14
- import os
15
- import logging
16
- import litellm
17
- from pathlib import Path
18
- from typing import Optional
19
-
20
- from rich.console import Console
21
- from rich.markdown import Markdown
22
- from rich.live import Live
23
- from rich.panel import Panel
24
- from rich.text import Text
25
- from prompt_toolkit import PromptSession
26
- from prompt_toolkit.history import FileHistory
27
- from prompt_toolkit.auto_suggest import AutoSuggestFromHistory
28
- from prompt_toolkit.completion import WordCompleter
29
- from prompt_toolkit.key_binding import KeyBindings
30
- from prompt_toolkit.formatted_text import HTML
31
-
32
- from agentic_nav.agents import NeurIPS2025Agent
33
- from agentic_nav.utils.logger import setup_logging
34
- from agentic_nav.utils.file_handlers import save_chat_history
35
- from agentic_nav.utils.cli import open_editor, show_history, print_help
36
-
37
- try:
38
- from datetime import datetime, UTC
39
- except ImportError:
40
- from datetime import datetime, timezone
41
- UTC = timezone.utc
42
-
43
-
44
- LOGGER = logging.getLogger(__name__)
45
-
46
- EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "nomic-embed-text")
47
- EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
48
-
49
- AGENT_MODEL_NAME = os.environ.get("AGENT_MODEL_NAME", "gpt-oss:20b")
50
- AGENT_MODEL_API_BASE = os.environ.get("AGENT_MODEL_API_BASE", "http://localhost:11436")
51
- OLLAMA_API_KEY = os.environ.get("OLLAMA_API_KEY")
52
-
53
- litellm._logging._disable_debugging()
54
- console = Console(soft_wrap=True)
55
-
56
- # Command completer for auto-complete
57
- command_completer = WordCompleter(
58
- ['/help', '/exit', '/system', '/edit', '/history', '/save', '/clear'],
59
- ignore_case=True,
60
- sentence=True
61
- )
62
-
63
- bindings = KeyBindings()
64
-
65
- @bindings.add('c-o')
66
- def _(event):
67
- """Multi-line input with Ctrl+O"""
68
- event.current_buffer.insert_text('\n')
69
-
70
-
71
- def create_prompt_session():
72
- """Create a prompt_toolkit session with history and auto-suggest"""
73
- history_file = Path.home() / ".llm_agents_history"
74
-
75
- return PromptSession(
76
- history=FileHistory(str(history_file)),
77
- auto_suggest=AutoSuggestFromHistory(),
78
- completer=command_completer,
79
- complete_while_typing=True,
80
- key_bindings=bindings,
81
- enable_open_in_editor=True,
82
- multiline=False,
83
- )
84
-
85
-
86
- def render_markdown(text: str, title: Optional[str] = None):
87
- """Render markdown with optional panel title"""
88
- if title:
89
- console.print(Panel(Markdown(text), title=title, border_style="blue"))
90
- else:
91
- console.print(Markdown(text))
92
-
93
-
94
- def stream_agent_response_sync(agent, message: dict):
95
- """
96
- Stream agent response with live markdown rendering.
97
-
98
- This function:
99
- 1. Copies the agent's current history and appends the new message
100
- 2. Streams the response using interact_stateless generator
101
- 3. Updates the live display with markdown content and tool execution status
102
- 4. Updates the agent's history with the final message list
103
-
104
- Note: KeyboardInterrupt is caught to allow graceful cancellation,
105
- then re-raised so the caller can handle cleanup.
106
-
107
- Args:
108
- agent: The agent instance with interact_stateless support
109
- message: User message dict with 'role', 'content', and optional '_ts'
110
- """
111
- # Get current history and add the new message
112
- messages = agent.get_history().copy()
113
- messages.append(message)
114
-
115
- accumulated_text = ""
116
- tool_calls_made = []
117
- final_messages = None
118
-
119
- with Live(console=console, refresh_per_second=10) as live:
120
- try:
121
- # Use interact_stateless for streaming (it's a generator)
122
- for updated_messages in agent.interact_stateless(
123
- messages=messages,
124
- model=agent.model,
125
- api_base=agent.api_base,
126
- api_key=agent.api_key,
127
- llm_args=agent.llm_args
128
- ):
129
- final_messages = updated_messages
130
-
131
- # Extract the last assistant message
132
- for msg in reversed(updated_messages):
133
- if msg.get("role") == "assistant":
134
- content = msg.get("content", "")
135
- if content != accumulated_text:
136
- accumulated_text = content
137
-
138
- # Show streaming content
139
- if accumulated_text:
140
- live.update(Markdown(accumulated_text))
141
-
142
- # Check for tool calls
143
- if "tool_calls" in msg and msg["tool_calls"] != tool_calls_made:
144
- tool_calls_made = msg["tool_calls"]
145
- # Show tool execution
146
- tool_names = [tc["function"]["name"] for tc in tool_calls_made]
147
- tool_info = Text(f"\n🔧 Executing tools: {', '.join(tool_names)}", style="yellow")
148
- live.update(tool_info)
149
- break
150
-
151
- # Update agent's history with final messages
152
- if final_messages:
153
- agent.set_history(final_messages)
154
-
155
- except KeyboardInterrupt:
156
- live.stop()
157
- console.print("\n[yellow]⚠ Response cancelled by user[/yellow]")
158
- raise
159
- except Exception as e:
160
- live.stop()
161
- console.print(f"\n[red]❌ Error: {e}[/red]")
162
- LOGGER.error(f"Streaming error: {e}", exc_info=True)
163
- raise
164
-
165
-
166
- async def async_interact(agent, message: dict):
167
- """
168
- Async wrapper for agent interaction with streaming.
169
-
170
- Note: KeyboardInterrupt from stream_agent_response_sync is caught here
171
- to prevent it from propagating up. The actual interrupt handling and
172
- user feedback happens in stream_agent_response_sync.
173
- """
174
- try:
175
- # Run the synchronous streaming function in a thread pool
176
- await asyncio.to_thread(stream_agent_response_sync, agent, message)
177
- except KeyboardInterrupt:
178
- # Already handled in stream_agent_response_sync with user feedback
179
- LOGGER.info("Agent interaction cancelled by user")
180
- except Exception as e:
181
- LOGGER.error(f"Agent interaction failed: {e}")
182
- console.print(f"[red]Error: {e}[/red]")
183
-
184
-
185
- def print_welcome():
186
- """Print welcome message"""
187
- welcome = Text()
188
- welcome.append("╔═══════════════════════════════════════╗\n", style="bold blue")
189
- welcome.append("║ ", style="bold blue")
190
- welcome.append("LLM Agent Chat Interface", style="bold white")
191
- welcome.append(" ║\n", style="bold blue")
192
- welcome.append("╚═══════════════════════════════════════╝\n", style="bold blue")
193
- welcome.append("\nCommands:\n", style="bold yellow")
194
- welcome.append(" /help - Show help\n", style="cyan")
195
- welcome.append(" /edit - Multi-line input\n", style="cyan")
196
- welcome.append(" /history - Show conversation history\n", style="cyan")
197
- welcome.append(" /system - Set system prompt\n", style="cyan")
198
- welcome.append(" /save - Save conversation\n", style="cyan")
199
- welcome.append(" /clear - Clear screen\n", style="cyan")
200
- welcome.append(" /exit - Exit (or Ctrl+D)\n", style="cyan")
201
- welcome.append("\nShortcuts:\n", style="bold yellow")
202
- welcome.append(" Ctrl+O - New line in input\n", style="cyan")
203
- welcome.append(" Ctrl+C - Cancel current response\n", style="cyan")
204
- welcome.append(" Ctrl+D - Exit\n", style="cyan")
205
- welcome.append(" ↑/↓ - Navigate history\n", style="cyan")
206
- welcome.append(" Tab - Auto-complete commands\n", style="cyan")
207
-
208
- console.print(welcome)
209
-
210
-
211
- @click.command()
212
- @click.option("-t", "--temperature", default=0.2, type=float,
213
- help="Specify the model temperature.")
214
- @click.option("--max-tokens", default=6000, type=int,
215
- help="Specify the max. number of model output tokens.")
216
- @click.option("-c", "--num-ctx", default=131072, type=int,
217
- help="Specify the model context window.")
218
- @click.option("-l", "--max-num-papers", default=50, type=int,
219
- help="Specify the maximum number of papers to retrieve.")
220
- def main(temperature, max_tokens, num_ctx, max_num_papers):
221
- """Enhanced LLM Agent CLI with async streaming and rich terminal features"""
222
-
223
- # Setup logging
224
- setup_logging(
225
- log_dir="logs",
226
- level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
227
- )
228
-
229
- print_welcome()
230
- LOGGER.info("Agent runtime started")
231
-
232
- # Config for the LLM messages
233
- llm_config = {
234
- "model": f"ollama_chat/{AGENT_MODEL_NAME}",
235
- "api_base": AGENT_MODEL_API_BASE,
236
- "temperature": temperature,
237
- "max_tokens": max_tokens,
238
- "num_ctx": num_ctx
239
- }
240
- LOGGER.info(f"LLM configuration: {llm_config}")
241
-
242
- # Parameters to limit the tool calling scope
243
- tool_args = {
244
- "num_records": max_num_papers
245
- }
246
- LOGGER.info(f"Global tool arguments: {tool_args}")
247
-
248
- # Initialize agent (model is passed via llm_config/llm_args)
249
- agent = NeurIPS2025Agent(
250
- api_base=AGENT_MODEL_API_BASE,
251
- api_key=OLLAMA_API_KEY,
252
- llm_args=llm_config,
253
- global_tool_args=tool_args,
254
- )
255
-
256
- agent.setup_session()
257
- console.print("[green]✓ Agent initialized successfully[/green]\n")
258
-
259
- # Create prompt session
260
- session = create_prompt_session()
261
-
262
- # Main interaction loop
263
- while True:
264
- try:
265
- # Get user input with rich prompt
266
- line = session.prompt(
267
- HTML('<ansiyellow><b>You></b></ansiyellow> '),
268
- multiline=False,
269
- ).strip()
270
-
271
- LOGGER.debug(f"USER PROMPT: {line}")
272
-
273
- except (EOFError, KeyboardInterrupt):
274
- console.print("\n[yellow]Goodbye! 👋[/yellow]")
275
- LOGGER.info("User exited")
276
- break
277
-
278
- if not line:
279
- continue
280
-
281
- # Handle commands
282
- if line.startswith("/"):
283
- parts = line.split(maxsplit=1)
284
- cmd = parts[0].lower()
285
- arg = parts[1] if len(parts) > 1 else ""
286
-
287
- if cmd == "/help":
288
- print_help()
289
- continue
290
-
291
- elif cmd == "/exit":
292
- console.print("[yellow]Goodbye! 👋[/yellow]")
293
- LOGGER.info("User exited via /exit command")
294
- break
295
-
296
- elif cmd == "/clear":
297
- console.clear()
298
- print_welcome()
299
- continue
300
-
301
- elif cmd == "/edit":
302
- content = open_editor()
303
- if content:
304
- next_message = {
305
- "role": "user",
306
- "content": content,
307
- "_ts": str(datetime.now(UTC))
308
- }
309
- else:
310
- console.print("[yellow]⚠ No content provided[/yellow]")
311
- continue
312
-
313
- elif cmd == "/system":
314
- content = open_editor()
315
- if content:
316
- messages = agent.set_system_prompt(
317
- messages=agent.get_history(),
318
- new_system_prompt=content
319
- )
320
- agent.set_history(messages=messages)
321
- console.print("[green]✓ System prompt updated[/green]")
322
- continue
323
- else:
324
- console.print("[yellow]⚠ No content provided[/yellow]")
325
- continue
326
-
327
- elif cmd == "/history":
328
- show_history(agent.get_history())
329
- continue
330
-
331
- elif cmd == "/save":
332
- Path("chat_histories/").mkdir(exist_ok=True, parents=True)
333
- time_now = datetime.now().strftime("%Y-%m-%d_%H-%M")
334
- path = arg.strip() or f"chat_histories/{time_now}_chat_history.json"
335
-
336
- try:
337
- save_chat_history(agent.get_history(), path)
338
- console.print(f"[green]✓ Chat saved to {path}[/green]")
339
- except Exception as e:
340
- console.print(f"[red]❌ Failed to save: {e}[/red]")
341
- LOGGER.error(f"Save failed: {e}")
342
- continue
343
-
344
- else:
345
- console.print(f"[red]❌ Unknown command: {cmd}[/red]")
346
- console.print("[yellow]Type /help for available commands[/yellow]")
347
- continue
348
- else:
349
- # Regular single-line user message
350
- next_message = {
351
- "role": "user",
352
- "content": line,
353
- "_ts": str(datetime.now(UTC))
354
- }
355
-
356
- try:
357
- console.print()
358
- asyncio.run(async_interact(agent, next_message))
359
- console.print()
360
-
361
- except KeyboardInterrupt:
362
- console.print("\n[yellow]⚠ Interrupted[/yellow]")
363
- continue
364
- except Exception as e:
365
- console.print(f"\n[red]❌ Error: {e}[/red]")
366
- LOGGER.error(f"Interaction error: {e}", exc_info=True)
367
- continue
368
-
369
-
370
- if __name__ == "__main__":
371
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/__init__.py DELETED
@@ -1,15 +0,0 @@
1
- from agentic_nav.tools.knowledge_graph import search_similar_papers, find_neighboring_papers, traverse_graph
2
- from agentic_nav.tools.session_routing import build_visit_schedule
3
-
4
-
5
- __all__ = [
6
- 'search_similar_papers',
7
- 'find_neighboring_papers',
8
- 'traverse_graph',
9
- 'build_visit_schedule',
10
- ]
11
-
12
-
13
- def get_all_tools():
14
- """Get all tools as a dictionary."""
15
- return [globals()[name] for name in __all__]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/knowledge_graph/__init__.py DELETED
@@ -1,326 +0,0 @@
1
- """
2
- This file defines the tools that can be made available to an agent.
3
- The idea is to put the actual functions into wrappers that provide LLM-friendly and token efficient outputs.
4
- """
5
- import os
6
- import random
7
-
8
- from toon_format import encode as toon_encode
9
- from typing import List, Optional, Union
10
-
11
- from agentic_nav.tools.knowledge_graph.retriever import Neo4jGraphWorker, LOGGER
12
-
13
- NEO4J_DB_URI = os.environ.get("NEO4J_DB_URI", "bolt://neo4j_db:7687")
14
- NEO4J_USERNAME = os.environ.get("NEO4J_USERNAME", "neo4j")
15
- NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD")
16
-
17
-
18
- def search_similar_papers(
19
- user_query: str,
20
- num_papers_to_return: int = 50,
21
- min_similarity: float = None,
22
- day: str = None,
23
- timeslots: List[str] = None
24
- ) -> str:
25
- """
26
- Search for research papers semantically similar to a user's natural language query.
27
-
28
- This function performs vector similarity search against a Neo4j knowledge graph database
29
- to find papers that match the semantic meaning of the user's query. It serves as the
30
- entry point for paper discovery workflows and is typically followed by neighborhood
31
- or graph traversal searches for deeper exploration.
32
-
33
- Args:
34
- user_query (str): Natural language query describing the research topic or interest.
35
- The query is embedded and compared against paper embeddings in the database.
36
- num_papers_to_return (int, optional): Maximum number of papers to return, ranked by
37
- similarity score. Defaults to 50.
38
- min_similarity (float, optional): Minimum similarity threshold for returned papers.
39
- Defaults to None (no filtering). Should be a value between 0.0 and 1.0, where
40
- higher values indicate stricter similarity requirements.
41
- day (str, optional): Conference day as a date string in ISO format (e.g., "2024-12-10").
42
- When provided, only papers scheduled on this day will be searched. Defaults to None
43
- (no day filtering).
44
- timeslots (List[str], optional): List of time ranges to filter papers by their session
45
- times. Each timeslot should be formatted as "HH:MM:SS-HH:MM:SS" (e.g.,
46
- ["09:00:00-12:00:00", "14:00:00-17:00:00"]). Papers with session start times
47
- falling within any of these ranges will be included. Defaults to None (no time filtering).
48
-
49
- Returns:
50
- str: A token-efficient formatted string representation of papers matching the query,
51
- encoded using the toon_encode function. Papers are typically ordered by
52
- descending similarity score.
53
-
54
- Restrictions:
55
- - Requires a running Neo4j database instance at bolt://localhost:7687 with credentials
56
- (username: "neo4j", password: "llm_agents")
57
- - The database must have pre-computed embeddings for papers to enable similarity search
58
- - The database must have a vector index configured for efficient similarity queries
59
- - Currently creates a new database connection for each function call, which may not be
60
- optimal for concurrent usage (see TODO note)
61
-
62
- Notes:
63
- - This function is designed as the initial step in a multi-stage paper discovery workflow
64
- - Results can be further explored using find_neighboring_papers() or traverse_graph()
65
- - When day and/or timeslots are provided, the database filters papers by their session
66
- times BEFORE performing vector similarity search for better performance
67
- - TODO: The Neo4jGraphWorker should be wrapped in a session to better handle
68
- concurrent connections and connection pooling
69
-
70
- Raises:
71
- Connection errors if Neo4j database is not accessible
72
- ValueError if min_similarity is outside the valid range [0.0, 1.0]
73
- ValueError if day is not in valid ISO date format (YYYY-MM-DD)
74
- ValueError if timeslots are not properly formatted
75
- Embedding errors if the query cannot be properly embedded
76
-
77
- Example:
78
- >>> # Basic similarity search
79
- >>> papers = search_similar_papers(
80
- ... user_query="federated learning for privacy-preserving machine learning",
81
- ... num_papers_to_return=15
82
- ... )
83
- >>>
84
- >>> # Search with similarity threshold
85
- >>> highly_relevant_papers = search_similar_papers(
86
- ... user_query="transformer architectures for NLP",
87
- ... num_papers_to_return=20,
88
- ... min_similarity=0.75
89
- ... )
90
- >>>
91
- >>> # Search for papers on a specific day and time
92
- >>> morning_papers = search_similar_papers(
93
- ... user_query="computer vision applications",
94
- ... num_papers_to_return=50,
95
- ... day="2024-12-10",
96
- ... timeslots=["09:00:00-12:00:00"]
97
- ... )
98
- >>>
99
- >>> # Search across multiple timeslots on a specific day
100
- >>> daytime_papers = search_similar_papers(
101
- ... user_query="reinforcement learning",
102
- ... num_papers_to_return=25,
103
- ... day="2024-12-11",
104
- ... timeslots=["09:00:00-12:00:00", "14:00:00-17:00:00"]
105
- ... )
106
- """
107
- # Type coercion for parameters that may come as strings from LLM tool calls
108
- if num_papers_to_return is not None and not isinstance(num_papers_to_return, int):
109
- num_papers_to_return = int(num_papers_to_return)
110
- if min_similarity is not None and not isinstance(min_similarity, float):
111
- min_similarity = float(min_similarity)
112
-
113
- # Handle timeslots - ensure it's a list or None
114
- if timeslots is not None and isinstance(timeslots, str):
115
- # If a single string is provided, wrap it in a list
116
- timeslots = [timeslots]
117
-
118
- worker = Neo4jGraphWorker(
119
- uri=NEO4J_DB_URI,
120
- username=NEO4J_USERNAME,
121
- password=NEO4J_PASSWORD
122
- )
123
-
124
- # Fetch papers with optional day and time filtering
125
- papers = worker.similarity_search(
126
- user_query=user_query,
127
- top_k=num_papers_to_return,
128
- min_similarity=min_similarity,
129
- day=day,
130
- timeslots=timeslots
131
- )
132
-
133
- # Format outputs to be more token efficient
134
- formatted_papers = toon_encode(papers)
135
-
136
- return formatted_papers
137
-
138
-
139
- def find_neighboring_papers(
140
- paper_id: str,
141
- relationship_types: Union[List[str], str] = ["SIMILAR_TO"],
142
- num_neighbors_to_return: int = 10,
143
- min_similarity: float = 0.75
144
- ) -> str:
145
- """
146
- Retrieve immediate neighboring entities of a specific paper from the Neo4j knowledge graph.
147
-
148
- This function performs a one-hop neighborhood search to find entities directly connected to
149
- a target paper. It is designed to be used after an initial similarity search when users want
150
- to explore specific relationships (similar papers, authors, or topics) for a paper of interest.
151
-
152
- Args:
153
- paper_id (str): The unique identifier of the target paper node in the graph. neo4j UUID.
154
- relationship_types (List[str], str): Types of relationships to query.
155
- Defaults to ["SIMILAR_TO"].
156
- Valid options: ["SIMILAR_TO", "IS_AUTHOR_OF", "BELONGS_TO_TOPIC"]
157
- neighbor_entity (str, optional): The type of neighboring entity to return.
158
- Defaults to "similar_papers".
159
- Valid options: ["similar_papers", "authors", "topics", "raw_results"]
160
- num_neighbors_to_return (int, optional): Maximum number of neighbors to return.
161
- Defaults to 10. Results are randomly shuffled before truncation to provide diversity.
162
- min_similarity (float, optional): Minimum similarity threshold for returned neighbors.
163
-
164
- Returns:
165
- str: A token-efficient formatted string representation of neighboring entities,
166
- encoded using the toon_encode function.
167
-
168
- Restrictions:
169
- - Requires a running Neo4j database instance at bolt://localhost:7687 with credentials
170
- (username: "neo4j", password: "llm_agents")
171
- - Should be used after an initial similarity search as part of a focused exploration workflow
172
- - The paper_id must exist in the Neo4j graph database
173
- - Only performs one-hop searches (direct neighbors only)
174
- - Only the three specified relationship types are supported
175
- - Only the four specified neighbor entity types are supported
176
- - The neighbor_entity parameter must match the relationship_types used
177
- (e.g., "similar_papers" with "SIMILAR_TO", "authors" with "IS_AUTHOR_OF")
178
-
179
- Notes:
180
- - Results are randomly shuffled to provide diverse recommendations across multiple calls
181
- - The function extracts only the "neighbor" data from the returned results
182
- - There is a potential bug: the type check `type(relevant_neighbors) is int` should likely be
183
- `type(num_neighbors_to_return) is int` for proper list truncation
184
-
185
- Raises:
186
- Connection errors if Neo4j database is not accessible
187
- KeyError if neighbor_entity doesn't exist in the returned neighbors dictionary
188
- ValueError if invalid relationship_types or neighbor_entity are provided
189
-
190
- Example:
191
- >>> similar_papers = find_neighboring_papers(
192
- ... paper_id="<UUID>",
193
- ... relationship_types=["SIMILAR_TO"],
194
- ... neighbor_entity="similar_papers",
195
- ... num_neighbors_to_return=5
196
- ... )
197
- >>>
198
- >>> authors = find_neighboring_papers(
199
- ... paper_id="<UUID>",
200
- ... relationship_types=["IS_AUTHOR_OF"],
201
- ... neighbor_entity="authors",
202
- ... num_neighbors_to_return=3
203
- ... )
204
- """
205
- # Type coercion for parameters that may come as strings from LLM tool calls
206
- if num_neighbors_to_return is not None and not isinstance(num_neighbors_to_return, int):
207
- num_neighbors_to_return = int(num_neighbors_to_return)
208
-
209
- if type(relationship_types) is str:
210
- relationship_types = [relationship_types]
211
-
212
- worker = Neo4jGraphWorker(
213
- uri=NEO4J_DB_URI,
214
- username=NEO4J_USERNAME,
215
- password=NEO4J_PASSWORD
216
- )
217
-
218
- neighbors = worker.neighborhood_search(
219
- paper_id=paper_id,
220
- relationship_types=relationship_types,
221
- min_similarity=min_similarity,
222
- )
223
-
224
- relevant_neighbors = []
225
- for rel_type, neighbor in neighbors.items():
226
- if rel_type != relationship_types:
227
- relevant_neighbors.append(neighbor)
228
-
229
- # Constrain and shuffle neighbors for more diverse responses
230
- random.shuffle(relevant_neighbors)
231
-
232
- # FIX: Changed type(relevant_neighbors) to type(num_neighbors_to_return)
233
- if num_neighbors_to_return is not None and isinstance(num_neighbors_to_return, int):
234
- relevant_neighbors = relevant_neighbors[:num_neighbors_to_return]
235
-
236
- # Format outputs to be more token efficient
237
- formatted_neighbors = toon_encode(relevant_neighbors)
238
-
239
- return formatted_neighbors
240
-
241
-
242
- def traverse_graph(
243
- start_paper_id: str,
244
- n_hops: int = 2,
245
- relationship_type: Optional[str] = "BELONGS_TO_TOPIC",
246
- max_results: Optional[int] = 30,
247
- strategy: str = "breadth_first_random",
248
- max_branches: Optional[int] = 2,
249
- random_seed: Optional[int] = 42
250
- ) -> str:
251
- """
252
- Traverse a Neo4j knowledge graph to discover related research papers through various relationship types.
253
-
254
- This function performs exploratory graph traversal starting from a seed paper to find potentially
255
- interesting related papers. It is designed to be used after an initial similarity search, allowing
256
- users to discover papers through different connection paths (topics, authors, similarity).
257
-
258
- Args:
259
- start_paper_id (str): The unique identifier of the starting paper node in the graph. neo4j UUID.
260
- n_hops (int, optional): Number of relationship hops to traverse from the starting paper.
261
- Defaults to 2. Higher values explore further but may return less relevant results.
262
- relationship_type (str, optional): Types of relationships to follow during traversal.
263
- Defaults to "BELONGS_TO_TOPIC".
264
- Valid options: ["SIMILAR_TO", "AUTHORED_BY", "BELONGS_TO_TOPIC"]
265
- max_results (int, optional): Maximum number of papers to return. Defaults to 30.
266
- strategy (str, optional): Graph traversal strategy to use. Defaults to "breadth_first_random".
267
- Valid options: ["breadth_first", "depth_first", "breadth_first_random", "depth_first_random"]
268
- max_branches (int, optional): Maximum number of branches to follow at each node during traversal.
269
- Defaults to 2. Controls the breadth of exploration at each step.
270
- random_seed (int, optional): Seed for random number generation in randomized strategies.
271
- Defaults to 42. Ensures reproducible results when using random strategies.
272
-
273
- Returns:
274
- str: A formatted string representation of discovered papers, encoded using the toon_encode function.
275
-
276
- Restrictions:
277
- - Requires a running Neo4j database instance at bolt://localhost:7687 with credentials
278
- (username: "neo4j", password: "llm_agents")
279
- - Should be used after an initial similarity search as part of an exploratory workflow
280
- - The start_paper_id must exist in the Neo4j graph database
281
- - Only the three specified relationship types are supported
282
- - Only the four specified traversal strategies are supported
283
- - Random strategies require random_seed for reproducibility
284
-
285
- Raises:
286
- Connection errors if Neo4j database is not accessible
287
- ValueError if invalid relationship_types or strategy are provided
288
-
289
- Example:
290
- >>> related_papers = traverse_graph(
291
- ... start_paper_id="paper_12345",
292
- ... n_hops=3,
293
- ... relationship_type="SIMILAR_TO",
294
- ... max_results=50,
295
- ... strategy="breadth_first_random"
296
- ... )
297
- """
298
- # Type coercion for parameters that may come as strings from LLM tool calls
299
- if n_hops is not None and not isinstance(n_hops, int):
300
- n_hops = int(n_hops)
301
- if max_results is not None and not isinstance(max_results, int):
302
- max_results = int(max_results)
303
- if max_branches is not None and not isinstance(max_branches, int):
304
- max_branches = int(max_branches)
305
- if random_seed is not None and not isinstance(random_seed, int):
306
- random_seed = int(random_seed)
307
-
308
- worker = Neo4jGraphWorker(
309
- uri=NEO4J_DB_URI,
310
- username=NEO4J_USERNAME,
311
- password=NEO4J_PASSWORD
312
- )
313
-
314
- papers = worker.graph_traversal(
315
- start_paper_id=start_paper_id,
316
- n_hops=n_hops,
317
- relationship_type=relationship_type,
318
- max_results=max_results,
319
- strategy=strategy,
320
- max_branches=max_branches,
321
- random_seed=random_seed
322
- )
323
-
324
- formatted_neighbors = toon_encode(papers)
325
-
326
- return formatted_neighbors
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/knowledge_graph/file_handler.py DELETED
@@ -1,29 +0,0 @@
1
- import pickle
2
- import networkx as nx
3
-
4
-
5
- def save_graph(graph: nx.Graph, output_path: str):
6
- """
7
- Save the graph to a file using pickle.
8
-
9
- Args:
10
- output_path: Path to save the graph
11
- """
12
- with open(output_path, 'wb') as f:
13
- pickle.dump(graph, f)
14
- f.close()
15
- print(f"Graph saved to {output_path}")
16
-
17
-
18
- def load_graph(input_path: str) -> nx.Graph:
19
- """
20
- Load a graph from a pickle file.
21
-
22
- Args:
23
- input_path: Path to the saved graph
24
- """
25
- with open(input_path, 'rb') as f:
26
- graph = pickle.load(f)
27
- f.close()
28
- print(f"Graph loaded from {input_path}")
29
- return graph
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/knowledge_graph/graph_generator.py DELETED
@@ -1,446 +0,0 @@
1
- import json
2
- import logging
3
- import os
4
-
5
- import click
6
- import networkx as nx
7
- import numpy as np
8
- import litellm
9
- from typing import List, Dict, Any, Union
10
- from litellm import embedding
11
- from concurrent.futures import ThreadPoolExecutor, as_completed
12
- from tqdm import tqdm
13
-
14
- from pathlib import Path
15
-
16
- from agentic_nav.utils.embedding_generator import batch_embed_documents
17
- from agentic_nav.utils.logging import setup_logging
18
- from agentic_nav.tools.knowledge_graph.file_handler import save_graph
19
-
20
-
21
- # Setup logging
22
- setup_logging(
23
- log_dir="logs",
24
- level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
25
- )
26
- LOGGER = logging.getLogger(__name__)
27
- litellm._logging._disable_debugging()
28
- PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
29
- EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "ollama/nomic-embed-text")
30
- EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
31
-
32
-
33
- class PaperKnowledgeGraph:
34
- """
35
- A knowledge graph builder for academic papers focusing on:
36
- - Paper names (nodes)
37
- - Topics (nodes)
38
- - Abstract embeddings (stored as node attributes)
39
- Uses litellm with ollama for local embedding generation with parallel processing.
40
- """
41
- def __init__(
42
- self,
43
- embedding_model: str = EMBEDDING_MODEL_NAME,
44
- ollama_base_url: str = EMBEDDING_MODEL_API_BASE,
45
- embedding_gen_batch_size: int = 32,
46
- max_parallel_workers: int = 8,
47
- limit_num_papers: Union[int, None] = None
48
- ):
49
- """
50
- Initialize the knowledge graph builder.
51
-
52
- Args:
53
- embedding_model: Name of the ollama embedding model (e.g., 'nomic-embed-text')
54
- ollama_base_url: Base URL for the ollama server
55
- embedding_gen_batch_size: Batch size for generating text embeddings
56
- max_parallel_workers: Number of parallel workers for embedding generation
57
- """
58
- self.graph = nx.Graph()
59
- self.embedding_model = embedding_model
60
- self.ollama_base_url = ollama_base_url
61
- self.batch_size = embedding_gen_batch_size
62
- self.max_workers = max_parallel_workers
63
- self.papers_data = []
64
- self.limit_num_papers = limit_num_papers
65
-
66
- # Test connection
67
- LOGGER.info(f"Initializing with model: {embedding_model}")
68
- LOGGER.info(f"Ollama server: {ollama_base_url}")
69
- self._test_embedding_connection()
70
-
71
- def _test_embedding_connection(self):
72
- """Test connection to ollama server."""
73
- try:
74
- response = embedding(
75
- model=self.embedding_model,
76
- input=["test connection"],
77
- api_base=self.ollama_base_url
78
- )
79
- LOGGER.info(f"Successfully connected to ollama server")
80
- LOGGER.info(f"Embedding dimension: {len(response.data[0]['embedding'])}")
81
- except Exception as e:
82
- LOGGER.error(f"❌ Error connecting to ollama server: {e}")
83
- LOGGER.error(f"Please ensure ollama is running and the model '{self.embedding_model}' is available")
84
- LOGGER.error(f"Run: ollama pull nomic-embed-text")
85
- raise
86
-
87
- def load_papers_from_json(self, json_file_path: str, paper_dict_key: str = "results"):
88
- """
89
- Load papers from a JSON file or JSONL file.
90
-
91
- Args:
92
- json_file_path: Path to the JSON/JSONL file
93
- """
94
- self.papers_data = []
95
-
96
- with open(json_file_path, 'r') as f:
97
- # Try to parse as regular JSON first
98
- try:
99
- content = f.read()
100
- # Try parsing as a single JSON object
101
- try:
102
- data = json.loads(content)
103
- if isinstance(data[paper_dict_key], list):
104
- self.papers_data = data[paper_dict_key]
105
- else:
106
- raise TypeError("File importer expects a list of papers.")
107
- except json.JSONDecodeError:
108
- # Try parsing as JSONL (one JSON object per line)
109
- f.seek(0)
110
- for line in f:
111
- line = line.strip()
112
- if line:
113
- self.papers_data.append(json.loads(line))
114
- except Exception as e:
115
- raise ValueError(f"Error parsing JSON file: {e}")
116
-
117
- if self.limit_num_papers is not None and self.limit_num_papers > 0:
118
- LOGGER.warning(f"WARNING: Number of papers limited to {self.limit_num_papers} items. Set to 'None' for all papers")
119
- self.papers_data = self.papers_data[:self.limit_num_papers]
120
-
121
- LOGGER.info(f"Loaded {len(self.papers_data)} papers from {json_file_path}")
122
-
123
- def build_graph(self):
124
- """
125
- Build the knowledge graph from loaded papers.
126
- Creates nodes for papers and topics, and edges between them.
127
- Computes embeddings for abstracts in parallel.
128
- """
129
- topic_nodes = set()
130
- author_nodes = set()
131
-
132
- LOGGER.info(f"\nPreparing to process {len(self.papers_data)} papers...")
133
-
134
- # Extract all abstracts and paper info
135
- paper_info = []
136
- abstracts = []
137
-
138
- for paper in self.papers_data:
139
- paper_id = paper.get('uid', paper.get('id'))
140
- paper_name = paper.get('name', 'Unnamed Paper')
141
- abstract = paper.get('abstract', '')
142
- topic = paper.get('topic', 'Unknown')
143
- authors = paper.get('authors', [])
144
- keywords = paper.get("keywords", [])
145
- decision = paper.get("decision", "")
146
- session = paper.get("session", "")
147
- session_start_time = paper.get("starttime", "")
148
- session_end_time = paper.get("endtime", "")
149
- presentation_type = paper.get("eventtype", "")
150
- room_name = paper.get("room_name", "")
151
- project_url = paper.get("url", "")
152
- poster_position = paper.get("poster_position", "")
153
- paper_url = paper.get("paper_url", "")
154
- sourceid = paper.get("sourceid", "")
155
- virtualsite_url = paper.get("virtualsite_url", "")
156
-
157
- paper_info.append({
158
- "id": paper_id,
159
- "name": paper_name,
160
- "abstract": abstract,
161
- "topic": topic,
162
- "authors": authors,
163
- "keywords": keywords,
164
- "decisions": decision,
165
- "session": session,
166
- "session_start_time": session_start_time,
167
- "session_end_time": session_end_time,
168
- "presentation_type": presentation_type,
169
- "room_name": room_name,
170
- "project_url": project_url,
171
- "poster_position": poster_position,
172
- "paper_url": paper_url,
173
- "sourceid": sourceid,
174
- "virtualsite_url": virtualsite_url
175
-
176
- })
177
- abstracts.append(abstract)
178
-
179
- # Generate all embeddings in parallel
180
- LOGGER.info(f"\nGenerating embeddings with batch size {self.batch_size}...")
181
- embeddings = batch_embed_documents(
182
- abstracts,
183
- batch_size=self.batch_size,
184
- embedding_model=self.embedding_model,
185
- api_base=self.ollama_base_url
186
- )
187
-
188
- # Convert to list so that embeddings can be mapped to samples properly
189
- embeddings = embeddings.tolist()
190
-
191
- # Add nodes to graph
192
- LOGGER.info("\nBuilding graph structure...")
193
- with tqdm(total=len(paper_info), desc="Adding nodes") as pbar:
194
- for info, embedding in zip(paper_info, embeddings):
195
-
196
- # Extract author information (store as list of dicts)
197
- author_list = []
198
- if info['authors']:
199
- for author in info['authors']:
200
- author_info = {
201
- 'id': author.get('id'),
202
- 'fullname': author.get('fullname', ''),
203
- 'institution': author.get('institution', ''),
204
- 'url': author.get('url', '')
205
- }
206
-
207
- author_uid = f"{author_info['id']} - {author_info['fullname']}"
208
- if author_uid not in author_nodes:
209
- self.graph.add_node(
210
- author_uid,
211
- **author_info
212
- )
213
- author_nodes.add(author_uid)
214
-
215
- author_list.append(author_info)
216
-
217
- # Add paper node with attributes
218
- paper_attrs = info.copy()
219
- del paper_attrs["authors"]
220
-
221
- self.graph.add_node(
222
- info["id"],
223
- **paper_attrs,
224
- embedding=embedding,
225
- authors=author_list,
226
- node_type="paper"
227
- )
228
-
229
- for author in author_list:
230
- self.graph.add_edge(f"{author['id']} - {author['fullname']}", info["id"], relationship="is_author_of")
231
-
232
- # Add topic node if it doesn't exist
233
- if info['topic'] and info['topic'] not in topic_nodes:
234
- self.graph.add_node(
235
- info['topic'],
236
- node_type='topic',
237
- name=info['topic']
238
- )
239
- topic_nodes.add(info['topic'])
240
-
241
- # Add edge between paper and topic
242
- if info['topic']:
243
- self.graph.add_edge(info['id'], info['topic'], relationship='belongs_to_topic')
244
-
245
- pbar.update(1)
246
-
247
- LOGGER.info(f"Built graph with {self.graph.number_of_nodes()} nodes and {self.graph.number_of_edges()} edges")
248
- LOGGER.info(f" Papers: {len([n for n, d in self.graph.nodes(data=True) if d.get('node_type') == 'paper'])}")
249
- LOGGER.info(f" Topics: {len([n for n, d in self.graph.nodes(data=True) if d.get('node_type') == 'topic'])}")
250
-
251
- def connect_similar_papers(self, similarity_threshold: float = 0.7):
252
- """
253
- Connect papers based on abstract embedding similarity using parallel processing.
254
- Args:
255
- similarity_threshold: Minimum cosine similarity to create an edge (0-1)
256
- """
257
- paper_nodes = [(n, d) for n, d in self.graph.nodes(data=True) if d.get('node_type') == 'paper']
258
- LOGGER.info(f"\nComputing similarities for {len(paper_nodes)} papers...")
259
-
260
- # Create pairs to compare (fast!)
261
- pairs = [(i, j) for i in range(len(paper_nodes)) for j in range(i + 1, len(paper_nodes))]
262
- LOGGER.info(f"Created {len(pairs)} pairs to compare")
263
-
264
- connections_added = 0
265
-
266
- def compute_similarity(pair_idx):
267
- """Compute similarity for a pair of papers."""
268
- i, j = pair_idx
269
- node1, data1 = paper_nodes[i]
270
- node2, data2 = paper_nodes[j]
271
- emb1 = data1['embedding']
272
- emb2 = data2['embedding']
273
- similarity = np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2))
274
- if similarity >= similarity_threshold:
275
- return (node1, node2, float(similarity))
276
- return None
277
-
278
- # Compute similarities in parallel
279
- edges_to_add = []
280
- with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
281
- futures = {executor.submit(compute_similarity, pair): pair for pair in pairs}
282
- with tqdm(total=len(pairs), desc="Computing similarities", unit="pair") as pbar:
283
- for future in as_completed(futures):
284
- result = future.result()
285
- if result is not None:
286
- edges_to_add.append(result)
287
- pbar.update(1)
288
-
289
- # Add edges to graph
290
- for node1, node2, similarity in edges_to_add:
291
- self.graph.add_edge(
292
- node1,
293
- node2,
294
- relationship='similar_to',
295
- similarity=similarity
296
- )
297
- connections_added += 1
298
-
299
- LOGGER.info(f"Added {connections_added} similarity edges with threshold {similarity_threshold}")
300
-
301
- def get_papers_by_topic(self, topic: str) -> List[Dict[str, Any]]:
302
- """
303
- Get all papers belonging to a specific topic.
304
-
305
- Args:
306
- topic: Topic name
307
-
308
- Returns:
309
- List of paper information dictionaries
310
- """
311
- if topic not in self.graph:
312
- return []
313
-
314
- papers = []
315
- for neighbor in self.graph.neighbors(topic):
316
- node_data = self.graph.nodes[neighbor]
317
- if node_data.get('node_type') == 'paper':
318
- papers.append({
319
- 'id': neighbor,
320
- 'name': node_data.get('name'),
321
- 'abstract': node_data.get('abstract'),
322
- 'embedding': node_data.get('embedding')
323
- })
324
- return papers
325
-
326
- def find_similar_papers(self, paper_id: str, top_k: int = 5) -> List[tuple]:
327
- """
328
- Find the most similar papers to a given paper.
329
-
330
- Args:
331
- paper_id: ID of the paper
332
- top_k: Number of similar papers to return
333
-
334
- Returns:
335
- List of (paper_id, similarity_score) tuples
336
- """
337
- if paper_id not in self.graph:
338
- return []
339
-
340
- paper_data = self.graph.nodes[paper_id]
341
- if paper_data.get('node_type') != 'paper':
342
- return []
343
-
344
- target_embedding = paper_data['embedding']
345
- similarities = []
346
-
347
- for node, data in self.graph.nodes(data=True):
348
- if data.get('node_type') == 'paper' and node != paper_id:
349
- similarity = np.dot(target_embedding, data['embedding']) / \
350
- (np.linalg.norm(target_embedding) * np.linalg.norm(data['embedding']))
351
- similarities.append((node, float(similarity), data.get('name')))
352
-
353
- # Sort by similarity and return top_k
354
- similarities.sort(key=lambda x: x[1], reverse=True)
355
- return similarities[:top_k]
356
-
357
- def get_graph_statistics(self) -> Dict[str, Any]:
358
- """
359
- Get statistics about the knowledge graph.
360
-
361
- Returns:
362
- Dictionary with graph statistics
363
- """
364
- paper_nodes = [n for n, d in self.graph.nodes(data=True)
365
- if d.get('node_type') == 'paper']
366
- topic_nodes = [n for n, d in self.graph.nodes(data=True)
367
- if d.get('node_type') == 'topic']
368
-
369
- stats = {
370
- 'total_nodes': self.graph.number_of_nodes(),
371
- 'total_edges': self.graph.number_of_edges(),
372
- 'paper_nodes': len(paper_nodes),
373
- 'topic_nodes': len(topic_nodes),
374
- 'average_degree': sum(dict(self.graph.degree()).values()) / self.graph.number_of_nodes(),
375
- 'density': nx.density(self.graph),
376
- 'is_connected': nx.is_connected(self.graph),
377
- }
378
-
379
- if nx.is_connected(self.graph):
380
- stats['diameter'] = nx.diameter(self.graph)
381
- stats['average_shortest_path'] = nx.average_shortest_path_length(self.graph)
382
-
383
- return stats
384
-
385
-
386
- @click.command()
387
- @click.option("-m", "--embedding-model", default="nomic-embed-text")
388
- @click.option("-l", "--ollama-server-url", default="http://localhost:11434")
389
- @click.option("-b", "--embedding-gen-batch-size", default=32)
390
- @click.option("-w", "--max-parallel-workers", default=16)
391
- @click.option("-p", "--limit-num-papers", default=None, type=int)
392
- @click.option("-f", "--input-json-file", default=f"{PROJECT_ROOT}/data/neurips-2025-orals-posters.json")
393
- @click.option("-o", "--output-file", default=f"{PROJECT_ROOT}/graphs/knowledge_graph.pkl")
394
- @click.option("-s", "--similarity-threshold", default=0.8)
395
- def main(
396
- embedding_model: str,
397
- ollama_server_url: str,
398
- embedding_gen_batch_size: int,
399
- max_parallel_workers: int,
400
- limit_num_papers: int,
401
- input_json_file: str,
402
- output_file: str,
403
- similarity_threshold: float
404
- ):
405
-
406
- kg = PaperKnowledgeGraph(
407
- embedding_model=f"ollama/{embedding_model}",
408
- ollama_base_url=ollama_server_url,
409
- embedding_gen_batch_size=embedding_gen_batch_size,
410
- max_parallel_workers=max_parallel_workers,
411
- limit_num_papers=limit_num_papers
412
- )
413
-
414
- # Load papers from JSON file
415
- kg.load_papers_from_json(input_json_file)
416
-
417
- # Build the graph (parallel embedding generation)
418
- kg.build_graph()
419
-
420
- # Optionally connect similar papers based on embeddings (parallel)
421
- kg.connect_similar_papers(similarity_threshold=similarity_threshold)
422
-
423
- # Save the graph to disk
424
- save_graph(
425
- graph=kg.graph,
426
- output_path=output_file
427
- )
428
-
429
- # Print statistics
430
- stats = kg.get_graph_statistics()
431
- LOGGER.info("\nGraph Statistics:")
432
- for key, value in stats.items():
433
- LOGGER.info(f" {key}: {value}")
434
-
435
- # Test run: Find similar papers
436
- if kg.papers_data:
437
- first_paper_id = kg.papers_data[0].get('uid', kg.papers_data[0].get('id'))
438
- LOGGER.debug(f"\nPapers similar to '{kg.graph.nodes[first_paper_id]['name']}':")
439
- similar = kg.find_similar_papers(first_paper_id, top_k=3)
440
- for pid, sim, name in similar:
441
- LOGGER.debug(f" - {name} (similarity: {sim:.3f})")
442
-
443
-
444
- # Run
445
- if __name__ == "__main__":
446
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/__init__.py DELETED
@@ -1,15 +0,0 @@
1
- from enum import Enum
2
-
3
- from agentic_nav.tools.knowledge_graph.graph_traversal_strategies.breadth_first_random import _graph_traversal_bfs_random
4
- from agentic_nav.tools.knowledge_graph.graph_traversal_strategies.depth_first_random import _graph_traversal_dfs_random
5
- from agentic_nav.tools.knowledge_graph.graph_traversal_strategies.neo4j_builtin import _graph_traversal_cypher
6
-
7
-
8
- class TraversalStrategy(Enum):
9
- """Traversal strategy options"""
10
- BFS = "breadth_first"
11
- DFS = "depth_first"
12
- BFS_RANDOM = "breadth_first_random"
13
- DFS_RANDOM = "depth_first_random"
14
-
15
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/breadth_first_random.py DELETED
@@ -1,80 +0,0 @@
1
- import neo4j
2
- from typing import List, Dict, Any, Optional, Set
3
- from collections import deque
4
- import random
5
-
6
-
7
- def _graph_traversal_bfs_random(
8
- db_driver: neo4j.Driver,
9
- start_paper_id: str,
10
- n_hops: int,
11
- relationship_type: Optional[str],
12
- max_results: Optional[int],
13
- max_branches: int
14
- ) -> List[Dict[str, Any]]:
15
- """
16
- BFS traversal with random neighbor sampling.
17
- Explores level by level, randomly sampling neighbors at each level.
18
- """
19
- with db_driver.session() as session:
20
- visited: Set[str] = {start_paper_id}
21
- queue = deque([(start_paper_id, 0)]) # (paper_id, distance)
22
- papers = []
23
-
24
- # Build relationship type filter
25
- if relationship_type:
26
- rel_filter = f":{':'.join([relationship_type])}"
27
- else:
28
- rel_filter = ""
29
-
30
- while queue:
31
- if max_results and type(max_results) is int and len(papers) >= max_results:
32
- break
33
-
34
- current_id, distance = queue.popleft()
35
-
36
- # Stop if we've reached max depth
37
- if distance >= n_hops:
38
- continue
39
-
40
- # Query to get all neighbors
41
- query = f"""
42
- MATCH (p:Paper {{id: $paper_id}})-[r{rel_filter}]->(neighbor:Paper)
43
- RETURN neighbor.id as id,
44
- neighbor.name as name,
45
- neighbor.abstract as abstract,
46
- neighbor.topic as topic
47
- """
48
-
49
- result = session.run(query, paper_id=current_id)
50
- neighbors = list(result)
51
-
52
- # Randomly sample neighbors
53
- if neighbors:
54
- sampled_neighbors = random.sample(
55
- neighbors,
56
- min(max_branches, len(neighbors))
57
- )
58
-
59
- for record in sampled_neighbors:
60
- neighbor_id = record['id']
61
-
62
- if neighbor_id not in visited:
63
- visited.add(neighbor_id)
64
-
65
- paper = {
66
- 'id': neighbor_id,
67
- 'name': record['name'],
68
- 'abstract': record['abstract'],
69
- 'topic': record['topic'],
70
- 'distance': distance + 1
71
- }
72
- papers.append(paper)
73
-
74
- # Add to queue for next level
75
- queue.append((neighbor_id, distance + 1))
76
-
77
- if max_results and type(max_results) is int and len(papers) >= max_results:
78
- break
79
-
80
- return papers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/depth_first_random.py DELETED
@@ -1,78 +0,0 @@
1
- from typing import List, Dict, Any, Optional, Set
2
- import random
3
-
4
- import neo4j
5
-
6
-
7
- def _graph_traversal_dfs_random(
8
- db_driver: neo4j.Driver,
9
- start_paper_id: str,
10
- n_hops: int,
11
- relationship_type: Optional[str],
12
- max_results: Optional[int],
13
- max_branches: int
14
- ) -> List[Dict[str, Any]]:
15
- """
16
- DFS traversal with random neighbor sampling.
17
- Explores deeply along random branches before backtracking.
18
- """
19
- with db_driver.session() as session:
20
- visited: Set[str] = {start_paper_id}
21
- papers = []
22
-
23
- # Build relationship type filter
24
- if relationship_type:
25
- rel_filter = f":{':'.join([relationship_type])}"
26
- else:
27
- rel_filter = ""
28
-
29
- def dfs_traverse(paper_id: str, distance: int):
30
- """Recursive DFS helper"""
31
- if max_results and len(papers) >= max_results:
32
- return
33
-
34
- if distance >= n_hops:
35
- return
36
-
37
- # Query to get all neighbors
38
- query = f"""
39
- MATCH (p:Paper {{id: $paper_id}})-[r{rel_filter}]->(neighbor:Paper)
40
- RETURN neighbor.id as id,
41
- neighbor.name as name,
42
- neighbor.abstract as abstract,
43
- neighbor.topic as topic
44
- """
45
-
46
- result = session.run(query, paper_id=paper_id)
47
- neighbors = list(result)
48
-
49
- # Randomly sample neighbors
50
- if neighbors:
51
- sampled_neighbors = random.sample(
52
- neighbors,
53
- min(max_branches, len(neighbors))
54
- )
55
-
56
- for record in sampled_neighbors:
57
- neighbor_id = record['id']
58
-
59
- if neighbor_id not in visited:
60
- if max_results and len(papers) >= max_results:
61
- return
62
-
63
- visited.add(neighbor_id)
64
-
65
- paper = {
66
- 'id': neighbor_id,
67
- 'name': record['name'],
68
- 'abstract': record['abstract'],
69
- 'topic': record['topic'],
70
- 'distance': distance + 1
71
- }
72
- papers.append(paper)
73
-
74
- # Recursively explore this branch
75
- dfs_traverse(neighbor_id, distance + 1)
76
-
77
- dfs_traverse(start_paper_id, 0)
78
- return papers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/neo4j_builtin.py DELETED
@@ -1,50 +0,0 @@
1
- from typing import List, Dict, Any, Optional
2
-
3
- import neo4j
4
-
5
-
6
- _DB_GRAPH_TRAVERSAL_QUERY = lambda rel_filter, n_hops: f"""
7
- MATCH path = (start:Paper)-[{rel_filter}*1..{n_hops}]-(related:Paper)
8
- WHERE start.id IN $start_paper_ids
9
- AND related.id <> start.id
10
- WITH related, min(length(path)) as min_distance
11
- RETURN DISTINCT related.id as id,
12
- related.name as name,
13
- related.abstract as abstract,
14
- related.topic as topic,
15
- min_distance as distance
16
- ORDER BY min_distance, related.name
17
- """
18
-
19
-
20
- def _graph_traversal_cypher(
21
- db_driver: neo4j.Driver,
22
- start_paper_id: str,
23
- n_hops: int,
24
- relationship_type: Optional[str],
25
- max_results: Optional[int]
26
- ) -> List[Dict[str, Any]]:
27
- """Original Cypher-based traversal (BFS/DFS handled by Neo4j)"""
28
- with db_driver.session() as session:
29
- if relationship_type:
30
- rel_filter = f":{':'.join([relationship_type])}"
31
- else:
32
- rel_filter = ""
33
-
34
- query = _DB_GRAPH_TRAVERSAL_QUERY(rel_filter=rel_filter, n_hops=n_hops)
35
- if max_results:
36
- query += f" LIMIT {max_results}"
37
-
38
- result = session.run(query, start_paper_ids=[start_paper_id])
39
- papers = []
40
- for record in result:
41
- paper = {
42
- 'id': record['id'],
43
- 'name': record['name'],
44
- 'abstract': record['abstract'],
45
- 'topic': record['topic'],
46
- 'distance': record['distance']
47
- }
48
- papers.append(paper)
49
-
50
- return papers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/knowledge_graph/neo4j_db_importer.py DELETED
@@ -1,537 +0,0 @@
1
- """
2
- Neo4j exporter for PaperKnowledgeGraph
3
- Exports NetworkX graph to Neo4j database with proper handling of embeddings and relationships
4
- """
5
- import logging
6
- import os
7
-
8
- import click
9
- import networkx as nx
10
- from neo4j import GraphDatabase
11
- from typing import Dict, Any
12
- import numpy as np
13
- from tqdm import tqdm
14
- from pathlib import Path
15
-
16
- from agentic_nav.tools.knowledge_graph.file_handler import load_graph
17
- from agentic_nav.utils.logger import setup_logging
18
-
19
-
20
- # Setup logging
21
- setup_logging(
22
- log_dir="logs",
23
- level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
24
- )
25
- LOGGER = logging.getLogger(__name__)
26
- PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
27
- NEO4J_USERNAME = os.environ.get("NEO4J_USERNAME", "neo4j")
28
- NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD")
29
- NEO4J_DB_URI = os.environ.get("NEO4J_DB_URI", "bolt://neo4j_db:7687")
30
-
31
-
32
- class Neo4jImporter:
33
- """Import PaperKnowledgeGraph to Neo4j database."""
34
-
35
- def __init__(
36
- self,
37
- uri: str = NEO4J_DB_URI,
38
- username: str = NEO4J_USERNAME,
39
- password: str = NEO4J_PASSWORD
40
- ):
41
- """Initialize Neo4j connection."""
42
- self.driver = GraphDatabase.driver(uri, auth=(username, password))
43
- self.driver.verify_connectivity()
44
- LOGGER.info(f"Connected to Neo4j at {uri}")
45
-
46
- def close(self):
47
- """Close the Neo4j driver connection."""
48
- self.driver.close()
49
-
50
- def clear_database(self, batch_size=500):
51
- with self.driver.session() as session:
52
- deleted_total = 0
53
- while True:
54
- result = session.run("""
55
- MATCH (n)
56
- WITH n LIMIT $batch_size
57
- DETACH DELETE n
58
- RETURN count(n) as deleted
59
- """,
60
- batch_size=batch_size
61
- )
62
-
63
- deleted = result.single()["deleted"]
64
- deleted_total += deleted
65
- LOGGER.info(f"Deleted {deleted} nodes (total: {deleted_total})")
66
-
67
- if deleted == 0:
68
- break
69
-
70
- def create_indexes(self, embedding_dimension: int = 768):
71
- """Create indexes for better query performance, including vector index."""
72
- with self.driver.session() as session:
73
- # Create index on paper IDs
74
- session.run("CREATE INDEX paper_id IF NOT EXISTS FOR (p:Paper) ON (p.id)")
75
-
76
- # Create index on topic names
77
- session.run("CREATE INDEX topic_name IF NOT EXISTS FOR (t:Topic) ON (t.name)")
78
-
79
- # Create index on author IDs
80
- session.run("CREATE INDEX author_id IF NOT EXISTS FOR (a:Author) ON (a.author_id)")
81
-
82
- # Create index on author names (useful for searching)
83
- session.run("CREATE INDEX author_name IF NOT EXISTS FOR (a:Author) ON (a.fullname)")
84
-
85
- # Create vector index for embeddings (Neo4j 5.11+)
86
- try:
87
- session.run("""
88
- CREATE VECTOR INDEX paper_embeddings IF NOT EXISTS
89
- FOR (p:Paper)
90
- ON p.embedding
91
- OPTIONS {
92
- indexConfig: {
93
- `vector.dimensions`: $dimension,
94
- `vector.similarity_function`: 'cosine'
95
- }
96
- }
97
- """, dimension=embedding_dimension)
98
- LOGGER.info(f"Created vector index for {embedding_dimension}-dimensional embeddings")
99
- except Exception as e:
100
- LOGGER.warning(f"Warning: Could not create vector index: {e}")
101
- LOGGER.warning("Vector indexes require Neo4j 5.11+ or Enterprise Edition")
102
-
103
- LOGGER.info("Created standard indexes")
104
-
105
- def _export_paper_nodes(self, kg: nx.Graph, batch_size: int):
106
- """Export paper nodes to Neo4j with all attributes."""
107
- paper_nodes = [(n, d) for n, d in kg.nodes(data=True)
108
- if d.get('node_type') == 'paper']
109
-
110
- LOGGER.info(f"\nExporting {len(paper_nodes)} paper nodes...")
111
-
112
- with self.driver.session() as session:
113
- for i in tqdm(range(0, len(paper_nodes), batch_size), desc="Paper nodes"):
114
- batch = paper_nodes[i:i + batch_size]
115
- papers_data = []
116
-
117
- for node_id, data in batch:
118
- # Convert embedding to list if it's numpy array
119
- embedding = data.get('embedding', [])
120
- if isinstance(embedding, np.ndarray):
121
- embedding = embedding.tolist()
122
-
123
- paper_dict = {
124
- "id": node_id,
125
- "name": data.get('name', ''),
126
- "abstract": data.get('abstract', ''),
127
- "topic": data.get('topic', ''),
128
- "keywords": data.get('keywords', []),
129
- "decision": data.get('decision', ''),
130
- "session": data.get('session', ''),
131
- "session_start_time": data.get('session_start_time', ''),
132
- "session_end_time": data.get('session_end_time', ''),
133
- "presentation_type": data.get('presentation_type', ''),
134
- "room_name": data.get('room_name', ''),
135
- "project_url": data.get('project_url', ''),
136
- "poster_position": data.get('poster_position', ''),
137
- "paper_url": data.get("paper_url", ""),
138
- "sourceid": data.get("sourceid", ""),
139
- "virtualsite_url": data.get("virtualsite_url", ""),
140
- 'embedding': embedding
141
- }
142
- papers_data.append(paper_dict)
143
-
144
- # Batch create paper nodes
145
- session.run("""
146
- UNWIND $papers AS paper
147
- CREATE (p:Paper {
148
- id: paper.id,
149
- name: paper.name,
150
- abstract: paper.abstract,
151
- topic: paper.topic,
152
- keywords: paper.keywords,
153
- decision: paper.decision,
154
- session: paper.session,
155
- session_start_time: paper.session_start_time,
156
- session_end_time: paper.session_end_time,
157
- presentation_type: paper.presentation_type,
158
- room_name: paper.room_name,
159
- project_url: paper.project_url,
160
- poster_position: paper.poster_position,
161
- paper_url: paper.paper_url,
162
- sourceid: paper.sourceid,
163
- virtualsite_url: paper.virtualsite_url,
164
- embedding: paper.embedding
165
- })
166
- """, papers=papers_data)
167
-
168
- LOGGER.info(f"Exported {len(paper_nodes)} paper nodes")
169
-
170
- def _export_topic_hierarchy(self, kg: nx.Graph):
171
- """
172
- Export topic nodes with hierarchical structure to Neo4j.
173
- Splits topics like "Deep Learning->Theory" into separate nodes with parent-child relationships.
174
- """
175
- # Collect all unique topic paths from paper nodes
176
- topic_paths = set()
177
- for node_id, data in kg.nodes(data=True):
178
- if data.get('node_type') == 'paper':
179
- topic = data.get('topic', '')
180
- if topic:
181
- topic_paths.add(topic)
182
-
183
- LOGGER.info(f"Processing {len(topic_paths)} unique topic paths...")
184
-
185
- # Parse topic paths and create hierarchy
186
- all_topics = set()
187
- topic_relationships = []
188
-
189
- for path in topic_paths:
190
- parts = [p.strip() for p in path.split('->')]
191
-
192
- # Add all topic parts
193
- for part in parts:
194
- all_topics.add(part)
195
-
196
- # Create parent-child relationships
197
- for i in range(len(parts) - 1):
198
- topic_relationships.append({
199
- 'parent': parts[i],
200
- 'child': parts[i + 1]
201
- })
202
-
203
- LOGGER.info(
204
- f"Creating {len(all_topics)} topic nodes with {len(set(tuple(r.items()) for r in topic_relationships))} "
205
- f"hierarchical relationships..."
206
- )
207
-
208
- with self.driver.session() as session:
209
- # Create all topic nodes (using MERGE to avoid duplicates)
210
- topics_data = [{'name': topic} for topic in all_topics]
211
- session.run("""
212
- UNWIND $topics AS topic
213
- MERGE (t:Topic {name: topic.name})
214
- """, topics=topics_data)
215
-
216
- # Create hierarchical relationships between topics (deduplicate first)
217
- if topic_relationships:
218
- # Remove duplicates
219
- unique_rels = list({(r['parent'], r['child']): r for r in topic_relationships}.values())
220
- session.run("""
221
- UNWIND $rels AS rel
222
- MATCH (parent:Topic {name: rel.parent})
223
- MATCH (child:Topic {name: rel.child})
224
- MERGE (child)-[:SUBTOPIC_OF]->(parent)
225
- """, rels=unique_rels)
226
-
227
- LOGGER.info(f"Exported {len(all_topics)} topic nodes with hierarchy")
228
-
229
- def _connect_papers_to_topics(self, kg: nx.Graph, batch_size: int):
230
- """
231
- Connect papers to their leaf topic nodes.
232
- For "Deep Learning->Theory", connects paper to "Theory" node.
233
- """
234
- paper_topic_connections = []
235
-
236
- for node_id, data in kg.nodes(data=True):
237
- if data.get('node_type') == 'paper':
238
- topic = data.get('topic', '')
239
- if topic:
240
- # Get the leaf topic (last part after splitting)
241
- parts = [p.strip() for p in topic.split('->')]
242
- leaf_topic = parts[-1]
243
-
244
- paper_topic_connections.append({
245
- 'paper_id': node_id,
246
- 'topic_name': leaf_topic,
247
- 'full_path': topic # Store full path as property
248
- })
249
-
250
- LOGGER.info(f"Connecting {len(paper_topic_connections)} papers to topics...")
251
-
252
- with self.driver.session() as session:
253
- for i in tqdm(range(0, len(paper_topic_connections), batch_size),
254
- desc="Paper-Topic connections"):
255
- batch = paper_topic_connections[i:i + batch_size]
256
-
257
- session.run("""
258
- UNWIND $connections AS conn
259
- MATCH (p:Paper {id: conn.paper_id})
260
- MATCH (t:Topic {name: conn.topic_name})
261
- MERGE (p)-[r:BELONGS_TO_TOPIC]->(t)
262
- SET r.full_path = conn.full_path
263
- """, connections=batch)
264
-
265
- LOGGER.info(f"Connected papers to leaf topics")
266
-
267
- def _export_similarity_relationships(self, kg: nx.Graph, batch_size: int):
268
- """Export similarity relationships between papers to Neo4j."""
269
- # Filter only similarity edges
270
- similarity_edges = [
271
- (source, target, data)
272
- for source, target, data in kg.edges(data=True)
273
- if data.get('relationship') == 'similar_to'
274
- ]
275
-
276
- LOGGER.info(f"Exporting {len(similarity_edges)} similarity relationships...")
277
-
278
- with self.driver.session() as session:
279
- for i in tqdm(range(0, len(similarity_edges), batch_size),
280
- desc="Similarity relationships"):
281
- batch = similarity_edges[i:i + batch_size]
282
-
283
- edges_data = [{
284
- 'source': source,
285
- 'target': target,
286
- 'similarity': data.get('similarity', 0.0)
287
- } for source, target, data in batch]
288
-
289
- session.run("""
290
- UNWIND $edges AS edge
291
- MATCH (p1:Paper {id: edge.source})
292
- MATCH (p2:Paper {id: edge.target})
293
- MERGE (p1)-[:SIMILAR_TO {similarity: edge.similarity}]->(p2)
294
- """, edges=edges_data)
295
-
296
- LOGGER.info(f"Exported {len(similarity_edges)} similarity relationships")
297
-
298
- def _export_authors_and_relationships(self, kg: nx.Graph, batch_size: int):
299
- """
300
- Export author nodes from NetworkX graph (where they already exist as separate nodes)
301
- and create IS_AUTHOR_OF relationships between authors and papers.
302
-
303
- Author nodes in NetworkX have composite IDs like "12345 - John Doe"
304
- """
305
- # Collect author nodes from the graph
306
- author_nodes = [
307
- (node_id, data)
308
- for node_id, data in kg.nodes(data=True)
309
- if data.get('node_type') != 'paper' and data.get('node_type') != 'topic'
310
- ]
311
-
312
- LOGGER.info(f"Found {len(author_nodes)} author nodes in graph...")
313
-
314
- # Extract author data
315
- all_authors = []
316
- for node_id, data in author_nodes:
317
- # Parse composite ID "12345 - John Doe"
318
- parts = node_id.split(' - ', 1)
319
- author_id = parts[0].strip() if len(parts) > 0 else ""
320
-
321
- author_dict = {
322
- 'composite_id': node_id, # Store the full composite ID
323
- 'author_id': author_id,
324
- 'fullname': data.get('fullname', ''),
325
- 'institution': data.get('institution', ''),
326
- 'url': data.get('url', '')
327
- }
328
- all_authors.append(author_dict)
329
-
330
- LOGGER.info(f"Exporting {len(all_authors)} unique authors...")
331
-
332
- with self.driver.session() as session:
333
- # Create author nodes in batches
334
- for i in tqdm(range(0, len(all_authors), batch_size), desc="Author nodes"):
335
- batch = all_authors[i:i + batch_size]
336
-
337
- session.run("""
338
- UNWIND $authors AS author
339
- MERGE (a:Author {composite_id: author.composite_id})
340
- ON CREATE SET
341
- a.author_id = author.author_id,
342
- a.fullname = author.fullname,
343
- a.institution = author.institution,
344
- a.url = author.url
345
- ON MATCH SET
346
- a.author_id = author.author_id,
347
- a.fullname = author.fullname,
348
- a.institution = author.institution,
349
- a.url = author.url
350
- """, authors=batch)
351
-
352
- LOGGER.info(f"Exported {len(all_authors)} author nodes")
353
-
354
- # Method 1: Try to collect author-paper relationships from graph edges
355
- author_paper_edges = [
356
- (source, target, data)
357
- for source, target, data in kg.edges(data=True)
358
- if data.get('relationship') == 'is_author_of'
359
- ]
360
-
361
- LOGGER.info(f"Found {len(author_paper_edges)} IS_AUTHOR_OF edges in graph")
362
-
363
- # Method 2: If no edges found, extract from paper node 'authors' attribute
364
- if len(author_paper_edges) == 0:
365
- LOGGER.warning("No IS_AUTHOR_OF edges found in graph. Extracting from paper 'authors' attribute...")
366
-
367
- paper_author_relationships = []
368
- for node_id, data in kg.nodes(data=True):
369
- if data.get('node_type') == 'paper':
370
- authors = data.get('authors', [])
371
-
372
- if authors and isinstance(authors, list) and len(authors) > 0:
373
- # Check if authors are stored as dicts
374
- if isinstance(authors[0], dict):
375
- for author in authors:
376
- author_id = str(author.get('id', ''))
377
- fullname = author.get('fullname', '')
378
- if author_id and fullname:
379
- composite_id = f"{author_id} - {fullname}"
380
- paper_author_relationships.append({
381
- 'author_id': composite_id,
382
- 'paper_id': node_id
383
- })
384
-
385
- LOGGER.info(f"Extracted {len(paper_author_relationships)} relationships from paper attributes")
386
-
387
- # Create relationships from extracted data
388
- with self.driver.session() as session:
389
- for i in tqdm(range(0, len(paper_author_relationships), batch_size),
390
- desc="Author-Paper relationships"):
391
- batch = paper_author_relationships[i:i + batch_size]
392
-
393
- session.run("""
394
- UNWIND $edges AS edge
395
- MATCH (a:Author {composite_id: edge.author_id})
396
- MATCH (p:Paper {id: edge.paper_id})
397
- MERGE (a)-[:IS_AUTHOR_OF]->(p)
398
- """, edges=batch)
399
-
400
- LOGGER.info(f"Created {len(paper_author_relationships)} author-paper relationships")
401
- else:
402
- # Create relationships from graph edges
403
- with self.driver.session() as session:
404
- for i in tqdm(range(0, len(author_paper_edges), batch_size),
405
- desc="Author-Paper relationships"):
406
- batch = author_paper_edges[i:i + batch_size]
407
-
408
- edges_data = [{
409
- 'author_id': source, # composite ID like "12345 - John Doe"
410
- 'paper_id': target
411
- } for source, target, data in batch]
412
-
413
- session.run("""
414
- UNWIND $edges AS edge
415
- MATCH (a:Author {composite_id: edge.author_id})
416
- MATCH (p:Paper {id: edge.paper_id})
417
- MERGE (a)-[:IS_AUTHOR_OF]->(p)
418
- """, edges=edges_data)
419
-
420
- LOGGER.info(f"Created {len(author_paper_edges)} author-paper relationships")
421
-
422
- def import_graph(self, kg_path: str, batch_size: int = 100, embedding_dimension: int = 768):
423
- """Import the entire knowledge graph to Neo4j."""
424
- LOGGER.info(f"Loading graph from path {kg_path}")
425
- kg = load_graph(kg_path)
426
-
427
- LOGGER.info("Starting Neo4j export...")
428
-
429
- # Clear and prepare database
430
- self.clear_database()
431
- self.create_indexes(embedding_dimension)
432
-
433
- # Export paper nodes
434
- self._export_paper_nodes(kg, batch_size)
435
-
436
- # Export authors and author-paper relationships
437
- self._export_authors_and_relationships(kg, batch_size)
438
-
439
- # Export topic hierarchy
440
- self._export_topic_hierarchy(kg)
441
-
442
- # Connect papers to topics
443
- self._connect_papers_to_topics(kg, batch_size)
444
-
445
- # Export similarity relationships
446
- self._export_similarity_relationships(kg, batch_size)
447
-
448
- LOGGER.info("Export completed successfully!")
449
-
450
- def verify_export(self) -> Dict[str, Any]:
451
- """Verify the export by checking node and relationship counts."""
452
- with self.driver.session() as session:
453
- # Count papers
454
- result = session.run("MATCH (p:Paper) RETURN count(p) as count")
455
- paper_count = result.single()['count']
456
-
457
- # Count topics
458
- result = session.run("MATCH (t:Topic) RETURN count(t) as count")
459
- topic_count = result.single()['count']
460
-
461
- # Count authors
462
- result = session.run("MATCH (a:Author) RETURN count(a) as count")
463
- author_count = result.single()['count']
464
-
465
- # Count relationships
466
- result = session.run("MATCH ()-[r]->() RETURN count(r) as count")
467
- rel_count = result.single()['count']
468
-
469
- # Count similarity relationships
470
- result = session.run("MATCH ()-[r:SIMILAR_TO]->() RETURN count(r) as count")
471
- similarity_count = result.single()['count']
472
-
473
- # Count topic hierarchy relationships
474
- result = session.run("MATCH ()-[r:SUBTOPIC_OF]->() RETURN count(r) as count")
475
- subtopic_count = result.single()['count']
476
-
477
- # Count author relationships (updated relationship name)
478
- result = session.run("MATCH ()-[r:IS_AUTHOR_OF]->() RETURN count(r) as count")
479
- is_author_of_count = result.single()['count']
480
-
481
- stats = {
482
- 'papers': paper_count,
483
- 'topics': topic_count,
484
- 'authors': author_count,
485
- 'total_relationships': rel_count,
486
- 'similarity_relationships': similarity_count,
487
- 'subtopic_relationships': subtopic_count,
488
- 'is_author_of_relationships': is_author_of_count
489
- }
490
-
491
- LOGGER.info("Neo4j Database Statistics:")
492
- for key, value in stats.items():
493
- LOGGER.info(f" {key}: {value}")
494
-
495
- return stats
496
-
497
-
498
- @click.command()
499
- @click.option("-g", "--graph-path", help="Path to the knowledge graph file (pickle).", default=f"{PROJECT_ROOT}/graphs/knowledge_graph.pkl")
500
- @click.option("-l", "--neo4j-uri", help="Database URI", default="bolt://localhost:7687")
501
- @click.option("-u", "--neo4j-username", help="Database user", default=NEO4J_USERNAME)
502
- @click.option("-p", "--neo4j-password", help="Database password", default=NEO4J_PASSWORD)
503
- @click.option("-b", "--batch-size", help="Batch size for node insertion", default=100)
504
- @click.option("-e", "--embedding-dimension", help="Vector embedding dimensions", default=768)
505
- def main(
506
- graph_path: str,
507
- neo4j_uri: str,
508
- neo4j_username: str,
509
- neo4j_password: str,
510
- batch_size: int = 100,
511
- embedding_dimension: int = 768
512
- ):
513
- """
514
- Convenience function to export a knowledge graph to Neo4j.
515
-
516
- Args:
517
- graph_path: PaperKnowledgeGraph instance
518
- neo4j_uri: Neo4j connection URI
519
- neo4j_username: Neo4j username
520
- neo4j_password: Neo4j password
521
- batch_size: Batch size for processing
522
- embedding_dimension: Dimension of embedding vectors (default: 768)
523
- """
524
- importer = Neo4jImporter(neo4j_uri, neo4j_username, neo4j_password)
525
- try:
526
- importer.import_graph(
527
- graph_path,
528
- batch_size,
529
- embedding_dimension
530
- )
531
- importer.verify_export()
532
- finally:
533
- importer.close()
534
-
535
-
536
- if __name__ == "__main__":
537
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/knowledge_graph/retriever.py DELETED
@@ -1,612 +0,0 @@
1
- import logging
2
- import numpy as np
3
- import random
4
- import os
5
-
6
- from neo4j import GraphDatabase
7
- from pathlib import Path
8
-
9
- from typing import List, Dict, Any, Optional
10
-
11
- from agentic_nav.tools.knowledge_graph.graph_traversal_strategies import (
12
- TraversalStrategy,
13
- _graph_traversal_dfs_random,
14
- _graph_traversal_cypher,
15
- _graph_traversal_bfs_random
16
- )
17
-
18
- from agentic_nav.utils.embedding_generator import batch_embed_documents
19
-
20
-
21
- PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
22
- LOGGER = logging.getLogger(__name__)
23
- EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "nomic-embed-text")
24
- EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
25
- NEO4J_DB_URI = os.environ.get("NEO4J_DB_URI", "bolt://neo4j_db:7687")
26
- NEO4J_DB_NODE_RETURN_LIMIT = int(os.environ.get("NEO4J_DB_NODE_RETURN_LIMIT", 200))
27
-
28
-
29
- class Neo4jGraphWorker:
30
- """Search and traversal operations for Neo4j paper knowledge graph."""
31
-
32
- _DB_SIMILARITY_SEARCH_QUERY = """
33
- MATCH (node:Paper)
34
- WHERE ($day IS NULL OR node.session_start_time IS NOT NULL)
35
- WITH node
36
- WHERE ($day IS NULL OR date(datetime(node.session_start_time)).dayOfWeek = $day)
37
- AND ($time_ranges IS NULL OR
38
- any(range IN $time_ranges WHERE
39
- time(datetime(node.session_start_time)) >= time(range.start)
40
- AND time(datetime(node.session_start_time)) <= time(range.end)))
41
- WITH collect(node) as filtered_nodes
42
- CALL db.index.vector.queryNodes('paper_embeddings', $top_k, $query_embedding)
43
- YIELD node, score
44
- WHERE node IN filtered_nodes OR ($day IS NULL AND $time_ranges IS NULL)
45
- RETURN node.id as id,
46
- node.name as name,
47
- node.abstract as abstract,
48
- node.topic as topic,
49
- node.paper_url as paper_url,
50
- node.session as session,
51
- node.session_start_time as session_start_time,
52
- node.session_end_time as session_end_time,
53
- node.presentation_type as presentation_type,
54
- node.room_name as room_name,
55
- node.project_url as project_url,
56
- node.poster_position as poster_position,
57
- node.sourceid as sourceid,
58
- node.virtualsite_url as virtualsite_url,
59
- node.decision as decision,
60
- [(a:Author)-[:IS_AUTHOR_OF]->(node) | a] as authors,
61
- score
62
- ORDER BY score DESC
63
- LIMIT $limit
64
- """
65
-
66
- _DB_NEIGHBORHOOD_SEARCH_QUERY = """
67
- MATCH (p:Paper)-[r]-(neighbor)
68
- WHERE p.id IN $paper_ids
69
- AND type(r) IN $allowed_rel_types
70
- AND 'Paper' IN labels(neighbor)
71
- AND (type(r) <> 'SIMILAR_TO' OR r.similarity >= $min_similarity)
72
- RETURN neighbor.id as id,
73
- neighbor.name as name,
74
- neighbor.abstract as abstract,
75
- neighbor.topic as topic,
76
- neighbor.paper_url as paper_url,
77
- neighbor.session as session,
78
- neighbor.session_start_time as session_start_time,
79
- neighbor.session_end_time as session_end_time,
80
- neighbor.presentation_type as presentation_type,
81
- neighbor.room_name as room_name,
82
- neighbor.project_url as project_url,
83
- neighbor.poster_position as poster_position,
84
- neighbor.sourceid as sourceid,
85
- neighbor.virtualsite_url as virtualsite_url,
86
- neighbor.decision as decision,
87
- [(a:Author)-[:IS_AUTHOR_OF]->(neighbor) | a] as authors,
88
- p.id as source_paper_id,
89
- type(r) as relationship_type,
90
- CASE WHEN type(r) = 'SIMILAR_TO' THEN r.similarity ELSE null END as similarity
91
- ORDER BY similarity DESC
92
- LIMIT $limit
93
- """
94
-
95
- # Find the DB query for graph traversal in the graph_traversal sub-folder.
96
- _DB_PAPERS_BY_AUTHOR = """
97
- MATCH (a:Author)-[:IS_AUTHOR_OF]->(p:Paper)
98
- WHERE a.fullname = $author_name
99
- WITH p, collect(DISTINCT a) as all_authors
100
- RETURN p.id as id,
101
- p.name as name,
102
- p.abstract as abstract,
103
- p.topic as topic,
104
- p.paper_url as paper_url,
105
- p.decision as decision,
106
- p.session as session,
107
- p.session_start_time as session_start_time,
108
- p.session_end_time as session_end_time,
109
- p.presentation_type as presentation_type,
110
- p.room_name as room_name,
111
- p.project_url as project_url,
112
- p.poster_position as poster_position,
113
- p.sourceid as sourceid,
114
- p.virtualsite_url as virtualsite_url,
115
- all_authors as authors
116
- ORDER BY p.name
117
- LIMIT $limit
118
- """
119
-
120
- _DB_PAPERS_BY_AUTHOR_FUZZY = """
121
- MATCH (a:Author)-[:IS_AUTHOR_OF]->(p:Paper)
122
- WHERE toLower(a.fullname) CONTAINS toLower($author_name)
123
- WITH p, collect(DISTINCT a) as all_authors
124
- RETURN p.id as id,
125
- p.name as name,
126
- p.abstract as abstract,
127
- p.topic as topic,
128
- p.paper_url as paper_url,
129
- p.decision as decision,
130
- p.session as session,
131
- p.session_start_time as session_start_time,
132
- p.session_end_time as session_end_time,
133
- p.presentation_type as presentation_type,
134
- p.room_name as room_name,
135
- p.project_url as project_url,
136
- p.poster_position as poster_position,
137
- p.sourceid as sourceid,
138
- p.virtualsite_url as virtualsite_url,
139
- all_authors as authors
140
- ORDER BY p.name
141
- LIMIT $limit
142
- """
143
-
144
- _DB_PAPERS_BY_TOPIC = """
145
- MATCH (p:Paper)-[:BELONGS_TO_TOPIC]->(t:Topic {name: $topic_name})
146
- RETURN p.id as id,
147
- p.name as name,
148
- p.abstract as abstract,
149
- p.topic as topic,
150
- p.paper_url as paper_url,
151
- p.decision as decision,
152
- p.session as session,
153
- p.session_start_time as session_start_time,
154
- p.session_end_time as session_end_time,
155
- p.presentation_type as presentation_type,
156
- p.room_name as room_name,
157
- p.project_url as project_url,
158
- p.poster_position as poster_position,
159
- p.sourceid as sourceid,
160
- p.virtualsite_url as virtualsite_url,
161
- [(a:Author)-[:IS_AUTHOR_OF]->(p) | a] as authors
162
- ORDER BY p.name
163
- LIMIT $limit
164
- """
165
-
166
- _DB_PAPERS_BY_TOPIC_AND_SUBTOPIC = """
167
- MATCH (t:Topic {name: $topic_name})
168
- OPTIONAL MATCH (subtopic:Topic)-[:SUBTOPIC_OF*]->(t)
169
- WITH t, collect(DISTINCT subtopic) + t as all_topics
170
- UNWIND all_topics as topic
171
- MATCH (p:Paper)-[:BELONGS_TO_TOPIC]->(topic)
172
- WITH DISTINCT p
173
- RETURN p.id as id,
174
- p.name as name,
175
- p.abstract as abstract,
176
- p.topic as topic,
177
- p.paper_url as paper_url,
178
- p.decision as decision,
179
- p.session as session,
180
- p.session_start_time as session_start_time,
181
- p.session_end_time as session_end_time,
182
- p.presentation_type as presentation_type,
183
- p.room_name as room_name,
184
- p.project_url as project_url,
185
- p.poster_position as poster_position,
186
- p.sourceid as sourceid,
187
- p.virtualsite_url as virtualsite_url,
188
- [(a:Author)-[:IS_AUTHOR_OF]->(p) | a] as authors
189
- ORDER BY p.name
190
- LIMIT $limit
191
- """
192
-
193
- def __init__(
194
- self,
195
- uri: str = NEO4J_DB_URI,
196
- username: str = "neo4j",
197
- password: str = "password"
198
- ):
199
- """Initialize Neo4j connection."""
200
- self.driver = GraphDatabase.driver(uri, auth=(username, password))
201
- self.driver.verify_connectivity()
202
- LOGGER.info(f"Connected to Neo4j at {uri}")
203
-
204
- def close(self):
205
- """Close the Neo4j driver connection."""
206
- self.driver.close()
207
-
208
- @staticmethod
209
- def embed_user_query(
210
- text: str,
211
- embedding_model: str = f"ollama/{EMBEDDING_MODEL_NAME}",
212
- api_base: str = EMBEDDING_MODEL_API_BASE
213
- ):
214
- emb = batch_embed_documents(
215
- texts=[text],
216
- batch_size=1,
217
- api_base=api_base,
218
- embedding_model=embedding_model
219
- ).tolist()[0]
220
-
221
- return emb
222
-
223
- def similarity_search(
224
- self,
225
- user_query: str,
226
- day: Optional[str] = None,
227
- timeslots: Optional[List[str]] = None,
228
- top_k: int = 5,
229
- min_similarity: Optional[float] = None
230
- ) -> List[Dict[str, Any]]:
231
- """
232
- Perform vector similarity search on paper embeddings.
233
-
234
- Args:
235
- user_query: User query (str)
236
- day: Conference day as date string (e.g., "2024-12-10") or None
237
- timeslots: List of time ranges as strings (e.g., ["09:00:00-12:00:00"]) or None
238
- top_k: Number of top results to return
239
- min_similarity: Optional minimum similarity threshold (0-1)
240
-
241
- Returns:
242
- List of dictionaries containing paper information and similarity scores
243
- """
244
-
245
- # Generate text embedding
246
- query_embedding = self.embed_user_query(
247
- text=user_query
248
- )
249
-
250
- # Convert numpy array to list if needed
251
- if isinstance(query_embedding, np.ndarray):
252
- query_embedding = query_embedding.tolist()
253
-
254
- # Parse day and timeslots for the query
255
- day_filter = None
256
- time_ranges = []
257
-
258
- if day:
259
- # Convert date string to day of week (1=Monday, 7=Sunday)
260
- from datetime import datetime
261
- date_obj = datetime.strptime(day, "%Y-%m-%d")
262
- day_filter = date_obj.isoweekday()
263
-
264
- if timeslots:
265
- # Parse timeslot ranges (e.g., "09:00:00-12:00:00")
266
- for slot in timeslots:
267
- if '-' in slot:
268
- start, end = slot.split('-')
269
- time_ranges.append({'start': start.strip(), 'end': end.strip()})
270
- else:
271
- # If no range, assume it's a single time point with some buffer
272
- time_ranges.append({'start': slot.strip(), 'end': slot.strip()})
273
-
274
- with self.driver.session() as session:
275
- result = session.run(
276
- self._DB_SIMILARITY_SEARCH_QUERY,
277
- query_embedding=query_embedding,
278
- top_k=top_k,
279
- limit=NEO4J_DB_NODE_RETURN_LIMIT,
280
- day=day_filter,
281
- time_ranges=time_ranges if time_ranges else None
282
- )
283
- papers = []
284
- for record in result:
285
- paper = {
286
- 'id': record['id'],
287
- 'name': record['name'],
288
- 'abstract': record['abstract'],
289
- 'topic': record['topic'],
290
- 'similarity_score': record['score'],
291
- 'paper_url': record['paper_url'],
292
- 'decision': record['decision'],
293
- 'session': record['session'],
294
- 'session_start_time': record['session_start_time'],
295
- 'session_end_time': record['session_end_time'],
296
- 'presentation_type': record['presentation_type'],
297
- 'room_name': record['room_name'],
298
- 'github_url': record['project_url'],
299
- 'poster_position': record['poster_position'],
300
- 'sourceid': record['sourceid'],
301
- 'virtualsite_url': record['virtualsite_url'],
302
- 'authors': [a['fullname'] for a in record['authors']]
303
- }
304
-
305
- # Apply minimum similarity filter if specified
306
- if min_similarity is None or paper['similarity_score'] >= min_similarity:
307
- papers.append(paper)
308
-
309
- return papers
310
-
311
- def neighborhood_search(
312
- self,
313
- paper_id: str,
314
- relationship_types: List[str] = ["SIMILAR_TO"],
315
- min_similarity: float = 0.7
316
- ) -> Dict[str, Any]:
317
- """
318
- Find immediate neighbors of given paper nodes.
319
-
320
- Args:
321
- paper_id: Paper ID to find neighbors for
322
- relationship_types: Optional list of relationship types to filter
323
- (e.g., ['SIMILAR_TO', 'IS_AUTHOR_OF', 'BELONGS_TO_TOPIC', 'SUBTOPIC_OF'])
324
- min_similarity (float): A minimum similarity score in the range of 0 - 1. Often a good value is 0.75 or 0.8.
325
-
326
-
327
- Returns:
328
- Dictionary with neighbors grouped by relationship type
329
- """
330
- allowed_rel_types = ['SIMILAR_TO', 'IS_AUTHOR_OF', 'BELONGS_TO_TOPIC', 'SUBTOPIC_OF']
331
- for rel_type in relationship_types:
332
- if rel_type not in allowed_rel_types:
333
- raise ValueError(f"Unsupported relationship type: {rel_type}. Supported relationship types: {allowed_rel_types}")
334
-
335
- with self.driver.session() as session:
336
- result = session.run(
337
- self._DB_NEIGHBORHOOD_SEARCH_QUERY,
338
- paper_ids=[paper_id],
339
- allowed_rel_types=relationship_types,
340
- min_similarity=min_similarity,
341
- limit=NEO4J_DB_NODE_RETURN_LIMIT
342
- )
343
-
344
- # Organize results by relationship type
345
- neighbors = {}
346
-
347
- for record in result:
348
- rel_type = record["relationship_type"]
349
- if rel_type not in neighbors.keys():
350
- neighbors[rel_type] = []
351
- else:
352
- neighbors[rel_type].append(record)
353
-
354
- return neighbors
355
-
356
- def graph_traversal(
357
- self,
358
- start_paper_id: str,
359
- n_hops: int = 2,
360
- relationship_type: Optional[str] = None,
361
- max_results: Optional[int] = None,
362
- strategy: str = "breadth_first_random",
363
- max_branches: Optional[int] = None,
364
- random_seed: Optional[int] = None
365
- ) -> List[Dict[str, Any]]:
366
- """
367
- Traverse the graph for n hops from starting paper nodes.
368
-
369
- Args:
370
- start_paper_id: Paper ID to start traversal from
371
- n_hops: Number of hops to traverse (1-5 recommended)
372
- relationship_type: Optional list of relationship types to traverse
373
- max_results: Optional maximum number of results to return
374
- strategy: Traversal strategy (breadth_first, depth_first, breadth_first_random, depth_first_random)
375
- max_branches: Maximum number of random neighbors to explore per node (only for random strategies)
376
- random_seed: Optional seed for reproducible random sampling
377
-
378
- Returns:
379
- List of papers found through traversal with distance information
380
- """
381
- if random_seed is not None:
382
- random.seed(random_seed)
383
-
384
- # Use original Cypher-based approach for non-random strategies
385
- if strategy in ["breadth_first", "depth_first"]:
386
- LOGGER.debug(f"Doing a graph traversal with neo4j's built-in strategy")
387
- return _graph_traversal_cypher(
388
- self.driver,
389
- start_paper_id,
390
- n_hops,
391
- relationship_type,
392
- max_results
393
- )
394
-
395
- # Use Python-based traversal for random strategies
396
- elif strategy == "breadth_first_random":
397
- LOGGER.debug(f"Doing a graph traversal with a random sampling breadth first strategy")
398
- return _graph_traversal_bfs_random(
399
- self.driver,
400
- start_paper_id,
401
- n_hops,
402
- relationship_type,
403
- max_results,
404
- max_branches or 3
405
- )
406
-
407
- elif strategy == "depth_first_random":
408
- LOGGER.debug(f"Doing a graph traversal with a random sampling depth first strategy")
409
- return _graph_traversal_dfs_random(
410
- self.driver,
411
- start_paper_id,
412
- n_hops,
413
- relationship_type,
414
- max_results,
415
- max_branches or 3
416
- )
417
-
418
- else:
419
- raise ValueError(f"Unsupported traversal strategy: {strategy}. "
420
- f"Supported strategies: breadth_first, depth_first, breadth_first_random, depth_first_random")
421
-
422
- def search_papers_by_author(
423
- self,
424
- author_name: str,
425
- fuzzy: bool = True
426
- ) -> List[Dict[str, Any]]:
427
- """
428
- Find all papers by a specific author.
429
-
430
- Args:
431
- author_name: Author name or partial name
432
- fuzzy: Whether to use fuzzy matching (CONTAINS vs exact match)
433
-
434
- Returns:
435
- List of papers by the author
436
- """
437
- with self.driver.session() as session:
438
- if fuzzy:
439
- query = self._DB_PAPERS_BY_AUTHOR_FUZZY
440
- else:
441
- query = self._DB_PAPERS_BY_AUTHOR
442
-
443
- result = session.run(query, author_name=author_name)
444
-
445
- papers = []
446
- for record in result:
447
- paper = {
448
- 'id': record['id'],
449
- 'name': record['name'],
450
- 'abstract': record['abstract'],
451
- 'topic': record['topic'],
452
- 'author_name': record['author_name'],
453
- 'paper_url': record['paper_url'],
454
- 'decision': record['decision'],
455
- 'session': record['session'],
456
- 'session_start_time': record['session_start_time'],
457
- 'session_end_time': record['session_end_time'],
458
- 'presentation_type': record['presentation_type'],
459
- 'room_name': record['room_name'],
460
- 'github_url': record['project_url'],
461
- 'poster_position': record['poster_position'],
462
- 'sourceid': record['sourceid'],
463
- 'virtualsite_url': record['virtualsite_url'],
464
- }
465
- papers.append(paper)
466
-
467
- return papers
468
-
469
- def search_papers_by_topic(
470
- self,
471
- topic_name: str,
472
- include_subtopics: bool = True
473
- ) -> List[Dict[str, Any]]:
474
- """
475
- Find all papers in a specific topic.
476
-
477
- Args:
478
- topic_name: Topic name
479
- include_subtopics: Whether to include papers from subtopics
480
-
481
- Returns:
482
- List of papers in the topic
483
- """
484
- with self.driver.session() as session:
485
- if include_subtopics:
486
- # Find topic and all its subtopics
487
- query = self._DB_PAPERS_BY_TOPIC_AND_SUBTOPIC
488
- else:
489
- query = self._DB_PAPERS_BY_TOPIC
490
-
491
- result = session.run(query, topic_name=topic_name, limit=NEO4J_DB_NODE_RETURN_LIMIT)
492
-
493
- papers = []
494
- for record in result:
495
- paper = {
496
- 'id': record['id'],
497
- 'name': record['name'],
498
- 'abstract': record['abstract'],
499
- 'topic': record['topic'],
500
- 'paper_url': record['paper_url'],
501
- 'decision': record['decision'],
502
- 'session': record['session'],
503
- 'session_start_time': record['session_start_time'],
504
- 'session_end_time': record['session_end_time'],
505
- 'presentation_type': record['presentation_type'],
506
- 'room_name': record['room_name'],
507
- 'github_url': record['project_url'],
508
- 'poster_position': record['poster_position'],
509
- 'sourceid': record['sourceid'],
510
- 'virtualsite_url': record['virtualsite_url'],
511
- }
512
- papers.append(paper)
513
-
514
- return papers
515
-
516
- def get_collaboration_network(
517
- self,
518
- author_name: str,
519
- n_hops: int = 2
520
- ) -> Dict[str, Any]:
521
- """
522
- Find collaboration network: authors who co-authored papers.
523
-
524
- Args:
525
- author_name: Starting author name
526
- n_hops: Degrees of separation to explore
527
-
528
- Returns:
529
- Dictionary with collaborators and shared papers
530
- """
531
- with self.driver.session() as session:
532
- query = f"""
533
- MATCH (a1:Author)
534
- WHERE toLower(a1.fullname) CONTAINS toLower($author_name)
535
- MATCH path = (a1)<-[:AUTHORED_BY]-(p:Paper)-[:AUTHORED_BY]->(a2:Author)
536
- WHERE a1 <> a2
537
- WITH a1, a2, collect(DISTINCT p) as shared_papers, length(path) as distance
538
- RETURN a1.fullname as source_author,
539
- a2.fullname as collaborator,
540
- a2.institution as institution,
541
- [p IN shared_papers | {{id: p.id, name: p.name}}] as papers,
542
- size(shared_papers) as paper_count
543
- ORDER BY paper_count DESC
544
- """
545
-
546
- result = session.run(query, author_name=author_name)
547
-
548
- collaborations = []
549
- for record in result:
550
- collab = {
551
- 'source_author': record['source_author'],
552
- 'collaborator': record['collaborator'],
553
- 'institution': record['institution'],
554
- 'shared_papers': record['papers'],
555
- 'paper_count': record['paper_count']
556
- }
557
- collaborations.append(collab)
558
-
559
- return {
560
- 'author': author_name,
561
- 'collaborators': collaborations,
562
- 'total_collaborators': len(collaborations)
563
- }
564
-
565
-
566
- # Test
567
- if __name__ == "__main__":
568
- # Initialize searcher
569
- searcher = Neo4jGraphWorker(
570
- uri="bolt://localhost:7687",
571
- username="neo4j",
572
- password="llm_agents"
573
- )
574
-
575
- try:
576
- # Example 1: Similarity search
577
- print("\n" + "=" * 60)
578
- print("Example 1: Similarity Search")
579
- print("=" * 60)
580
- user_query = "Reinforcement learning"
581
- similar_papers = searcher.similarity_search(user_query, top_k=30)
582
- for i, paper in enumerate(similar_papers, 1):
583
- print(f"\n{i}. {paper['name']}")
584
- print(f" Topic: {paper['topic']}")
585
- print(f" Similarity: {paper['similarity_score']:.4f}")
586
-
587
- # Example 2: Neighborhood search
588
- if similar_papers:
589
- print("\n" + "=" * 60)
590
- print("Example 2: Neighborhood Search")
591
- print("=" * 60)
592
- paper_id = similar_papers[0]['id']
593
- neighbors = searcher.neighborhood_search(paper_id, min_similarity=0.75)
594
- print(f"\nNeighbors of: {similar_papers[0]['name']}")
595
- for rel_type, neighbors in neighbors.items():
596
- print(f" \n{rel_type.upper()} RELATIONSHIPS:")
597
- for neighbor in neighbors:
598
- print(f" - {neighbor['name']} (similarity: {neighbor['similarity']:.4f})")
599
-
600
- # Example 3: Graph traversal
601
- print("\n" + "=" * 60)
602
- print("Example 3: Graph Traversal (2 hops)")
603
- print("=" * 60)
604
- if similar_papers:
605
- paper_ids = similar_papers[0]['id']
606
- related = searcher.graph_traversal(paper_ids, n_hops=2)
607
- print(f"\nFound {len(related)} related papers through traversal")
608
- for paper in related[:5]: # Show first 5
609
- print(f" - {paper['name']} (distance: {paper['distance']})")
610
-
611
- finally:
612
- searcher.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/session_routing/__init__.py DELETED
@@ -1,210 +0,0 @@
1
- """
2
- Session routing tool for building personalized conference visiting schedules.
3
-
4
- This tool helps NeurIPS 2025 conference attendees create optimized schedules
5
- for visiting poster sessions based on their research interests, preferred dates,
6
- and time slots.
7
- """
8
-
9
- import os
10
- from typing import Union, List, Optional
11
- from neo4j import GraphDatabase
12
-
13
- from agentic_nav.tools.knowledge_graph import search_similar_papers
14
- from agentic_nav.tools.session_routing.scheduler import ScheduleBuilder
15
- from agentic_nav.tools.session_routing.utils import parse_date_input, parse_time_preference
16
-
17
-
18
- # Environment variables for Neo4j connection
19
- NEO4J_DB_URI = os.getenv("NEO4J_DB_URI", "bolt://localhost:7687")
20
- NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j")
21
- NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "")
22
-
23
-
24
- def build_visit_schedule(
25
- topics: Union[str, List[str]],
26
- dates: Union[str, List[str]] = None,
27
- time_preferences: str = None,
28
- max_papers: int = 20,
29
- min_similarity: float = 0.6
30
- ) -> str:
31
- # TODO: Add filter for ["poster", "oral"]. Make sure to match orals with poster counterpart.
32
- """
33
- Build a personalized visiting schedule for NeurIPS 2025 conference poster sessions.
34
-
35
- This tool helps you create an optimized schedule by:
36
- 1. Finding papers relevant to your research interests (topics)
37
- 2. Filtering by your preferred dates and time slots
38
- 3. Scoring papers by relevance to your topics
39
- 4. Clustering papers by room location to minimize walking
40
- 5. Organizing chronologically for easy navigation
41
-
42
- The schedule includes paper titles, locations, poster positions, and relevance scores.
43
-
44
- Args:
45
- topics: Research topic(s) of interest. Can be a single topic string or a list of topics.
46
- Examples: "transformer architectures", ["reinforcement learning", "multi-agent systems"]
47
- dates: Conference date(s) to include. Can be:
48
- - ISO format: "2025-12-02" or ["2025-12-02", "2025-12-03"]
49
- - Day names: "Tuesday", "Wednesday"
50
- - None (default): include all conference days (Dec 2-7, 2025)
51
- time_preferences: Preferred time slot(s). Can be:
52
- - Preset: "morning" (8am-12pm), "afternoon" (12pm-5pm), "evening" (5pm-9pm)
53
- - Range: "9:00-12:00" or "14-17"
54
- - None (default): include all time slots
55
- max_papers: Maximum number of papers to include in schedule (default: 20)
56
- min_similarity: Minimum similarity score for paper relevance (0.0-1.0, default: 0.6)
57
-
58
- Returns:
59
- Formatted markdown schedule organized by date, time slot, and room location.
60
- All times are displayed in conference local time (PST/UTC-8).
61
-
62
- Restrictions:
63
- - Requires Neo4j database connection (NEO4J_DB_URI, NEO4J_USERNAME, NEO4J_PASSWORD)
64
- - Requires Paper nodes with session timing and location fields
65
- - Conference dates: December 2-7, 2025 in San Diego/Mexico City (UTC-8)
66
-
67
- Notes:
68
- - Papers are scored by similarity to your topics using embedding search
69
- - Schedule optimizes for both relevance and room clustering
70
- - Time zones are automatically converted from UTC to PST
71
- - Poster positions help you quickly locate papers in exhibition halls
72
-
73
- Raises:
74
- ValueError: If topics is empty or dates cannot be parsed
75
- Exception: If Neo4j connection fails
76
-
77
- Example:
78
- >>> build_visit_schedule(
79
- ... topics=["machine learning", "computer vision"],
80
- ... dates="2025-12-02",
81
- ... time_preferences="morning",
82
- ... max_papers=15
83
- ... )
84
- # Your NeurIPS 2025 Conference Schedule
85
-
86
- ## Tuesday, December 2, 2025
87
-
88
- ### 9:00 AM - 11:00 AM PST
89
-
90
- **Hall A**
91
- - **Poster #123** | Attention Mechanisms in Vision Transformers
92
- - Authors: John Doe, Jane Doe, et al.
93
- - Topic: Computer Vision
94
- - Relevance: 0.92
95
- ...
96
- """
97
- # Type coercion for parameters that may come as strings from LLM tool calls
98
- if isinstance(topics, str):
99
- # If topics is a single string, treat as one topic
100
- topics = [topics]
101
- elif topics is None:
102
- raise ValueError("Topics parameter is required. Please provide at least one research topic.")
103
-
104
- if max_papers is not None and not isinstance(max_papers, int):
105
- max_papers = int(max_papers)
106
-
107
- if min_similarity is not None and not isinstance(min_similarity, float):
108
- min_similarity = float(min_similarity)
109
-
110
- # Parse dates
111
- parsed_dates = None
112
- if dates:
113
- if isinstance(dates, str):
114
- dates = [dates]
115
-
116
- parsed_dates = []
117
- for date_str in dates:
118
- parsed = parse_date_input(date_str)
119
- if parsed:
120
- parsed_dates.append(parsed)
121
-
122
- if not parsed_dates:
123
- parsed_dates = None # Fall back to all dates if parsing fails
124
-
125
- # Parse time preferences (convert to UTC for database query)
126
- time_range = None
127
- if time_preferences:
128
- local_time_range = parse_time_preference(time_preferences)
129
- if local_time_range:
130
- # Convert PST to UTC (add 8 hours)
131
- start_utc = (local_time_range[0] + 8) % 24
132
- end_utc = (local_time_range[1] + 8) % 24
133
- time_range = (start_utc, end_utc)
134
-
135
- # Step 1: Search for papers matching each topic using existing tool
136
- all_paper_ids = set()
137
- relevance_scores = {}
138
-
139
- for topic in topics:
140
- try:
141
-
142
- from llm_agents.tools.knowledge_graph.retriever import Neo4jGraphWorker
143
-
144
- worker = Neo4jGraphWorker(
145
- uri=NEO4J_DB_URI,
146
- username=NEO4J_USERNAME,
147
- password=NEO4J_PASSWORD
148
- )
149
-
150
- papers = worker.similarity_search(
151
- user_query=topic,
152
- top_k=max_papers * 2,
153
- min_similarity=min_similarity
154
- )
155
-
156
- worker.close()
157
-
158
- # Extract paper IDs and scores
159
- for paper in papers:
160
- paper_id = paper.get('id')
161
- score = paper.get('score', 0.0)
162
-
163
- if paper_id:
164
- all_paper_ids.add(paper_id)
165
- # Keep highest score if paper matches multiple topics
166
- if paper_id not in relevance_scores or score > relevance_scores[paper_id]:
167
- relevance_scores[paper_id] = score
168
-
169
- except Exception as e:
170
- # If search fails for one topic, continue with others
171
- continue
172
-
173
- if not all_paper_ids:
174
- return "No papers found matching your topics. Try broadening your search criteria or adjusting the minimum similarity threshold."
175
-
176
- # Step 2: Initialize schedule builder
177
- driver = GraphDatabase.driver(NEO4J_DB_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
178
- builder = ScheduleBuilder(driver)
179
-
180
- try:
181
- # Step 3: Filter papers by date and time
182
- filtered_papers = builder.filter_by_datetime(
183
- paper_ids=list(all_paper_ids),
184
- dates=parsed_dates,
185
- time_range=time_range
186
- )
187
-
188
- if not filtered_papers:
189
- return "No papers found matching your date and time preferences. Try expanding your time range or selecting different dates."
190
-
191
- # Step 4: Score papers by relevance
192
- scored_papers = builder.score_papers(filtered_papers, relevance_scores)
193
-
194
- # Step 5: Optimize schedule (chronological + room clustering)
195
- schedule = builder.optimize_schedule(scored_papers, max_papers=max_papers)
196
-
197
- # Step 6: Format as markdown
198
- markdown_output = builder.format_as_markdown(schedule, include_abstracts=False)
199
-
200
- return markdown_output
201
-
202
- finally:
203
- builder.close()
204
-
205
-
206
- __all__ = ['build_visit_schedule']
207
-
208
-
209
- if __name__ == "__main__":
210
- print(build_visit_schedule(topics=["federated learning"], max_papers=200, dates=["Wednesday"]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/session_routing/scheduler.py DELETED
@@ -1,377 +0,0 @@
1
- """
2
- Schedule builder for NeurIPS 2025 conference paper sessions.
3
-
4
- This module provides the ScheduleBuilder class that handles filtering,
5
- scoring, and organizing papers into optimized visiting schedules.
6
- """
7
-
8
- from datetime import datetime
9
- from typing import List, Dict, Any, Optional, Tuple
10
- from collections import defaultdict
11
- import neo4j
12
-
13
- from agentic_nav.tools.session_routing.utils import (
14
- convert_utc_to_local,
15
- format_time_slot,
16
- format_date_header,
17
- cluster_papers_by_room,
18
- parse_time_preference
19
- )
20
-
21
-
22
- class ScheduleBuilder:
23
- """
24
- Build optimized conference visiting schedules.
25
-
26
- This class handles filtering papers by date/time, scoring by relevance,
27
- clustering by room location, and formatting the final schedule.
28
- """
29
-
30
- def __init__(self, neo4j_driver: neo4j.Driver):
31
- """
32
- Initialize the schedule builder.
33
-
34
- Args:
35
- neo4j_driver: Neo4j database driver for querying papers
36
- """
37
- self.driver = neo4j_driver
38
-
39
- def filter_by_datetime(
40
- self,
41
- paper_ids: List[str],
42
- dates: Optional[List[datetime]] = None,
43
- time_range: Optional[Tuple[int, int]] = None
44
- ) -> List[Dict[str, Any]]:
45
- """
46
- Filter papers by date and time preferences.
47
-
48
- Args:
49
- paper_ids: List of paper IDs to filter
50
- dates: List of conference dates to include (None = all dates)
51
- time_range: Tuple of (start_hour, end_hour) in UTC (None = all times)
52
-
53
- Returns:
54
- List of paper dictionaries with full details including session times
55
-
56
- Example:
57
- >>> builder.filter_by_datetime(['paper1', 'paper2'], dates=[datetime(2025,12,2)])
58
- """
59
- if not paper_ids:
60
- return []
61
-
62
- # Deduplicate paper_ids to ensure we only query each paper once
63
- unique_paper_ids = list(set(paper_ids))
64
-
65
- # Build Cypher query to get full paper details including authors via relationship
66
- # Relationship is IS_AUTHOR_OF (uppercase) and author property is 'fullname'
67
- query = """
68
- MATCH (p:Paper)
69
- WHERE p.id IN $paper_ids
70
- OPTIONAL MATCH (a:Author)-[:IS_AUTHOR_OF]-(p)
71
- WITH p, collect(a.fullname) as authors
72
- RETURN DISTINCT p.id as id,
73
- p.name as name,
74
- p.abstract as abstract,
75
- p.topic as topic,
76
- p.session as session,
77
- p.session_start_time as session_start_time,
78
- p.session_end_time as session_end_time,
79
- p.room_name as room_name,
80
- p.poster_position as poster_position,
81
- p.presentation_type as presentation_type,
82
- p.url as url,
83
- authors
84
- """
85
-
86
- with self.driver.session() as session:
87
- result = session.run(query, paper_ids=unique_paper_ids)
88
- papers = [dict(record) for record in result]
89
-
90
- # Deduplicate papers by ID (just in case)
91
- seen_ids = set()
92
- unique_papers = []
93
- for paper in papers:
94
- paper_id = paper.get('id')
95
- if paper_id and paper_id not in seen_ids:
96
- seen_ids.add(paper_id)
97
- unique_papers.append(paper)
98
-
99
- papers = unique_papers
100
-
101
- # Filter by date if specified
102
- if dates:
103
- date_strs = [d.strftime("%Y-%m-%d") for d in dates]
104
- papers = [
105
- p for p in papers
106
- if p.get('session_start_time') and
107
- any(date_str in p['session_start_time'] for date_str in date_strs)
108
- ]
109
-
110
- # Filter by time range if specified (convert UTC time range)
111
- if time_range:
112
- start_hour, end_hour = time_range
113
- filtered_papers = []
114
-
115
- for paper in papers:
116
- try:
117
- start_time_str = paper.get('session_start_time', '')
118
- if not start_time_str:
119
- continue
120
-
121
- # Parse UTC time
122
- if 'T' in start_time_str:
123
- dt = datetime.fromisoformat(start_time_str.replace('Z', ''))
124
- else:
125
- continue
126
-
127
- # Check if paper session falls within time range (UTC)
128
- if start_hour <= dt.hour < end_hour:
129
- filtered_papers.append(paper)
130
-
131
- except (ValueError, AttributeError):
132
- # If we can't parse time, include the paper to be safe
133
- filtered_papers.append(paper)
134
-
135
- papers = filtered_papers
136
-
137
- return papers
138
-
139
- def score_papers(
140
- self,
141
- papers: List[Dict[str, Any]],
142
- relevance_scores: Dict[str, float]
143
- ) -> List[Dict[str, Any]]:
144
- """
145
- Add relevance scores to papers.
146
-
147
- Args:
148
- papers: List of paper dictionaries
149
- relevance_scores: Dict mapping paper_id to relevance score
150
-
151
- Returns:
152
- Papers with added 'relevance_score' field, sorted by score descending
153
-
154
- Example:
155
- >>> builder.score_papers(papers, {'paper1': 0.95, 'paper2': 0.87})
156
- """
157
- scored_papers = []
158
-
159
- for paper in papers:
160
- paper_id = paper.get('id')
161
- score = relevance_scores.get(paper_id, 0.0)
162
-
163
- paper_with_score = paper.copy()
164
- paper_with_score['relevance_score'] = score
165
- scored_papers.append(paper_with_score)
166
-
167
- # Sort by relevance score (highest first)
168
- scored_papers.sort(key=lambda p: p['relevance_score'], reverse=True)
169
-
170
- return scored_papers
171
-
172
- def optimize_schedule(
173
- self,
174
- papers: List[Dict[str, Any]],
175
- max_papers: int = 20
176
- ) -> Dict[str, Dict[str, List[Dict[str, Any]]]]:
177
- """
178
- Optimize schedule by grouping papers chronologically and by room.
179
-
180
- Args:
181
- papers: List of scored paper dictionaries
182
- max_papers: Maximum number of papers to include
183
-
184
- Returns:
185
- Nested dict: {date: {time_slot: {room: [papers]}}}
186
-
187
- Example:
188
- >>> schedule = builder.optimize_schedule(papers, max_papers=15)
189
- """
190
- # Deduplicate papers by ID first
191
- seen_ids = set()
192
- unique_papers = []
193
- for paper in papers:
194
- paper_id = paper.get('id')
195
- if paper_id and paper_id not in seen_ids:
196
- seen_ids.add(paper_id)
197
- unique_papers.append(paper)
198
-
199
- # Limit to top papers by relevance
200
- top_papers = unique_papers[:max_papers]
201
-
202
- # Group by date and time
203
- schedule = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
204
-
205
- for paper in top_papers:
206
- try:
207
- start_time = paper.get('session_start_time', '')
208
- if not start_time:
209
- continue
210
-
211
- # Extract date
212
- date_str = start_time.split('T')[0]
213
-
214
- # Create time slot key
215
- end_time = paper.get('session_end_time', '')
216
- time_slot = format_time_slot(start_time, end_time) if end_time else start_time
217
-
218
- # Get room (handle None values, fallback to session for Mexico City papers)
219
- room = paper.get('room_name')
220
- if not room:
221
- # Use session as fallback (e.g., for Mexico City papers)
222
- room = paper.get('session') or 'N/A'
223
-
224
- # Add to schedule
225
- schedule[date_str][time_slot][room].append(paper)
226
-
227
- except (ValueError, AttributeError, IndexError):
228
- # Skip papers with invalid time data
229
- continue
230
-
231
- return schedule
232
-
233
- def format_as_markdown(
234
- self,
235
- schedule: Dict[str, Dict[str, List[Dict[str, Any]]]],
236
- include_abstracts: bool = False
237
- ) -> str:
238
- """
239
- Format schedule as structured markdown.
240
-
241
- Args:
242
- schedule: Nested schedule dictionary
243
- include_abstracts: Whether to include paper abstracts (default: False)
244
-
245
- Returns:
246
- Formatted markdown string with format:
247
- "Date (MM dd, yyyy) - Time Slot - Session Name - Location"
248
-
249
- Example:
250
- >>> markdown = builder.format_as_markdown(schedule)
251
- """
252
- if not schedule:
253
- return "No papers found matching your criteria."
254
-
255
- output = ["# Your NeurIPS 2025 Conference Schedule\n"]
256
-
257
- # Flatten schedule into list of blocks for better formatting
258
- schedule_blocks = []
259
-
260
- for date_str in sorted(schedule.keys()):
261
- time_slots = schedule[date_str]
262
-
263
- for time_slot in sorted(time_slots.keys()):
264
- rooms = time_slots[time_slot]
265
-
266
- for room_or_session in sorted(rooms.keys()):
267
- papers_in_block = rooms[room_or_session]
268
-
269
- # Sort papers by poster position ID (numerically)
270
- def poster_sort_key(paper):
271
- poster_pos = paper.get('poster_position')
272
- if not poster_pos:
273
- return float('inf') # Put papers without position at end
274
-
275
- # Remove '#' prefix if present
276
- if isinstance(poster_pos, str) and poster_pos.startswith('#'):
277
- poster_pos = poster_pos[1:]
278
-
279
- # Convert to integer for numerical sorting
280
- try:
281
- return int(poster_pos)
282
- except (ValueError, TypeError):
283
- return float('inf') # Put invalid positions at end
284
-
285
- papers_in_block.sort(key=poster_sort_key)
286
-
287
- schedule_blocks.append({
288
- 'date': date_str,
289
- 'time_slot': time_slot,
290
- 'room_or_session': room_or_session,
291
- 'papers': papers_in_block
292
- })
293
-
294
- # Format each schedule block
295
- total_papers = 0
296
- for block in schedule_blocks:
297
- date_str = block['date']
298
- time_slot = block['time_slot']
299
- room_or_session = block['room_or_session']
300
- papers = block['papers']
301
-
302
- total_papers += len(papers)
303
-
304
- # Get session and location from first paper (all papers in block share these)
305
- if papers:
306
- first_paper = papers[0]
307
- session_name = first_paper.get('session', 'N/A')
308
- actual_room = first_paper.get('room_name')
309
-
310
- # Determine location: use room if available, otherwise indicate session-based location
311
- if actual_room:
312
- location = actual_room
313
- else:
314
- location = "Mexico City" # Papers without room are from Mexico City
315
-
316
- else:
317
- session_name = room_or_session
318
- location = room_or_session
319
-
320
- # Format date as "Month DD, YYYY"
321
- try:
322
- from datetime import datetime
323
- dt = datetime.fromisoformat(date_str)
324
- formatted_date = dt.strftime("%B %d, %Y")
325
- except:
326
- formatted_date = date_str
327
-
328
- # Create a comprehensive header
329
- header = f"## {formatted_date} - {time_slot} - {session_name} - {location}\n"
330
- output.append(f"\n{header}")
331
-
332
- # List papers in this block
333
- for paper in papers:
334
- title = paper.get('name', 'Untitled')
335
- poster_pos = paper.get('poster_position', 'N/A')
336
- # TODO: This needs to be the distance between the user input query and the paper embedding, i.e.,
337
- # compare encoded user_input with "embedding" in database.
338
- relevance = paper.get('relevance_score', 0)
339
- topic = paper.get('topic', 'General')
340
- pres_type = paper.get('presentation_type', 'Poster')
341
- authors = paper.get('authors', 'N/A')
342
-
343
- # Format authors for display
344
- if isinstance(authors, list):
345
- authors_str = ', '.join(authors) if authors else 'N/A'
346
- elif authors and authors != 'N/A':
347
- authors_str = str(authors)
348
- else:
349
- authors_str = 'N/A'
350
-
351
- # Format paper entry
352
- output.append(f"- **{pres_type} {poster_pos.replace('#', '') if poster_pos is not None else ''}** | {title}")
353
- output.append(f" - Authors: {authors_str}")
354
- output.append(f" - Topic: {topic}")
355
-
356
- # Add paper URL if available
357
- paper_url = paper.get('url')
358
- if paper_url:
359
- output.append(f" - URL: {paper_url}")
360
-
361
- output.append(f" - Relevance: {relevance:.2f}")
362
-
363
- if include_abstracts and paper.get('abstract'):
364
- abstract = paper['abstract'][:200] + "..." if len(paper['abstract']) > 200 else paper['abstract']
365
- output.append(f" - Abstract: {abstract}")
366
-
367
- output.append("") # Blank line between papers
368
-
369
- # Add summary footer
370
- output.append(f"\n---\n**Total Papers in Schedule: {total_papers}**")
371
-
372
- return "\n".join(output)
373
-
374
- def close(self):
375
- """Close the Neo4j driver connection."""
376
- if self.driver:
377
- self.driver.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/tools/session_routing/utils.py DELETED
@@ -1,253 +0,0 @@
1
- """
2
- Utility functions for session routing and schedule building.
3
-
4
- This module provides helper functions for time zone conversion,
5
- date parsing, and formatting schedule outputs.
6
- """
7
-
8
- from datetime import datetime, timedelta
9
- from typing import Optional, Tuple
10
- import re
11
-
12
-
13
- def convert_utc_to_local(utc_time_str: str, timezone_offset: int = -8) -> str:
14
- """
15
- Convert UTC time string to local conference time.
16
-
17
- Args:
18
- utc_time_str: ISO format UTC time string (e.g., "2025-12-02T17:00:00Z")
19
- timezone_offset: Hours offset from UTC (default: -8 for PST/Mexico City)
20
-
21
- Returns:
22
- Local time string in format "9:00 AM PST"
23
-
24
- Raises:
25
- ValueError: If time string cannot be parsed
26
-
27
- Example:
28
- >>> convert_utc_to_local("2025-12-02T17:00:00Z")
29
- "9:00 AM PST"
30
- """
31
- try:
32
- # Handle various UTC time formats
33
- utc_time_str = utc_time_str.strip()
34
- if utc_time_str.endswith('Z'):
35
- utc_time_str = utc_time_str[:-1]
36
- elif '+' in utc_time_str or utc_time_str.count('-') > 2:
37
- # Has timezone info, extract just the datetime part
38
- utc_time_str = utc_time_str.split('+')[0].split('T')[0] + 'T' + utc_time_str.split('T')[1].split('+')[0].split('-')[0]
39
-
40
- # Parse the UTC time
41
- if 'T' in utc_time_str:
42
- utc_dt = datetime.fromisoformat(utc_time_str)
43
- else:
44
- # Try parsing without T separator
45
- utc_dt = datetime.strptime(utc_time_str, "%Y-%m-%d %H:%M:%S")
46
-
47
- # Apply timezone offset
48
- local_dt = utc_dt + timedelta(hours=timezone_offset)
49
-
50
- # Format as human-readable time
51
- hour = local_dt.hour
52
- minute = local_dt.minute
53
- am_pm = "AM" if hour < 12 else "PM"
54
- hour_12 = hour if hour <= 12 else hour - 12
55
- hour_12 = 12 if hour_12 == 0 else hour_12
56
-
57
- if minute == 0:
58
- time_str = f"{hour_12}:00 {am_pm} PST"
59
- else:
60
- time_str = f"{hour_12}:{minute:02d} {am_pm} PST"
61
-
62
- return time_str
63
- except (ValueError, AttributeError) as e:
64
- raise ValueError(f"Could not parse time string '{utc_time_str}': {e}")
65
-
66
-
67
- def parse_date_input(date_str: str) -> Optional[datetime]:
68
- """
69
- Parse flexible date input formats.
70
-
71
- Supports:
72
- - ISO format: "2025-12-02"
73
- - Day names: "Monday", "Tuesday", etc.
74
- - Relative: "today", "tomorrow"
75
-
76
- Args:
77
- date_str: Date string in various formats
78
-
79
- Returns:
80
- Datetime object or None if parsing fails
81
-
82
- Example:
83
- >>> parse_date_input("2025-12-02")
84
- datetime.datetime(2025, 12, 2, 0, 0)
85
- """
86
- if not date_str:
87
- return None
88
-
89
- date_str = date_str.strip().lower()
90
-
91
- # Try ISO format first
92
- try:
93
- return datetime.fromisoformat(date_str)
94
- except ValueError:
95
- pass
96
-
97
- # Try common date formats
98
- for fmt in ["%Y-%m-%d", "%m/%d/%Y", "%d/%m/%Y", "%B %d, %Y", "%b %d, %Y"]:
99
- try:
100
- return datetime.strptime(date_str, fmt)
101
- except ValueError:
102
- continue
103
-
104
- # Handle day names (for NeurIPS 2025: Dec 2-7, 2025)
105
- conference_start = datetime(2025, 12, 2) # Tuesday
106
- day_mapping = {
107
- 'monday': conference_start - timedelta(days=1),
108
- 'tuesday': conference_start,
109
- 'wednesday': conference_start + timedelta(days=1),
110
- 'thursday': conference_start + timedelta(days=2),
111
- 'friday': conference_start + timedelta(days=3),
112
- 'saturday': conference_start + timedelta(days=4),
113
- 'sunday': conference_start + timedelta(days=5),
114
- }
115
-
116
- if date_str in day_mapping:
117
- return day_mapping[date_str]
118
-
119
- return None
120
-
121
-
122
- def parse_time_preference(time_pref: str) -> Optional[Tuple[int, int]]:
123
- """
124
- Parse time preference string into hour range.
125
-
126
- Args:
127
- time_pref: Time preference like "morning", "afternoon", "9:00-12:00"
128
-
129
- Returns:
130
- Tuple of (start_hour, end_hour) in 24-hour format, or None
131
-
132
- Example:
133
- >>> parse_time_preference("morning")
134
- (8, 12)
135
- >>> parse_time_preference("9:00-15:00")
136
- (9, 15)
137
- """
138
- if not time_pref:
139
- return None
140
-
141
- time_pref = time_pref.strip().lower()
142
-
143
- # Predefined time slots
144
- presets = {
145
- 'morning': (8, 12),
146
- 'afternoon': (12, 17),
147
- 'evening': (17, 21),
148
- 'early': (8, 10),
149
- 'late': (19, 21),
150
- }
151
-
152
- if time_pref in presets:
153
- return presets[time_pref]
154
-
155
- # Parse time range format: "9:00-12:00" or "09:00-12:00" or "9-12"
156
- range_pattern = r'(\d{1,2})(?::(\d{2}))?[\s\-]+(\d{1,2})(?::(\d{2}))?'
157
- match = re.match(range_pattern, time_pref)
158
-
159
- if match:
160
- start_hour = int(match.group(1))
161
- end_hour = int(match.group(3))
162
- return (start_hour, end_hour)
163
-
164
- return None
165
-
166
-
167
- def format_time_slot(start_time: str, end_time: str) -> str:
168
- """
169
- Format time slot for display.
170
-
171
- Args:
172
- start_time: Start time in UTC format
173
- end_time: End time in UTC format
174
-
175
- Returns:
176
- Formatted time range string
177
-
178
- Example:
179
- >>> format_time_slot("2025-12-02T17:00:00Z", "2025-12-02T19:00:00Z")
180
- "9:00 AM - 11:00 AM PST"
181
- """
182
- try:
183
- start_local = convert_utc_to_local(start_time)
184
- end_local = convert_utc_to_local(end_time)
185
-
186
- # Remove PST from start time if both are same timezone
187
- if start_local.endswith(' PST') and end_local.endswith(' PST'):
188
- start_local = start_local[:-4]
189
-
190
- return f"{start_local} - {end_local}"
191
- except ValueError:
192
- return f"{start_time} - {end_time}"
193
-
194
-
195
- def format_date_header(date_str: str) -> str:
196
- """
197
- Format date for section headers.
198
-
199
- Args:
200
- date_str: Date string (ISO format or datetime)
201
-
202
- Returns:
203
- Formatted date like "Tuesday, December 2, 2025"
204
-
205
- Example:
206
- >>> format_date_header("2025-12-02")
207
- "Tuesday, December 2, 2025"
208
- """
209
- try:
210
- if isinstance(date_str, str):
211
- dt = datetime.fromisoformat(date_str.split('T')[0])
212
- else:
213
- dt = date_str
214
-
215
- return dt.strftime("%A, %B %d, %Y")
216
- except (ValueError, AttributeError):
217
- return str(date_str)
218
-
219
-
220
- def cluster_papers_by_room(papers: list, time_slot_key: str = 'session') -> dict:
221
- """
222
- Group papers by room within their time slots.
223
-
224
- Args:
225
- papers: List of paper dictionaries with room_name and session info
226
- time_slot_key: Key to group by time slots (default: 'session')
227
-
228
- Returns:
229
- Nested dict: {time_slot: {room_name: [papers]}}
230
-
231
- Example:
232
- >>> papers = [
233
- ... {'session': 'Morning', 'room_name': 'Hall A', 'name': 'Paper 1'},
234
- ... {'session': 'Morning', 'room_name': 'Hall A', 'name': 'Paper 2'},
235
- ... ]
236
- >>> cluster_papers_by_room(papers)
237
- {'Morning': {'Hall A': [...]}}
238
- """
239
- clustered = {}
240
-
241
- for paper in papers:
242
- time_slot = paper.get(time_slot_key, 'Unknown Session')
243
- room = paper.get('room_name', 'Unknown Room')
244
-
245
- if time_slot not in clustered:
246
- clustered[time_slot] = {}
247
-
248
- if room not in clustered[time_slot]:
249
- clustered[time_slot][room] = []
250
-
251
- clustered[time_slot][room].append(paper)
252
-
253
- return clustered
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/utils/__init__.py DELETED
@@ -1,3 +0,0 @@
1
- from agentic_nav.utils.tooling import infer_tool, _json_type
2
- from agentic_nav.utils.logger import setup_logging
3
- from agentic_nav.utils.embedding_generator import batch_embed_documents
 
 
 
 
agentic_nav/utils/cli/__init__.py DELETED
@@ -1,3 +0,0 @@
1
- from agentic_nav.utils.cli.editor import open_editor
2
- from agentic_nav.utils.cli.help import print_help
3
- from agentic_nav.utils.cli.history import show_history
 
 
 
 
agentic_nav/utils/cli/editor.py DELETED
@@ -1,29 +0,0 @@
1
- import os
2
- import tempfile
3
-
4
-
5
- def open_editor(initial_text=""):
6
- editor = os.environ.get("EDITOR")
7
- if not editor:
8
- # Minimal sensible defaults
9
- if os.name == "nt":
10
- editor = "notepad"
11
- else:
12
- editor = "nano"
13
- with tempfile.NamedTemporaryFile(suffix=".md", delete=False, mode="w+", encoding="utf-8") as tf:
14
- path = tf.name
15
- tf.write(initial_text)
16
- tf.flush()
17
- try:
18
- # Open editor and wait
19
- rc = os.system(f'{editor} "{path}"')
20
- if rc != 0:
21
- print(f"(editor exit code {rc})")
22
- with open(path, "r", encoding="utf-8") as f:
23
- content = f.read()
24
- finally:
25
- try:
26
- os.unlink(path)
27
- except Exception:
28
- pass
29
- return content.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/utils/cli/help.py DELETED
@@ -1,14 +0,0 @@
1
-
2
-
3
- def print_help():
4
- help_text = """
5
- Commands:
6
- /help Show this help
7
- /exit Exit the chat
8
- /system Set or replace system prompt (multi-line via $EDITOR)
9
- /edit Compose multi-line user message via $EDITOR
10
- /history Show conversation history (JSON)
11
- /save <path> Save conversation history to a file (JSON)
12
- Typing anything else will send it as a user message.
13
- """
14
- print(help_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/utils/cli/history.py DELETED
@@ -1,11 +0,0 @@
1
-
2
- def show_history(messages):
3
- for i, m in enumerate(messages):
4
- ts = m.get("_ts", "")
5
- role = m.get("role", "")
6
- content = m.get("content", "")
7
- header = f"[{i}] {role} {ts}"
8
- print(header)
9
- print("-" * len(header))
10
- print(content)
11
- print()
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/utils/embedding_generator.py DELETED
@@ -1,151 +0,0 @@
1
- import logging
2
-
3
- import litellm
4
- import numpy as np
5
- import spaces
6
-
7
- from litellm import embedding
8
- from sentence_transformers import SentenceTransformer
9
- from tqdm import tqdm
10
-
11
- from typing import List
12
-
13
-
14
- LOGGER = logging.getLogger(__name__)
15
- local_embedding_model = None
16
-
17
-
18
- class EmbeddingResponse:
19
- def __init__(self, embeddings):
20
- self.data = [
21
- type('obj', (), {
22
- 'embedding': emb.tolist(),
23
- 'index': idx
24
- })()
25
- for idx, emb in enumerate(embeddings)
26
- ]
27
-
28
-
29
- def _get_local_model(embedding_model_name: str = "nomic-ai/nomic-embed-text-v1.5"):
30
- """Lazy load the embedding model only once"""
31
- global local_embedding_model
32
- if local_embedding_model is None:
33
- LOGGER.info(f"Loading embedding model: {embedding_model_name}")
34
- local_embedding_model = SentenceTransformer(
35
- embedding_model_name,
36
- trust_remote_code=True
37
- )
38
- return local_embedding_model
39
-
40
-
41
- @spaces.GPU
42
- def embed_hf_spaces(input, embedding_model_name: str = "nomic-ai/nomic-embed-text-v1.5", api_base=None, **kwargs):
43
- """
44
- Drop-in replacement for litellm.embedding()
45
-
46
- Args:
47
- input: Single string or list of strings to embed
48
- embedding_model_name: HuggingFace model name to use
49
- api_base: Ignored for local embedding
50
- **kwargs: Additional args like num_ctx (ignored for local)
51
-
52
- Returns:
53
- Object with same structure as LiteLLM response
54
- """
55
- # Get model (loads only on first call)
56
- model_instance = _get_local_model(embedding_model_name)
57
-
58
- texts = [input] if isinstance(input, str) else input
59
- embeddings = model_instance.encode(
60
- texts,
61
- convert_to_tensor=True,
62
- show_progress_bar=False,
63
- normalize_embeddings=True
64
- )
65
-
66
- embeddings_np = embeddings.cpu().numpy()
67
-
68
- return EmbeddingResponse(embeddings_np)
69
-
70
-
71
- def embedding_fn(model, input, api_base, **kwargs):
72
- if api_base == "hf_spaces_local":
73
- return embed_hf_spaces(input=input, embedding_model_name=model, api_base=api_base, **kwargs)
74
- elif "localhost" in api_base or "ollama.com" in api_base:
75
- return embedding(input=input, model=model, api_base=api_base, **kwargs)
76
- else:
77
- raise NotImplementedError(f"Unknown api_base for provider {api_base}. Available options: hf_spaces_local, ollama local (http://localhost:11435), ollama cloud (https://ollama.com)")
78
-
79
-
80
- def batch_embed_documents(
81
- texts: List[str],
82
- batch_size: int = 1,
83
- embedding_model: str = "nomic-ai/nomic-embed-text-v1.5",
84
- api_base: str = "hf_spaces_local",
85
- show_progress: bool = False,
86
- ) -> np.ndarray:
87
-
88
- if not texts:
89
- return np.array([], dtype="float32").reshape(0, 0)
90
-
91
- if None in texts:
92
- LOGGER.warning(f"WARNING: Detected documents with 'None' values. Replacing 'None' with an empty string...")
93
- texts = ['' if doc is None else doc for doc in texts]
94
-
95
- vecs: List[List[float]] = []
96
- for i in tqdm(range(0, len(texts), batch_size), disable=not show_progress):
97
- chunk = texts[i:i + batch_size]
98
- try:
99
- resp = embedding_fn(
100
- model=embedding_model,
101
- input=chunk,
102
- api_base=api_base,
103
- **{"num_ctx": 2048}
104
- )
105
- except Exception as e:
106
- LOGGER.error(f"Error during embedding batch {i}-{i + batch_size}: {e}. Falling back to single sample processing")
107
- individual_responses = []
108
- ctr = i
109
- for sample in chunk:
110
- try:
111
- individual_responses.append(
112
- embedding_fn(
113
- model=embedding_model,
114
- input=sample,
115
- api_base=api_base,
116
- **{"num_ctx": 2048}
117
- )
118
- )
119
- except litellm.BadRequestError:
120
- LOGGER.error(f"Encountered error processing paper #{ctr}. Please inspect and retry afterwards.")
121
- ctr += 1
122
-
123
- LOGGER.debug(f"Single sample response from embedding model: {individual_responses}")
124
-
125
- # Extract embeddings from individual responses
126
- for individual_resp in individual_responses:
127
- vecs.extend([d["embedding"] for d in individual_resp.data])
128
- else:
129
- # Normal batch processing
130
- vecs.extend([d["embedding"] for d in resp.data])
131
-
132
- arr = np.array(vecs, dtype="float32")
133
- # cosine similarity: normalize to unit length and use IndexFlatIP
134
- norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
135
- return arr / norms
136
-
137
-
138
- if __name__ == "__main__":
139
- res = batch_embed_documents(
140
- texts=[
141
- "test1",
142
- "test2",
143
- "test3",
144
- "test4",
145
- "test5"
146
- ],
147
- batch_size=2,
148
- embedding_model="ollama/nomic-embed-text",
149
- api_base="http://localhost:11435"
150
- )
151
- print(f"Result shape: {res.shape}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/utils/file_handlers.py DELETED
@@ -1,10 +0,0 @@
1
- import json
2
-
3
-
4
- def save_chat_history(messages, path):
5
- try:
6
- with open(path, "w", encoding="utf-8") as f:
7
- json.dump(messages, f, indent=2, ensure_ascii=False)
8
- print(f"Saved to {path}")
9
- except Exception as e:
10
- print("Save failed:", e)
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/utils/logger.py DELETED
@@ -1,49 +0,0 @@
1
- import logging
2
- import logging.handlers
3
-
4
- from datetime import datetime
5
- from pathlib import Path
6
-
7
-
8
-
9
- def setup_logging(log_dir: str = "logs", level: str = "INFO", console_level: str = "WARNING"):
10
- """
11
- Configure logging for the entire application.
12
-
13
- Args:
14
- log_dir: Directory for log files
15
- level: Root logger level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
16
- console_level: Console handler level - defaults to WARNING to avoid
17
- interfering with CLI display. Set to INFO for verbose output.
18
- """
19
- Path(log_dir).mkdir(exist_ok=True)
20
-
21
- # Root logger configuration
22
- root_logger = logging.getLogger()
23
- root_logger.setLevel(getattr(logging, level.upper()))
24
-
25
- # Console handler - set to WARNING by default to not interfere with CLI display
26
- console_handler = logging.StreamHandler()
27
- console_handler.setLevel(getattr(logging, console_level.upper()))
28
- console_format = logging.Formatter(
29
- "%(asctime)s - %(levelname)s - %(name)s - %(message)s",
30
- datefmt="%Y-%m-%d %H:%M:%S"
31
- )
32
- console_handler.setFormatter(console_format)
33
-
34
- # File handler - for production
35
- time_now = datetime.now().strftime("%Y-%m-%d_%H-%M")
36
-
37
- file_handler = logging.handlers.RotatingFileHandler(
38
- f"{log_dir}/{time_now}_llm_agents.log",
39
- maxBytes=10 * 1024 * 1024, # 10MB
40
- backupCount=5
41
- )
42
- file_handler.setLevel(logging.DEBUG)
43
- file_format = logging.Formatter(
44
- "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s"
45
- )
46
- file_handler.setFormatter(file_format)
47
-
48
- root_logger.addHandler(console_handler)
49
- root_logger.addHandler(file_handler)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
agentic_nav/utils/tooling.py DELETED
@@ -1,44 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import inspect
4
-
5
- from typing import Any, Dict, List, Callable, get_args, get_origin, Literal
6
-
7
-
8
- def _json_type(t: Any) -> Dict[str, Any]:
9
- origin, args = get_origin(t), get_args(t)
10
- if origin is Literal:
11
- return {"type": "string", "enum": list(args)}
12
- if origin in (list, List):
13
- return {"type": "array", "items": {"type": "string"}}
14
- if t in (str,): return {"type": "string"}
15
- if t in (int,): return {"type": "integer"}
16
- if t in (float,): return {"type": "number"}
17
- if t in (bool,): return {"type": "boolean"}
18
- return {"type": "string"}
19
-
20
-
21
- def infer_tool(func: Callable[..., Any], tool_args: Dict[Any, Any]) -> Dict[str, Any]:
22
- sig = inspect.signature(func)
23
- hints = getattr(func, "__annotations__", {})
24
- props, required = {}, []
25
- for name, p in sig.parameters.items():
26
- if name in ("self", "cls"): continue
27
- schema = _json_type(hints.get(name, str))
28
- if p.default is inspect._empty: required.append(name)
29
- props[name] = schema
30
-
31
- parameter_values = {}
32
- for arg_name, arg_val in tool_args.items():
33
- if arg_name in props.keys():
34
- parameter_values[arg_name] = arg_val
35
-
36
- return {
37
- "type": "function",
38
- "function": {
39
- "name": func.__name__,
40
- "description": (inspect.getdoc(func) or f"Call {func.__name__}"),
41
- "parameters": {"type": "object", "properties": props, "required": required},
42
- },
43
- "parameter_properties_values": parameter_values
44
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from agentic_nav.frontend.browser_ui import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ main()
data/.keep DELETED
File without changes
docker-compose.yaml DELETED
@@ -1,137 +0,0 @@
1
- services:
2
- neo4j_db:
3
- image: neo4j:5.26.0
4
- container_name: neo4j_db
5
- expose:
6
- - "7474"
7
- - "7687"
8
- ports:
9
- - "7474:7474" # HTTP
10
- - "7687:7687" # Bolt
11
- environment:
12
- # Authentication
13
- - NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD}
14
-
15
- # Memory settings
16
- - NEO4J_server_memory_heap_initial__size=512m
17
- - NEO4J_server_memory_heap_max__size=2G
18
- - NEO4J_server_memory_pagecache_size=2G
19
- - NEO4J_db_memory_transaction_total_max=3G
20
- - NEO4J_dbms_memory_transaction_total_max=3G
21
-
22
- # APOC plugin (optional but recommended)
23
- - NEO4J_PLUGINS=["apoc"]
24
-
25
- # Accept license (required for Enterprise features, remove if using Community)
26
- # - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
27
- volumes:
28
- - neo4j_data:/data
29
- - neo4j_logs:/logs
30
- - neo4j_import:/var/lib/neo4j/import
31
- - neo4j_plugins:/plugins
32
- restart: unless-stopped
33
- healthcheck:
34
- test: [ "CMD-SHELL", "cypher-shell -u ${NEO4J_USERNAME:-neo4j} -p ${NEO4J_PASSWORD:-llm_agents} 'RETURN 1'" ]
35
- interval: 10s
36
- timeout: 5s
37
- retries: 10
38
- start_period: 30s
39
- networks:
40
- - llm_agents_net
41
-
42
- ollama_embed:
43
- image: ollama/ollama:latest
44
- container_name: ollama_embed
45
- ports:
46
- - "11435:11434"
47
- volumes:
48
- - ~/.ollama:/root/.ollama
49
- environment:
50
- - OLLAMA_HOST=0.0.0.0
51
- - NVIDIA_VISIBLE_DEVICES=all
52
- - NVIDIA_DRIVER_CAPABILITIES=compute,utility
53
- - EMBEDDING_MODEL_NAME=${EMBEDDING_MODEL_NAME}
54
- restart: always
55
- entrypoint: [ "/bin/bash", "-c", "\
56
- ollama serve & \
57
- sleep 5 && \
58
- ollama pull $EMBEDDING_MODEL_NAME && \
59
- wait" ]
60
- networks:
61
- - llm_agents_net
62
- deploy:
63
- resources:
64
- reservations:
65
- devices:
66
- - driver: nvidia
67
- count: all
68
- capabilities: [ gpu ]
69
-
70
- ollama_agent:
71
- image: ollama/ollama:latest
72
- container_name: ollama_agent
73
- ports:
74
- - "11436:11434"
75
- volumes:
76
- - ~/.ollama:/root/.ollama
77
- environment:
78
- - OLLAMA_HOST=0.0.0.0
79
- - NVIDIA_VISIBLE_DEVICES=all
80
- - NVIDIA_DRIVER_CAPABILITIES=compute,utility
81
- - AGENT_MODEL_NAME=${AGENT_MODEL_NAME}
82
- restart: always
83
- entrypoint: [ "/bin/bash", "-c", "\
84
- ollama serve & \
85
- sleep 5 && \
86
- ollama pull $AGENT_MODEL_NAME && \
87
- wait" ]
88
- networks:
89
- - llm_agents_net
90
- deploy:
91
- resources:
92
- reservations:
93
- devices:
94
- - driver: nvidia
95
- count: all
96
- capabilities: [ gpu ]
97
-
98
- webinterface:
99
- build:
100
- context: .
101
- dockerfile: Dockerfile
102
- container_name: llm-agents-web
103
- ports:
104
- - "7860:7860"
105
- environment:
106
- - PYTHONUNBUFFERED=1
107
- - OLLAMA_API_KEY=${OLLAMA_API_KEY}
108
- - NEO4J_USERNAME=${NEO4J_USERNAME:-neo4j}
109
- - NEO4J_PASSWORD=${NEO4J_PASSWORD:-llm_agents}
110
- - NEO4J_DB_URI=${NEO4J_DB_URI}
111
- - POPULATE_DATABASE_NIPS2025=false
112
- - EMBEDDING_MODEL_NAME=${EMBEDDING_MODEL_NAME}
113
- - EMBEDDING_MODEL_API_BASE=http://ollama_embed:11434
114
- - AGENT_MODEL_NAME=${AGENT_MODEL_NAME}
115
- - AGENT_MODEL_API_BASE=http://ollama_agent:11434
116
- - NEO4J_DB_NODE_RETURN_LIMIT=${NEO4J_DB_NODE_RETURN_LIMIT}
117
- restart: unless-stopped
118
- networks:
119
- - llm_agents_net
120
- depends_on:
121
- neo4j_db:
122
- condition: service_healthy
123
- ollama_embed:
124
- condition: service_started
125
- ollama_agent:
126
- condition: service_started
127
-
128
-
129
- networks:
130
- llm_agents_net:
131
-
132
-
133
- volumes:
134
- neo4j_data:
135
- neo4j_logs:
136
- neo4j_import:
137
- neo4j_plugins:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
graphs/.gitkeep DELETED
File without changes
pyproject.toml DELETED
@@ -1,59 +0,0 @@
1
- [project]
2
- name = "agentic-nav"
3
- version = "0.1.0"
4
- description = "Conference navigation agent leveraging graph databases and semantic search to provide paper recommendations, research network exploration, and automated schedule generation for NeurIPS 2025 attendees."
5
- readme = "README.md"
6
- authors = [
7
- {name = "Shiqiang Wang", email = "s.wang9@exeter.ac.uk"},
8
- {name = "Herbert Woisetschläger", email = "herbert.woisetschlaeger@tum.de"}
9
- ]
10
-
11
- requires-python = ">=3.10"
12
- dependencies = [
13
- "aiofiles",
14
- "einops",
15
- "flask",
16
- "gradio[mcp,oauth]",
17
- "hatchling",
18
- "httpx",
19
- "kaleido",
20
- "litellm",
21
- "neo4j",
22
- "prompt-toolkit",
23
- "pydantic",
24
- "pydantic-settings",
25
- "pyvis>=0.3.2",
26
- "rich>=13.0.0",
27
- "sentence-transformers",
28
- "toon-format",
29
- "torch==2.8.0",
30
- "typer",
31
- ]
32
-
33
- [tool.uv.workspace]
34
- members = [
35
- "litellm",
36
- ]
37
-
38
- [tool.uv.sources]
39
- litellm = { git = "https://github.com/shiqiangw/litellm.git" }
40
- toon-format = { git = "https://github.com/toon-format/toon-python.git" }
41
-
42
- [build-system]
43
- requires = ["hatchling"]
44
- build-backend = "hatchling.build"
45
-
46
- [dependency-groups]
47
- dev = [
48
- "pytest>=9.0.1",
49
- "pytest-asyncio>=1.3.0",
50
- "pytest-cov>=7.0.0",
51
- "pytest-mock>=3.15.1",
52
- ]
53
-
54
- [tool.hatchling.build.targets.wheel]
55
- packages = ["llm_agents"]
56
-
57
- [project.scripts]
58
- agentic-nav-cli = "agentic_nav.frontend.cli:main"
59
- agentic-nav-web = "agentic_nav.frontend.browser_ui:main"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pytest.ini DELETED
@@ -1,26 +0,0 @@
1
- [pytest]
2
- minversion = 6.0
3
- testpaths = tests
4
- python_files = test_*.py
5
- python_classes = Test*
6
- python_functions = test_*
7
- addopts =
8
- --strict-markers
9
- --strict-config
10
- --verbose
11
- markers =
12
- unit: Unit tests
13
- integration: Integration tests (currently skipped, require full setup)
14
- slow: Slow tests that require external services
15
- neo4j: Tests requiring Neo4j database
16
- ollama: Tests requiring Ollama service
17
- no_auto_env: Tests that should not use automatic environment variable loading
18
- asyncio_mode = auto
19
- asyncio_default_fixture_loop_scope = function
20
-
21
- [coverage:run]
22
- source = .
23
- omit =
24
- */tests/*
25
- */test_*
26
- setup.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  # This file was autogenerated by uv via the following command:
2
  # uv pip compile pyproject.toml
3
- agentiv_nav @ git+https://${GH_USER}:${GH_TOKEN}@github.com/core-aix/agentic-nav.git@dev
4
  aiofiles==24.1.0
5
  # via
6
  # llm-agents (pyproject.toml)
 
1
  # This file was autogenerated by uv via the following command:
2
  # uv pip compile pyproject.toml
3
+ agentic_nav @ git+https://${GH_USER}:${GH_TOKEN}@github.com/core-aix/agentic-nav.git@dev
4
  aiofiles==24.1.0
5
  # via
6
  # llm-agents (pyproject.toml)
scripts/docker-entrypoint.sh DELETED
@@ -1,14 +0,0 @@
1
- #!/bin/bash
2
- set -e
3
-
4
- echo "Neo4j is up - executing command"
5
-
6
- if [ "${POPULATE_DATABASE_NIPS2025}" = "true" ]; then
7
- echo "Importing NeurIPS 2025 papers..."
8
- bash scripts/import_neurips2025_kg.sh
9
- else
10
- echo "Skipping NeurIPS 2025 paper import (POPULATE_DATABASE_NIPS2025 is not set to 'true')"
11
- fi
12
-
13
- echo "Starting main application..."
14
- exec "$@"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/import_neurips2025_kg.sh DELETED
@@ -1,13 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Download the pre-built knowledge graph
4
- wget -O graphs/neurips2025_knowledge_graph.pkl https://syncandshare.lrz.de/dl/fiJPiUkKp1SZAqRX2m76S6/knowledge_graph_thresh_0.6_v3.pkl
5
-
6
- # Import the knowledge graph to the database
7
- uv run llm_agents/tools/knowledge_graph/neo4j_db_importer.py \
8
- --graph-path graphs/neurips2025_knowledge_graph.pkl \
9
- --neo4j-uri bolt://neo4j_db:7687 \
10
- --neo4j-username $NEO4J_USERNAME \
11
- --neo4j-password $NEO4J_PASSWORD \
12
- --batch-size 100 \
13
- --embedding-dimension 768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/prepare_gradio.sh DELETED
@@ -1,18 +0,0 @@
1
- #!/bin/bash
2
-
3
- set -e
4
-
5
- # Only initialize submodules if not in Docker (gradio folder not present)
6
- if [ ! -d "gradio/.git" ]; then
7
- echo "Initializing and updating git submodules..."
8
- git submodule update --init --recursive
9
- cd gradio
10
- echo "Pinned gradio version to GIT revision 648169d85fbeeffc184115c4c92b12957f2a162f (Nov. 12, 2025)"
11
- git checkout 648169d85fbeeffc184115c4c92b12957f2a162f
12
- cd ..
13
- fi
14
-
15
- echo "Building Gradio frontend..."
16
- cd gradio
17
- bash scripts/build_frontend.sh
18
- cd ..
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/__init__.py DELETED
@@ -1 +0,0 @@
1
- # Test package