Spaces:

CORE-AIx
/

AgenticNav

Build error

App Files Files Community

Herbert commited on Nov 25, 2025

Commit

4188210

1 Parent(s): ca7c002

Added hf_spaces instructions

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.coveragerc +0 -25
.dockerignore +0 -46
.github/workflows/tests.yaml +0 -35
.gitignore +0 -215
.gitmodules +0 -0
.python-version +0 -1
CLAUDE.md +0 -165
Dockerfile +0 -50
LICENSE +0 -201
README.md +1 -187
agentic_nav/__init__.py +0 -0
agentic_nav/agents/__init__.py +0 -1
agentic_nav/agents/base.py +0 -327
agentic_nav/agents/neurips2025_conference.py +0 -48
agentic_nav/frontend/__init__.py +0 -0
agentic_nav/frontend/browser_ui.py +0 -525
agentic_nav/frontend/cli.py +0 -371
agentic_nav/tools/__init__.py +0 -15
agentic_nav/tools/knowledge_graph/__init__.py +0 -326
agentic_nav/tools/knowledge_graph/file_handler.py +0 -29
agentic_nav/tools/knowledge_graph/graph_generator.py +0 -446
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/__init__.py +0 -15
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/breadth_first_random.py +0 -80
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/depth_first_random.py +0 -78
agentic_nav/tools/knowledge_graph/graph_traversal_strategies/neo4j_builtin.py +0 -50
agentic_nav/tools/knowledge_graph/neo4j_db_importer.py +0 -537
agentic_nav/tools/knowledge_graph/retriever.py +0 -612
agentic_nav/tools/session_routing/__init__.py +0 -210
agentic_nav/tools/session_routing/scheduler.py +0 -377
agentic_nav/tools/session_routing/utils.py +0 -253
agentic_nav/utils/__init__.py +0 -3
agentic_nav/utils/cli/__init__.py +0 -3
agentic_nav/utils/cli/editor.py +0 -29
agentic_nav/utils/cli/help.py +0 -14
agentic_nav/utils/cli/history.py +0 -11
agentic_nav/utils/embedding_generator.py +0 -151
agentic_nav/utils/file_handlers.py +0 -10
agentic_nav/utils/logger.py +0 -49
agentic_nav/utils/tooling.py +0 -44
app.py +5 -0
data/.keep +0 -0
docker-compose.yaml +0 -137
graphs/.gitkeep +0 -0
pyproject.toml +0 -59
pytest.ini +0 -26
requirements.txt +1 -1
scripts/docker-entrypoint.sh +0 -14
scripts/import_neurips2025_kg.sh +0 -13
scripts/prepare_gradio.sh +0 -18
tests/__init__.py +0 -1

.coveragerc DELETED Viewed

@@ -1,25 +0,0 @@
-[run]
-source = llm_agents
-omit =
-    */gradio/*
-    */tests/*
-    */__pycache__/*
-    */.*
-    */venv/*
-    */.venv/*
-[report]
-exclude_lines =
-    pragma: no cover
-    def __repr__
-    if self.debug:
-    if settings.DEBUG
-    raise AssertionError
-    raise NotImplementedError
-    if 0:
-    if __name__ == .__main__.:
-    class .*\bProtocol\):
-    @(abc\.)?abstractmethod
-[html]
-directory = htmlcov

.dockerignore DELETED Viewed

@@ -1,46 +0,0 @@
-# Python
-__pycache__
-*.py[cod]
-*$py.class
-*.so
-.Python
-*.egg-info
-dist
-build
-.eggs
-# Virtual environments
-.venv
-venv
-ENV
-env
-# IDE
-.vscode
-.idea
-*.swp
-*.swo
-*~
-# Version control
-.git
-.gitignore
-# OS
-.DS_Store
-Thumbs.db
-# Testing
-.pytest_cache
-.coverage
-htmlcov
-# Documentation
-docs/_build
-# Logs
-*.log
-# Local development files
-.env.local
-*.local

.github/workflows/tests.yaml DELETED Viewed

@@ -1,35 +0,0 @@
-name: Tests
-on:
-  push:
-    branches: [ main, master, dev ]
-  pull_request:
-    branches: [ main, master, dev ]
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ['3.14']
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Install uv
-        uses: astral-sh/setup-uv@v3
-      - name: Install dependencies
-        run: uv sync
-      - name: Run tests with coverage
-        run: |
-          uv run pytest --cov=llm_agents --cov-report=term --cov-report=json tests/

.gitignore DELETED Viewed

@@ -1,215 +0,0 @@
-# Byte-compiled / optimized / DLL files
-__pycache__/
-*.py[codz]
-*$py.class
-# C extensions
-*.so
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-share/python-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.nox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-*.py.cover
-.hypothesis/
-.pytest_cache/
-cover/
-# Translations
-*.mo
-*.pot
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-db.sqlite3-journal
-# Flask stuff:
-instance/
-.webassets-cache
-# Scrapy stuff:
-.scrapy
-# Sphinx documentation
-docs/_build/
-# PyBuilder
-.pybuilder/
-target/
-# Jupyter Notebook
-.ipynb_checkpoints
-# IPython
-profile_default/
-ipython_config.py
-# pyenv
-#   For a library or package, you might want to ignore these files since the code is
-#   intended to run in multiple environments; otherwise, check them in:
-# .python-version
-# pipenv
-#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
-#   However, in case of collaboration, if having platform-specific dependencies or dependencies
-#   having no cross-platform support, pipenv may install dependencies that don't work, or not
-#   install all needed dependencies.
-#Pipfile.lock
-# UV
-#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#uv.lock
-# poetry
-#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
-#   This is especially recommended for binary packages to ensure reproducibility, and is more
-#   commonly ignored for libraries.
-#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
-#poetry.lock
-#poetry.toml
-# pdm
-#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
-#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
-#pdm.lock
-#pdm.toml
-.pdm-python
-.pdm-build/
-# pixi
-#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
-#pixi.lock
-#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
-#   in the .venv directory. It is recommended not to include this directory in version control.
-.pixi
-# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
-__pypackages__/
-# Celery stuff
-celerybeat-schedule
-celerybeat.pid
-# SageMath parsed files
-*.sage.py
-# Environments
-.env
-.envrc
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-.idea/
-# Spyder project settings
-.spyderproject
-.spyproject
-# Rope project settings
-.ropeproject
-# mkdocs documentation
-/site
-# mypy
-.mypy_cache/
-.dmypy.json
-dmypy.json
-# Pyre type checker
-.pyre/
-# pytype static type analyzer
-.pytype/
-# Cython debug symbols
-cython_debug/
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
-# Abstra
-# Abstra is an AI-powered process automation framework.
-# Ignore directories containing user credentials, local state, and settings.
-# Learn more at https://abstra.io/docs
-.abstra/
-# Visual Studio Code
-#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
-#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
-#  and can be added to the global gitignore or merged into this file. However, if you prefer,
-#  you could uncomment the following to ignore the entire vscode folder
-# .vscode/
-# Ruff stuff:
-.ruff_cache/
-# PyPI configuration file
-.pypirc
-# Cursor
-#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
-#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
-#  refer to https://docs.cursor.com/context/ignore-files
-.cursorignore
-.cursorindexingignore
-# Marimo
-marimo/_static/
-marimo/_lsp/
-__marimo__/
-data/*.json
-.vscode/
-rag_index_json/
-*.pkl
-*.json

.gitmodules DELETED Viewed

File without changes

.python-version DELETED Viewed

	@@ -1 +0,0 @@
1	- 3.10

CLAUDE.md DELETED Viewed

@@ -1,165 +0,0 @@
-# CLAUDE.md
-This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
-## Project Overview
-This is LLMAgents, a Python package for AI research analysis agents. The system helps researchers browse papers, find similar papers, write summaries, and plan conference schedules using Neo4j knowledge graphs and LLM agents.
-## Key Commands
-### Environment Setup
-```bash
-# Install dependencies
-uv sync
-# Setup environment variables (required before running)
-export $(grep -v '^#' .env | xargs)
-# Prepare gradio from source (for Python 3.14 compatibility)
-bash scripts/prepare_gradio.sh
-```
-### Running the Application
-```bash
-# CLI interface
-uv run agentic-nav-cli -t 0.4 --max-tokens 6000 -c 131072 --max-num-papers 10
-# Web interface
-agentic-nav-web
-```
-### Database and Knowledge Graph
-```bash
-# Start required services
-docker compose up neo4j_db ollama_embed ollama_agent -d
-# Build knowledge graph from NeurIPS 2025 data
-uv run llm_agents/tools/knowledge_graph/graph_generator.py \
-    --input-json-file data/neurips-2025-orals-posters.json \
-    --embedding-model $EMBEDDING_MODEL_NAME \
-    --ollama-server-url $EMBEDDING_MODEL_API_BASE \
-    --embedding-gen-batch-size 32 \
-    --max-parallel-workers 28 \
-    --similarity-threshold 0.8 \
-    --output-file graphs/knowledge_graph.pkl
-# Import knowledge graph to Neo4j
-uv run llm_agents/tools/knowledge_graph/neo4j_db_importer.py \
-    --graph-path graphs/knowledge_graph.pkl \
-    --neo4j-uri bolt://localhost:7687 \
-    --batch-size 100 \
-    --embedding-dimension 768
-```
-### Testing
-```bash
-# Run all tests (recommended - avoids gradio conflicts)
-uv run pytest tests/
-# Run tests with coverage report
-uv run pytest tests/ --cov=llm_agents --cov-report=term-missing
-# Alternative: Use the custom test runner
-python run_tests.py
-# Run specific test categories
-uv run pytest tests/ -m unit          # Unit tests only
-uv run pytest tests/ -m integration   # Integration tests only
-uv run pytest tests/ -m "not slow"    # Skip slow tests
-# Run tests for specific module
-uv run pytest tests/agents/
-uv run pytest tests/tools/
-uv run pytest tests/utils/
-uv run pytest tests/frontend/
-# Run single test file
-uv run pytest tests/agents/test_base.py
-# Run with verbose output
-uv run pytest tests/ -v
-# Generate HTML coverage report
-uv run pytest tests/ --cov=llm_agents --cov-report=html
-# View coverage report at htmlcov/index.html
-# Note: Always specify tests/ directory to avoid conflicts with gradio workspace
-```
-### Development
-```bash
-# Run full system with Docker
-docker compose up --build -d
-# Import pre-generated NeurIPS 2025 knowledge graph
-bash scripts/import_neurips2025_kg.sh
-```
-## Architecture
-### Core Components
-1. **Agent System (`llm_agents/agents/`)**
-   - `base.py`: Core LLMAgent class with streaming support and tool execution
-   - `neurips2025_conference.py`: Specialized agent for NeurIPS 2025 conference data
-   - Uses LiteLLM for model abstraction, supports Ollama models
-2. **Tools System (`llm_agents/tools/`)**
-   - Knowledge graph tools: `search_similar_papers`, `find_neighboring_papers`, `traverse_graph`
-   - Graph traversal strategies: breadth-first, depth-first, neo4j builtin
-   - Tool registry automatically discovers callable functions
-3. **Frontend (`llm_agents/frontend/`)**
-   - `cli.py`: Rich terminal interface with streaming, command history, auto-completion
-   - `browser_ui.py`: Gradio web interface for browser-based interactions
-   - Both interfaces support the same agent functionality
-4. **Knowledge Graph (`llm_agents/tools/knowledge_graph/`)**
-   - Neo4j-based paper similarity and relationship storage
-   - Embedding-based vector search for paper discovery
-   - Support for graph traversal algorithms
-### Key Data Flow
-1. User input → Frontend (CLI/Web)
-2. Frontend → Agent (stateless interaction with streaming)
-3. Agent → LLM (via LiteLLM) + Tools (knowledge graph queries)
-4. Tools → Neo4j database for paper retrieval
-5. Results streamed back to user with live markdown rendering
-## Configuration
-### Required Environment Variables
-```bash
-NEO4J_USERNAME=neo4j
-NEO4J_PASSWORD=<password>
-EMBEDDING_MODEL_NAME=nomic-embed-text
-EMBEDDING_MODEL_API_BASE=http://localhost:11435
-AGENT_MODEL_NAME=gpt-oss:20b
-AGENT_MODEL_API_BASE=http://localhost:11436
-OLLAMA_API_KEY=<optional>
-POPULATE_DATABASE_NIPS2025=false
-AGENTIC_NAV_LOG_LEVEL=INFO
-```
-### Model Support
-- Primary: Ollama models (local and remote)
-- Remote Ollama models via https://ollama.com with API key
-- Uses LiteLLM for provider abstraction
-## Dependencies
-- **Python**: 3.14+ required
-- **uv**: For dependency management
-- **Neo4j**: Graph database for knowledge storage
-- **Ollama**: LLM inference (supports GPU acceleration with Nvidia Container Toolkit)
-- **Gradio**: Built from source for Python 3.14 compatibility
-## Development Notes
-- The system is designed for multi-user sessions via stateless agent interactions
-- Streaming responses are supported in both CLI and web interfaces
-- Tool calls are automatically executed and results fed back to the LLM
-- Chat history can be saved/loaded in JSON format
-- Logging is configured per environment with structured output to `logs/` directory

Dockerfile DELETED Viewed

@@ -1,50 +0,0 @@
-FROM python:3.14-slim
-WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    git \
-    bash \
-    wget \
-    curl \
-    && rm -rf /var/lib/apt/lists/*
-# Install Node.js (required for pnpm and building Gradio frontend)
-RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
-    apt-get install -y nodejs && \
-    rm -rf /var/lib/apt/lists/*
-# Install pnpm globally
-RUN npm install -g pnpm
-# Install uv first (before copying files)
-RUN pip install --no-cache-dir uv
-# Copy all necessary files
-COPY pyproject.toml uv.lock* ./
-COPY .python-version* ./
-COPY README.md ./
-COPY LICENSE ./
-COPY llm_agents/ ./llm_agents/
-COPY scripts/ ./scripts/
-COPY graphs/ ./graphs/
-RUN mkdir ./gradio
-RUN git clone https://github.com/gradio-app/gradio.git gradio/
-# Run the gradio preparation script (build frontend only, submodule already initialized)
-RUN bash scripts/prepare_gradio.sh
-# Use uv sync to install dependencies
-RUN uv sync
-EXPOSE 7860
-# Set entrypoint
-# Download and initialize the NeurIPS 2025 conference knowledge graph
-RUN chmod +x /app/scripts/docker-entrypoint.sh
-RUN chmod +x /app/scripts/import_neurips2025_kg.sh
-ENTRYPOINT ["scripts/docker-entrypoint.sh"]
-CMD ["uv", "run", "llm_agents/frontend/browser_ui.py"]

LICENSE DELETED Viewed

@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-   1. Definitions.
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-   END OF TERMS AND CONDITIONS
-   APPENDIX: How to apply the Apache License to your work.
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-   Copyright [yyyy] [name of copyright owner]
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-       http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.

README.md CHANGED Viewed

@@ -17,193 +17,7 @@ short_description: Agent for NeurIPS paper discovery and visit schedule builder
 # AgenticNAV - Your AI conference companion
-[![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
-![Coverage](https://github.com/core-aix/agentic-nav/workflows/Tests/badge.svg)
-This repository contains code for an agent that can help you do related work for your next research project.
-Given the sheer amount of new publications that are being published at major machine learning conferences, this agent
-can help browse papers, find similar papers, and help you write summaries to quickly get an overview of what is currently
-going on.
-The agent can also support you in planning your next conference trip by providing a schedule around one or more topics
-that you are interested in.
-## Installation & usage of the web-based interface
-The agent is conveniently packaged as a docker image. You can spin up the entire system by using the commands below.
-Make sure to have the [Nvidia Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation) installed.
-At the moment we only support `ollama` models.
-Instead of a local agent model, you can also make use of remote ollama models. A full list is available here:
-https://docs.ollama.com/cloud.
-To make use of these large models, set `AGENT_MODEL_NAME=<your model of choice>` and
-`AGENT_MODEL_API_BASE=https://ollama.com`.
-Don't forget to set your `OLLAMA_API_KEY` either directly via the environment or in the browser.
-**Important note:** The ollama docker containers cannot use GPU acceleration on MacOS. If you want to use your Mac's GPU,
-you need to run ollama without containerization (i.e., you need to manually spin up the ollama server).
-With `NEO4J_DB_NODE_RETURN_LIMIT`, we set a strict return limit of 200 nodes per query to avoid overstraining the database.
-You can set it as needed for your use case.
-```commandline
-# Database config
-echo "NEO4J_USERNAME=neo4j" >> .env
-echo "NEO4J_PASSWORD=<a password of your choice>" >> .env
-echo "NEO4J_DB_URI=bolt://neo4j_db:7687" >> .env
-echo "NEO4J_DB_NODE_RETURN_LIMIT=200" >> .env
-echo "EMBEDDING_MODEL_NAME=nomic-embed-text" >> .env
-echo "EMBEDDING_MODEL_API_BASE=http://ollama_agent:11434" >> .env
-echo "AGENT_MODEL_NAME=gpt-oss:20b" >> .env
-echo "AGENT_MODEL_API_BASE=http://ollama_agent:11434" >> .env
-# Optional: set your OLLAMA_API_KEY when using remote models
-echo "OLLAMA_API_KEY=<your key here>" >> .env
-# Set the following to true if you would like to import our pre-generated knowledge graph for the NeurIPS 2025 conference
-# Warning (!): Setting the parameter below to 'true' will clear any existing data inside the docker-based neo4j database
-echo "POPULATE_DATABASE_NIPS2025=false" >> .env
-git clone https://github.com/core-aix/agentic-nav.git
-cd agentic-nav
-docker compose up --build -d
-```
-This will launch the agent and its web interface, available via `http://localhost:7860`, along with a neo4j database
-(community edition).
-**It will also populate the database with all accepted papers of the NeurIPS 2025 machine learning conference (if you set `POPULATE_DATABASE_NIPS2025=true`).**
-We include pair-wise similarity scores to enable graph traversals and the search for broadly related papers.
-After the docker containers are up and running, you can interact with the agent. Have fun!
-## Development & contributing to the agent
-If you are interested in understanding the system in detail, you may want to run all setup steps manually and avoid a
-containerized runtime. Run the following steps to setup a development environment.
-We use [uv](https://docs.astral.sh/uv/) for dependency management.
-Our docker containers for serving LLMs use Ollama and GPU acceleration. For that, you need the [Nvidia Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation)
-Make sure to have both installed before you proceed.
-### Installation
-After you cloned the repository, you need to setup the database. We use neo4j to manage the knowledge graph data we need
-for the agent to work properly.
-**Note:** We are using gradio built from source as the latest release (as of Nov. 12, 2025) does not yet support python 3.14.
-First, export all necessary environment variables:
-```commandline
-echo "NEO4J_USERNAME=neo4j" >> .env
-echo "NEO4J_PASSWORD=<a password of your choice>" >> .env
-echo "NEO4J_DB_URI=bolt://localhost:7687" >> .env
-echo "NEO4J_DB_NODE_RETURN_LIMIT=200" >> .env
-echo "EMBEDDING_MODEL_NAME=ollama/nomic-embed-text" >> .env
-echo "EMBEDDING_MODEL_API_BASE=http://localhost:11435" >> .env
-echo "AGENT_MODEL_NAME=ollama_chat/gpt-oss:20b" >> .env
-echo "AGENT_MODEL_API_BASE=http://localhost:11436" >> .env
-# Optional: set your OLLAMA_API_KEY when using remote models
-echo "OLLAMA_API_KEY=<your key here>" >> .env
-# Set the following to true if you would like to import our pre-generated knowledge graph for the NeurIPS 2025 conference
-# Warning (!): Setting the parameter below to 'true' will clear any existing data inside the docker-based neo4j database
-echo "POPULATE_DATABASE_NIPS2025=false" >> .env
-# Make sure you also have those values in your commandline environment
-export $(grep -v '^#' .env | xargs)
-```
-Then get the project files:
-```commandline
-git clone https://github.com/core-aix/agentic-nav.git
-cd agentic-nav
-docker compose up neo4j_db ollama_embed ollama_agent -d
-# The following command is only needed if you'd like to use the gradio-based GUI
-# This will eventually go away once gradio bumps their release version to support python 3.14
-bash scripts/prepare_gradio.sh
-uv sync
-```
-### Building the NeurIPS 2025 knowledge graph locally
-You can also build the knowledge graph yourself and, for example, swap the embedding model we use by default.
-Follow the steps below to do so. Note, that you still need to setup the project as described in the `Installation`
-subsection above. Make sure to set `POPULATE_DATABASE_NIPS2025=false` in your .env file.
-#### Get the data
-Download https://neurips.cc/static/virtual/data/neurips-2025-orals-posters.json and put the file in the `./data` folder.
-```commandline
-wget -O data/neurips-2025-orals-posters.json https://neurips.cc/static/virtual/data/neurips-2025-orals-posters.json
-```
-#### Build the knowledge graph
-You can build the knowledge graph per your needs by running the following script:
-```commandline
-uv run llm_agents/tools/knowledge_graph/graph_generator.py \
-    --input-json-file data/neurips-2025-orals-posters.json \
-    --embedding-model $EMBEDDING_MODEL_NAME \
-    --ollama-server-url $EMBEDDING_MODEL_API_BASE \
-    --embedding-gen-batch-size 32 \
-    --max-parallel-workers 28 \
-    --similarity-threshold 0.6 \
-    --output-file graphs/knowledge_graph.pkl \
-    # --limit-num-papers  # Optional
-```
-**Important note:** Generating the full graph for over 6k papers can take more than 1 hour. You can find a set of pre-generated
-knowledge graphs here (the "thresh" in the file name indicates the `similarity-threshold` for which we create a `similar_to` relationship between papers): [LRZ Sync+Share](https://syncandshare.lrz.de/getlink/fiFMhMLLH7FaQ3Jipqqsye/)
-#### Importing the knowledge graph to a neo4j database
-We provide an importer to move the knowledge graph into a graph database that supports vector-based similarity search.
-```commandline
-uv run llm_agents/tools/knowledge_graph/neo4j_db_importer.py \
-    --graph-path graphs/knowledge_graph.pkl \
-    --neo4j-uri $NEO4J_DB_URI \
-    --batch-size 100 \
-    --embedding-dimension 768 # This must match the vector dims generated by the embedding model.
-```
-**Note:** Depending on what your graph looks like this can also take a while (> 20min for 6K papers). Also, beware that
-running this script will first clear any existing entries before the new graph is written to the database.
-### Agent interactions
-We offer two ways of interacting with agents, via the command line and via the browser.
-The backend uses LiteLLM, which allows you to use a variety of LLM inference endpoints.
-Find details on the various providers [here](https://docs.litellm.ai/docs/providers).
-#### Commandline interface
-The agent can also be used via a versatile CLI.
-Below are two examples how to run a local and a remote model.
-We are using LiteLLM to abstract away from individual inference API providers.
-Note, that we currently only test with Ollama models.
-```commandline
-uv run agentic-nav-cli \
-    -t 0.4 \
-    --max-tokens 6000 \
-    -c 131072 \
-    --max-num-papers 10
-```
-#### Web-based interface (beginner friendly)
-We use gradio to provide a chat interface with the same functionalities as the commandline-based interface.
-You can launch the web app by running:
-```commandline
-agentic-nav-web
-```
-All the hyperparameters you need to set can be configured in the web interface and will be used in you individual session.
-Once you close the browser window, your session will terminate and all custom configuration will be removed.
-At the moment, the web UI only supports Ollama models.
-### Debugging agent interactions
-The agent involves a set of asynchronous operations. We provide a built-in logging instance to capture all relevant logs
-for debugging. To set the right debugging level for your application, you can use the environment variable `AGENTIC_NAV_LOG_LEVEL`.
-By default, it is set to `INFO`.
-#### Running tests
-We try to cover all tools and agent functionalities in thorough unit tests.
-You can run them via:
-```commandline
-uv run pytest tests/
-```


17
18	# AgenticNAV - Your AI conference companion
19
20	+ PLEASE FIND THE IMPLEMENTATION OF AGENTIC NAV ON GITHUB: [https://github.com/core-aix/agentic-nav](https://github.com/core-aix/agentic-nav)

21




22


23

agentic_nav/__init__.py DELETED Viewed

File without changes

agentic_nav/agents/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from agentic_nav.agents.neurips2025_conference import NeurIPS2025Agent, DEFAULT_NEURIPS2025_AGENT_ARGS

agentic_nav/agents/base.py DELETED Viewed

@@ -1,327 +0,0 @@
-import json
-import litellm
-import logging
-from dataclasses import dataclass, field
-from typing import List, Dict
-from agentic_nav.tools import get_all_tools
-from agentic_nav.utils.tooling import infer_tool
-try:
-    from datetime import datetime, UTC
-except ImportError:
-    from datetime import datetime, timezone
-    UTC = timezone.utc
-LOGGER = logging.getLogger(__name__)
-@dataclass
-class LLMAgent:
-    model: str = "ollama_chat/gpt-oss:20b"
-    api_base: str = "http://localhost:11434"
-    api_key: str = None
-    llm_args: dict = field(default_factory=lambda: {"temperature": 0.2, "max_tokens": 6000, "num_ctx": 131072})
-    tools: List[callable] = field(default_factory=lambda: get_all_tools())
-    global_tool_args: dict = field(default_factory=lambda: {"max_num_papers": 10})
-    max_interaction_rounds: int = 10
-    messages: List[Dict] = field(default_factory=lambda: [])
-    tool_registry: Dict = None
-    tool_descriptions: List = None
-    default_system_prompt: Dict[str, str] = None
-    def __remove_model_key_from_llm_args(self, stateful: bool = True):
-        if stateful:
-            self.model = self.llm_args["model"]
-            self.api_base = self.llm_args["api_base"]
-        if "model" in self.llm_args.keys():
-            del self.llm_args["model"]
-        if "api_base" in self.llm_args.keys():
-            del self.llm_args["api_base"]
-    def test_llm_connection(self):
-        self.__remove_model_key_from_llm_args(stateful=True)
-        try:
-            response = litellm.completion(
-                model=self.model,
-                messages=[{"role": "user", "content": "test", "_ts": str(datetime.now(UTC))}],
-                tool_choice="auto",
-                api_base=self.api_base,
-                api_key=self.api_key,
-                stream=True,
-                **self.llm_args,
-            )
-            LOGGER.info(f"Model is available! Response: {response.choices[0].message.content}")
-        except Exception as e:
-            LOGGER.error(f"Model not available or connection failed: {str(e)}")
-    def setup_session(self, tool_funcs: List[callable] = None):
-        self.tool_registry = {fn.__name__: fn for fn in self.tools} if tool_funcs is None else {fn.__name__: fn for fn in tool_funcs}
-        self.tool_descriptions = [infer_tool(fn, tool_args=self.global_tool_args) for fn in self.tool_registry.values()]
-        LOGGER.info(f"Agent setup and tools ready to use.")
-        LOGGER.debug(f"Available tools: {self.tools}")
-    def remove_session(self):
-        """De-registers tools and resets messages to the initial state."""
-        self.tool_registry = None
-        self.tool_descriptions = None
-        self.messages = [self.default_system_prompt if not None else {"role": "system", "content": "You are a helpful assistant."}]
-    def interact(self, message: Dict):
-        assert self.tool_registry is not None, "Make sure to call 'setup_session()' before the first interaction."
-        assert self.tool_descriptions is not None, "Make sure to call 'setup_session()' before the first interaction."
-        assert type(message) == dict, "Make sure to pass a dictionary as next message for the agent."
-        assert "role" in message.keys(), "The message must contain a 'role' key."
-        assert "content" in message.keys(), "The message must contain a 'content' key."
-        self.__remove_model_key_from_llm_args(stateful=True)
-        if "_ts" not in message.keys():
-            message["_ts"] = str(datetime.now(UTC))
-        self.messages.append(message)
-        for _ in range(self.max_interaction_rounds):
-            collected, calls = self._send_to_llm(
-                messages=self.messages,
-                model=self.model,
-                api_base=self.api_base,
-                api_key=self.api_key
-            )
-            # append the assembled assistant message so tool execution sees the assistant's follow-up
-            self.messages.append({"role": "assistant", "content": collected, "_ts": str(datetime.now(UTC))})
-            LOGGER.debug(f"Agent response: {collected}")
-            if not calls:
-                return self.messages
-            else:
-                self.messages[-1]["tool_calls"] = calls
-                LOGGER.debug(f"Agent requested tool calls: {calls}")
-            # execute tools and append results
-            for call in calls:
-                self.messages.append(
-                    self.call_tool(
-                        tool_call=call
-                    )
-                )
-        LOGGER.debug(f"Interaction complete. Total messages: {len(self.messages)}")
-        return self.messages
-    def interact_stateless(
-        self,
-        messages: List[Dict],
-        model: str,
-        api_base: str,
-        api_key: str,
-        llm_args: Dict = None
-    ):
-        """
-        This method is designed to support multi-user sessions and requires state management outside the agent class.
-        """
-        assert self.tool_registry is not None, "Make sure to call 'setup_session()' before the first interaction."
-        assert self.tool_descriptions is not None, "Make sure to call 'setup_session()' before the first interaction."
-        self.__remove_model_key_from_llm_args(stateful=False)
-        # Sanity check for all messages
-        for message in messages:
-            if "_ts" not in message.keys():
-                message["_ts"] = str(datetime.now(UTC))
-        for round_num in range(self.max_interaction_rounds):
-            # Stream the LLM response
-            collected = ""
-            calls = []
-            # Create initial assistant message
-            assistant_msg_idx = len(messages)
-            messages.append({"role": "assistant", "content": "", "_ts": str(datetime.now(UTC))})
-            stream_iter = litellm.completion(
-                model=model if model is not None else self.model,
-                messages=messages[:assistant_msg_idx],  # Don't include the empty assistant message
-                tools=self.tool_descriptions,
-                tool_choice="auto",
-                api_base=api_base if api_base is not None else self.api_base,
-                api_key=api_key if api_key is not None else self.api_key,
-                stream=True,
-                **llm_args if llm_args is not None else self.llm_args,
-            )
-            for chunk in stream_iter:
-                choices = chunk.get("choices", []) or []
-                if not choices:
-                    continue
-                choice = choices[0]
-                # Extract content from chunk
-                content = None
-                delta = choice.get("delta")
-                if delta and "content" in delta:
-                    content = delta["content"]
-                elif delta and "message" in delta and isinstance(delta["message"], dict):
-                    content = delta["message"].get("content")
-                if delta and "tool_calls" in delta:
-                    calls.extend(delta["tool_calls"] or [])
-                if content is None:
-                    msg = choice.get("message")
-                    if isinstance(msg, dict):
-                        content = msg.get("content")
-                if content is None:
-                    content = choice.get("text")
-                if content:
-                    if not isinstance(content, str):
-                        try:
-                            content = json.dumps(content, ensure_ascii=False)
-                        except Exception:
-                            content = str(content)
-                    collected += content
-                    # Update the assistant message with accumulated content
-                    messages[assistant_msg_idx]["content"] = collected
-                    # Yield the updated messages for streaming display
-                    yield messages.copy()
-            # After streaming is complete, update with final content
-            messages[assistant_msg_idx]["content"] = collected
-            LOGGER.debug(f"Agent response: {collected}")
-            if not calls:
-                yield messages
-                return
-            else:
-                messages[assistant_msg_idx]["tool_calls"] = calls
-                LOGGER.debug(f"Agent requested tool calls: {calls}")
-                yield messages.copy()
-            # Execute tools and append results
-            for call in calls:
-                messages.append(self.call_tool(tool_call=call))
-                yield messages.copy()
-        yield messages
-    def _send_to_llm(
-        self,
-        messages: List[Dict],
-        model: str,
-        api_base: str,
-        api_key: str,
-        llm_args: Dict = None
-    ):
-        stream_iter = litellm.completion(
-            model=model if model is not None else self.model,
-            messages=messages,
-            tools=self.tool_descriptions,
-            tool_choice="auto",
-            api_base=api_base if api_base is not None else self.api_base,
-            api_key=api_key if api_key is not None else self.api_key,
-            stream=True,
-            **llm_args if llm_args is not None else self.llm_args,
-        )
-        collected = ""
-        calls = []
-        for chunk in stream_iter:
-            choices = chunk.get("choices", []) or []
-            if not choices:
-                continue
-            choice = choices[0]
-            # try several places where partial content may appear
-            content = None
-            delta = choice.get("delta")
-            if "content" in delta:
-                content = delta["content"]
-            elif "message" in delta and isinstance(delta["message"], dict):
-                content = delta["message"].get("content")
-            if "tool_calls" in delta:
-                calls.extend(delta["tool_calls"] or [])
-            if content is None:
-                msg = choice.get("message")
-                if isinstance(msg, dict):
-                    content = msg.get("content")
-            if content is None:
-                content = choice.get("text")
-            if content:
-                if not isinstance(content, str):
-                    try:
-                        content = json.dumps(content, ensure_ascii=False)
-                    except Exception as e:
-                        LOGGER.error(f"JSON encoding error encountered. {e}. Treating agent response as regular text.")
-                        content = str(content)
-                collected += content
-        return collected, calls
-    def call_tool(self, tool_call: Dict):
-        name = tool_call["function"]["name"]
-        args = tool_call["function"].get("arguments", "{}")
-        LOGGER.debug(f"Preparing tool call: {name}")
-        LOGGER.debug(f"Expected tool arguments: {args}")
-        try:
-            parsed = json.loads(args) if isinstance(args, str) else (args or {})
-            LOGGER.debug(f"Parsed tool call arguments: {parsed}")
-        except json.JSONDecodeError:
-            parsed = {}
-            LOGGER.warning(f"Tool call arguments: COULD NOT BE PARSED")
-        out = self.tool_registry[name](**parsed)
-        LOGGER.debug(f"Tool call output: {parsed}")
-        return {
-            "role": "tool",
-            "tool_call_id": tool_call.get("id"),
-            "name": name,
-            "content": json.dumps(out, ensure_ascii=False),
-            "_ts": str(datetime.now(UTC))
-        }
-    def set_history(self, messages):
-        self.messages = messages
-        LOGGER.info(f"Set new message history.")
-    def get_history(self):
-        return self.messages
-    @staticmethod
-    def set_system_prompt(new_system_prompt: str, messages: List[Dict]):
-        messages = [m for m in messages if m.get("role") != "system"]
-        messages.insert(0, {
-            "role": "system",
-            "content": new_system_prompt,
-            "_ts": str(datetime.now(UTC))
-        })
-        LOGGER.info(f"New system prompt set and configured.")
-        LOGGER.debug(f"New system prompt: {new_system_prompt}")
-        return messages
-    def get_system_prompt(self):
-        for message in self.messages:
-            if "role" in message.keys() and message["role"] == "system":
-                return message
-        return None
-    def get_most_recent_assistant_message(self):
-        for message in reversed(self.messages):
-            if message.get("role") == "assistant":
-                return message
-        return None

agentic_nav/agents/neurips2025_conference.py DELETED Viewed

@@ -1,48 +0,0 @@
-import os
-from dataclasses import dataclass
-from agentic_nav.agents.base import LLMAgent
-from agentic_nav.tools import search_similar_papers, find_neighboring_papers, traverse_graph, build_visit_schedule  # <- the tools we expose
-from zoneinfo import ZoneInfo
-try:
-    from datetime import datetime, UTC
-except ImportError:
-    from datetime import datetime, timezone
-    UTC = timezone.utc
-DEFAULT_NEURIPS2025_AGENT_ARGS = {
-    "model": os.environ.get("AGENT_MODEL_NAME", "gpt-oss:120b-cloud"),
-    "api_base": os.environ.get("AGENT_MODEL_API_BASE", "https://ollama.com"),
-    "api_key": os.environ.get("OLLAMA_API_KEY"),
-    "llm_args": {"temperature": 0.2, "max_tokens": 6000, "num_ctx": 131072},
-    "global_tool_args": {"max_num_papers": 10}
-}
-system = {
-    "role": "system",
-    "content": (
-        "You are an assistant who can help browsing NeurIPS 2025 papers. "
-        "You are provided with a search tool that can search all accepted papers of NeurIPS 2025. "
-        "However, note that the search tool only takes paper titles and abstracts as input keywords; "
-        "it cannot take anything else as the input keywords. "
-        "However, the output of the search includes various metadata fields such as authors, affiliations, "
-        "and session times. \n"
-        "When building a schedule, do not specify the name of the day.\n"
-        "If you find duplicates, just omit them. Only keep the first appearance.\n"
-        f"Generally, if you do not find a result, tell the user you don't know.\n"
-        f"Here is the current timestamp: {datetime.now(ZoneInfo('America/Los_Angeles'))}. The conference is happening in San Diego, California."
-    )
-}
-@dataclass
-class NeurIPS2025Agent(LLMAgent):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.messages = [{**system}]
-        self.tools = [search_similar_papers, find_neighboring_papers, traverse_graph, build_visit_schedule]
-        self.default_system_prompt = system

agentic_nav/frontend/__init__.py DELETED Viewed

File without changes

agentic_nav/frontend/browser_ui.py DELETED Viewed

@@ -1,525 +0,0 @@
-"""
-Gradio web UI that interacts with an agent implementation.
-Features matching terminal UI:
-- Multi-turn chat with Markdown rendering
-- System prompt editing
-- View conversation history
-- Save chat history to file
-- All model configuration options
-- Clear chat functionality
-- **Per-user conversation state management with stateless agent**
-"""
-from venv import logger
-import gradio as gr
-import os
-import datetime
-import logging
-import json
-from pathlib import Path
-from typing import List, Tuple, Optional, Dict
-from agentic_nav.agents import NeurIPS2025Agent, DEFAULT_NEURIPS2025_AGENT_ARGS
-from agentic_nav.utils.logger import setup_logging
-from agentic_nav.utils.file_handlers import save_chat_history
-LOGGER = logging.getLogger(__name__)
-EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "nomic-embed-text")
-EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
-AGENT_MODEL_NAME = os.environ.get("AGENT_MODEL_NAME", "gpt-oss:20b")
-AGENT_MODEL_API_BASE = os.environ.get("AGENT_MODEL_API_BASE", "http://localhost:11436")
-OLLAMA_API_KEY = os.environ.get("OLLAMA_API_KEY", DEFAULT_NEURIPS2025_AGENT_ARGS["api_key"])
-def initialize_agent():
-    """Initialize the AGENT instance."""
-    agent = NeurIPS2025Agent(
-        model=f"ollama_chat/{AGENT_MODEL_NAME}",
-        api_base=AGENT_MODEL_API_BASE,
-        api_key=OLLAMA_API_KEY,
-        llm_args=DEFAULT_NEURIPS2025_AGENT_ARGS["llm_args"],
-        global_tool_args=DEFAULT_NEURIPS2025_AGENT_ARGS["global_tool_args"],
-    )
-    agent.setup_session()
-    return agent
-def configure_agent(
-        api_base: str,
-        api_key: str,
-        model: str,
-        temperature: float,
-        max_tokens: int,
-        num_ctx: int,
-        max_num_papers: int,
-        current_config: Dict
-):
-    """Initialize the agent with a given configuration."""
-    LOGGER.info(f"Agent runtime started via Gradio UI for session")
-    current_config.update({
-        "model": model,
-        "api_base": api_base,
-        "api_key": api_key,
-        "llm_args": {
-            "temperature": temperature,
-            "max_tokens": max_tokens,
-            "num_ctx": num_ctx
-        },
-        "global_tool_args": {"max_num_papers": max_num_papers}
-    })
-    current_config_to_print = current_config.copy()
-    if "api_key" in current_config_to_print:
-        del current_config_to_print["api_key"]
-    LOGGER.info(f"User-defined configuration saved. Config: {current_config_to_print}")
-    return current_config, "✓ Agent initialized successfully!"
-def chat_fn(
-        new_message: str,
-        history: List[Dict],
-        config: Optional[Dict],
-        messages: Optional[List[Dict]],
-        agent: NeurIPS2025Agent,
-) -> Tuple[List[Dict], Optional[List[Dict]]]:
-    """
-    Handle chat interaction using stateless agent.
-    Args:
-        new_message: User's input message
-        history: Chat history as list of message dictionaries with role/content
-        config: Configuration dict with model, api_base, api_key, llm_args
-        messages: Current conversation messages list
-        agent: Agent instance
-    Returns:
-        Tuple of (updated_history, messages)
-    """
-    if not new_message.strip():
-        yield history, messages
-        return
-    LOGGER.debug(f"USER PROMPT: {new_message}")
-    # Safety check: ensure messages is a list
-    if messages is None or not isinstance(messages, list):
-        LOGGER.warning("Messages state was not properly initialized, resetting...")
-        messages = [agent.get_system_prompt()]
-    # Create a copy of history and messages to avoid mutation issues
-    history = history.copy() if history else []
-    messages = messages.copy()
-    # Add user message to history immediately with empty assistant response
-    user_msg_dict = {"role": "user", "content": new_message}
-    assistant_msg_dict = {"role": "assistant", "content": ""}
-    history.extend([user_msg_dict, assistant_msg_dict])
-    try:
-        # Create user message with timestamp
-        user_message = {
-            "role": "user",
-            "content": new_message,
-            "_ts": str(datetime.datetime.now(datetime.timezone.utc))
-        }
-        # Add user message to conversation
-        messages.append(user_message)
-        # Stream the response
-        accumulated_response = ""
-        for partial_messages in agent.interact_stateless(
-                messages=messages,
-                model=config["model"],
-                api_base=config["api_base"],
-                api_key=config["api_key"],
-                llm_args=config["llm_args"]
-        ):
-            # Get the latest assistant message content
-            for msg in reversed(partial_messages):
-                if msg.get("role") == "assistant":
-                    accumulated_response = msg["content"]
-                    break
-            # Update the last assistant message in history with accumulated response
-            history[-1]["content"] = accumulated_response
-            yield history, partial_messages
-        # Final update with complete messages
-        messages = partial_messages
-        LOGGER.info("Agent response generated successfully")
-    except Exception as e:
-        LOGGER.error(f"Agent encountered an error: {e}", exc_info=True)
-        error_msg = f"❌ Error: {str(e)}"
-        history[-1]["content"] = error_msg
-        yield history, messages
-def update_system_prompt(
-    new_prompt: str,
-    messages: Optional[List[Dict]],
-    agent: NeurIPS2025Agent
-) -> Tuple[str, Optional[List[Dict]]]:
-    """Update the system prompt in the message history.
-    Args:
-        new_prompt: New system prompt
-        messages: Current message history
-        agent: Agent instance
-    Returns:
-        Tuple of (status_message, agent_instance, config, updated_messages)
-    """
-    if not new_prompt.strip():
-        return "System prompt cannot be empty.", messages
-    try:
-        # Initialize messages if None
-        if messages is None:
-            messages = []
-        # Use the static method to update system prompt
-        messages = agent.set_system_prompt(new_system_prompt=new_prompt, messages=messages)
-        LOGGER.info("System prompt updated")
-        LOGGER.info(f"New system prompt: {messages[0]}")
-        return "✓ System prompt updated successfully!", messages
-    except Exception as e:
-        LOGGER.error(f"Error updating system prompt: {e}")
-        return f"Error: {str(e)}", messages
-def view_history(messages: Optional[List[Dict]]) -> str:
-    """View the full conversation history in JSON format.
-    Args:
-        messages: Current message history
-    Returns:
-        JSON formatted history string
-    """
-    if messages is None:
-        return "⚠️ No conversation history yet."
-    try:
-        # Format as pretty JSON
-        return json.dumps(messages, indent=2, ensure_ascii=False)
-    except Exception as e:
-        LOGGER.error(f"Error viewing history: {e}")
-        return f"❌ Error: {str(e)}"
-def save_history(filename: str, messages: Optional[List[Dict]]) -> str:
-    """Save chat history to a JSON file.
-    Args:
-        filename: Optional filename
-        messages: Current message history
-    Returns:
-        Status message
-    """
-    if messages is None or len(messages) == 0:
-        return "⚠️ No conversation history to save."
-    try:
-        # Create directory if it doesn't exist
-        Path("chat_histories/").mkdir(exist_ok=True, parents=True)
-        # Generate filename if not provided
-        if not filename.strip():
-            time_now = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-            # Add session identifier to prevent conflicts
-            import uuid
-            session_id = str(uuid.uuid4())[:8]
-            filename = f"chat_histories/{time_now}_session_{session_id}_chat_history.json"
-        else:
-            filename = filename.strip()
-            # Ensure it's in chat_histories directory
-            if not filename.startswith("chat_histories/"):
-                filename = f"chat_histories/{filename}"
-            if not filename.endswith(".json"):
-                filename += ".json"
-        # Save the history
-        save_chat_history(messages, filename)
-        LOGGER.info(f"Chat history saved to {filename}")
-        return f"✓ Chat history saved to: {filename}"
-    except Exception as e:
-        LOGGER.error(f"Error saving history: {e}")
-        return f"❌ Error: {str(e)}"
-def clear_chat(
-        config: Optional[Dict],
-        messages: Optional[List[Dict]],
-        agent: NeurIPS2025Agent
-) -> Tuple[str, List, Optional[List[Dict]]]:
-    """Clear the chat history in the UI and reset message list.
-    Args:
-        config: Current configuration
-        messages: Current message history
-        agent: Agent instance
-    Returns:
-        Tuple of (status_message, empty_history, reset_messages)
-    """
-    system_prompt = agent.get_system_prompt()
-    if isinstance(system_prompt, dict):
-        reset_messages = [system_prompt]
-    else:
-        reset_messages = []
-    LOGGER.info("Chat cleared and reset")
-    return "✓ Chat cleared!", [], reset_messages
-def submit_message(message, history, config, messages, agent):
-    """Wrapper to clear input and process message"""
-    yield from chat_fn(message, history, config, messages, agent)
-def main():
-    # Setup the agent instance
-    agent = initialize_agent()
-    with gr.Blocks(
-        title="AgenticNAV",
-        theme=gr.themes.Default(
-        spacing_size=gr.themes.sizes.spacing_sm,
-        radius_size=gr.themes.sizes.radius_none
-    )) as webapp:
-        gr.Markdown(
-            "# 🤖 AgenticNAV - Explore NeurIPS 2025 papers and build your personalized schedule, effortlessly!\n "
-            "This agent can help you explore the more than 5000 papers at this year's NeurIPS conference. "
-            "You can start chatting right away but see below for more specific instructions on how to use the agent "
-            "with your favorite model and inference config. You can also set a custom system prompt.\n\n "
-            "**Note:** This is an experimental deployment and LLMs can make mistakes. This can mean that the agent may "
-            "not discover your paper even though it is presented at the conference."
-        )
-        # Session state for agent instance, config, and messages
-        config_state = gr.State(value=DEFAULT_NEURIPS2025_AGENT_ARGS)
-        messages_state = gr.State(value=[agent.get_system_prompt()])
-        with gr.Row():
-            with gr.Column():
-                # Main chat interface
-                chatbot = gr.Chatbot(
-                    label="Conversation Trail",
-                    height=750,
-                    type="messages",
-                    show_copy_button=True,
-                )
-                with gr.Row():
-                    msg_input = gr.Textbox(
-                        label="Your message",
-                        placeholder="Type your message here...",
-                        lines=3,
-                        scale=4
-                    )
-                    submit_btn = gr.Button("Send", variant="primary", scale=1)
-                with gr.Row():
-                    clear_btn = gr.Button("🗑️ Clear Chat", size="sm")
-                    save_btn = gr.Button("💾 Save History", size="sm")
-                with gr.Row():
-                    # Help text at bottom
-                    gr.Markdown("""
-                        ### 📖 Usage Guide
-                        1. **Initialize**: Configure settings and click "Initialize Agent"
-                        2. **Chat**: Type messages and press Enter or click Send
-                        3. **System Prompt**: Customize the agent's behavior via System Prompt panel
-                        4. **History**: View or save your conversation using the History & Save panel
-                        5. **Clear**: Start a fresh conversation with the Clear Chat button
-                        ### Note on Ollama API Keys
-                        In case you are experiencing an error calling the agent model (usually indicated by a message
-                        containing the word "unauthorized"), you may go to https://ollama.com and generate your own key.
-                        You can provide it in the configuration below. It will not be stored on our system and gets deleted
-                        when you end session (i.e., close your browser window).
-                        **Note**: Each browser session maintains its own independent conversation state.
-                        Uses stateless agent interaction for better concurrency support.
-                        """
-                    )
-        with gr.Row():
-            with gr.Column():
-                # Settings panel
-                gr.Markdown("### ⚙️ Agent Settings")
-                with gr.Accordion("Configuration", open=True):
-                    api_base_input = gr.Textbox(
-                        label="API Base URL",
-                        value=AGENT_MODEL_API_BASE,
-                        placeholder="http://localhost:11434"
-                    )
-                    api_key_input = gr.Textbox(
-                        label="API Key (only needed for remote models)",
-                        value="",
-                        type="password",
-                        placeholder="Leave empty if not needed"
-                    )
-                    model_input = gr.Textbox(
-                        label="Model",
-                        value=f"ollama_chat/{AGENT_MODEL_NAME}" if "ollama_chat" not in AGENT_MODEL_NAME else AGENT_MODEL_NAME,
-                        placeholder="ollama_chat/gpt-oss:20b"
-                    )
-                    temperature_input = gr.Slider(
-                        label="Temperature",
-                        minimum=0.0,
-                        maximum=1.0,
-                        value=0.2,
-                        step=0.1
-                    )
-                    max_tokens_input = gr.Slider(
-                        label="Max Tokens",
-                        minimum=100,
-                        maximum=8192,
-                        value=6000,
-                        step=10
-                    )
-                    num_ctx_input = gr.Number(
-                        label="Context Window",
-                        value=131072,
-                        precision=0
-                    )
-                    max_papers_input = gr.Slider(
-                        label="Max Papers to Retrieve",
-                        minimum=0,
-                        maximum=100,
-                        value=50,
-                        step=1
-                    )
-                    init_btn = gr.Button("Update Config", variant="primary")
-                    init_status = gr.Textbox(label="Status", interactive=False)
-                with gr.Accordion("System Prompt", open=False):
-                    system_prompt_input = gr.Textbox(
-                        label="System Prompt",
-                        value=agent.get_system_prompt()["content"] if type(agent.get_system_prompt()) is dict else None,
-                        placeholder="Enter custom system prompt here...",
-                        lines=12
-                    )
-                    update_system_btn = gr.Button("Update System Prompt")
-                    system_status = gr.Textbox(label="Status", interactive=False)
-                with gr.Accordion("History & Save", open=False):
-                    view_history_btn = gr.Button("📜 View Full History")
-                    history_output = gr.Code(
-                        label="Conversation History (JSON)",
-                        language="json",
-                        lines=10
-                    )
-                    save_filename_input = gr.Textbox(
-                        label="Filename (optional)",
-                        placeholder="Leave empty for auto-generated name",
-                        value=""
-                    )
-                    save_status = gr.Textbox(label="Save Status", interactive=False)
-        # Event handlers
-        init_btn.click(
-            fn=configure_agent,
-            inputs=[
-                api_base_input,
-                api_key_input,
-                model_input,
-                temperature_input,
-                max_tokens_input,
-                num_ctx_input,
-                max_papers_input,
-                config_state
-            ],
-            outputs=[config_state, init_status]
-        )
-        # Chat submission
-        submit_btn.click(
-            fn=lambda msg_input, chatbot, config_state, messages_state: (yield from submit_message(msg_input, chatbot, config_state, messages_state, agent)),
-            inputs=[msg_input, chatbot, config_state, messages_state],
-            outputs=[chatbot, messages_state]
-        ).then(
-            fn=lambda: "",
-            inputs=None,
-            outputs=msg_input
-        )
-        msg_input.submit(
-            fn=lambda msg_input, chatbot, config_state, messages_state: (yield from submit_message(msg_input, chatbot, config_state, messages_state, agent)),
-            inputs=[msg_input, chatbot, config_state, messages_state],
-            outputs=[chatbot, messages_state]
-        ).then(
-            fn=lambda: "",
-            inputs=None,
-            outputs=msg_input
-        )
-        # System prompt update
-        update_system_btn.click(
-            fn=lambda system_prompt_input, messages_state: update_system_prompt(system_prompt_input, messages_state, agent),
-            inputs=[system_prompt_input, messages_state],
-            outputs=[system_status, messages_state]
-        )
-        # History viewing
-        view_history_btn.click(
-            fn=view_history,
-            inputs=messages_state,
-            outputs=history_output
-        )
-        # Save history
-        save_btn.click(
-            fn=save_history,
-            inputs=[save_filename_input, messages_state],
-            outputs=save_status
-        )
-        # Clear chat
-        clear_btn.click(
-            fn=lambda config_state, messages_state: clear_chat(config_state, messages_state, agent),
-            inputs=[config_state, messages_state],
-            outputs=[save_status, chatbot, messages_state]
-        )
-    webapp.launch(
-        server_name="0.0.0.0",  # Allow external connections
-        server_port=7860,  # Default Gradio port
-        share=False,  # Set to True to create a public link
-        show_error=True,
-        debug=True
-    )
-if __name__ == "__main__":
-    # Setup logging (only needs to be done once globally)
-    setup_logging(
-        log_dir="logs",
-        level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
-    )
-    main()

agentic_nav/frontend/cli.py DELETED Viewed

@@ -1,371 +0,0 @@
-"""
-Enhanced terminal chat UI with async streaming and full terminal functionality.
-Features:
-- Async streaming output as LLM generates tokens
-- Rich prompt with command history and auto-completion
-- Live markdown rendering during streaming
-- Multi-line input via Ctrl+O or /edit command
-- Commands: /help, /exit, /system, /edit, /history, /save <path>, /clear
-- Keyboard shortcuts: Ctrl+C to cancel, Ctrl+D to exit
-"""
-import asyncio
-import click
-import os
-import logging
-import litellm
-from pathlib import Path
-from typing import Optional
-from rich.console import Console
-from rich.markdown import Markdown
-from rich.live import Live
-from rich.panel import Panel
-from rich.text import Text
-from prompt_toolkit import PromptSession
-from prompt_toolkit.history import FileHistory
-from prompt_toolkit.auto_suggest import AutoSuggestFromHistory
-from prompt_toolkit.completion import WordCompleter
-from prompt_toolkit.key_binding import KeyBindings
-from prompt_toolkit.formatted_text import HTML
-from agentic_nav.agents import NeurIPS2025Agent
-from agentic_nav.utils.logger import setup_logging
-from agentic_nav.utils.file_handlers import save_chat_history
-from agentic_nav.utils.cli import open_editor, show_history, print_help
-try:
-    from datetime import datetime, UTC
-except ImportError:
-    from datetime import datetime, timezone
-    UTC = timezone.utc
-LOGGER = logging.getLogger(__name__)
-EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "nomic-embed-text")
-EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
-AGENT_MODEL_NAME = os.environ.get("AGENT_MODEL_NAME", "gpt-oss:20b")
-AGENT_MODEL_API_BASE = os.environ.get("AGENT_MODEL_API_BASE", "http://localhost:11436")
-OLLAMA_API_KEY = os.environ.get("OLLAMA_API_KEY")
-litellm._logging._disable_debugging()
-console = Console(soft_wrap=True)
-# Command completer for auto-complete
-command_completer = WordCompleter(
-    ['/help', '/exit', '/system', '/edit', '/history', '/save', '/clear'],
-    ignore_case=True,
-    sentence=True
-)
-bindings = KeyBindings()
-@bindings.add('c-o')
-def _(event):
-    """Multi-line input with Ctrl+O"""
-    event.current_buffer.insert_text('\n')
-def create_prompt_session():
-    """Create a prompt_toolkit session with history and auto-suggest"""
-    history_file = Path.home() / ".llm_agents_history"
-    return PromptSession(
-        history=FileHistory(str(history_file)),
-        auto_suggest=AutoSuggestFromHistory(),
-        completer=command_completer,
-        complete_while_typing=True,
-        key_bindings=bindings,
-        enable_open_in_editor=True,
-        multiline=False,
-    )
-def render_markdown(text: str, title: Optional[str] = None):
-    """Render markdown with optional panel title"""
-    if title:
-        console.print(Panel(Markdown(text), title=title, border_style="blue"))
-    else:
-        console.print(Markdown(text))
-def stream_agent_response_sync(agent, message: dict):
-    """
-    Stream agent response with live markdown rendering.
-    This function:
-    1. Copies the agent's current history and appends the new message
-    2. Streams the response using interact_stateless generator
-    3. Updates the live display with markdown content and tool execution status
-    4. Updates the agent's history with the final message list
-    Note: KeyboardInterrupt is caught to allow graceful cancellation,
-    then re-raised so the caller can handle cleanup.
-    Args:
-        agent: The agent instance with interact_stateless support
-        message: User message dict with 'role', 'content', and optional '_ts'
-    """
-    # Get current history and add the new message
-    messages = agent.get_history().copy()
-    messages.append(message)
-    accumulated_text = ""
-    tool_calls_made = []
-    final_messages = None
-    with Live(console=console, refresh_per_second=10) as live:
-        try:
-            # Use interact_stateless for streaming (it's a generator)
-            for updated_messages in agent.interact_stateless(
-                messages=messages,
-                model=agent.model,
-                api_base=agent.api_base,
-                api_key=agent.api_key,
-                llm_args=agent.llm_args
-            ):
-                final_messages = updated_messages
-                # Extract the last assistant message
-                for msg in reversed(updated_messages):
-                    if msg.get("role") == "assistant":
-                        content = msg.get("content", "")
-                        if content != accumulated_text:
-                            accumulated_text = content
-                            # Show streaming content
-                            if accumulated_text:
-                                live.update(Markdown(accumulated_text))
-                        # Check for tool calls
-                        if "tool_calls" in msg and msg["tool_calls"] != tool_calls_made:
-                            tool_calls_made = msg["tool_calls"]
-                            # Show tool execution
-                            tool_names = [tc["function"]["name"] for tc in tool_calls_made]
-                            tool_info = Text(f"\n🔧 Executing tools: {', '.join(tool_names)}", style="yellow")
-                            live.update(tool_info)
-                        break
-            # Update agent's history with final messages
-            if final_messages:
-                agent.set_history(final_messages)
-        except KeyboardInterrupt:
-            live.stop()
-            console.print("\n[yellow]⚠ Response cancelled by user[/yellow]")
-            raise
-        except Exception as e:
-            live.stop()
-            console.print(f"\n[red]❌ Error: {e}[/red]")
-            LOGGER.error(f"Streaming error: {e}", exc_info=True)
-            raise
-async def async_interact(agent, message: dict):
-    """
-    Async wrapper for agent interaction with streaming.
-    Note: KeyboardInterrupt from stream_agent_response_sync is caught here
-    to prevent it from propagating up. The actual interrupt handling and
-    user feedback happens in stream_agent_response_sync.
-    """
-    try:
-        # Run the synchronous streaming function in a thread pool
-        await asyncio.to_thread(stream_agent_response_sync, agent, message)
-    except KeyboardInterrupt:
-        # Already handled in stream_agent_response_sync with user feedback
-        LOGGER.info("Agent interaction cancelled by user")
-    except Exception as e:
-        LOGGER.error(f"Agent interaction failed: {e}")
-        console.print(f"[red]Error: {e}[/red]")
-def print_welcome():
-    """Print welcome message"""
-    welcome = Text()
-    welcome.append("╔═══════════════════════════════════════╗\n", style="bold blue")
-    welcome.append("║   ", style="bold blue")
-    welcome.append("LLM Agent Chat Interface", style="bold white")
-    welcome.append("     ║\n", style="bold blue")
-    welcome.append("╚═══════════════════════════════════════╝\n", style="bold blue")
-    welcome.append("\nCommands:\n", style="bold yellow")
-    welcome.append("  /help     - Show help\n", style="cyan")
-    welcome.append("  /edit     - Multi-line input\n", style="cyan")
-    welcome.append("  /history  - Show conversation history\n", style="cyan")
-    welcome.append("  /system   - Set system prompt\n", style="cyan")
-    welcome.append("  /save     - Save conversation\n", style="cyan")
-    welcome.append("  /clear    - Clear screen\n", style="cyan")
-    welcome.append("  /exit     - Exit (or Ctrl+D)\n", style="cyan")
-    welcome.append("\nShortcuts:\n", style="bold yellow")
-    welcome.append("  Ctrl+O    - New line in input\n", style="cyan")
-    welcome.append("  Ctrl+C    - Cancel current response\n", style="cyan")
-    welcome.append("  Ctrl+D    - Exit\n", style="cyan")
-    welcome.append("  ↑/↓       - Navigate history\n", style="cyan")
-    welcome.append("  Tab       - Auto-complete commands\n", style="cyan")
-    console.print(welcome)
-@click.command()
-@click.option("-t", "--temperature", default=0.2, type=float,
-              help="Specify the model temperature.")
-@click.option("--max-tokens", default=6000, type=int,
-              help="Specify the max. number of model output tokens.")
-@click.option("-c", "--num-ctx", default=131072, type=int,
-              help="Specify the model context window.")
-@click.option("-l", "--max-num-papers", default=50, type=int,
-              help="Specify the maximum number of papers to retrieve.")
-def main(temperature, max_tokens, num_ctx, max_num_papers):
-    """Enhanced LLM Agent CLI with async streaming and rich terminal features"""
-    # Setup logging
-    setup_logging(
-        log_dir="logs",
-        level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
-    )
-    print_welcome()
-    LOGGER.info("Agent runtime started")
-    # Config for the LLM messages
-    llm_config = {
-        "model": f"ollama_chat/{AGENT_MODEL_NAME}",
-        "api_base": AGENT_MODEL_API_BASE,
-        "temperature": temperature,
-        "max_tokens": max_tokens,
-        "num_ctx": num_ctx
-    }
-    LOGGER.info(f"LLM configuration: {llm_config}")
-    # Parameters to limit the tool calling scope
-    tool_args = {
-        "num_records": max_num_papers
-    }
-    LOGGER.info(f"Global tool arguments: {tool_args}")
-    # Initialize agent (model is passed via llm_config/llm_args)
-    agent = NeurIPS2025Agent(
-        api_base=AGENT_MODEL_API_BASE,
-        api_key=OLLAMA_API_KEY,
-        llm_args=llm_config,
-        global_tool_args=tool_args,
-    )
-    agent.setup_session()
-    console.print("[green]✓ Agent initialized successfully[/green]\n")
-    # Create prompt session
-    session = create_prompt_session()
-    # Main interaction loop
-    while True:
-        try:
-            # Get user input with rich prompt
-            line = session.prompt(
-                HTML('<ansiyellow><b>You></b></ansiyellow> '),
-                multiline=False,
-            ).strip()
-            LOGGER.debug(f"USER PROMPT: {line}")
-        except (EOFError, KeyboardInterrupt):
-            console.print("\n[yellow]Goodbye! 👋[/yellow]")
-            LOGGER.info("User exited")
-            break
-        if not line:
-            continue
-        # Handle commands
-        if line.startswith("/"):
-            parts = line.split(maxsplit=1)
-            cmd = parts[0].lower()
-            arg = parts[1] if len(parts) > 1 else ""
-            if cmd == "/help":
-                print_help()
-                continue
-            elif cmd == "/exit":
-                console.print("[yellow]Goodbye! 👋[/yellow]")
-                LOGGER.info("User exited via /exit command")
-                break
-            elif cmd == "/clear":
-                console.clear()
-                print_welcome()
-                continue
-            elif cmd == "/edit":
-                content = open_editor()
-                if content:
-                    next_message = {
-                        "role": "user",
-                        "content": content,
-                        "_ts": str(datetime.now(UTC))
-                    }
-                else:
-                    console.print("[yellow]⚠ No content provided[/yellow]")
-                    continue
-            elif cmd == "/system":
-                content = open_editor()
-                if content:
-                    messages = agent.set_system_prompt(
-                        messages=agent.get_history(),
-                        new_system_prompt=content
-                    )
-                    agent.set_history(messages=messages)
-                    console.print("[green]✓ System prompt updated[/green]")
-                    continue
-                else:
-                    console.print("[yellow]⚠ No content provided[/yellow]")
-                    continue
-            elif cmd == "/history":
-                show_history(agent.get_history())
-                continue
-            elif cmd == "/save":
-                Path("chat_histories/").mkdir(exist_ok=True, parents=True)
-                time_now = datetime.now().strftime("%Y-%m-%d_%H-%M")
-                path = arg.strip() or f"chat_histories/{time_now}_chat_history.json"
-                try:
-                    save_chat_history(agent.get_history(), path)
-                    console.print(f"[green]✓ Chat saved to {path}[/green]")
-                except Exception as e:
-                    console.print(f"[red]❌ Failed to save: {e}[/red]")
-                    LOGGER.error(f"Save failed: {e}")
-                continue
-            else:
-                console.print(f"[red]❌ Unknown command: {cmd}[/red]")
-                console.print("[yellow]Type /help for available commands[/yellow]")
-                continue
-        else:
-            # Regular single-line user message
-            next_message = {
-                "role": "user",
-                "content": line,
-                "_ts": str(datetime.now(UTC))
-            }
-        try:
-            console.print()
-            asyncio.run(async_interact(agent, next_message))
-            console.print()
-        except KeyboardInterrupt:
-            console.print("\n[yellow]⚠ Interrupted[/yellow]")
-            continue
-        except Exception as e:
-            console.print(f"\n[red]❌ Error: {e}[/red]")
-            LOGGER.error(f"Interaction error: {e}", exc_info=True)
-            continue
-if __name__ == "__main__":
-    main()

agentic_nav/tools/__init__.py DELETED Viewed

@@ -1,15 +0,0 @@
-from agentic_nav.tools.knowledge_graph import search_similar_papers, find_neighboring_papers, traverse_graph
-from agentic_nav.tools.session_routing import build_visit_schedule
-__all__ = [
-    'search_similar_papers',
-    'find_neighboring_papers',
-    'traverse_graph',
-    'build_visit_schedule',
-]
-def get_all_tools():
-    """Get all tools as a dictionary."""
-    return [globals()[name] for name in __all__]

agentic_nav/tools/knowledge_graph/__init__.py DELETED Viewed

@@ -1,326 +0,0 @@
-"""
-This file defines the tools that can be made available to an agent.
-The idea is to put the actual functions into wrappers that provide LLM-friendly and token efficient outputs.
-"""
-import os
-import random
-from toon_format import encode as toon_encode
-from typing import List, Optional, Union
-from agentic_nav.tools.knowledge_graph.retriever import Neo4jGraphWorker, LOGGER
-NEO4J_DB_URI = os.environ.get("NEO4J_DB_URI", "bolt://neo4j_db:7687")
-NEO4J_USERNAME = os.environ.get("NEO4J_USERNAME", "neo4j")
-NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD")
-def search_similar_papers(
-        user_query: str,
-        num_papers_to_return: int = 50,
-        min_similarity: float = None,
-        day: str = None,
-        timeslots: List[str] = None
-) -> str:
-    """
-    Search for research papers semantically similar to a user's natural language query.
-    This function performs vector similarity search against a Neo4j knowledge graph database
-    to find papers that match the semantic meaning of the user's query. It serves as the
-    entry point for paper discovery workflows and is typically followed by neighborhood
-    or graph traversal searches for deeper exploration.
-    Args:
-        user_query (str): Natural language query describing the research topic or interest.
-            The query is embedded and compared against paper embeddings in the database.
-        num_papers_to_return (int, optional): Maximum number of papers to return, ranked by
-            similarity score. Defaults to 50.
-        min_similarity (float, optional): Minimum similarity threshold for returned papers.
-            Defaults to None (no filtering). Should be a value between 0.0 and 1.0, where
-            higher values indicate stricter similarity requirements.
-        day (str, optional): Conference day as a date string in ISO format (e.g., "2024-12-10").
-            When provided, only papers scheduled on this day will be searched. Defaults to None
-            (no day filtering).
-        timeslots (List[str], optional): List of time ranges to filter papers by their session
-            times. Each timeslot should be formatted as "HH:MM:SS-HH:MM:SS" (e.g.,
-            ["09:00:00-12:00:00", "14:00:00-17:00:00"]). Papers with session start times
-            falling within any of these ranges will be included. Defaults to None (no time filtering).
-    Returns:
-        str: A token-efficient formatted string representation of papers matching the query,
-            encoded using the toon_encode function. Papers are typically ordered by
-            descending similarity score.
-    Restrictions:
-        - Requires a running Neo4j database instance at bolt://localhost:7687 with credentials
-          (username: "neo4j", password: "llm_agents")
-        - The database must have pre-computed embeddings for papers to enable similarity search
-        - The database must have a vector index configured for efficient similarity queries
-        - Currently creates a new database connection for each function call, which may not be
-          optimal for concurrent usage (see TODO note)
-    Notes:
-        - This function is designed as the initial step in a multi-stage paper discovery workflow
-        - Results can be further explored using find_neighboring_papers() or traverse_graph()
-        - When day and/or timeslots are provided, the database filters papers by their session
-          times BEFORE performing vector similarity search for better performance
-        - TODO: The Neo4jGraphWorker should be wrapped in a session to better handle
-          concurrent connections and connection pooling
-    Raises:
-        Connection errors if Neo4j database is not accessible
-        ValueError if min_similarity is outside the valid range [0.0, 1.0]
-        ValueError if day is not in valid ISO date format (YYYY-MM-DD)
-        ValueError if timeslots are not properly formatted
-        Embedding errors if the query cannot be properly embedded
-    Example:
-        >>> # Basic similarity search
-        >>> papers = search_similar_papers(
-        ...     user_query="federated learning for privacy-preserving machine learning",
-        ...     num_papers_to_return=15
-        ... )
-        >>>
-        >>> # Search with similarity threshold
-        >>> highly_relevant_papers = search_similar_papers(
-        ...     user_query="transformer architectures for NLP",
-        ...     num_papers_to_return=20,
-        ...     min_similarity=0.75
-        ... )
-        >>>
-        >>> # Search for papers on a specific day and time
-        >>> morning_papers = search_similar_papers(
-        ...     user_query="computer vision applications",
-        ...     num_papers_to_return=50,
-        ...     day="2024-12-10",
-        ...     timeslots=["09:00:00-12:00:00"]
-        ... )
-        >>>
-        >>> # Search across multiple timeslots on a specific day
-        >>> daytime_papers = search_similar_papers(
-        ...     user_query="reinforcement learning",
-        ...     num_papers_to_return=25,
-        ...     day="2024-12-11",
-        ...     timeslots=["09:00:00-12:00:00", "14:00:00-17:00:00"]
-        ... )
-    """
-    # Type coercion for parameters that may come as strings from LLM tool calls
-    if num_papers_to_return is not None and not isinstance(num_papers_to_return, int):
-        num_papers_to_return = int(num_papers_to_return)
-    if min_similarity is not None and not isinstance(min_similarity, float):
-        min_similarity = float(min_similarity)
-    # Handle timeslots - ensure it's a list or None
-    if timeslots is not None and isinstance(timeslots, str):
-        # If a single string is provided, wrap it in a list
-        timeslots = [timeslots]
-    worker = Neo4jGraphWorker(
-        uri=NEO4J_DB_URI,
-        username=NEO4J_USERNAME,
-        password=NEO4J_PASSWORD
-    )
-    # Fetch papers with optional day and time filtering
-    papers = worker.similarity_search(
-        user_query=user_query,
-        top_k=num_papers_to_return,
-        min_similarity=min_similarity,
-        day=day,
-        timeslots=timeslots
-    )
-    # Format outputs to be more token efficient
-    formatted_papers = toon_encode(papers)
-    return formatted_papers
-def find_neighboring_papers(
-        paper_id: str,
-        relationship_types: Union[List[str], str] = ["SIMILAR_TO"],
-        num_neighbors_to_return: int = 10,
-        min_similarity: float = 0.75
-) -> str:
-    """
-    Retrieve immediate neighboring entities of a specific paper from the Neo4j knowledge graph.
-    This function performs a one-hop neighborhood search to find entities directly connected to
-    a target paper. It is designed to be used after an initial similarity search when users want
-    to explore specific relationships (similar papers, authors, or topics) for a paper of interest.
-    Args:
-        paper_id (str): The unique identifier of the target paper node in the graph. neo4j UUID.
-        relationship_types (List[str], str): Types of relationships to query.
-            Defaults to ["SIMILAR_TO"].
-            Valid options: ["SIMILAR_TO", "IS_AUTHOR_OF", "BELONGS_TO_TOPIC"]
-        neighbor_entity (str, optional): The type of neighboring entity to return.
-            Defaults to "similar_papers".
-            Valid options: ["similar_papers", "authors", "topics", "raw_results"]
-        num_neighbors_to_return (int, optional): Maximum number of neighbors to return.
-            Defaults to 10. Results are randomly shuffled before truncation to provide diversity.
-        min_similarity (float, optional): Minimum similarity threshold for returned neighbors.
-    Returns:
-        str: A token-efficient formatted string representation of neighboring entities,
-            encoded using the toon_encode function.
-    Restrictions:
-        - Requires a running Neo4j database instance at bolt://localhost:7687 with credentials
-          (username: "neo4j", password: "llm_agents")
-        - Should be used after an initial similarity search as part of a focused exploration workflow
-        - The paper_id must exist in the Neo4j graph database
-        - Only performs one-hop searches (direct neighbors only)
-        - Only the three specified relationship types are supported
-        - Only the four specified neighbor entity types are supported
-        - The neighbor_entity parameter must match the relationship_types used
-          (e.g., "similar_papers" with "SIMILAR_TO", "authors" with "IS_AUTHOR_OF")
-    Notes:
-        - Results are randomly shuffled to provide diverse recommendations across multiple calls
-        - The function extracts only the "neighbor" data from the returned results
-        - There is a potential bug: the type check `type(relevant_neighbors) is int` should likely be
-          `type(num_neighbors_to_return) is int` for proper list truncation
-    Raises:
-        Connection errors if Neo4j database is not accessible
-        KeyError if neighbor_entity doesn't exist in the returned neighbors dictionary
-        ValueError if invalid relationship_types or neighbor_entity are provided
-    Example:
-        >>> similar_papers = find_neighboring_papers(
-        ...     paper_id="<UUID>",
-        ...     relationship_types=["SIMILAR_TO"],
-        ...     neighbor_entity="similar_papers",
-        ...     num_neighbors_to_return=5
-        ... )
-        >>>
-        >>> authors = find_neighboring_papers(
-        ...     paper_id="<UUID>",
-        ...     relationship_types=["IS_AUTHOR_OF"],
-        ...     neighbor_entity="authors",
-        ...     num_neighbors_to_return=3
-        ... )
-    """
-    # Type coercion for parameters that may come as strings from LLM tool calls
-    if num_neighbors_to_return is not None and not isinstance(num_neighbors_to_return, int):
-        num_neighbors_to_return = int(num_neighbors_to_return)
-    if type(relationship_types) is str:
-        relationship_types = [relationship_types]
-    worker = Neo4jGraphWorker(
-        uri=NEO4J_DB_URI,
-        username=NEO4J_USERNAME,
-        password=NEO4J_PASSWORD
-    )
-    neighbors = worker.neighborhood_search(
-        paper_id=paper_id,
-        relationship_types=relationship_types,
-        min_similarity=min_similarity,
-    )
-    relevant_neighbors = []
-    for rel_type, neighbor in neighbors.items():
-        if rel_type != relationship_types:
-            relevant_neighbors.append(neighbor)
-    # Constrain and shuffle neighbors for more diverse responses
-    random.shuffle(relevant_neighbors)
-    # FIX: Changed type(relevant_neighbors) to type(num_neighbors_to_return)
-    if num_neighbors_to_return is not None and isinstance(num_neighbors_to_return, int):
-        relevant_neighbors = relevant_neighbors[:num_neighbors_to_return]
-    # Format outputs to be more token efficient
-    formatted_neighbors = toon_encode(relevant_neighbors)
-    return formatted_neighbors
-def traverse_graph(
-        start_paper_id: str,
-        n_hops: int = 2,
-        relationship_type: Optional[str] = "BELONGS_TO_TOPIC",
-        max_results: Optional[int] = 30,
-        strategy: str = "breadth_first_random",
-        max_branches: Optional[int] = 2,
-        random_seed: Optional[int] = 42
-) -> str:
-    """
-    Traverse a Neo4j knowledge graph to discover related research papers through various relationship types.
-    This function performs exploratory graph traversal starting from a seed paper to find potentially
-    interesting related papers. It is designed to be used after an initial similarity search, allowing
-    users to discover papers through different connection paths (topics, authors, similarity).
-    Args:
-        start_paper_id (str): The unique identifier of the starting paper node in the graph. neo4j UUID.
-        n_hops (int, optional): Number of relationship hops to traverse from the starting paper.
-            Defaults to 2. Higher values explore further but may return less relevant results.
-        relationship_type (str, optional): Types of relationships to follow during traversal.
-            Defaults to "BELONGS_TO_TOPIC".
-            Valid options: ["SIMILAR_TO", "AUTHORED_BY", "BELONGS_TO_TOPIC"]
-        max_results (int, optional): Maximum number of papers to return. Defaults to 30.
-        strategy (str, optional): Graph traversal strategy to use. Defaults to "breadth_first_random".
-            Valid options: ["breadth_first", "depth_first", "breadth_first_random", "depth_first_random"]
-        max_branches (int, optional): Maximum number of branches to follow at each node during traversal.
-            Defaults to 2. Controls the breadth of exploration at each step.
-        random_seed (int, optional): Seed for random number generation in randomized strategies.
-            Defaults to 42. Ensures reproducible results when using random strategies.
-    Returns:
-        str: A formatted string representation of discovered papers, encoded using the toon_encode function.
-    Restrictions:
-        - Requires a running Neo4j database instance at bolt://localhost:7687 with credentials
-          (username: "neo4j", password: "llm_agents")
-        - Should be used after an initial similarity search as part of an exploratory workflow
-        - The start_paper_id must exist in the Neo4j graph database
-        - Only the three specified relationship types are supported
-        - Only the four specified traversal strategies are supported
-        - Random strategies require random_seed for reproducibility
-    Raises:
-        Connection errors if Neo4j database is not accessible
-        ValueError if invalid relationship_types or strategy are provided
-    Example:
-        >>> related_papers = traverse_graph(
-        ...     start_paper_id="paper_12345",
-        ...     n_hops=3,
-        ...     relationship_type="SIMILAR_TO",
-        ...     max_results=50,
-        ...     strategy="breadth_first_random"
-        ... )
-    """
-    # Type coercion for parameters that may come as strings from LLM tool calls
-    if n_hops is not None and not isinstance(n_hops, int):
-        n_hops = int(n_hops)
-    if max_results is not None and not isinstance(max_results, int):
-        max_results = int(max_results)
-    if max_branches is not None and not isinstance(max_branches, int):
-        max_branches = int(max_branches)
-    if random_seed is not None and not isinstance(random_seed, int):
-        random_seed = int(random_seed)
-    worker = Neo4jGraphWorker(
-        uri=NEO4J_DB_URI,
-        username=NEO4J_USERNAME,
-        password=NEO4J_PASSWORD
-    )
-    papers = worker.graph_traversal(
-        start_paper_id=start_paper_id,
-        n_hops=n_hops,
-        relationship_type=relationship_type,
-        max_results=max_results,
-        strategy=strategy,
-        max_branches=max_branches,
-        random_seed=random_seed
-    )
-    formatted_neighbors = toon_encode(papers)
-    return formatted_neighbors

agentic_nav/tools/knowledge_graph/file_handler.py DELETED Viewed

@@ -1,29 +0,0 @@
-import pickle
-import networkx as nx
-def save_graph(graph: nx.Graph, output_path: str):
-    """
-    Save the graph to a file using pickle.
-    Args:
-        output_path: Path to save the graph
-    """
-    with open(output_path, 'wb') as f:
-        pickle.dump(graph, f)
-        f.close()
-    print(f"Graph saved to {output_path}")
-def load_graph(input_path: str) -> nx.Graph:
-    """
-    Load a graph from a pickle file.
-    Args:
-        input_path: Path to the saved graph
-    """
-    with open(input_path, 'rb') as f:
-        graph = pickle.load(f)
-        f.close()
-    print(f"Graph loaded from {input_path}")
-    return graph

agentic_nav/tools/knowledge_graph/graph_generator.py DELETED Viewed

@@ -1,446 +0,0 @@
-import json
-import logging
-import os
-import click
-import networkx as nx
-import numpy as np
-import litellm
-from typing import List, Dict, Any, Union
-from litellm import embedding
-from concurrent.futures import ThreadPoolExecutor, as_completed
-from tqdm import tqdm
-from pathlib import Path
-from agentic_nav.utils.embedding_generator import batch_embed_documents
-from agentic_nav.utils.logging import setup_logging
-from agentic_nav.tools.knowledge_graph.file_handler import save_graph
-# Setup logging
-setup_logging(
-    log_dir="logs",
-    level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
-)
-LOGGER = logging.getLogger(__name__)
-litellm._logging._disable_debugging()
-PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
-EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "ollama/nomic-embed-text")
-EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
-class PaperKnowledgeGraph:
-    """
-    A knowledge graph builder for academic papers focusing on:
-    - Paper names (nodes)
-    - Topics (nodes)
-    - Abstract embeddings (stored as node attributes)
-    Uses litellm with ollama for local embedding generation with parallel processing.
-    """
-    def __init__(
-        self,
-        embedding_model: str = EMBEDDING_MODEL_NAME,
-        ollama_base_url: str = EMBEDDING_MODEL_API_BASE,
-        embedding_gen_batch_size: int = 32,
-        max_parallel_workers: int = 8,
-        limit_num_papers: Union[int, None] = None
-    ):
-        """
-        Initialize the knowledge graph builder.
-        Args:
-            embedding_model: Name of the ollama embedding model (e.g., 'nomic-embed-text')
-            ollama_base_url: Base URL for the ollama server
-            embedding_gen_batch_size: Batch size for generating text embeddings
-            max_parallel_workers: Number of parallel workers for embedding generation
-        """
-        self.graph = nx.Graph()
-        self.embedding_model = embedding_model
-        self.ollama_base_url = ollama_base_url
-        self.batch_size = embedding_gen_batch_size
-        self.max_workers = max_parallel_workers
-        self.papers_data = []
-        self.limit_num_papers = limit_num_papers
-        # Test connection
-        LOGGER.info(f"Initializing with model: {embedding_model}")
-        LOGGER.info(f"Ollama server: {ollama_base_url}")
-        self._test_embedding_connection()
-    def _test_embedding_connection(self):
-        """Test connection to ollama server."""
-        try:
-            response = embedding(
-                model=self.embedding_model,
-                input=["test connection"],
-                api_base=self.ollama_base_url
-            )
-            LOGGER.info(f"Successfully connected to ollama server")
-            LOGGER.info(f"Embedding dimension: {len(response.data[0]['embedding'])}")
-        except Exception as e:
-            LOGGER.error(f"❌ Error connecting to ollama server: {e}")
-            LOGGER.error(f"Please ensure ollama is running and the model '{self.embedding_model}' is available")
-            LOGGER.error(f"Run: ollama pull nomic-embed-text")
-            raise
-    def load_papers_from_json(self, json_file_path: str, paper_dict_key: str = "results"):
-        """
-        Load papers from a JSON file or JSONL file.
-        Args:
-            json_file_path: Path to the JSON/JSONL file
-        """
-        self.papers_data = []
-        with open(json_file_path, 'r') as f:
-            # Try to parse as regular JSON first
-            try:
-                content = f.read()
-                # Try parsing as a single JSON object
-                try:
-                    data = json.loads(content)
-                    if isinstance(data[paper_dict_key], list):
-                        self.papers_data = data[paper_dict_key]
-                    else:
-                        raise TypeError("File importer expects a list of papers.")
-                except json.JSONDecodeError:
-                    # Try parsing as JSONL (one JSON object per line)
-                    f.seek(0)
-                    for line in f:
-                        line = line.strip()
-                        if line:
-                            self.papers_data.append(json.loads(line))
-            except Exception as e:
-                raise ValueError(f"Error parsing JSON file: {e}")
-        if self.limit_num_papers is not None and self.limit_num_papers > 0:
-            LOGGER.warning(f"WARNING: Number of papers limited to {self.limit_num_papers} items. Set to 'None' for all papers")
-            self.papers_data = self.papers_data[:self.limit_num_papers]
-        LOGGER.info(f"Loaded {len(self.papers_data)} papers from {json_file_path}")
-    def build_graph(self):
-        """
-        Build the knowledge graph from loaded papers.
-        Creates nodes for papers and topics, and edges between them.
-        Computes embeddings for abstracts in parallel.
-        """
-        topic_nodes = set()
-        author_nodes = set()
-        LOGGER.info(f"\nPreparing to process {len(self.papers_data)} papers...")
-        # Extract all abstracts and paper info
-        paper_info = []
-        abstracts = []
-        for paper in self.papers_data:
-            paper_id = paper.get('uid', paper.get('id'))
-            paper_name = paper.get('name', 'Unnamed Paper')
-            abstract = paper.get('abstract', '')
-            topic = paper.get('topic', 'Unknown')
-            authors = paper.get('authors', [])
-            keywords = paper.get("keywords", [])
-            decision = paper.get("decision", "")
-            session = paper.get("session", "")
-            session_start_time = paper.get("starttime", "")
-            session_end_time = paper.get("endtime", "")
-            presentation_type = paper.get("eventtype", "")
-            room_name = paper.get("room_name", "")
-            project_url = paper.get("url", "")
-            poster_position = paper.get("poster_position", "")
-            paper_url = paper.get("paper_url", "")
-            sourceid = paper.get("sourceid", "")
-            virtualsite_url = paper.get("virtualsite_url", "")
-            paper_info.append({
-                "id": paper_id,
-                "name": paper_name,
-                "abstract": abstract,
-                "topic": topic,
-                "authors": authors,
-                "keywords": keywords,
-                "decisions": decision,
-                "session": session,
-                "session_start_time": session_start_time,
-                "session_end_time": session_end_time,
-                "presentation_type": presentation_type,
-                "room_name": room_name,
-                "project_url": project_url,
-                "poster_position": poster_position,
-                "paper_url": paper_url,
-                "sourceid": sourceid,
-                "virtualsite_url": virtualsite_url
-            })
-            abstracts.append(abstract)
-        # Generate all embeddings in parallel
-        LOGGER.info(f"\nGenerating embeddings with batch size {self.batch_size}...")
-        embeddings = batch_embed_documents(
-            abstracts,
-            batch_size=self.batch_size,
-            embedding_model=self.embedding_model,
-            api_base=self.ollama_base_url
-        )
-        # Convert to list so that embeddings can be mapped to samples properly
-        embeddings = embeddings.tolist()
-        # Add nodes to graph
-        LOGGER.info("\nBuilding graph structure...")
-        with tqdm(total=len(paper_info), desc="Adding nodes") as pbar:
-            for info, embedding in zip(paper_info, embeddings):
-                # Extract author information (store as list of dicts)
-                author_list = []
-                if info['authors']:
-                    for author in info['authors']:
-                        author_info = {
-                            'id': author.get('id'),
-                            'fullname': author.get('fullname', ''),
-                            'institution': author.get('institution', ''),
-                            'url': author.get('url', '')
-                        }
-                        author_uid = f"{author_info['id']} - {author_info['fullname']}"
-                        if author_uid not in author_nodes:
-                            self.graph.add_node(
-                                author_uid,
-                                **author_info
-                            )
-                            author_nodes.add(author_uid)
-                        author_list.append(author_info)
-                # Add paper node with attributes
-                paper_attrs = info.copy()
-                del paper_attrs["authors"]
-                self.graph.add_node(
-                    info["id"],
-                    **paper_attrs,
-                    embedding=embedding,
-                    authors=author_list,
-                    node_type="paper"
-                )
-                for author in author_list:
-                    self.graph.add_edge(f"{author['id']} - {author['fullname']}", info["id"], relationship="is_author_of")
-                # Add topic node if it doesn't exist
-                if info['topic'] and info['topic'] not in topic_nodes:
-                    self.graph.add_node(
-                        info['topic'],
-                        node_type='topic',
-                        name=info['topic']
-                    )
-                    topic_nodes.add(info['topic'])
-                # Add edge between paper and topic
-                if info['topic']:
-                    self.graph.add_edge(info['id'], info['topic'], relationship='belongs_to_topic')
-                pbar.update(1)
-        LOGGER.info(f"Built graph with {self.graph.number_of_nodes()} nodes and {self.graph.number_of_edges()} edges")
-        LOGGER.info(f"   Papers: {len([n for n, d in self.graph.nodes(data=True) if d.get('node_type') == 'paper'])}")
-        LOGGER.info(f"   Topics: {len([n for n, d in self.graph.nodes(data=True) if d.get('node_type') == 'topic'])}")
-    def connect_similar_papers(self, similarity_threshold: float = 0.7):
-        """
-        Connect papers based on abstract embedding similarity using parallel processing.
-        Args:
-            similarity_threshold: Minimum cosine similarity to create an edge (0-1)
-        """
-        paper_nodes = [(n, d) for n, d in self.graph.nodes(data=True) if d.get('node_type') == 'paper']
-        LOGGER.info(f"\nComputing similarities for {len(paper_nodes)} papers...")
-        # Create pairs to compare (fast!)
-        pairs = [(i, j) for i in range(len(paper_nodes)) for j in range(i + 1, len(paper_nodes))]
-        LOGGER.info(f"Created {len(pairs)} pairs to compare")
-        connections_added = 0
-        def compute_similarity(pair_idx):
-            """Compute similarity for a pair of papers."""
-            i, j = pair_idx
-            node1, data1 = paper_nodes[i]
-            node2, data2 = paper_nodes[j]
-            emb1 = data1['embedding']
-            emb2 = data2['embedding']
-            similarity = np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2))
-            if similarity >= similarity_threshold:
-                return (node1, node2, float(similarity))
-            return None
-        # Compute similarities in parallel
-        edges_to_add = []
-        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
-            futures = {executor.submit(compute_similarity, pair): pair for pair in pairs}
-            with tqdm(total=len(pairs), desc="Computing similarities", unit="pair") as pbar:
-                for future in as_completed(futures):
-                    result = future.result()
-                    if result is not None:
-                        edges_to_add.append(result)
-                    pbar.update(1)
-        # Add edges to graph
-        for node1, node2, similarity in edges_to_add:
-            self.graph.add_edge(
-                node1,
-                node2,
-                relationship='similar_to',
-                similarity=similarity
-            )
-            connections_added += 1
-        LOGGER.info(f"Added {connections_added} similarity edges with threshold {similarity_threshold}")
-    def get_papers_by_topic(self, topic: str) -> List[Dict[str, Any]]:
-        """
-        Get all papers belonging to a specific topic.
-        Args:
-            topic: Topic name
-        Returns:
-            List of paper information dictionaries
-        """
-        if topic not in self.graph:
-            return []
-        papers = []
-        for neighbor in self.graph.neighbors(topic):
-            node_data = self.graph.nodes[neighbor]
-            if node_data.get('node_type') == 'paper':
-                papers.append({
-                    'id': neighbor,
-                    'name': node_data.get('name'),
-                    'abstract': node_data.get('abstract'),
-                    'embedding': node_data.get('embedding')
-                })
-        return papers
-    def find_similar_papers(self, paper_id: str, top_k: int = 5) -> List[tuple]:
-        """
-        Find the most similar papers to a given paper.
-        Args:
-            paper_id: ID of the paper
-            top_k: Number of similar papers to return
-        Returns:
-            List of (paper_id, similarity_score) tuples
-        """
-        if paper_id not in self.graph:
-            return []
-        paper_data = self.graph.nodes[paper_id]
-        if paper_data.get('node_type') != 'paper':
-            return []
-        target_embedding = paper_data['embedding']
-        similarities = []
-        for node, data in self.graph.nodes(data=True):
-            if data.get('node_type') == 'paper' and node != paper_id:
-                similarity = np.dot(target_embedding, data['embedding']) / \
-                             (np.linalg.norm(target_embedding) * np.linalg.norm(data['embedding']))
-                similarities.append((node, float(similarity), data.get('name')))
-        # Sort by similarity and return top_k
-        similarities.sort(key=lambda x: x[1], reverse=True)
-        return similarities[:top_k]
-    def get_graph_statistics(self) -> Dict[str, Any]:
-        """
-        Get statistics about the knowledge graph.
-        Returns:
-            Dictionary with graph statistics
-        """
-        paper_nodes = [n for n, d in self.graph.nodes(data=True)
-                       if d.get('node_type') == 'paper']
-        topic_nodes = [n for n, d in self.graph.nodes(data=True)
-                       if d.get('node_type') == 'topic']
-        stats = {
-            'total_nodes': self.graph.number_of_nodes(),
-            'total_edges': self.graph.number_of_edges(),
-            'paper_nodes': len(paper_nodes),
-            'topic_nodes': len(topic_nodes),
-            'average_degree': sum(dict(self.graph.degree()).values()) / self.graph.number_of_nodes(),
-            'density': nx.density(self.graph),
-            'is_connected': nx.is_connected(self.graph),
-        }
-        if nx.is_connected(self.graph):
-            stats['diameter'] = nx.diameter(self.graph)
-            stats['average_shortest_path'] = nx.average_shortest_path_length(self.graph)
-        return stats
-@click.command()
-@click.option("-m", "--embedding-model", default="nomic-embed-text")
-@click.option("-l", "--ollama-server-url", default="http://localhost:11434")
-@click.option("-b", "--embedding-gen-batch-size", default=32)
-@click.option("-w", "--max-parallel-workers", default=16)
-@click.option("-p", "--limit-num-papers", default=None, type=int)
-@click.option("-f", "--input-json-file", default=f"{PROJECT_ROOT}/data/neurips-2025-orals-posters.json")
-@click.option("-o", "--output-file", default=f"{PROJECT_ROOT}/graphs/knowledge_graph.pkl")
-@click.option("-s", "--similarity-threshold", default=0.8)
-def main(
-    embedding_model: str,
-    ollama_server_url: str,
-    embedding_gen_batch_size: int,
-    max_parallel_workers: int,
-    limit_num_papers: int,
-    input_json_file: str,
-    output_file: str,
-    similarity_threshold: float
-):
-    kg = PaperKnowledgeGraph(
-        embedding_model=f"ollama/{embedding_model}",
-        ollama_base_url=ollama_server_url,
-        embedding_gen_batch_size=embedding_gen_batch_size,
-        max_parallel_workers=max_parallel_workers,
-        limit_num_papers=limit_num_papers
-    )
-    # Load papers from JSON file
-    kg.load_papers_from_json(input_json_file)
-    # Build the graph (parallel embedding generation)
-    kg.build_graph()
-    # Optionally connect similar papers based on embeddings (parallel)
-    kg.connect_similar_papers(similarity_threshold=similarity_threshold)
-    # Save the graph to disk
-    save_graph(
-        graph=kg.graph,
-        output_path=output_file
-    )
-    # Print statistics
-    stats = kg.get_graph_statistics()
-    LOGGER.info("\nGraph Statistics:")
-    for key, value in stats.items():
-        LOGGER.info(f"  {key}: {value}")
-    # Test run: Find similar papers
-    if kg.papers_data:
-        first_paper_id = kg.papers_data[0].get('uid', kg.papers_data[0].get('id'))
-        LOGGER.debug(f"\nPapers similar to '{kg.graph.nodes[first_paper_id]['name']}':")
-        similar = kg.find_similar_papers(first_paper_id, top_k=3)
-        for pid, sim, name in similar:
-            LOGGER.debug(f"  - {name} (similarity: {sim:.3f})")
-# Run
-if __name__ == "__main__":
-    main()

agentic_nav/tools/knowledge_graph/graph_traversal_strategies/__init__.py DELETED Viewed

@@ -1,15 +0,0 @@
-from enum import Enum
-from agentic_nav.tools.knowledge_graph.graph_traversal_strategies.breadth_first_random import _graph_traversal_bfs_random
-from agentic_nav.tools.knowledge_graph.graph_traversal_strategies.depth_first_random import _graph_traversal_dfs_random
-from agentic_nav.tools.knowledge_graph.graph_traversal_strategies.neo4j_builtin import _graph_traversal_cypher
-class TraversalStrategy(Enum):
-    """Traversal strategy options"""
-    BFS = "breadth_first"
-    DFS = "depth_first"
-    BFS_RANDOM = "breadth_first_random"
-    DFS_RANDOM = "depth_first_random"

agentic_nav/tools/knowledge_graph/graph_traversal_strategies/breadth_first_random.py DELETED Viewed

@@ -1,80 +0,0 @@
-import neo4j
-from typing import List, Dict, Any, Optional, Set
-from collections import deque
-import random
-def _graph_traversal_bfs_random(
-    db_driver: neo4j.Driver,
-    start_paper_id: str,
-    n_hops: int,
-    relationship_type: Optional[str],
-    max_results: Optional[int],
-    max_branches: int
-) -> List[Dict[str, Any]]:
-    """
-    BFS traversal with random neighbor sampling.
-    Explores level by level, randomly sampling neighbors at each level.
-    """
-    with db_driver.session() as session:
-        visited: Set[str] = {start_paper_id}
-        queue = deque([(start_paper_id, 0)])  # (paper_id, distance)
-        papers = []
-        # Build relationship type filter
-        if relationship_type:
-            rel_filter = f":{':'.join([relationship_type])}"
-        else:
-            rel_filter = ""
-        while queue:
-            if max_results and type(max_results) is int and len(papers) >= max_results:
-                break
-            current_id, distance = queue.popleft()
-            # Stop if we've reached max depth
-            if distance >= n_hops:
-                continue
-            # Query to get all neighbors
-            query = f"""
-            MATCH (p:Paper {{id: $paper_id}})-[r{rel_filter}]->(neighbor:Paper)
-            RETURN neighbor.id as id,
-                   neighbor.name as name,
-                   neighbor.abstract as abstract,
-                   neighbor.topic as topic
-            """
-            result = session.run(query, paper_id=current_id)
-            neighbors = list(result)
-            # Randomly sample neighbors
-            if neighbors:
-                sampled_neighbors = random.sample(
-                    neighbors,
-                    min(max_branches, len(neighbors))
-                )
-                for record in sampled_neighbors:
-                    neighbor_id = record['id']
-                    if neighbor_id not in visited:
-                        visited.add(neighbor_id)
-                        paper = {
-                            'id': neighbor_id,
-                            'name': record['name'],
-                            'abstract': record['abstract'],
-                            'topic': record['topic'],
-                            'distance': distance + 1
-                        }
-                        papers.append(paper)
-                        # Add to queue for next level
-                        queue.append((neighbor_id, distance + 1))
-                        if max_results and type(max_results) is int and len(papers) >= max_results:
-                            break
-        return papers

agentic_nav/tools/knowledge_graph/graph_traversal_strategies/depth_first_random.py DELETED Viewed

@@ -1,78 +0,0 @@
-from typing import List, Dict, Any, Optional, Set
-import random
-import neo4j
-def _graph_traversal_dfs_random(
-    db_driver: neo4j.Driver,
-    start_paper_id: str,
-    n_hops: int,
-    relationship_type: Optional[str],
-    max_results: Optional[int],
-    max_branches: int
-) -> List[Dict[str, Any]]:
-    """
-    DFS traversal with random neighbor sampling.
-    Explores deeply along random branches before backtracking.
-    """
-    with db_driver.session() as session:
-        visited: Set[str] = {start_paper_id}
-        papers = []
-        # Build relationship type filter
-        if relationship_type:
-            rel_filter = f":{':'.join([relationship_type])}"
-        else:
-            rel_filter = ""
-        def dfs_traverse(paper_id: str, distance: int):
-            """Recursive DFS helper"""
-            if max_results and len(papers) >= max_results:
-                return
-            if distance >= n_hops:
-                return
-            # Query to get all neighbors
-            query = f"""
-            MATCH (p:Paper {{id: $paper_id}})-[r{rel_filter}]->(neighbor:Paper)
-            RETURN neighbor.id as id,
-                   neighbor.name as name,
-                   neighbor.abstract as abstract,
-                   neighbor.topic as topic
-            """
-            result = session.run(query, paper_id=paper_id)
-            neighbors = list(result)
-            # Randomly sample neighbors
-            if neighbors:
-                sampled_neighbors = random.sample(
-                    neighbors,
-                    min(max_branches, len(neighbors))
-                )
-                for record in sampled_neighbors:
-                    neighbor_id = record['id']
-                    if neighbor_id not in visited:
-                        if max_results and len(papers) >= max_results:
-                            return
-                        visited.add(neighbor_id)
-                        paper = {
-                            'id': neighbor_id,
-                            'name': record['name'],
-                            'abstract': record['abstract'],
-                            'topic': record['topic'],
-                            'distance': distance + 1
-                        }
-                        papers.append(paper)
-                        # Recursively explore this branch
-                        dfs_traverse(neighbor_id, distance + 1)
-        dfs_traverse(start_paper_id, 0)
-        return papers

agentic_nav/tools/knowledge_graph/graph_traversal_strategies/neo4j_builtin.py DELETED Viewed

@@ -1,50 +0,0 @@
-from typing import List, Dict, Any, Optional
-import neo4j
-_DB_GRAPH_TRAVERSAL_QUERY = lambda rel_filter, n_hops: f"""
-    MATCH path = (start:Paper)-[{rel_filter}*1..{n_hops}]-(related:Paper)
-    WHERE start.id IN $start_paper_ids
-    AND related.id <> start.id
-    WITH related, min(length(path)) as min_distance
-    RETURN DISTINCT related.id as id,
-           related.name as name,
-           related.abstract as abstract,
-           related.topic as topic,
-           min_distance as distance
-    ORDER BY min_distance, related.name
-    """
-def _graph_traversal_cypher(
-    db_driver: neo4j.Driver,
-    start_paper_id: str,
-    n_hops: int,
-    relationship_type: Optional[str],
-    max_results: Optional[int]
-) -> List[Dict[str, Any]]:
-    """Original Cypher-based traversal (BFS/DFS handled by Neo4j)"""
-    with db_driver.session() as session:
-        if relationship_type:
-            rel_filter = f":{':'.join([relationship_type])}"
-        else:
-            rel_filter = ""
-        query = _DB_GRAPH_TRAVERSAL_QUERY(rel_filter=rel_filter, n_hops=n_hops)
-        if max_results:
-            query += f" LIMIT {max_results}"
-        result = session.run(query, start_paper_ids=[start_paper_id])
-        papers = []
-        for record in result:
-            paper = {
-                'id': record['id'],
-                'name': record['name'],
-                'abstract': record['abstract'],
-                'topic': record['topic'],
-                'distance': record['distance']
-            }
-            papers.append(paper)
-        return papers

agentic_nav/tools/knowledge_graph/neo4j_db_importer.py DELETED Viewed

@@ -1,537 +0,0 @@
-"""
-Neo4j exporter for PaperKnowledgeGraph
-Exports NetworkX graph to Neo4j database with proper handling of embeddings and relationships
-"""
-import logging
-import os
-import click
-import networkx as nx
-from neo4j import GraphDatabase
-from typing import Dict, Any
-import numpy as np
-from tqdm import tqdm
-from pathlib import Path
-from agentic_nav.tools.knowledge_graph.file_handler import load_graph
-from agentic_nav.utils.logger import setup_logging
-# Setup logging
-setup_logging(
-    log_dir="logs",
-    level=os.environ.get("AGENTIC_NAV_LOG_LEVEL", "INFO")
-)
-LOGGER = logging.getLogger(__name__)
-PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
-NEO4J_USERNAME = os.environ.get("NEO4J_USERNAME", "neo4j")
-NEO4J_PASSWORD = os.environ.get("NEO4J_PASSWORD")
-NEO4J_DB_URI = os.environ.get("NEO4J_DB_URI", "bolt://neo4j_db:7687")
-class Neo4jImporter:
-    """Import PaperKnowledgeGraph to Neo4j database."""
-    def __init__(
-            self,
-            uri: str = NEO4J_DB_URI,
-            username: str = NEO4J_USERNAME,
-            password: str = NEO4J_PASSWORD
-    ):
-        """Initialize Neo4j connection."""
-        self.driver = GraphDatabase.driver(uri, auth=(username, password))
-        self.driver.verify_connectivity()
-        LOGGER.info(f"Connected to Neo4j at {uri}")
-    def close(self):
-        """Close the Neo4j driver connection."""
-        self.driver.close()
-    def clear_database(self, batch_size=500):
-        with self.driver.session() as session:
-            deleted_total = 0
-            while True:
-                result = session.run("""
-                    MATCH (n)
-                    WITH n LIMIT $batch_size
-                    DETACH DELETE n
-                    RETURN count(n) as deleted
-                    """,
-                    batch_size=batch_size
-                )
-                deleted = result.single()["deleted"]
-                deleted_total += deleted
-                LOGGER.info(f"Deleted {deleted} nodes (total: {deleted_total})")
-                if deleted == 0:
-                    break
-    def create_indexes(self, embedding_dimension: int = 768):
-        """Create indexes for better query performance, including vector index."""
-        with self.driver.session() as session:
-            # Create index on paper IDs
-            session.run("CREATE INDEX paper_id IF NOT EXISTS FOR (p:Paper) ON (p.id)")
-            # Create index on topic names
-            session.run("CREATE INDEX topic_name IF NOT EXISTS FOR (t:Topic) ON (t.name)")
-            # Create index on author IDs
-            session.run("CREATE INDEX author_id IF NOT EXISTS FOR (a:Author) ON (a.author_id)")
-            # Create index on author names (useful for searching)
-            session.run("CREATE INDEX author_name IF NOT EXISTS FOR (a:Author) ON (a.fullname)")
-            # Create vector index for embeddings (Neo4j 5.11+)
-            try:
-                session.run("""
-                    CREATE VECTOR INDEX paper_embeddings IF NOT EXISTS
-                    FOR (p:Paper)
-                    ON p.embedding
-                    OPTIONS {
-                        indexConfig: {
-                            `vector.dimensions`: $dimension,
-                            `vector.similarity_function`: 'cosine'
-                        }
-                    }
-                """, dimension=embedding_dimension)
-                LOGGER.info(f"Created vector index for {embedding_dimension}-dimensional embeddings")
-            except Exception as e:
-                LOGGER.warning(f"Warning: Could not create vector index: {e}")
-                LOGGER.warning("Vector indexes require Neo4j 5.11+ or Enterprise Edition")
-            LOGGER.info("Created standard indexes")
-    def _export_paper_nodes(self, kg: nx.Graph, batch_size: int):
-        """Export paper nodes to Neo4j with all attributes."""
-        paper_nodes = [(n, d) for n, d in kg.nodes(data=True)
-                       if d.get('node_type') == 'paper']
-        LOGGER.info(f"\nExporting {len(paper_nodes)} paper nodes...")
-        with self.driver.session() as session:
-            for i in tqdm(range(0, len(paper_nodes), batch_size), desc="Paper nodes"):
-                batch = paper_nodes[i:i + batch_size]
-                papers_data = []
-                for node_id, data in batch:
-                    # Convert embedding to list if it's numpy array
-                    embedding = data.get('embedding', [])
-                    if isinstance(embedding, np.ndarray):
-                        embedding = embedding.tolist()
-                    paper_dict = {
-                        "id": node_id,
-                        "name": data.get('name', ''),
-                        "abstract": data.get('abstract', ''),
-                        "topic": data.get('topic', ''),
-                        "keywords": data.get('keywords', []),
-                        "decision": data.get('decision', ''),
-                        "session": data.get('session', ''),
-                        "session_start_time": data.get('session_start_time', ''),
-                        "session_end_time": data.get('session_end_time', ''),
-                        "presentation_type": data.get('presentation_type', ''),
-                        "room_name": data.get('room_name', ''),
-                        "project_url": data.get('project_url', ''),
-                        "poster_position": data.get('poster_position', ''),
-                        "paper_url": data.get("paper_url", ""),
-                        "sourceid": data.get("sourceid", ""),
-                        "virtualsite_url": data.get("virtualsite_url", ""),
-                        'embedding': embedding
-                    }
-                    papers_data.append(paper_dict)
-                # Batch create paper nodes
-                session.run("""
-                    UNWIND $papers AS paper
-                    CREATE (p:Paper {
-                        id: paper.id,
-                        name: paper.name,
-                        abstract: paper.abstract,
-                        topic: paper.topic,
-                        keywords: paper.keywords,
-                        decision: paper.decision,
-                        session: paper.session,
-                        session_start_time: paper.session_start_time,
-                        session_end_time: paper.session_end_time,
-                        presentation_type: paper.presentation_type,
-                        room_name: paper.room_name,
-                        project_url: paper.project_url,
-                        poster_position: paper.poster_position,
-                        paper_url: paper.paper_url,
-                        sourceid: paper.sourceid,
-                        virtualsite_url: paper.virtualsite_url,
-                        embedding: paper.embedding
-                    })
-                """, papers=papers_data)
-        LOGGER.info(f"Exported {len(paper_nodes)} paper nodes")
-    def _export_topic_hierarchy(self, kg: nx.Graph):
-        """
-        Export topic nodes with hierarchical structure to Neo4j.
-        Splits topics like "Deep Learning->Theory" into separate nodes with parent-child relationships.
-        """
-        # Collect all unique topic paths from paper nodes
-        topic_paths = set()
-        for node_id, data in kg.nodes(data=True):
-            if data.get('node_type') == 'paper':
-                topic = data.get('topic', '')
-                if topic:
-                    topic_paths.add(topic)
-        LOGGER.info(f"Processing {len(topic_paths)} unique topic paths...")
-        # Parse topic paths and create hierarchy
-        all_topics = set()
-        topic_relationships = []
-        for path in topic_paths:
-            parts = [p.strip() for p in path.split('->')]
-            # Add all topic parts
-            for part in parts:
-                all_topics.add(part)
-            # Create parent-child relationships
-            for i in range(len(parts) - 1):
-                topic_relationships.append({
-                    'parent': parts[i],
-                    'child': parts[i + 1]
-                })
-        LOGGER.info(
-            f"Creating {len(all_topics)} topic nodes with {len(set(tuple(r.items()) for r in topic_relationships))} "
-            f"hierarchical relationships..."
-        )
-        with self.driver.session() as session:
-            # Create all topic nodes (using MERGE to avoid duplicates)
-            topics_data = [{'name': topic} for topic in all_topics]
-            session.run("""
-                UNWIND $topics AS topic
-                MERGE (t:Topic {name: topic.name})
-            """, topics=topics_data)
-            # Create hierarchical relationships between topics (deduplicate first)
-            if topic_relationships:
-                # Remove duplicates
-                unique_rels = list({(r['parent'], r['child']): r for r in topic_relationships}.values())
-                session.run("""
-                    UNWIND $rels AS rel
-                    MATCH (parent:Topic {name: rel.parent})
-                    MATCH (child:Topic {name: rel.child})
-                    MERGE (child)-[:SUBTOPIC_OF]->(parent)
-                """, rels=unique_rels)
-        LOGGER.info(f"Exported {len(all_topics)} topic nodes with hierarchy")
-    def _connect_papers_to_topics(self, kg: nx.Graph, batch_size: int):
-        """
-        Connect papers to their leaf topic nodes.
-        For "Deep Learning->Theory", connects paper to "Theory" node.
-        """
-        paper_topic_connections = []
-        for node_id, data in kg.nodes(data=True):
-            if data.get('node_type') == 'paper':
-                topic = data.get('topic', '')
-                if topic:
-                    # Get the leaf topic (last part after splitting)
-                    parts = [p.strip() for p in topic.split('->')]
-                    leaf_topic = parts[-1]
-                    paper_topic_connections.append({
-                        'paper_id': node_id,
-                        'topic_name': leaf_topic,
-                        'full_path': topic  # Store full path as property
-                    })
-        LOGGER.info(f"Connecting {len(paper_topic_connections)} papers to topics...")
-        with self.driver.session() as session:
-            for i in tqdm(range(0, len(paper_topic_connections), batch_size),
-                          desc="Paper-Topic connections"):
-                batch = paper_topic_connections[i:i + batch_size]
-                session.run("""
-                    UNWIND $connections AS conn
-                    MATCH (p:Paper {id: conn.paper_id})
-                    MATCH (t:Topic {name: conn.topic_name})
-                    MERGE (p)-[r:BELONGS_TO_TOPIC]->(t)
-                    SET r.full_path = conn.full_path
-                """, connections=batch)
-        LOGGER.info(f"Connected papers to leaf topics")
-    def _export_similarity_relationships(self, kg: nx.Graph, batch_size: int):
-        """Export similarity relationships between papers to Neo4j."""
-        # Filter only similarity edges
-        similarity_edges = [
-            (source, target, data)
-            for source, target, data in kg.edges(data=True)
-            if data.get('relationship') == 'similar_to'
-        ]
-        LOGGER.info(f"Exporting {len(similarity_edges)} similarity relationships...")
-        with self.driver.session() as session:
-            for i in tqdm(range(0, len(similarity_edges), batch_size),
-                          desc="Similarity relationships"):
-                batch = similarity_edges[i:i + batch_size]
-                edges_data = [{
-                    'source': source,
-                    'target': target,
-                    'similarity': data.get('similarity', 0.0)
-                } for source, target, data in batch]
-                session.run("""
-                    UNWIND $edges AS edge
-                    MATCH (p1:Paper {id: edge.source})
-                    MATCH (p2:Paper {id: edge.target})
-                    MERGE (p1)-[:SIMILAR_TO {similarity: edge.similarity}]->(p2)
-                """, edges=edges_data)
-        LOGGER.info(f"Exported {len(similarity_edges)} similarity relationships")
-    def _export_authors_and_relationships(self, kg: nx.Graph, batch_size: int):
-        """
-        Export author nodes from NetworkX graph (where they already exist as separate nodes)
-        and create IS_AUTHOR_OF relationships between authors and papers.
-        Author nodes in NetworkX have composite IDs like "12345 - John Doe"
-        """
-        # Collect author nodes from the graph
-        author_nodes = [
-            (node_id, data)
-            for node_id, data in kg.nodes(data=True)
-            if data.get('node_type') != 'paper' and data.get('node_type') != 'topic'
-        ]
-        LOGGER.info(f"Found {len(author_nodes)} author nodes in graph...")
-        # Extract author data
-        all_authors = []
-        for node_id, data in author_nodes:
-            # Parse composite ID "12345 - John Doe"
-            parts = node_id.split(' - ', 1)
-            author_id = parts[0].strip() if len(parts) > 0 else ""
-            author_dict = {
-                'composite_id': node_id,  # Store the full composite ID
-                'author_id': author_id,
-                'fullname': data.get('fullname', ''),
-                'institution': data.get('institution', ''),
-                'url': data.get('url', '')
-            }
-            all_authors.append(author_dict)
-        LOGGER.info(f"Exporting {len(all_authors)} unique authors...")
-        with self.driver.session() as session:
-            # Create author nodes in batches
-            for i in tqdm(range(0, len(all_authors), batch_size), desc="Author nodes"):
-                batch = all_authors[i:i + batch_size]
-                session.run("""
-                    UNWIND $authors AS author
-                    MERGE (a:Author {composite_id: author.composite_id})
-                    ON CREATE SET
-                        a.author_id = author.author_id,
-                        a.fullname = author.fullname,
-                        a.institution = author.institution,
-                        a.url = author.url
-                    ON MATCH SET
-                        a.author_id = author.author_id,
-                        a.fullname = author.fullname,
-                        a.institution = author.institution,
-                        a.url = author.url
-                """, authors=batch)
-        LOGGER.info(f"Exported {len(all_authors)} author nodes")
-        # Method 1: Try to collect author-paper relationships from graph edges
-        author_paper_edges = [
-            (source, target, data)
-            for source, target, data in kg.edges(data=True)
-            if data.get('relationship') == 'is_author_of'
-        ]
-        LOGGER.info(f"Found {len(author_paper_edges)} IS_AUTHOR_OF edges in graph")
-        # Method 2: If no edges found, extract from paper node 'authors' attribute
-        if len(author_paper_edges) == 0:
-            LOGGER.warning("No IS_AUTHOR_OF edges found in graph. Extracting from paper 'authors' attribute...")
-            paper_author_relationships = []
-            for node_id, data in kg.nodes(data=True):
-                if data.get('node_type') == 'paper':
-                    authors = data.get('authors', [])
-                    if authors and isinstance(authors, list) and len(authors) > 0:
-                        # Check if authors are stored as dicts
-                        if isinstance(authors[0], dict):
-                            for author in authors:
-                                author_id = str(author.get('id', ''))
-                                fullname = author.get('fullname', '')
-                                if author_id and fullname:
-                                    composite_id = f"{author_id} - {fullname}"
-                                    paper_author_relationships.append({
-                                        'author_id': composite_id,
-                                        'paper_id': node_id
-                                    })
-            LOGGER.info(f"Extracted {len(paper_author_relationships)} relationships from paper attributes")
-            # Create relationships from extracted data
-            with self.driver.session() as session:
-                for i in tqdm(range(0, len(paper_author_relationships), batch_size),
-                              desc="Author-Paper relationships"):
-                    batch = paper_author_relationships[i:i + batch_size]
-                    session.run("""
-                        UNWIND $edges AS edge
-                        MATCH (a:Author {composite_id: edge.author_id})
-                        MATCH (p:Paper {id: edge.paper_id})
-                        MERGE (a)-[:IS_AUTHOR_OF]->(p)
-                    """, edges=batch)
-            LOGGER.info(f"Created {len(paper_author_relationships)} author-paper relationships")
-        else:
-            # Create relationships from graph edges
-            with self.driver.session() as session:
-                for i in tqdm(range(0, len(author_paper_edges), batch_size),
-                              desc="Author-Paper relationships"):
-                    batch = author_paper_edges[i:i + batch_size]
-                    edges_data = [{
-                        'author_id': source,  # composite ID like "12345 - John Doe"
-                        'paper_id': target
-                    } for source, target, data in batch]
-                    session.run("""
-                        UNWIND $edges AS edge
-                        MATCH (a:Author {composite_id: edge.author_id})
-                        MATCH (p:Paper {id: edge.paper_id})
-                        MERGE (a)-[:IS_AUTHOR_OF]->(p)
-                    """, edges=edges_data)
-            LOGGER.info(f"Created {len(author_paper_edges)} author-paper relationships")
-    def import_graph(self, kg_path: str, batch_size: int = 100, embedding_dimension: int = 768):
-        """Import the entire knowledge graph to Neo4j."""
-        LOGGER.info(f"Loading graph from path {kg_path}")
-        kg = load_graph(kg_path)
-        LOGGER.info("Starting Neo4j export...")
-        # Clear and prepare database
-        self.clear_database()
-        self.create_indexes(embedding_dimension)
-        # Export paper nodes
-        self._export_paper_nodes(kg, batch_size)
-        # Export authors and author-paper relationships
-        self._export_authors_and_relationships(kg, batch_size)
-        # Export topic hierarchy
-        self._export_topic_hierarchy(kg)
-        # Connect papers to topics
-        self._connect_papers_to_topics(kg, batch_size)
-        # Export similarity relationships
-        self._export_similarity_relationships(kg, batch_size)
-        LOGGER.info("Export completed successfully!")
-    def verify_export(self) -> Dict[str, Any]:
-        """Verify the export by checking node and relationship counts."""
-        with self.driver.session() as session:
-            # Count papers
-            result = session.run("MATCH (p:Paper) RETURN count(p) as count")
-            paper_count = result.single()['count']
-            # Count topics
-            result = session.run("MATCH (t:Topic) RETURN count(t) as count")
-            topic_count = result.single()['count']
-            # Count authors
-            result = session.run("MATCH (a:Author) RETURN count(a) as count")
-            author_count = result.single()['count']
-            # Count relationships
-            result = session.run("MATCH ()-[r]->() RETURN count(r) as count")
-            rel_count = result.single()['count']
-            # Count similarity relationships
-            result = session.run("MATCH ()-[r:SIMILAR_TO]->() RETURN count(r) as count")
-            similarity_count = result.single()['count']
-            # Count topic hierarchy relationships
-            result = session.run("MATCH ()-[r:SUBTOPIC_OF]->() RETURN count(r) as count")
-            subtopic_count = result.single()['count']
-            # Count author relationships (updated relationship name)
-            result = session.run("MATCH ()-[r:IS_AUTHOR_OF]->() RETURN count(r) as count")
-            is_author_of_count = result.single()['count']
-            stats = {
-                'papers': paper_count,
-                'topics': topic_count,
-                'authors': author_count,
-                'total_relationships': rel_count,
-                'similarity_relationships': similarity_count,
-                'subtopic_relationships': subtopic_count,
-                'is_author_of_relationships': is_author_of_count
-            }
-            LOGGER.info("Neo4j Database Statistics:")
-            for key, value in stats.items():
-                LOGGER.info(f"   {key}: {value}")
-            return stats
-@click.command()
-@click.option("-g", "--graph-path", help="Path to the knowledge graph file (pickle).", default=f"{PROJECT_ROOT}/graphs/knowledge_graph.pkl")
-@click.option("-l", "--neo4j-uri", help="Database URI", default="bolt://localhost:7687")
-@click.option("-u", "--neo4j-username", help="Database user", default=NEO4J_USERNAME)
-@click.option("-p", "--neo4j-password", help="Database password", default=NEO4J_PASSWORD)
-@click.option("-b", "--batch-size", help="Batch size for node insertion", default=100)
-@click.option("-e", "--embedding-dimension", help="Vector embedding dimensions", default=768)
-def main(
-    graph_path: str,
-    neo4j_uri: str,
-    neo4j_username: str,
-    neo4j_password: str,
-    batch_size: int = 100,
-    embedding_dimension: int = 768
-):
-    """
-    Convenience function to export a knowledge graph to Neo4j.
-    Args:
-        graph_path: PaperKnowledgeGraph instance
-        neo4j_uri: Neo4j connection URI
-        neo4j_username: Neo4j username
-        neo4j_password: Neo4j password
-        batch_size: Batch size for processing
-        embedding_dimension: Dimension of embedding vectors (default: 768)
-    """
-    importer = Neo4jImporter(neo4j_uri, neo4j_username, neo4j_password)
-    try:
-        importer.import_graph(
-            graph_path,
-            batch_size,
-            embedding_dimension
-        )
-        importer.verify_export()
-    finally:
-        importer.close()
-if __name__ == "__main__":
-    main()

agentic_nav/tools/knowledge_graph/retriever.py DELETED Viewed

@@ -1,612 +0,0 @@
-import logging
-import numpy as np
-import random
-import os
-from neo4j import GraphDatabase
-from pathlib import Path
-from typing import List, Dict, Any, Optional
-from agentic_nav.tools.knowledge_graph.graph_traversal_strategies import (
-    TraversalStrategy,
-    _graph_traversal_dfs_random,
-    _graph_traversal_cypher,
-    _graph_traversal_bfs_random
-)
-from agentic_nav.utils.embedding_generator import batch_embed_documents
-PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
-LOGGER = logging.getLogger(__name__)
-EMBEDDING_MODEL_NAME = os.environ.get("EMBEDDING_MODEL_NAME", "nomic-embed-text")
-EMBEDDING_MODEL_API_BASE = os.environ.get("EMBEDDING_MODEL_API_BASE", "http://localhost:11435")
-NEO4J_DB_URI = os.environ.get("NEO4J_DB_URI", "bolt://neo4j_db:7687")
-NEO4J_DB_NODE_RETURN_LIMIT = int(os.environ.get("NEO4J_DB_NODE_RETURN_LIMIT", 200))
-class Neo4jGraphWorker:
-    """Search and traversal operations for Neo4j paper knowledge graph."""
-    _DB_SIMILARITY_SEARCH_QUERY = """
-        MATCH (node:Paper)
-        WHERE ($day IS NULL OR node.session_start_time IS NOT NULL)
-        WITH node
-        WHERE ($day IS NULL OR date(datetime(node.session_start_time)).dayOfWeek = $day)
-        AND ($time_ranges IS NULL OR
-             any(range IN $time_ranges WHERE
-                 time(datetime(node.session_start_time)) >= time(range.start)
-                 AND time(datetime(node.session_start_time)) <= time(range.end)))
-        WITH collect(node) as filtered_nodes
-        CALL db.index.vector.queryNodes('paper_embeddings', $top_k, $query_embedding)
-        YIELD node, score
-        WHERE node IN filtered_nodes OR ($day IS NULL AND $time_ranges IS NULL)
-        RETURN node.id as id,
-               node.name as name,
-               node.abstract as abstract,
-               node.topic as topic,
-               node.paper_url as paper_url,
-               node.session as session,
-               node.session_start_time as session_start_time,
-               node.session_end_time as session_end_time,
-               node.presentation_type as presentation_type,
-               node.room_name as room_name,
-               node.project_url as project_url,
-               node.poster_position as poster_position,
-               node.sourceid as sourceid,
-               node.virtualsite_url as virtualsite_url,
-               node.decision as decision,
-               [(a:Author)-[:IS_AUTHOR_OF]->(node) | a] as authors,
-               score
-        ORDER BY score DESC
-        LIMIT $limit
-        """
-    _DB_NEIGHBORHOOD_SEARCH_QUERY = """
-        MATCH (p:Paper)-[r]-(neighbor)
-        WHERE p.id IN $paper_ids
-          AND type(r) IN $allowed_rel_types
-          AND 'Paper' IN labels(neighbor)
-          AND (type(r) <> 'SIMILAR_TO' OR r.similarity >= $min_similarity)
-        RETURN neighbor.id as id,
-               neighbor.name as name,
-               neighbor.abstract as abstract,
-               neighbor.topic as topic,
-               neighbor.paper_url as paper_url,
-               neighbor.session as session,
-               neighbor.session_start_time as session_start_time,
-               neighbor.session_end_time as session_end_time,
-               neighbor.presentation_type as presentation_type,
-               neighbor.room_name as room_name,
-               neighbor.project_url as project_url,
-               neighbor.poster_position as poster_position,
-               neighbor.sourceid as sourceid,
-               neighbor.virtualsite_url as virtualsite_url,
-               neighbor.decision as decision,
-               [(a:Author)-[:IS_AUTHOR_OF]->(neighbor) | a] as authors,
-               p.id as source_paper_id,
-               type(r) as relationship_type,
-               CASE WHEN type(r) = 'SIMILAR_TO' THEN r.similarity ELSE null END as similarity
-        ORDER BY similarity DESC
-        LIMIT $limit
-        """
-    # Find the DB query for graph traversal in the graph_traversal sub-folder.
-    _DB_PAPERS_BY_AUTHOR = """
-        MATCH (a:Author)-[:IS_AUTHOR_OF]->(p:Paper)
-        WHERE a.fullname = $author_name
-        WITH p, collect(DISTINCT a) as all_authors
-        RETURN p.id as id,
-               p.name as name,
-               p.abstract as abstract,
-               p.topic as topic,
-               p.paper_url as paper_url,
-               p.decision as decision,
-               p.session as session,
-               p.session_start_time as session_start_time,
-               p.session_end_time as session_end_time,
-               p.presentation_type as presentation_type,
-               p.room_name as room_name,
-               p.project_url as project_url,
-               p.poster_position as poster_position,
-               p.sourceid as sourceid,
-               p.virtualsite_url as virtualsite_url,
-               all_authors as authors
-        ORDER BY p.name
-        LIMIT $limit
-        """
-    _DB_PAPERS_BY_AUTHOR_FUZZY = """
-        MATCH (a:Author)-[:IS_AUTHOR_OF]->(p:Paper)
-        WHERE toLower(a.fullname) CONTAINS toLower($author_name)
-        WITH p, collect(DISTINCT a) as all_authors
-        RETURN p.id as id,
-               p.name as name,
-               p.abstract as abstract,
-               p.topic as topic,
-               p.paper_url as paper_url,
-               p.decision as decision,
-               p.session as session,
-               p.session_start_time as session_start_time,
-               p.session_end_time as session_end_time,
-               p.presentation_type as presentation_type,
-               p.room_name as room_name,
-               p.project_url as project_url,
-               p.poster_position as poster_position,
-               p.sourceid as sourceid,
-               p.virtualsite_url as virtualsite_url,
-               all_authors as authors
-        ORDER BY p.name
-        LIMIT $limit
-        """
-    _DB_PAPERS_BY_TOPIC = """
-        MATCH (p:Paper)-[:BELONGS_TO_TOPIC]->(t:Topic {name: $topic_name})
-        RETURN p.id as id,
-               p.name as name,
-               p.abstract as abstract,
-               p.topic as topic,
-               p.paper_url as paper_url,
-               p.decision as decision,
-               p.session as session,
-               p.session_start_time as session_start_time,
-               p.session_end_time as session_end_time,
-               p.presentation_type as presentation_type,
-               p.room_name as room_name,
-               p.project_url as project_url,
-               p.poster_position as poster_position,
-               p.sourceid as sourceid,
-               p.virtualsite_url as virtualsite_url,
-               [(a:Author)-[:IS_AUTHOR_OF]->(p) | a] as authors
-        ORDER BY p.name
-        LIMIT $limit
-        """
-    _DB_PAPERS_BY_TOPIC_AND_SUBTOPIC = """
-        MATCH (t:Topic {name: $topic_name})
-        OPTIONAL MATCH (subtopic:Topic)-[:SUBTOPIC_OF*]->(t)
-        WITH t, collect(DISTINCT subtopic) + t as all_topics
-        UNWIND all_topics as topic
-        MATCH (p:Paper)-[:BELONGS_TO_TOPIC]->(topic)
-        WITH DISTINCT p
-        RETURN p.id as id,
-               p.name as name,
-               p.abstract as abstract,
-               p.topic as topic,
-               p.paper_url as paper_url,
-               p.decision as decision,
-               p.session as session,
-               p.session_start_time as session_start_time,
-               p.session_end_time as session_end_time,
-               p.presentation_type as presentation_type,
-               p.room_name as room_name,
-               p.project_url as project_url,
-               p.poster_position as poster_position,
-               p.sourceid as sourceid,
-               p.virtualsite_url as virtualsite_url,
-               [(a:Author)-[:IS_AUTHOR_OF]->(p) | a] as authors
-        ORDER BY p.name
-        LIMIT $limit
-        """
-    def __init__(
-            self,
-            uri: str = NEO4J_DB_URI,
-            username: str = "neo4j",
-            password: str = "password"
-    ):
-        """Initialize Neo4j connection."""
-        self.driver = GraphDatabase.driver(uri, auth=(username, password))
-        self.driver.verify_connectivity()
-        LOGGER.info(f"Connected to Neo4j at {uri}")
-    def close(self):
-        """Close the Neo4j driver connection."""
-        self.driver.close()
-    @staticmethod
-    def embed_user_query(
-        text: str,
-        embedding_model: str = f"ollama/{EMBEDDING_MODEL_NAME}",
-        api_base: str = EMBEDDING_MODEL_API_BASE
-    ):
-        emb = batch_embed_documents(
-            texts=[text],
-            batch_size=1,
-            api_base=api_base,
-            embedding_model=embedding_model
-        ).tolist()[0]
-        return emb
-    def similarity_search(
-            self,
-            user_query: str,
-            day: Optional[str] = None,
-            timeslots: Optional[List[str]] = None,
-            top_k: int = 5,
-            min_similarity: Optional[float] = None
-    ) -> List[Dict[str, Any]]:
-        """
-        Perform vector similarity search on paper embeddings.
-        Args:
-            user_query: User query (str)
-            day: Conference day as date string (e.g., "2024-12-10") or None
-            timeslots: List of time ranges as strings (e.g., ["09:00:00-12:00:00"]) or None
-            top_k: Number of top results to return
-            min_similarity: Optional minimum similarity threshold (0-1)
-        Returns:
-            List of dictionaries containing paper information and similarity scores
-        """
-        # Generate text embedding
-        query_embedding = self.embed_user_query(
-            text=user_query
-        )
-        # Convert numpy array to list if needed
-        if isinstance(query_embedding, np.ndarray):
-            query_embedding = query_embedding.tolist()
-        # Parse day and timeslots for the query
-        day_filter = None
-        time_ranges = []
-        if day:
-            # Convert date string to day of week (1=Monday, 7=Sunday)
-            from datetime import datetime
-            date_obj = datetime.strptime(day, "%Y-%m-%d")
-            day_filter = date_obj.isoweekday()
-        if timeslots:
-            # Parse timeslot ranges (e.g., "09:00:00-12:00:00")
-            for slot in timeslots:
-                if '-' in slot:
-                    start, end = slot.split('-')
-                    time_ranges.append({'start': start.strip(), 'end': end.strip()})
-                else:
-                    # If no range, assume it's a single time point with some buffer
-                    time_ranges.append({'start': slot.strip(), 'end': slot.strip()})
-        with self.driver.session() as session:
-            result = session.run(
-                self._DB_SIMILARITY_SEARCH_QUERY,
-                query_embedding=query_embedding,
-                top_k=top_k,
-                limit=NEO4J_DB_NODE_RETURN_LIMIT,
-                day=day_filter,
-                time_ranges=time_ranges if time_ranges else None
-            )
-            papers = []
-            for record in result:
-                paper = {
-                    'id': record['id'],
-                    'name': record['name'],
-                    'abstract': record['abstract'],
-                    'topic': record['topic'],
-                    'similarity_score': record['score'],
-                    'paper_url': record['paper_url'],
-                    'decision': record['decision'],
-                    'session': record['session'],
-                    'session_start_time': record['session_start_time'],
-                    'session_end_time': record['session_end_time'],
-                    'presentation_type': record['presentation_type'],
-                    'room_name': record['room_name'],
-                    'github_url': record['project_url'],
-                    'poster_position': record['poster_position'],
-                    'sourceid': record['sourceid'],
-                    'virtualsite_url': record['virtualsite_url'],
-                    'authors': [a['fullname'] for a in record['authors']]
-                }
-                # Apply minimum similarity filter if specified
-                if min_similarity is None or paper['similarity_score'] >= min_similarity:
-                    papers.append(paper)
-            return papers
-    def neighborhood_search(
-            self,
-            paper_id: str,
-            relationship_types: List[str] = ["SIMILAR_TO"],
-            min_similarity: float = 0.7
-    ) -> Dict[str, Any]:
-        """
-        Find immediate neighbors of given paper nodes.
-        Args:
-            paper_id: Paper ID to find neighbors for
-            relationship_types: Optional list of relationship types to filter
-                               (e.g., ['SIMILAR_TO', 'IS_AUTHOR_OF', 'BELONGS_TO_TOPIC', 'SUBTOPIC_OF'])
-            min_similarity (float): A minimum similarity score in the range of 0 - 1. Often a good value is 0.75 or 0.8.
-        Returns:
-            Dictionary with neighbors grouped by relationship type
-        """
-        allowed_rel_types = ['SIMILAR_TO', 'IS_AUTHOR_OF', 'BELONGS_TO_TOPIC', 'SUBTOPIC_OF']
-        for rel_type in relationship_types:
-            if rel_type not in allowed_rel_types:
-                raise ValueError(f"Unsupported relationship type: {rel_type}. Supported relationship types: {allowed_rel_types}")
-        with self.driver.session() as session:
-            result = session.run(
-                self._DB_NEIGHBORHOOD_SEARCH_QUERY,
-                paper_ids=[paper_id],
-                allowed_rel_types=relationship_types,
-                min_similarity=min_similarity,
-                limit=NEO4J_DB_NODE_RETURN_LIMIT
-            )
-            # Organize results by relationship type
-            neighbors = {}
-            for record in result:
-                rel_type = record["relationship_type"]
-                if rel_type not in neighbors.keys():
-                    neighbors[rel_type] = []
-                else:
-                    neighbors[rel_type].append(record)
-            return neighbors
-    def graph_traversal(
-        self,
-        start_paper_id: str,
-        n_hops: int = 2,
-        relationship_type: Optional[str] = None,
-        max_results: Optional[int] = None,
-        strategy: str = "breadth_first_random",
-        max_branches: Optional[int] = None,
-        random_seed: Optional[int] = None
-    ) -> List[Dict[str, Any]]:
-        """
-        Traverse the graph for n hops from starting paper nodes.
-        Args:
-            start_paper_id: Paper ID to start traversal from
-            n_hops: Number of hops to traverse (1-5 recommended)
-            relationship_type: Optional list of relationship types to traverse
-            max_results: Optional maximum number of results to return
-            strategy: Traversal strategy (breadth_first, depth_first, breadth_first_random, depth_first_random)
-            max_branches: Maximum number of random neighbors to explore per node (only for random strategies)
-            random_seed: Optional seed for reproducible random sampling
-        Returns:
-            List of papers found through traversal with distance information
-        """
-        if random_seed is not None:
-            random.seed(random_seed)
-        # Use original Cypher-based approach for non-random strategies
-        if strategy in ["breadth_first", "depth_first"]:
-            LOGGER.debug(f"Doing a graph traversal with neo4j's built-in strategy")
-            return _graph_traversal_cypher(
-                self.driver,
-                start_paper_id,
-                n_hops,
-                relationship_type,
-                max_results
-            )
-        # Use Python-based traversal for random strategies
-        elif strategy == "breadth_first_random":
-            LOGGER.debug(f"Doing a graph traversal with a random sampling breadth first strategy")
-            return _graph_traversal_bfs_random(
-                self.driver,
-                start_paper_id,
-                n_hops,
-                relationship_type,
-                max_results,
-                max_branches or 3
-            )
-        elif strategy == "depth_first_random":
-            LOGGER.debug(f"Doing a graph traversal with a random sampling depth first strategy")
-            return _graph_traversal_dfs_random(
-                self.driver,
-                start_paper_id,
-                n_hops,
-                relationship_type,
-                max_results,
-                max_branches or 3
-            )
-        else:
-            raise ValueError(f"Unsupported traversal strategy: {strategy}. "
-                           f"Supported strategies: breadth_first, depth_first, breadth_first_random, depth_first_random")
-    def search_papers_by_author(
-            self,
-            author_name: str,
-            fuzzy: bool = True
-    ) -> List[Dict[str, Any]]:
-        """
-        Find all papers by a specific author.
-        Args:
-            author_name: Author name or partial name
-            fuzzy: Whether to use fuzzy matching (CONTAINS vs exact match)
-        Returns:
-            List of papers by the author
-        """
-        with self.driver.session() as session:
-            if fuzzy:
-                query = self._DB_PAPERS_BY_AUTHOR_FUZZY
-            else:
-                query = self._DB_PAPERS_BY_AUTHOR
-            result = session.run(query, author_name=author_name)
-            papers = []
-            for record in result:
-                paper = {
-                    'id': record['id'],
-                    'name': record['name'],
-                    'abstract': record['abstract'],
-                    'topic': record['topic'],
-                    'author_name': record['author_name'],
-                    'paper_url': record['paper_url'],
-                    'decision': record['decision'],
-                    'session': record['session'],
-                    'session_start_time': record['session_start_time'],
-                    'session_end_time': record['session_end_time'],
-                    'presentation_type': record['presentation_type'],
-                    'room_name': record['room_name'],
-                    'github_url': record['project_url'],
-                    'poster_position': record['poster_position'],
-                    'sourceid': record['sourceid'],
-                    'virtualsite_url': record['virtualsite_url'],
-                }
-                papers.append(paper)
-            return papers
-    def search_papers_by_topic(
-            self,
-            topic_name: str,
-            include_subtopics: bool = True
-    ) -> List[Dict[str, Any]]:
-        """
-        Find all papers in a specific topic.
-        Args:
-            topic_name: Topic name
-            include_subtopics: Whether to include papers from subtopics
-        Returns:
-            List of papers in the topic
-        """
-        with self.driver.session() as session:
-            if include_subtopics:
-                # Find topic and all its subtopics
-                query = self._DB_PAPERS_BY_TOPIC_AND_SUBTOPIC
-            else:
-                query = self._DB_PAPERS_BY_TOPIC
-            result = session.run(query, topic_name=topic_name, limit=NEO4J_DB_NODE_RETURN_LIMIT)
-            papers = []
-            for record in result:
-                paper = {
-                    'id': record['id'],
-                    'name': record['name'],
-                    'abstract': record['abstract'],
-                    'topic': record['topic'],
-                    'paper_url': record['paper_url'],
-                    'decision': record['decision'],
-                    'session': record['session'],
-                    'session_start_time': record['session_start_time'],
-                    'session_end_time': record['session_end_time'],
-                    'presentation_type': record['presentation_type'],
-                    'room_name': record['room_name'],
-                    'github_url': record['project_url'],
-                    'poster_position': record['poster_position'],
-                    'sourceid': record['sourceid'],
-                    'virtualsite_url': record['virtualsite_url'],
-                }
-                papers.append(paper)
-            return papers
-    def get_collaboration_network(
-            self,
-            author_name: str,
-            n_hops: int = 2
-    ) -> Dict[str, Any]:
-        """
-        Find collaboration network: authors who co-authored papers.
-        Args:
-            author_name: Starting author name
-            n_hops: Degrees of separation to explore
-        Returns:
-            Dictionary with collaborators and shared papers
-        """
-        with self.driver.session() as session:
-            query = f"""
-                MATCH (a1:Author)
-                WHERE toLower(a1.fullname) CONTAINS toLower($author_name)
-                MATCH path = (a1)<-[:AUTHORED_BY]-(p:Paper)-[:AUTHORED_BY]->(a2:Author)
-                WHERE a1 <> a2
-                WITH a1, a2, collect(DISTINCT p) as shared_papers, length(path) as distance
-                RETURN a1.fullname as source_author,
-                       a2.fullname as collaborator,
-                       a2.institution as institution,
-                       [p IN shared_papers | {{id: p.id, name: p.name}}] as papers,
-                       size(shared_papers) as paper_count
-                ORDER BY paper_count DESC
-            """
-            result = session.run(query, author_name=author_name)
-            collaborations = []
-            for record in result:
-                collab = {
-                    'source_author': record['source_author'],
-                    'collaborator': record['collaborator'],
-                    'institution': record['institution'],
-                    'shared_papers': record['papers'],
-                    'paper_count': record['paper_count']
-                }
-                collaborations.append(collab)
-            return {
-                'author': author_name,
-                'collaborators': collaborations,
-                'total_collaborators': len(collaborations)
-            }
-# Test
-if __name__ == "__main__":
-    # Initialize searcher
-    searcher = Neo4jGraphWorker(
-        uri="bolt://localhost:7687",
-        username="neo4j",
-        password="llm_agents"
-    )
-    try:
-        # Example 1: Similarity search
-        print("\n" + "=" * 60)
-        print("Example 1: Similarity Search")
-        print("=" * 60)
-        user_query = "Reinforcement learning"
-        similar_papers = searcher.similarity_search(user_query, top_k=30)
-        for i, paper in enumerate(similar_papers, 1):
-            print(f"\n{i}. {paper['name']}")
-            print(f"   Topic: {paper['topic']}")
-            print(f"   Similarity: {paper['similarity_score']:.4f}")
-        # Example 2: Neighborhood search
-        if similar_papers:
-            print("\n" + "=" * 60)
-            print("Example 2: Neighborhood Search")
-            print("=" * 60)
-            paper_id = similar_papers[0]['id']
-            neighbors = searcher.neighborhood_search(paper_id, min_similarity=0.75)
-            print(f"\nNeighbors of: {similar_papers[0]['name']}")
-            for rel_type, neighbors in neighbors.items():
-                print(f"    \n{rel_type.upper()} RELATIONSHIPS:")
-                for neighbor in neighbors:
-                    print(f"      - {neighbor['name']} (similarity: {neighbor['similarity']:.4f})")
-        # Example 3: Graph traversal
-        print("\n" + "=" * 60)
-        print("Example 3: Graph Traversal (2 hops)")
-        print("=" * 60)
-        if similar_papers:
-            paper_ids = similar_papers[0]['id']
-            related = searcher.graph_traversal(paper_ids, n_hops=2)
-            print(f"\nFound {len(related)} related papers through traversal")
-            for paper in related[:5]:  # Show first 5
-                print(f"  - {paper['name']} (distance: {paper['distance']})")
-    finally:
-        searcher.close()

agentic_nav/tools/session_routing/__init__.py DELETED Viewed

@@ -1,210 +0,0 @@
-"""
-Session routing tool for building personalized conference visiting schedules.
-This tool helps NeurIPS 2025 conference attendees create optimized schedules
-for visiting poster sessions based on their research interests, preferred dates,
-and time slots.
-"""
-import os
-from typing import Union, List, Optional
-from neo4j import GraphDatabase
-from agentic_nav.tools.knowledge_graph import search_similar_papers
-from agentic_nav.tools.session_routing.scheduler import ScheduleBuilder
-from agentic_nav.tools.session_routing.utils import parse_date_input, parse_time_preference
-# Environment variables for Neo4j connection
-NEO4J_DB_URI = os.getenv("NEO4J_DB_URI", "bolt://localhost:7687")
-NEO4J_USERNAME = os.getenv("NEO4J_USERNAME", "neo4j")
-NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "")
-def build_visit_schedule(
-    topics: Union[str, List[str]],
-    dates: Union[str, List[str]] = None,
-    time_preferences: str = None,
-    max_papers: int = 20,
-    min_similarity: float = 0.6
-) -> str:
-    # TODO: Add filter for ["poster", "oral"]. Make sure to match orals with poster counterpart.
-    """
-    Build a personalized visiting schedule for NeurIPS 2025 conference poster sessions.
-    This tool helps you create an optimized schedule by:
-    1. Finding papers relevant to your research interests (topics)
-    2. Filtering by your preferred dates and time slots
-    3. Scoring papers by relevance to your topics
-    4. Clustering papers by room location to minimize walking
-    5. Organizing chronologically for easy navigation
-    The schedule includes paper titles, locations, poster positions, and relevance scores.
-    Args:
-        topics: Research topic(s) of interest. Can be a single topic string or a list of topics.
-                Examples: "transformer architectures", ["reinforcement learning", "multi-agent systems"]
-        dates: Conference date(s) to include. Can be:
-               - ISO format: "2025-12-02" or ["2025-12-02", "2025-12-03"]
-               - Day names: "Tuesday", "Wednesday"
-               - None (default): include all conference days (Dec 2-7, 2025)
-        time_preferences: Preferred time slot(s). Can be:
-                         - Preset: "morning" (8am-12pm), "afternoon" (12pm-5pm), "evening" (5pm-9pm)
-                         - Range: "9:00-12:00" or "14-17"
-                         - None (default): include all time slots
-        max_papers: Maximum number of papers to include in schedule (default: 20)
-        min_similarity: Minimum similarity score for paper relevance (0.0-1.0, default: 0.6)
-    Returns:
-        Formatted markdown schedule organized by date, time slot, and room location.
-        All times are displayed in conference local time (PST/UTC-8).
-    Restrictions:
-        - Requires Neo4j database connection (NEO4J_DB_URI, NEO4J_USERNAME, NEO4J_PASSWORD)
-        - Requires Paper nodes with session timing and location fields
-        - Conference dates: December 2-7, 2025 in San Diego/Mexico City (UTC-8)
-    Notes:
-        - Papers are scored by similarity to your topics using embedding search
-        - Schedule optimizes for both relevance and room clustering
-        - Time zones are automatically converted from UTC to PST
-        - Poster positions help you quickly locate papers in exhibition halls
-    Raises:
-        ValueError: If topics is empty or dates cannot be parsed
-        Exception: If Neo4j connection fails
-    Example:
-        >>> build_visit_schedule(
-        ...     topics=["machine learning", "computer vision"],
-        ...     dates="2025-12-02",
-        ...     time_preferences="morning",
-        ...     max_papers=15
-        ... )
-        # Your NeurIPS 2025 Conference Schedule
-        ## Tuesday, December 2, 2025
-        ### 9:00 AM - 11:00 AM PST
-        **Hall A**
-        - **Poster #123** | Attention Mechanisms in Vision Transformers
-          - Authors: John Doe, Jane Doe, et al.
-          - Topic: Computer Vision
-          - Relevance: 0.92
-        ...
-    """
-    # Type coercion for parameters that may come as strings from LLM tool calls
-    if isinstance(topics, str):
-        # If topics is a single string, treat as one topic
-        topics = [topics]
-    elif topics is None:
-        raise ValueError("Topics parameter is required. Please provide at least one research topic.")
-    if max_papers is not None and not isinstance(max_papers, int):
-        max_papers = int(max_papers)
-    if min_similarity is not None and not isinstance(min_similarity, float):
-        min_similarity = float(min_similarity)
-    # Parse dates
-    parsed_dates = None
-    if dates:
-        if isinstance(dates, str):
-            dates = [dates]
-        parsed_dates = []
-        for date_str in dates:
-            parsed = parse_date_input(date_str)
-            if parsed:
-                parsed_dates.append(parsed)
-        if not parsed_dates:
-            parsed_dates = None  # Fall back to all dates if parsing fails
-    # Parse time preferences (convert to UTC for database query)
-    time_range = None
-    if time_preferences:
-        local_time_range = parse_time_preference(time_preferences)
-        if local_time_range:
-            # Convert PST to UTC (add 8 hours)
-            start_utc = (local_time_range[0] + 8) % 24
-            end_utc = (local_time_range[1] + 8) % 24
-            time_range = (start_utc, end_utc)
-    # Step 1: Search for papers matching each topic using existing tool
-    all_paper_ids = set()
-    relevance_scores = {}
-    for topic in topics:
-        try:
-            from llm_agents.tools.knowledge_graph.retriever import Neo4jGraphWorker
-            worker = Neo4jGraphWorker(
-                uri=NEO4J_DB_URI,
-                username=NEO4J_USERNAME,
-                password=NEO4J_PASSWORD
-            )
-            papers = worker.similarity_search(
-                user_query=topic,
-                top_k=max_papers * 2,
-                min_similarity=min_similarity
-            )
-            worker.close()
-            # Extract paper IDs and scores
-            for paper in papers:
-                paper_id = paper.get('id')
-                score = paper.get('score', 0.0)
-                if paper_id:
-                    all_paper_ids.add(paper_id)
-                    # Keep highest score if paper matches multiple topics
-                    if paper_id not in relevance_scores or score > relevance_scores[paper_id]:
-                        relevance_scores[paper_id] = score
-        except Exception as e:
-            # If search fails for one topic, continue with others
-            continue
-    if not all_paper_ids:
-        return "No papers found matching your topics. Try broadening your search criteria or adjusting the minimum similarity threshold."
-    # Step 2: Initialize schedule builder
-    driver = GraphDatabase.driver(NEO4J_DB_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
-    builder = ScheduleBuilder(driver)
-    try:
-        # Step 3: Filter papers by date and time
-        filtered_papers = builder.filter_by_datetime(
-            paper_ids=list(all_paper_ids),
-            dates=parsed_dates,
-            time_range=time_range
-        )
-        if not filtered_papers:
-            return "No papers found matching your date and time preferences. Try expanding your time range or selecting different dates."
-        # Step 4: Score papers by relevance
-        scored_papers = builder.score_papers(filtered_papers, relevance_scores)
-        # Step 5: Optimize schedule (chronological + room clustering)
-        schedule = builder.optimize_schedule(scored_papers, max_papers=max_papers)
-        # Step 6: Format as markdown
-        markdown_output = builder.format_as_markdown(schedule, include_abstracts=False)
-        return markdown_output
-    finally:
-        builder.close()
-__all__ = ['build_visit_schedule']
-if __name__ == "__main__":
-    print(build_visit_schedule(topics=["federated learning"], max_papers=200, dates=["Wednesday"]))

agentic_nav/tools/session_routing/scheduler.py DELETED Viewed

@@ -1,377 +0,0 @@
-"""
-Schedule builder for NeurIPS 2025 conference paper sessions.
-This module provides the ScheduleBuilder class that handles filtering,
-scoring, and organizing papers into optimized visiting schedules.
-"""
-from datetime import datetime
-from typing import List, Dict, Any, Optional, Tuple
-from collections import defaultdict
-import neo4j
-from agentic_nav.tools.session_routing.utils import (
-    convert_utc_to_local,
-    format_time_slot,
-    format_date_header,
-    cluster_papers_by_room,
-    parse_time_preference
-)
-class ScheduleBuilder:
-    """
-    Build optimized conference visiting schedules.
-    This class handles filtering papers by date/time, scoring by relevance,
-    clustering by room location, and formatting the final schedule.
-    """
-    def __init__(self, neo4j_driver: neo4j.Driver):
-        """
-        Initialize the schedule builder.
-        Args:
-            neo4j_driver: Neo4j database driver for querying papers
-        """
-        self.driver = neo4j_driver
-    def filter_by_datetime(
-        self,
-        paper_ids: List[str],
-        dates: Optional[List[datetime]] = None,
-        time_range: Optional[Tuple[int, int]] = None
-    ) -> List[Dict[str, Any]]:
-        """
-        Filter papers by date and time preferences.
-        Args:
-            paper_ids: List of paper IDs to filter
-            dates: List of conference dates to include (None = all dates)
-            time_range: Tuple of (start_hour, end_hour) in UTC (None = all times)
-        Returns:
-            List of paper dictionaries with full details including session times
-        Example:
-            >>> builder.filter_by_datetime(['paper1', 'paper2'], dates=[datetime(2025,12,2)])
-        """
-        if not paper_ids:
-            return []
-        # Deduplicate paper_ids to ensure we only query each paper once
-        unique_paper_ids = list(set(paper_ids))
-        # Build Cypher query to get full paper details including authors via relationship
-        # Relationship is IS_AUTHOR_OF (uppercase) and author property is 'fullname'
-        query = """
-        MATCH (p:Paper)
-        WHERE p.id IN $paper_ids
-        OPTIONAL MATCH (a:Author)-[:IS_AUTHOR_OF]-(p)
-        WITH p, collect(a.fullname) as authors
-        RETURN DISTINCT p.id as id,
-               p.name as name,
-               p.abstract as abstract,
-               p.topic as topic,
-               p.session as session,
-               p.session_start_time as session_start_time,
-               p.session_end_time as session_end_time,
-               p.room_name as room_name,
-               p.poster_position as poster_position,
-               p.presentation_type as presentation_type,
-               p.url as url,
-               authors
-        """
-        with self.driver.session() as session:
-            result = session.run(query, paper_ids=unique_paper_ids)
-            papers = [dict(record) for record in result]
-        # Deduplicate papers by ID (just in case)
-        seen_ids = set()
-        unique_papers = []
-        for paper in papers:
-            paper_id = paper.get('id')
-            if paper_id and paper_id not in seen_ids:
-                seen_ids.add(paper_id)
-                unique_papers.append(paper)
-        papers = unique_papers
-        # Filter by date if specified
-        if dates:
-            date_strs = [d.strftime("%Y-%m-%d") for d in dates]
-            papers = [
-                p for p in papers
-                if p.get('session_start_time') and
-                   any(date_str in p['session_start_time'] for date_str in date_strs)
-            ]
-        # Filter by time range if specified (convert UTC time range)
-        if time_range:
-            start_hour, end_hour = time_range
-            filtered_papers = []
-            for paper in papers:
-                try:
-                    start_time_str = paper.get('session_start_time', '')
-                    if not start_time_str:
-                        continue
-                    # Parse UTC time
-                    if 'T' in start_time_str:
-                        dt = datetime.fromisoformat(start_time_str.replace('Z', ''))
-                    else:
-                        continue
-                    # Check if paper session falls within time range (UTC)
-                    if start_hour <= dt.hour < end_hour:
-                        filtered_papers.append(paper)
-                except (ValueError, AttributeError):
-                    # If we can't parse time, include the paper to be safe
-                    filtered_papers.append(paper)
-            papers = filtered_papers
-        return papers
-    def score_papers(
-        self,
-        papers: List[Dict[str, Any]],
-        relevance_scores: Dict[str, float]
-    ) -> List[Dict[str, Any]]:
-        """
-        Add relevance scores to papers.
-        Args:
-            papers: List of paper dictionaries
-            relevance_scores: Dict mapping paper_id to relevance score
-        Returns:
-            Papers with added 'relevance_score' field, sorted by score descending
-        Example:
-            >>> builder.score_papers(papers, {'paper1': 0.95, 'paper2': 0.87})
-        """
-        scored_papers = []
-        for paper in papers:
-            paper_id = paper.get('id')
-            score = relevance_scores.get(paper_id, 0.0)
-            paper_with_score = paper.copy()
-            paper_with_score['relevance_score'] = score
-            scored_papers.append(paper_with_score)
-        # Sort by relevance score (highest first)
-        scored_papers.sort(key=lambda p: p['relevance_score'], reverse=True)
-        return scored_papers
-    def optimize_schedule(
-        self,
-        papers: List[Dict[str, Any]],
-        max_papers: int = 20
-    ) -> Dict[str, Dict[str, List[Dict[str, Any]]]]:
-        """
-        Optimize schedule by grouping papers chronologically and by room.
-        Args:
-            papers: List of scored paper dictionaries
-            max_papers: Maximum number of papers to include
-        Returns:
-            Nested dict: {date: {time_slot: {room: [papers]}}}
-        Example:
-            >>> schedule = builder.optimize_schedule(papers, max_papers=15)
-        """
-        # Deduplicate papers by ID first
-        seen_ids = set()
-        unique_papers = []
-        for paper in papers:
-            paper_id = paper.get('id')
-            if paper_id and paper_id not in seen_ids:
-                seen_ids.add(paper_id)
-                unique_papers.append(paper)
-        # Limit to top papers by relevance
-        top_papers = unique_papers[:max_papers]
-        # Group by date and time
-        schedule = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
-        for paper in top_papers:
-            try:
-                start_time = paper.get('session_start_time', '')
-                if not start_time:
-                    continue
-                # Extract date
-                date_str = start_time.split('T')[0]
-                # Create time slot key
-                end_time = paper.get('session_end_time', '')
-                time_slot = format_time_slot(start_time, end_time) if end_time else start_time
-                # Get room (handle None values, fallback to session for Mexico City papers)
-                room = paper.get('room_name')
-                if not room:
-                    # Use session as fallback (e.g., for Mexico City papers)
-                    room = paper.get('session') or 'N/A'
-                # Add to schedule
-                schedule[date_str][time_slot][room].append(paper)
-            except (ValueError, AttributeError, IndexError):
-                # Skip papers with invalid time data
-                continue
-        return schedule
-    def format_as_markdown(
-        self,
-        schedule: Dict[str, Dict[str, List[Dict[str, Any]]]],
-        include_abstracts: bool = False
-    ) -> str:
-        """
-        Format schedule as structured markdown.
-        Args:
-            schedule: Nested schedule dictionary
-            include_abstracts: Whether to include paper abstracts (default: False)
-        Returns:
-            Formatted markdown string with format:
-            "Date (MM dd, yyyy) - Time Slot - Session Name - Location"
-        Example:
-            >>> markdown = builder.format_as_markdown(schedule)
-        """
-        if not schedule:
-            return "No papers found matching your criteria."
-        output = ["# Your NeurIPS 2025 Conference Schedule\n"]
-        # Flatten schedule into list of blocks for better formatting
-        schedule_blocks = []
-        for date_str in sorted(schedule.keys()):
-            time_slots = schedule[date_str]
-            for time_slot in sorted(time_slots.keys()):
-                rooms = time_slots[time_slot]
-                for room_or_session in sorted(rooms.keys()):
-                    papers_in_block = rooms[room_or_session]
-                    # Sort papers by poster position ID (numerically)
-                    def poster_sort_key(paper):
-                        poster_pos = paper.get('poster_position')
-                        if not poster_pos:
-                            return float('inf')  # Put papers without position at end
-                        # Remove '#' prefix if present
-                        if isinstance(poster_pos, str) and poster_pos.startswith('#'):
-                            poster_pos = poster_pos[1:]
-                        # Convert to integer for numerical sorting
-                        try:
-                            return int(poster_pos)
-                        except (ValueError, TypeError):
-                            return float('inf')  # Put invalid positions at end
-                    papers_in_block.sort(key=poster_sort_key)
-                    schedule_blocks.append({
-                        'date': date_str,
-                        'time_slot': time_slot,
-                        'room_or_session': room_or_session,
-                        'papers': papers_in_block
-                    })
-        # Format each schedule block
-        total_papers = 0
-        for block in schedule_blocks:
-            date_str = block['date']
-            time_slot = block['time_slot']
-            room_or_session = block['room_or_session']
-            papers = block['papers']
-            total_papers += len(papers)
-            # Get session and location from first paper (all papers in block share these)
-            if papers:
-                first_paper = papers[0]
-                session_name = first_paper.get('session', 'N/A')
-                actual_room = first_paper.get('room_name')
-                # Determine location: use room if available, otherwise indicate session-based location
-                if actual_room:
-                    location = actual_room
-                else:
-                    location = "Mexico City"  # Papers without room are from Mexico City
-            else:
-                session_name = room_or_session
-                location = room_or_session
-            # Format date as "Month DD, YYYY"
-            try:
-                from datetime import datetime
-                dt = datetime.fromisoformat(date_str)
-                formatted_date = dt.strftime("%B %d, %Y")
-            except:
-                formatted_date = date_str
-            # Create a comprehensive header
-            header = f"## {formatted_date} - {time_slot} - {session_name} - {location}\n"
-            output.append(f"\n{header}")
-            # List papers in this block
-            for paper in papers:
-                title = paper.get('name', 'Untitled')
-                poster_pos = paper.get('poster_position', 'N/A')
-                # TODO: This needs to be the distance between the user input query and the paper embedding, i.e.,
-                #   compare encoded user_input with "embedding" in database.
-                relevance = paper.get('relevance_score', 0)
-                topic = paper.get('topic', 'General')
-                pres_type = paper.get('presentation_type', 'Poster')
-                authors = paper.get('authors', 'N/A')
-                # Format authors for display
-                if isinstance(authors, list):
-                    authors_str = ', '.join(authors) if authors else 'N/A'
-                elif authors and authors != 'N/A':
-                    authors_str = str(authors)
-                else:
-                    authors_str = 'N/A'
-                # Format paper entry
-                output.append(f"- **{pres_type} {poster_pos.replace('#', '') if poster_pos is not None else ''}** | {title}")
-                output.append(f"  - Authors: {authors_str}")
-                output.append(f"  - Topic: {topic}")
-                # Add paper URL if available
-                paper_url = paper.get('url')
-                if paper_url:
-                    output.append(f"  - URL: {paper_url}")
-                output.append(f"  - Relevance: {relevance:.2f}")
-                if include_abstracts and paper.get('abstract'):
-                    abstract = paper['abstract'][:200] + "..." if len(paper['abstract']) > 200 else paper['abstract']
-                    output.append(f"  - Abstract: {abstract}")
-                output.append("")  # Blank line between papers
-        # Add summary footer
-        output.append(f"\n---\n**Total Papers in Schedule: {total_papers}**")
-        return "\n".join(output)
-    def close(self):
-        """Close the Neo4j driver connection."""
-        if self.driver:
-            self.driver.close()

agentic_nav/tools/session_routing/utils.py DELETED Viewed

@@ -1,253 +0,0 @@
-"""
-Utility functions for session routing and schedule building.
-This module provides helper functions for time zone conversion,
-date parsing, and formatting schedule outputs.
-"""
-from datetime import datetime, timedelta
-from typing import Optional, Tuple
-import re
-def convert_utc_to_local(utc_time_str: str, timezone_offset: int = -8) -> str:
-    """
-    Convert UTC time string to local conference time.
-    Args:
-        utc_time_str: ISO format UTC time string (e.g., "2025-12-02T17:00:00Z")
-        timezone_offset: Hours offset from UTC (default: -8 for PST/Mexico City)
-    Returns:
-        Local time string in format "9:00 AM PST"
-    Raises:
-        ValueError: If time string cannot be parsed
-    Example:
-        >>> convert_utc_to_local("2025-12-02T17:00:00Z")
-        "9:00 AM PST"
-    """
-    try:
-        # Handle various UTC time formats
-        utc_time_str = utc_time_str.strip()
-        if utc_time_str.endswith('Z'):
-            utc_time_str = utc_time_str[:-1]
-        elif '+' in utc_time_str or utc_time_str.count('-') > 2:
-            # Has timezone info, extract just the datetime part
-            utc_time_str = utc_time_str.split('+')[0].split('T')[0] + 'T' + utc_time_str.split('T')[1].split('+')[0].split('-')[0]
-        # Parse the UTC time
-        if 'T' in utc_time_str:
-            utc_dt = datetime.fromisoformat(utc_time_str)
-        else:
-            # Try parsing without T separator
-            utc_dt = datetime.strptime(utc_time_str, "%Y-%m-%d %H:%M:%S")
-        # Apply timezone offset
-        local_dt = utc_dt + timedelta(hours=timezone_offset)
-        # Format as human-readable time
-        hour = local_dt.hour
-        minute = local_dt.minute
-        am_pm = "AM" if hour < 12 else "PM"
-        hour_12 = hour if hour <= 12 else hour - 12
-        hour_12 = 12 if hour_12 == 0 else hour_12
-        if minute == 0:
-            time_str = f"{hour_12}:00 {am_pm} PST"
-        else:
-            time_str = f"{hour_12}:{minute:02d} {am_pm} PST"
-        return time_str
-    except (ValueError, AttributeError) as e:
-        raise ValueError(f"Could not parse time string '{utc_time_str}': {e}")
-def parse_date_input(date_str: str) -> Optional[datetime]:
-    """
-    Parse flexible date input formats.
-    Supports:
-    - ISO format: "2025-12-02"
-    - Day names: "Monday", "Tuesday", etc.
-    - Relative: "today", "tomorrow"
-    Args:
-        date_str: Date string in various formats
-    Returns:
-        Datetime object or None if parsing fails
-    Example:
-        >>> parse_date_input("2025-12-02")
-        datetime.datetime(2025, 12, 2, 0, 0)
-    """
-    if not date_str:
-        return None
-    date_str = date_str.strip().lower()
-    # Try ISO format first
-    try:
-        return datetime.fromisoformat(date_str)
-    except ValueError:
-        pass
-    # Try common date formats
-    for fmt in ["%Y-%m-%d", "%m/%d/%Y", "%d/%m/%Y", "%B %d, %Y", "%b %d, %Y"]:
-        try:
-            return datetime.strptime(date_str, fmt)
-        except ValueError:
-            continue
-    # Handle day names (for NeurIPS 2025: Dec 2-7, 2025)
-    conference_start = datetime(2025, 12, 2)  # Tuesday
-    day_mapping = {
-        'monday': conference_start - timedelta(days=1),
-        'tuesday': conference_start,
-        'wednesday': conference_start + timedelta(days=1),
-        'thursday': conference_start + timedelta(days=2),
-        'friday': conference_start + timedelta(days=3),
-        'saturday': conference_start + timedelta(days=4),
-        'sunday': conference_start + timedelta(days=5),
-    }
-    if date_str in day_mapping:
-        return day_mapping[date_str]
-    return None
-def parse_time_preference(time_pref: str) -> Optional[Tuple[int, int]]:
-    """
-    Parse time preference string into hour range.
-    Args:
-        time_pref: Time preference like "morning", "afternoon", "9:00-12:00"
-    Returns:
-        Tuple of (start_hour, end_hour) in 24-hour format, or None
-    Example:
-        >>> parse_time_preference("morning")
-        (8, 12)
-        >>> parse_time_preference("9:00-15:00")
-        (9, 15)
-    """
-    if not time_pref:
-        return None
-    time_pref = time_pref.strip().lower()
-    # Predefined time slots
-    presets = {
-        'morning': (8, 12),
-        'afternoon': (12, 17),
-        'evening': (17, 21),
-        'early': (8, 10),
-        'late': (19, 21),
-    }
-    if time_pref in presets:
-        return presets[time_pref]
-    # Parse time range format: "9:00-12:00" or "09:00-12:00" or "9-12"
-    range_pattern = r'(\d{1,2})(?::(\d{2}))?[\s\-]+(\d{1,2})(?::(\d{2}))?'
-    match = re.match(range_pattern, time_pref)
-    if match:
-        start_hour = int(match.group(1))
-        end_hour = int(match.group(3))
-        return (start_hour, end_hour)
-    return None
-def format_time_slot(start_time: str, end_time: str) -> str:
-    """
-    Format time slot for display.
-    Args:
-        start_time: Start time in UTC format
-        end_time: End time in UTC format
-    Returns:
-        Formatted time range string
-    Example:
-        >>> format_time_slot("2025-12-02T17:00:00Z", "2025-12-02T19:00:00Z")
-        "9:00 AM - 11:00 AM PST"
-    """
-    try:
-        start_local = convert_utc_to_local(start_time)
-        end_local = convert_utc_to_local(end_time)
-        # Remove PST from start time if both are same timezone
-        if start_local.endswith(' PST') and end_local.endswith(' PST'):
-            start_local = start_local[:-4]
-        return f"{start_local} - {end_local}"
-    except ValueError:
-        return f"{start_time} - {end_time}"
-def format_date_header(date_str: str) -> str:
-    """
-    Format date for section headers.
-    Args:
-        date_str: Date string (ISO format or datetime)
-    Returns:
-        Formatted date like "Tuesday, December 2, 2025"
-    Example:
-        >>> format_date_header("2025-12-02")
-        "Tuesday, December 2, 2025"
-    """
-    try:
-        if isinstance(date_str, str):
-            dt = datetime.fromisoformat(date_str.split('T')[0])
-        else:
-            dt = date_str
-        return dt.strftime("%A, %B %d, %Y")
-    except (ValueError, AttributeError):
-        return str(date_str)
-def cluster_papers_by_room(papers: list, time_slot_key: str = 'session') -> dict:
-    """
-    Group papers by room within their time slots.
-    Args:
-        papers: List of paper dictionaries with room_name and session info
-        time_slot_key: Key to group by time slots (default: 'session')
-    Returns:
-        Nested dict: {time_slot: {room_name: [papers]}}
-    Example:
-        >>> papers = [
-        ...     {'session': 'Morning', 'room_name': 'Hall A', 'name': 'Paper 1'},
-        ...     {'session': 'Morning', 'room_name': 'Hall A', 'name': 'Paper 2'},
-        ... ]
-        >>> cluster_papers_by_room(papers)
-        {'Morning': {'Hall A': [...]}}
-    """
-    clustered = {}
-    for paper in papers:
-        time_slot = paper.get(time_slot_key, 'Unknown Session')
-        room = paper.get('room_name', 'Unknown Room')
-        if time_slot not in clustered:
-            clustered[time_slot] = {}
-        if room not in clustered[time_slot]:
-            clustered[time_slot][room] = []
-        clustered[time_slot][room].append(paper)
-    return clustered

agentic_nav/utils/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from agentic_nav.utils.tooling import infer_tool, _json_type
-from agentic_nav.utils.logger import setup_logging
-from agentic_nav.utils.embedding_generator import batch_embed_documents

agentic_nav/utils/cli/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from agentic_nav.utils.cli.editor import open_editor
-from agentic_nav.utils.cli.help import print_help
-from agentic_nav.utils.cli.history import show_history

agentic_nav/utils/cli/editor.py DELETED Viewed

@@ -1,29 +0,0 @@
-import os
-import tempfile
-def open_editor(initial_text=""):
-    editor = os.environ.get("EDITOR")
-    if not editor:
-        # Minimal sensible defaults
-        if os.name == "nt":
-            editor = "notepad"
-        else:
-            editor = "nano"
-    with tempfile.NamedTemporaryFile(suffix=".md", delete=False, mode="w+", encoding="utf-8") as tf:
-        path = tf.name
-        tf.write(initial_text)
-        tf.flush()
-    try:
-        # Open editor and wait
-        rc = os.system(f'{editor} "{path}"')
-        if rc != 0:
-            print(f"(editor exit code {rc})")
-        with open(path, "r", encoding="utf-8") as f:
-            content = f.read()
-    finally:
-        try:
-            os.unlink(path)
-        except Exception:
-            pass
-    return content.strip()

agentic_nav/utils/cli/help.py DELETED Viewed

@@ -1,14 +0,0 @@
-def print_help():
-    help_text = """
-Commands:
-  /help           Show this help
-  /exit           Exit the chat
-  /system         Set or replace system prompt (multi-line via $EDITOR)
-  /edit           Compose multi-line user message via $EDITOR
-  /history        Show conversation history (JSON)
-  /save <path>    Save conversation history to a file (JSON)
-Typing anything else will send it as a user message.
-"""
-    print(help_text)

agentic_nav/utils/cli/history.py DELETED Viewed

@@ -1,11 +0,0 @@
-def show_history(messages):
-    for i, m in enumerate(messages):
-        ts = m.get("_ts", "")
-        role = m.get("role", "")
-        content = m.get("content", "")
-        header = f"[{i}] {role} {ts}"
-        print(header)
-        print("-" * len(header))
-        print(content)
-        print()

agentic_nav/utils/embedding_generator.py DELETED Viewed

@@ -1,151 +0,0 @@
-import logging
-import litellm
-import numpy as np
-import spaces
-from litellm import embedding
-from sentence_transformers import SentenceTransformer
-from tqdm import tqdm
-from typing import List
-LOGGER = logging.getLogger(__name__)
-local_embedding_model = None
-class EmbeddingResponse:
-    def __init__(self, embeddings):
-        self.data = [
-            type('obj', (), {
-                'embedding': emb.tolist(),
-                'index': idx
-            })()
-            for idx, emb in enumerate(embeddings)
-        ]
-def _get_local_model(embedding_model_name: str = "nomic-ai/nomic-embed-text-v1.5"):
-    """Lazy load the embedding model only once"""
-    global local_embedding_model
-    if local_embedding_model is None:
-        LOGGER.info(f"Loading embedding model: {embedding_model_name}")
-        local_embedding_model = SentenceTransformer(
-            embedding_model_name,
-            trust_remote_code=True
-        )
-    return local_embedding_model
-@spaces.GPU
-def embed_hf_spaces(input, embedding_model_name: str = "nomic-ai/nomic-embed-text-v1.5", api_base=None, **kwargs):
-    """
-    Drop-in replacement for litellm.embedding()
-    Args:
-        input: Single string or list of strings to embed
-        embedding_model_name: HuggingFace model name to use
-        api_base: Ignored for local embedding
-        **kwargs: Additional args like num_ctx (ignored for local)
-    Returns:
-        Object with same structure as LiteLLM response
-    """
-    # Get model (loads only on first call)
-    model_instance = _get_local_model(embedding_model_name)
-    texts = [input] if isinstance(input, str) else input
-    embeddings = model_instance.encode(
-        texts,
-        convert_to_tensor=True,
-        show_progress_bar=False,
-        normalize_embeddings=True
-    )
-    embeddings_np = embeddings.cpu().numpy()
-    return EmbeddingResponse(embeddings_np)
-def embedding_fn(model, input, api_base, **kwargs):
-    if api_base == "hf_spaces_local":
-        return embed_hf_spaces(input=input, embedding_model_name=model, api_base=api_base, **kwargs)
-    elif "localhost" in api_base or "ollama.com" in api_base:
-        return embedding(input=input, model=model, api_base=api_base, **kwargs)
-    else:
-        raise NotImplementedError(f"Unknown api_base for provider {api_base}. Available options: hf_spaces_local, ollama local (http://localhost:11435), ollama cloud (https://ollama.com)")
-def batch_embed_documents(
-        texts: List[str],
-        batch_size: int = 1,
-        embedding_model: str = "nomic-ai/nomic-embed-text-v1.5",
-        api_base: str = "hf_spaces_local",
-        show_progress: bool = False,
-) -> np.ndarray:
-    if not texts:
-        return np.array([], dtype="float32").reshape(0, 0)
-    if None in texts:
-        LOGGER.warning(f"WARNING: Detected documents with 'None' values. Replacing 'None' with an empty string...")
-        texts = ['' if doc is None else doc for doc in texts]
-    vecs: List[List[float]] = []
-    for i in tqdm(range(0, len(texts), batch_size), disable=not show_progress):
-        chunk = texts[i:i + batch_size]
-        try:
-            resp = embedding_fn(
-                model=embedding_model,
-                input=chunk,
-                api_base=api_base,
-                **{"num_ctx": 2048}
-            )
-        except Exception as e:
-            LOGGER.error(f"Error during embedding batch {i}-{i + batch_size}: {e}. Falling back to single sample processing")
-            individual_responses = []
-            ctr = i
-            for sample in chunk:
-                try:
-                    individual_responses.append(
-                        embedding_fn(
-                            model=embedding_model,
-                            input=sample,
-                            api_base=api_base,
-                            **{"num_ctx": 2048}
-                        )
-                    )
-                except litellm.BadRequestError:
-                    LOGGER.error(f"Encountered error processing paper #{ctr}. Please inspect and retry afterwards.")
-                ctr += 1
-            LOGGER.debug(f"Single sample response from embedding model: {individual_responses}")
-            # Extract embeddings from individual responses
-            for individual_resp in individual_responses:
-                vecs.extend([d["embedding"] for d in individual_resp.data])
-        else:
-            # Normal batch processing
-            vecs.extend([d["embedding"] for d in resp.data])
-    arr = np.array(vecs, dtype="float32")
-    # cosine similarity: normalize to unit length and use IndexFlatIP
-    norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
-    return arr / norms
-if __name__ == "__main__":
-    res = batch_embed_documents(
-        texts=[
-            "test1",
-            "test2",
-            "test3",
-            "test4",
-            "test5"
-        ],
-        batch_size=2,
-        embedding_model="ollama/nomic-embed-text",
-        api_base="http://localhost:11435"
-    )
-    print(f"Result shape: {res.shape}")

agentic_nav/utils/file_handlers.py DELETED Viewed

@@ -1,10 +0,0 @@
-import json
-def save_chat_history(messages, path):
-    try:
-        with open(path, "w", encoding="utf-8") as f:
-            json.dump(messages, f, indent=2, ensure_ascii=False)
-        print(f"Saved to {path}")
-    except Exception as e:
-        print("Save failed:", e)

agentic_nav/utils/logger.py DELETED Viewed

@@ -1,49 +0,0 @@
-import logging
-import logging.handlers
-from datetime import datetime
-from pathlib import Path
-def setup_logging(log_dir: str = "logs", level: str = "INFO", console_level: str = "WARNING"):
-    """
-    Configure logging for the entire application.
-    Args:
-        log_dir: Directory for log files
-        level: Root logger level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
-        console_level: Console handler level - defaults to WARNING to avoid
-                       interfering with CLI display. Set to INFO for verbose output.
-    """
-    Path(log_dir).mkdir(exist_ok=True)
-    # Root logger configuration
-    root_logger = logging.getLogger()
-    root_logger.setLevel(getattr(logging, level.upper()))
-    # Console handler - set to WARNING by default to not interfere with CLI display
-    console_handler = logging.StreamHandler()
-    console_handler.setLevel(getattr(logging, console_level.upper()))
-    console_format = logging.Formatter(
-        "%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%Y-%m-%d %H:%M:%S"
-    )
-    console_handler.setFormatter(console_format)
-    # File handler - for production
-    time_now = datetime.now().strftime("%Y-%m-%d_%H-%M")
-    file_handler = logging.handlers.RotatingFileHandler(
-        f"{log_dir}/{time_now}_llm_agents.log",
-        maxBytes=10 * 1024 * 1024,  # 10MB
-        backupCount=5
-    )
-    file_handler.setLevel(logging.DEBUG)
-    file_format = logging.Formatter(
-        "%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s"
-    )
-    file_handler.setFormatter(file_format)
-    root_logger.addHandler(console_handler)
-    root_logger.addHandler(file_handler)

agentic_nav/utils/tooling.py DELETED Viewed

@@ -1,44 +0,0 @@
-from __future__ import annotations
-import inspect
-from typing import Any, Dict, List, Callable, get_args, get_origin, Literal
-def _json_type(t: Any) -> Dict[str, Any]:
-    origin, args = get_origin(t), get_args(t)
-    if origin is Literal:
-        return {"type": "string", "enum": list(args)}
-    if origin in (list, List):
-        return {"type": "array", "items": {"type": "string"}}
-    if t in (str,): return {"type": "string"}
-    if t in (int,): return {"type": "integer"}
-    if t in (float,): return {"type": "number"}
-    if t in (bool,): return {"type": "boolean"}
-    return {"type": "string"}
-def infer_tool(func: Callable[..., Any], tool_args: Dict[Any, Any]) -> Dict[str, Any]:
-    sig = inspect.signature(func)
-    hints = getattr(func, "__annotations__", {})
-    props, required = {}, []
-    for name, p in sig.parameters.items():
-        if name in ("self", "cls"): continue
-        schema = _json_type(hints.get(name, str))
-        if p.default is inspect._empty: required.append(name)
-        props[name] = schema
-    parameter_values = {}
-    for arg_name, arg_val in tool_args.items():
-        if arg_name in props.keys():
-            parameter_values[arg_name] = arg_val
-    return {
-        "type": "function",
-        "function": {
-            "name": func.__name__,
-            "description": (inspect.getdoc(func) or f"Call {func.__name__}"),
-            "parameters": {"type": "object", "properties": props, "required": required},
-        },
-        "parameter_properties_values": parameter_values
-    }

app.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from agentic_nav.frontend.browser_ui import main
+if __name__ == "__main__":
+    main()

data/.keep DELETED Viewed

File without changes

docker-compose.yaml DELETED Viewed

@@ -1,137 +0,0 @@
-services:
-  neo4j_db:
-    image: neo4j:5.26.0
-    container_name: neo4j_db
-    expose:
-      - "7474"
-      - "7687"
-    ports:
-      - "7474:7474"  # HTTP
-      - "7687:7687"  # Bolt
-    environment:
-      # Authentication
-      - NEO4J_AUTH=${NEO4J_USERNAME}/${NEO4J_PASSWORD}
-      # Memory settings
-      - NEO4J_server_memory_heap_initial__size=512m
-      - NEO4J_server_memory_heap_max__size=2G
-      - NEO4J_server_memory_pagecache_size=2G
-      - NEO4J_db_memory_transaction_total_max=3G
-      - NEO4J_dbms_memory_transaction_total_max=3G
-      # APOC plugin (optional but recommended)
-      - NEO4J_PLUGINS=["apoc"]
-      # Accept license (required for Enterprise features, remove if using Community)
-      # - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
-    volumes:
-      - neo4j_data:/data
-      - neo4j_logs:/logs
-      - neo4j_import:/var/lib/neo4j/import
-      - neo4j_plugins:/plugins
-    restart: unless-stopped
-    healthcheck:
-      test: [ "CMD-SHELL", "cypher-shell -u ${NEO4J_USERNAME:-neo4j} -p ${NEO4J_PASSWORD:-llm_agents} 'RETURN 1'" ]
-      interval: 10s
-      timeout: 5s
-      retries: 10
-      start_period: 30s
-    networks:
-      - llm_agents_net
-  ollama_embed:
-    image: ollama/ollama:latest
-    container_name: ollama_embed
-    ports:
-      - "11435:11434"
-    volumes:
-      - ~/.ollama:/root/.ollama
-    environment:
-      - OLLAMA_HOST=0.0.0.0
-      - NVIDIA_VISIBLE_DEVICES=all
-      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
-      - EMBEDDING_MODEL_NAME=${EMBEDDING_MODEL_NAME}
-    restart: always
-    entrypoint: [ "/bin/bash", "-c", "\
-            ollama serve & \
-            sleep 5 && \
-            ollama pull $EMBEDDING_MODEL_NAME && \
-            wait" ]
-    networks:
-      - llm_agents_net
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [ gpu ]
-  ollama_agent:
-    image: ollama/ollama:latest
-    container_name: ollama_agent
-    ports:
-      - "11436:11434"
-    volumes:
-      - ~/.ollama:/root/.ollama
-    environment:
-      - OLLAMA_HOST=0.0.0.0
-      - NVIDIA_VISIBLE_DEVICES=all
-      - NVIDIA_DRIVER_CAPABILITIES=compute,utility
-      - AGENT_MODEL_NAME=${AGENT_MODEL_NAME}
-    restart: always
-    entrypoint: [ "/bin/bash", "-c", "\
-        ollama serve & \
-        sleep 5 && \
-        ollama pull $AGENT_MODEL_NAME && \
-        wait" ]
-    networks:
-      - llm_agents_net
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [ gpu ]
-  webinterface:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    container_name: llm-agents-web
-    ports:
-      - "7860:7860"
-    environment:
-      - PYTHONUNBUFFERED=1
-      - OLLAMA_API_KEY=${OLLAMA_API_KEY}
-      - NEO4J_USERNAME=${NEO4J_USERNAME:-neo4j}
-      - NEO4J_PASSWORD=${NEO4J_PASSWORD:-llm_agents}
-      - NEO4J_DB_URI=${NEO4J_DB_URI}
-      - POPULATE_DATABASE_NIPS2025=false
-      - EMBEDDING_MODEL_NAME=${EMBEDDING_MODEL_NAME}
-      - EMBEDDING_MODEL_API_BASE=http://ollama_embed:11434
-      - AGENT_MODEL_NAME=${AGENT_MODEL_NAME}
-      - AGENT_MODEL_API_BASE=http://ollama_agent:11434
-      - NEO4J_DB_NODE_RETURN_LIMIT=${NEO4J_DB_NODE_RETURN_LIMIT}
-    restart: unless-stopped
-    networks:
-      - llm_agents_net
-    depends_on:
-      neo4j_db:
-        condition: service_healthy
-      ollama_embed:
-        condition: service_started
-      ollama_agent:
-        condition: service_started
-networks:
-  llm_agents_net:
-volumes:
-  neo4j_data:
-  neo4j_logs:
-  neo4j_import:
-  neo4j_plugins:

graphs/.gitkeep DELETED Viewed

File without changes

pyproject.toml DELETED Viewed

@@ -1,59 +0,0 @@
-[project]
-name = "agentic-nav"
-version = "0.1.0"
-description = "Conference navigation agent leveraging graph databases and semantic search to provide paper recommendations, research network exploration, and automated schedule generation for NeurIPS 2025 attendees."
-readme = "README.md"
-authors = [
-    {name = "Shiqiang Wang", email = "s.wang9@exeter.ac.uk"},
-    {name = "Herbert Woisetschläger", email = "herbert.woisetschlaeger@tum.de"}
-]
-requires-python = ">=3.10"
-dependencies = [
-    "aiofiles",
-    "einops",
-    "flask",
-    "gradio[mcp,oauth]",
-    "hatchling",
-    "httpx",
-    "kaleido",
-    "litellm",
-    "neo4j",
-    "prompt-toolkit",
-    "pydantic",
-    "pydantic-settings",
-    "pyvis>=0.3.2",
-    "rich>=13.0.0",
-    "sentence-transformers",
-    "toon-format",
-    "torch==2.8.0",
-    "typer",
-]
-[tool.uv.workspace]
-members = [
-    "litellm",
-]
-[tool.uv.sources]
-litellm = { git = "https://github.com/shiqiangw/litellm.git" }
-toon-format = { git = "https://github.com/toon-format/toon-python.git" }
-[build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
-[dependency-groups]
-dev = [
-    "pytest>=9.0.1",
-    "pytest-asyncio>=1.3.0",
-    "pytest-cov>=7.0.0",
-    "pytest-mock>=3.15.1",
-]
-[tool.hatchling.build.targets.wheel]
-packages = ["llm_agents"]
-[project.scripts]
-agentic-nav-cli = "agentic_nav.frontend.cli:main"
-agentic-nav-web = "agentic_nav.frontend.browser_ui:main"

pytest.ini DELETED Viewed

@@ -1,26 +0,0 @@
-[pytest]
-minversion = 6.0
-testpaths = tests
-python_files = test_*.py
-python_classes = Test*
-python_functions = test_*
-addopts =
-    --strict-markers
-    --strict-config
-    --verbose
-markers =
-    unit: Unit tests
-    integration: Integration tests (currently skipped, require full setup)
-    slow: Slow tests that require external services
-    neo4j: Tests requiring Neo4j database
-    ollama: Tests requiring Ollama service
-    no_auto_env: Tests that should not use automatic environment variable loading
-asyncio_mode = auto
-asyncio_default_fixture_loop_scope = function
-[coverage:run]
-source = .
-omit =
-    */tests/*
-    */test_*
-    setup.py

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml
-agentiv_nav @ git+https://${GH_USER}:${GH_TOKEN}@github.com/core-aix/agentic-nav.git@dev
 aiofiles==24.1.0
     # via
     #   llm-agents (pyproject.toml)

 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml
+agentic_nav @ git+https://${GH_USER}:${GH_TOKEN}@github.com/core-aix/agentic-nav.git@dev
 aiofiles==24.1.0
     # via
     #   llm-agents (pyproject.toml)

scripts/docker-entrypoint.sh DELETED Viewed

@@ -1,14 +0,0 @@
-#!/bin/bash
-set -e
-echo "Neo4j is up - executing command"
-if [ "${POPULATE_DATABASE_NIPS2025}" = "true" ]; then
-    echo "Importing NeurIPS 2025 papers..."
-    bash scripts/import_neurips2025_kg.sh
-else
-    echo "Skipping NeurIPS 2025 paper import (POPULATE_DATABASE_NIPS2025 is not set to 'true')"
-fi
-echo "Starting main application..."
-exec "$@"

scripts/import_neurips2025_kg.sh DELETED Viewed

@@ -1,13 +0,0 @@
-#!/bin/bash
-# Download the pre-built knowledge graph
-wget -O graphs/neurips2025_knowledge_graph.pkl https://syncandshare.lrz.de/dl/fiJPiUkKp1SZAqRX2m76S6/knowledge_graph_thresh_0.6_v3.pkl
-# Import the knowledge graph to the database
-uv run llm_agents/tools/knowledge_graph/neo4j_db_importer.py \
-    --graph-path graphs/neurips2025_knowledge_graph.pkl \
-    --neo4j-uri bolt://neo4j_db:7687 \
-    --neo4j-username $NEO4J_USERNAME \
-    --neo4j-password $NEO4J_PASSWORD \
-    --batch-size 100 \
-    --embedding-dimension 768

scripts/prepare_gradio.sh DELETED Viewed

@@ -1,18 +0,0 @@
-#!/bin/bash
-set -e
-# Only initialize submodules if not in Docker (gradio folder not present)
-if [ ! -d "gradio/.git" ]; then
-    echo "Initializing and updating git submodules..."
-    git submodule update --init --recursive
-    cd gradio
-    echo "Pinned gradio version to GIT revision 648169d85fbeeffc184115c4c92b12957f2a162f (Nov. 12, 2025)"
-    git checkout 648169d85fbeeffc184115c4c92b12957f2a162f
-    cd ..
-fi
-echo "Building Gradio frontend..."
-cd gradio
-bash scripts/build_frontend.sh
-cd ..

tests/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # Test package