Upload 35 files
Browse files- .gitignore +178 -0
- Dockerfile +23 -0
- alembic.ini +116 -0
- app/__init__.py +0 -0
- app/api/__init__.py +0 -0
- app/api/v1/__init__.py +0 -0
- app/api/v1/chat.py +370 -0
- app/api/v1/repositories.py +277 -0
- app/api/v1/router.py +9 -0
- app/core/config.py +24 -0
- app/core/database.py +28 -0
- app/core/security.py +39 -0
- app/database.py +30 -0
- app/main.py +72 -0
- app/models/__init__.py +5 -0
- app/models/conversation.py +38 -0
- app/models/repository.py +31 -0
- app/schemas/__init__.py +0 -0
- app/schemas/chat.py +42 -0
- app/schemas/repository.py +28 -0
- app/services/__init__.py +6 -0
- app/services/chat_service.py +128 -0
- app/services/embedding_service.py +63 -0
- app/services/github_service.py +150 -0
- app/services/vector_service.py +118 -0
- app/utils/__init__.py +0 -0
- migrations/README +1 -0
- migrations/env.py +57 -0
- migrations/script.py.mako +26 -0
- migrations/versions/16e292816c22_initial_migration.py +83 -0
- migrations/versions/2e8f053488b9_clean_qodex_architecture_no_user_.py +46 -0
- render.yaml +11 -0
- requirements.txt +27 -0
- run_server.py +10 -0
- tests/__init__.py +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
share/python-wheels/
|
| 24 |
+
*.egg-info/
|
| 25 |
+
.installed.cfg
|
| 26 |
+
*.egg
|
| 27 |
+
MANIFEST
|
| 28 |
+
|
| 29 |
+
# PyInstaller
|
| 30 |
+
# Usually these files are written by a python script from a template
|
| 31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 32 |
+
*.manifest
|
| 33 |
+
*.spec
|
| 34 |
+
|
| 35 |
+
# Installer logs
|
| 36 |
+
pip-log.txt
|
| 37 |
+
pip-delete-this-directory.txt
|
| 38 |
+
|
| 39 |
+
# Unit test / coverage reports
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.nox/
|
| 43 |
+
.coverage
|
| 44 |
+
.coverage.*
|
| 45 |
+
.cache
|
| 46 |
+
nosetests.xml
|
| 47 |
+
coverage.xml
|
| 48 |
+
*.cover
|
| 49 |
+
*.py,cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
cover/
|
| 53 |
+
|
| 54 |
+
# Translations
|
| 55 |
+
*.mo
|
| 56 |
+
*.pot
|
| 57 |
+
|
| 58 |
+
# Django stuff:
|
| 59 |
+
*.log
|
| 60 |
+
local_settings.py
|
| 61 |
+
db.sqlite3
|
| 62 |
+
db.sqlite3-journal
|
| 63 |
+
|
| 64 |
+
# Flask stuff:
|
| 65 |
+
instance/
|
| 66 |
+
.webassets-cache
|
| 67 |
+
|
| 68 |
+
# Scrapy stuff:
|
| 69 |
+
.scrapy
|
| 70 |
+
|
| 71 |
+
# Sphinx documentation
|
| 72 |
+
docs/_build/
|
| 73 |
+
|
| 74 |
+
# PyBuilder
|
| 75 |
+
.pybuilder/
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 88 |
+
# .python-version
|
| 89 |
+
|
| 90 |
+
# pipenv
|
| 91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 94 |
+
# install all needed dependencies.
|
| 95 |
+
#Pipfile.lock
|
| 96 |
+
|
| 97 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
| 98 |
+
__pypackages__/
|
| 99 |
+
|
| 100 |
+
# Celery stuff
|
| 101 |
+
celerybeat-schedule
|
| 102 |
+
celerybeat.pid
|
| 103 |
+
|
| 104 |
+
# SageMath parsed files
|
| 105 |
+
*.sage.py
|
| 106 |
+
|
| 107 |
+
# Environments
|
| 108 |
+
.env
|
| 109 |
+
.venv
|
| 110 |
+
env/
|
| 111 |
+
venv/
|
| 112 |
+
ENV/
|
| 113 |
+
env.bak/
|
| 114 |
+
venv.bak/
|
| 115 |
+
|
| 116 |
+
# Spyder project settings
|
| 117 |
+
.spyderproject
|
| 118 |
+
.spyproject
|
| 119 |
+
|
| 120 |
+
# Rope project settings
|
| 121 |
+
.ropeproject
|
| 122 |
+
|
| 123 |
+
# mkdocs documentation
|
| 124 |
+
/site
|
| 125 |
+
|
| 126 |
+
# mypy
|
| 127 |
+
.mypy_cache/
|
| 128 |
+
.dmypy.json
|
| 129 |
+
dmypy.json
|
| 130 |
+
|
| 131 |
+
# Pyre type checker
|
| 132 |
+
.pyre/
|
| 133 |
+
|
| 134 |
+
# pytype static type analyzer
|
| 135 |
+
.pytype/
|
| 136 |
+
|
| 137 |
+
# Cython debug symbols
|
| 138 |
+
cython_debug/
|
| 139 |
+
|
| 140 |
+
# PyCharm
|
| 141 |
+
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
|
| 142 |
+
# be found at https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
|
| 143 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 144 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 145 |
+
#.idea/
|
| 146 |
+
# ChromaDB Vector Database (local data)
|
| 147 |
+
chroma_db/
|
| 148 |
+
*.bin
|
| 149 |
+
*.sqlite3
|
| 150 |
+
|
| 151 |
+
# Environment files
|
| 152 |
+
.env
|
| 153 |
+
.env.local
|
| 154 |
+
|
| 155 |
+
# Python cache
|
| 156 |
+
__pycache__/
|
| 157 |
+
*.pyc
|
| 158 |
+
*.pyo
|
| 159 |
+
|
| 160 |
+
# Virtual environment
|
| 161 |
+
venv/
|
| 162 |
+
env/
|
| 163 |
+
|
| 164 |
+
# IDE files
|
| 165 |
+
.vscode/
|
| 166 |
+
.idea/
|
| 167 |
+
|
| 168 |
+
# OS files
|
| 169 |
+
.DS_Store
|
| 170 |
+
Thumbs.db
|
| 171 |
+
|
| 172 |
+
# Logs
|
| 173 |
+
*.log
|
| 174 |
+
|
| 175 |
+
# Temporary files
|
| 176 |
+
temp/
|
| 177 |
+
tmp/
|
| 178 |
+
|
Dockerfile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10.13-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
git \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
# Copy requirements first (for better caching)
|
| 11 |
+
COPY requirements.txt .
|
| 12 |
+
|
| 13 |
+
# Install Python dependencies
|
| 14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
# Copy application code
|
| 17 |
+
COPY . .
|
| 18 |
+
|
| 19 |
+
# Expose port (HuggingFace Spaces uses 7860)
|
| 20 |
+
EXPOSE 7860
|
| 21 |
+
|
| 22 |
+
# Run application (change port to 7860)
|
| 23 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
alembic.ini
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# A generic, single database configuration.
|
| 2 |
+
|
| 3 |
+
[alembic]
|
| 4 |
+
# path to migration scripts
|
| 5 |
+
script_location = migrations
|
| 6 |
+
|
| 7 |
+
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
| 8 |
+
# Uncomment the line below if you want the files to be prepended with date and time
|
| 9 |
+
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
| 10 |
+
# for all available tokens
|
| 11 |
+
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
| 12 |
+
|
| 13 |
+
# sys.path path, will be prepended to sys.path if present.
|
| 14 |
+
# defaults to the current working directory.
|
| 15 |
+
prepend_sys_path = .
|
| 16 |
+
|
| 17 |
+
# timezone to use when rendering the date within the migration file
|
| 18 |
+
# as well as the filename.
|
| 19 |
+
# If specified, requires the python-dateutil library that can be
|
| 20 |
+
# installed by adding `alembic[tz]` to the pip requirements
|
| 21 |
+
# string value is passed to dateutil.tz.gettz()
|
| 22 |
+
# leave blank for localtime
|
| 23 |
+
# timezone =
|
| 24 |
+
|
| 25 |
+
# max length of characters to apply to the
|
| 26 |
+
# "slug" field
|
| 27 |
+
# truncate_slug_length = 40
|
| 28 |
+
|
| 29 |
+
# set to 'true' to run the environment during
|
| 30 |
+
# the 'revision' command, regardless of autogenerate
|
| 31 |
+
# revision_environment = false
|
| 32 |
+
|
| 33 |
+
# set to 'true' to allow .pyc and .pyo files without
|
| 34 |
+
# a source .py file to be detected as revisions in the
|
| 35 |
+
# versions/ directory
|
| 36 |
+
# sourceless = false
|
| 37 |
+
|
| 38 |
+
# version location specification; This defaults
|
| 39 |
+
# to migrations/versions. When using multiple version
|
| 40 |
+
# directories, initial revisions must be specified with --version-path.
|
| 41 |
+
# The path separator used here should be the separator specified by "version_path_separator" below.
|
| 42 |
+
# version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
|
| 43 |
+
|
| 44 |
+
# version path separator; As mentioned above, this is the character used to split
|
| 45 |
+
# version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
|
| 46 |
+
# If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
|
| 47 |
+
# Valid values for version_path_separator are:
|
| 48 |
+
#
|
| 49 |
+
# version_path_separator = :
|
| 50 |
+
# version_path_separator = ;
|
| 51 |
+
# version_path_separator = space
|
| 52 |
+
version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
|
| 53 |
+
|
| 54 |
+
# set to 'true' to search source files recursively
|
| 55 |
+
# in each "version_locations" directory
|
| 56 |
+
# new in Alembic version 1.10
|
| 57 |
+
# recursive_version_locations = false
|
| 58 |
+
|
| 59 |
+
# the output encoding used when revision files
|
| 60 |
+
# are written from script.py.mako
|
| 61 |
+
# output_encoding = utf-8
|
| 62 |
+
|
| 63 |
+
sqlalchemy.url = postgresql://codequery_user:codequery_pass_2025@localhost:5432/codequery_dev
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
[post_write_hooks]
|
| 67 |
+
# post_write_hooks defines scripts or Python functions that are run
|
| 68 |
+
# on newly generated revision scripts. See the documentation for further
|
| 69 |
+
# detail and examples
|
| 70 |
+
|
| 71 |
+
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
| 72 |
+
# hooks = black
|
| 73 |
+
# black.type = console_scripts
|
| 74 |
+
# black.entrypoint = black
|
| 75 |
+
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
| 76 |
+
|
| 77 |
+
# lint with attempts to fix using "ruff" - use the exec runner, execute a binary
|
| 78 |
+
# hooks = ruff
|
| 79 |
+
# ruff.type = exec
|
| 80 |
+
# ruff.executable = %(here)s/.venv/bin/ruff
|
| 81 |
+
# ruff.options = --fix REVISION_SCRIPT_FILENAME
|
| 82 |
+
|
| 83 |
+
# Logging configuration
|
| 84 |
+
[loggers]
|
| 85 |
+
keys = root,sqlalchemy,alembic
|
| 86 |
+
|
| 87 |
+
[handlers]
|
| 88 |
+
keys = console
|
| 89 |
+
|
| 90 |
+
[formatters]
|
| 91 |
+
keys = generic
|
| 92 |
+
|
| 93 |
+
[logger_root]
|
| 94 |
+
level = WARN
|
| 95 |
+
handlers = console
|
| 96 |
+
qualname =
|
| 97 |
+
|
| 98 |
+
[logger_sqlalchemy]
|
| 99 |
+
level = WARN
|
| 100 |
+
handlers =
|
| 101 |
+
qualname = sqlalchemy.engine
|
| 102 |
+
|
| 103 |
+
[logger_alembic]
|
| 104 |
+
level = INFO
|
| 105 |
+
handlers =
|
| 106 |
+
qualname = alembic
|
| 107 |
+
|
| 108 |
+
[handler_console]
|
| 109 |
+
class = StreamHandler
|
| 110 |
+
args = (sys.stderr,)
|
| 111 |
+
level = NOTSET
|
| 112 |
+
formatter = generic
|
| 113 |
+
|
| 114 |
+
[formatter_generic]
|
| 115 |
+
format = %(levelname)-5.5s [%(name)s] %(message)s
|
| 116 |
+
datefmt = %H:%M:%S
|
app/__init__.py
ADDED
|
File without changes
|
app/api/__init__.py
ADDED
|
File without changes
|
app/api/v1/__init__.py
ADDED
|
File without changes
|
app/api/v1/chat.py
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends, HTTPException, Header
|
| 2 |
+
from sqlalchemy.orm import Session
|
| 3 |
+
from typing import List
|
| 4 |
+
from app.database import get_db
|
| 5 |
+
from app.models.repository import Repository, RepositoryStatusEnum
|
| 6 |
+
from app.models.conversation import Conversation, Message
|
| 7 |
+
from app.core.config import settings
|
| 8 |
+
from app.services import EmbeddingService, VectorService, ChatService
|
| 9 |
+
from pydantic import BaseModel
|
| 10 |
+
import logging
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
router = APIRouter()
|
| 15 |
+
|
| 16 |
+
# Define the models directly in this file
|
| 17 |
+
class ChatRequest(BaseModel):
|
| 18 |
+
"""Request model for chat with repository"""
|
| 19 |
+
query: str
|
| 20 |
+
repository_id: int
|
| 21 |
+
|
| 22 |
+
class ChatResponse(BaseModel):
|
| 23 |
+
"""Response model for chat"""
|
| 24 |
+
response: str
|
| 25 |
+
sources: List[dict]
|
| 26 |
+
repository_name: str
|
| 27 |
+
context_chunks_used: int
|
| 28 |
+
model_used: str
|
| 29 |
+
success: bool
|
| 30 |
+
|
| 31 |
+
def verify_client_secret(x_client_secret: str = Header(..., alias="X-Client-Secret")):
|
| 32 |
+
"""Verify request comes from authorized Next.js client"""
|
| 33 |
+
if x_client_secret != settings.nextjs_secret:
|
| 34 |
+
raise HTTPException(
|
| 35 |
+
status_code=403,
|
| 36 |
+
detail="Unauthorized client - invalid secret"
|
| 37 |
+
)
|
| 38 |
+
return True
|
| 39 |
+
|
| 40 |
+
def get_user_id(x_user_id: str = Header(..., alias="X-User-ID")):
|
| 41 |
+
"""Extract and validate user ID from header"""
|
| 42 |
+
if not x_user_id or len(x_user_id.strip()) == 0:
|
| 43 |
+
raise HTTPException(status_code=400, detail="User ID required")
|
| 44 |
+
return x_user_id.strip()
|
| 45 |
+
|
| 46 |
+
def verify_repository_ownership(repository_id: int, user_id: str, db: Session):
|
| 47 |
+
"""Verify user owns the repository"""
|
| 48 |
+
repository = db.query(Repository).filter(
|
| 49 |
+
Repository.id == repository_id,
|
| 50 |
+
Repository.user_id == user_id
|
| 51 |
+
).first()
|
| 52 |
+
|
| 53 |
+
if not repository:
|
| 54 |
+
raise HTTPException(status_code=404, detail="Repository not found or access denied")
|
| 55 |
+
|
| 56 |
+
return repository
|
| 57 |
+
|
| 58 |
+
@router.post("/", response_model=ChatResponse)
|
| 59 |
+
async def chat_with_repository(
|
| 60 |
+
request: ChatRequest,
|
| 61 |
+
db: Session = Depends(get_db),
|
| 62 |
+
user_id: str = Depends(get_user_id),
|
| 63 |
+
_: bool = Depends(verify_client_secret)
|
| 64 |
+
):
|
| 65 |
+
"""Chat with a repository using QODEX AI"""
|
| 66 |
+
logger.info(f"💬 QODEX Chat: '{request.query[:60]}...' for repo {request.repository_id} (user: {user_id})")
|
| 67 |
+
|
| 68 |
+
# Verify repository ownership
|
| 69 |
+
repository = verify_repository_ownership(request.repository_id, user_id, db)
|
| 70 |
+
|
| 71 |
+
if repository.status != RepositoryStatusEnum.READY:
|
| 72 |
+
status_messages = {
|
| 73 |
+
RepositoryStatusEnum.PENDING: "Repository is pending processing. Please wait.",
|
| 74 |
+
RepositoryStatusEnum.PROCESSING: "Repository is currently being processed. Please wait.",
|
| 75 |
+
RepositoryStatusEnum.FAILED: f"Repository processing failed: {repository.error_message}"
|
| 76 |
+
}
|
| 77 |
+
raise HTTPException(
|
| 78 |
+
status_code=400,
|
| 79 |
+
detail=status_messages.get(repository.status, "Repository not ready for chat")
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
# Initialize services
|
| 84 |
+
embedding_service = EmbeddingService()
|
| 85 |
+
vector_service = VectorService()
|
| 86 |
+
chat_service = ChatService()
|
| 87 |
+
|
| 88 |
+
# Generate query embedding
|
| 89 |
+
logger.info(f"🔍 Generating embedding for query...")
|
| 90 |
+
query_embedding = await embedding_service.generate_query_embedding(request.query)
|
| 91 |
+
|
| 92 |
+
# Search for similar code chunks
|
| 93 |
+
logger.info(f"🔎 Searching for relevant code chunks...")
|
| 94 |
+
similar_chunks = await vector_service.search_similar_code(
|
| 95 |
+
repository_id=request.repository_id,
|
| 96 |
+
query_embedding=query_embedding,
|
| 97 |
+
top_k=5
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
if not similar_chunks:
|
| 101 |
+
logger.warning(f"⚠️ No relevant chunks found for query in repo {request.repository_id}")
|
| 102 |
+
return ChatResponse(
|
| 103 |
+
response="I couldn't find any relevant code chunks for your question. Try asking about something more specific to this repository, or check if the repository was processed correctly.",
|
| 104 |
+
sources=[],
|
| 105 |
+
repository_name=repository.name,
|
| 106 |
+
context_chunks_used=0,
|
| 107 |
+
model_used="no_results",
|
| 108 |
+
success=False
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
logger.info(f"✅ Found {len(similar_chunks)} relevant code chunks")
|
| 112 |
+
|
| 113 |
+
# Generate AI response
|
| 114 |
+
logger.info(f"🤖 Generating AI response with Gemini...")
|
| 115 |
+
ai_response = await chat_service.generate_response(
|
| 116 |
+
query=request.query,
|
| 117 |
+
code_chunks=similar_chunks,
|
| 118 |
+
repository_name=repository.name
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
# Save conversation if successful
|
| 122 |
+
if ai_response['success']:
|
| 123 |
+
try:
|
| 124 |
+
# Create or get conversation
|
| 125 |
+
conversation = db.query(Conversation).filter(
|
| 126 |
+
Conversation.repository_id == request.repository_id
|
| 127 |
+
).first()
|
| 128 |
+
|
| 129 |
+
if not conversation:
|
| 130 |
+
conversation = Conversation(
|
| 131 |
+
repository_id=request.repository_id,
|
| 132 |
+
title=f"Chat about {repository.name}"
|
| 133 |
+
)
|
| 134 |
+
db.add(conversation)
|
| 135 |
+
db.commit()
|
| 136 |
+
db.refresh(conversation)
|
| 137 |
+
|
| 138 |
+
# Save user message
|
| 139 |
+
user_message = Message(
|
| 140 |
+
conversation_id=conversation.id,
|
| 141 |
+
role="user",
|
| 142 |
+
content=request.query
|
| 143 |
+
)
|
| 144 |
+
db.add(user_message)
|
| 145 |
+
|
| 146 |
+
# Save assistant response
|
| 147 |
+
assistant_message = Message(
|
| 148 |
+
conversation_id=conversation.id,
|
| 149 |
+
role="assistant",
|
| 150 |
+
content=ai_response['response'],
|
| 151 |
+
citations=ai_response['sources']
|
| 152 |
+
)
|
| 153 |
+
db.add(assistant_message)
|
| 154 |
+
|
| 155 |
+
db.commit()
|
| 156 |
+
logger.info(f"💾 Saved conversation for repo {request.repository_id} (user: {user_id})")
|
| 157 |
+
|
| 158 |
+
except Exception as save_error:
|
| 159 |
+
logger.warning(f"⚠️ Failed to save conversation: {save_error}")
|
| 160 |
+
# Continue anyway - don't fail the response
|
| 161 |
+
|
| 162 |
+
logger.info(f"🎉 QODEX chat successful for repo {request.repository_id} (user: {user_id})")
|
| 163 |
+
|
| 164 |
+
return ChatResponse(
|
| 165 |
+
response=ai_response['response'],
|
| 166 |
+
sources=ai_response['sources'],
|
| 167 |
+
repository_name=repository.name,
|
| 168 |
+
context_chunks_used=len(similar_chunks),
|
| 169 |
+
model_used=ai_response['model_used'],
|
| 170 |
+
success=ai_response['success']
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
except Exception as e:
|
| 174 |
+
logger.error(f"❌ Error in QODEX chat processing: {e}")
|
| 175 |
+
raise HTTPException(
|
| 176 |
+
status_code=500,
|
| 177 |
+
detail=f"Failed to process chat request: {str(e)}"
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
# ✅ NEW: Direct messages route (Option 1 solution!)
|
| 181 |
+
@router.get("/{repository_id}/messages")
|
| 182 |
+
async def get_repository_chat_messages(
|
| 183 |
+
repository_id: int,
|
| 184 |
+
db: Session = Depends(get_db),
|
| 185 |
+
user_id: str = Depends(get_user_id),
|
| 186 |
+
_: bool = Depends(verify_client_secret)
|
| 187 |
+
):
|
| 188 |
+
"""Get all chat messages for a repository directly - SINGLE API CALL!"""
|
| 189 |
+
|
| 190 |
+
# Verify repository ownership
|
| 191 |
+
repository = verify_repository_ownership(repository_id, user_id, db)
|
| 192 |
+
|
| 193 |
+
# Get conversation for this repository
|
| 194 |
+
conversation = db.query(Conversation).filter(
|
| 195 |
+
Conversation.repository_id == repository_id
|
| 196 |
+
).first()
|
| 197 |
+
|
| 198 |
+
if not conversation:
|
| 199 |
+
return {
|
| 200 |
+
"repository_id": repository_id,
|
| 201 |
+
"repository_name": repository.name,
|
| 202 |
+
"user_id": user_id,
|
| 203 |
+
"conversation_id": None,
|
| 204 |
+
"messages": [],
|
| 205 |
+
"total_messages": 0
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
# Get all messages
|
| 209 |
+
messages = db.query(Message).filter(
|
| 210 |
+
Message.conversation_id == conversation.id
|
| 211 |
+
).order_by(Message.created_at.asc()).all()
|
| 212 |
+
|
| 213 |
+
return {
|
| 214 |
+
"repository_id": repository_id,
|
| 215 |
+
"repository_name": repository.name,
|
| 216 |
+
"user_id": user_id,
|
| 217 |
+
"conversation_id": conversation.id,
|
| 218 |
+
"messages": [
|
| 219 |
+
{
|
| 220 |
+
"id": msg.id,
|
| 221 |
+
"role": msg.role,
|
| 222 |
+
"content": msg.content,
|
| 223 |
+
"citations": msg.citations,
|
| 224 |
+
"created_at": msg.created_at
|
| 225 |
+
}
|
| 226 |
+
for msg in messages
|
| 227 |
+
],
|
| 228 |
+
"total_messages": len(messages)
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
@router.get("/{repository_id}/conversations")
|
| 232 |
+
async def get_repository_conversations(
|
| 233 |
+
repository_id: int,
|
| 234 |
+
db: Session = Depends(get_db),
|
| 235 |
+
user_id: str = Depends(get_user_id),
|
| 236 |
+
_: bool = Depends(verify_client_secret)
|
| 237 |
+
):
|
| 238 |
+
"""Get all conversations for a repository (user must own it)"""
|
| 239 |
+
|
| 240 |
+
# Verify repository ownership
|
| 241 |
+
repository = verify_repository_ownership(repository_id, user_id, db)
|
| 242 |
+
|
| 243 |
+
conversations = db.query(Conversation).filter(
|
| 244 |
+
Conversation.repository_id == repository_id
|
| 245 |
+
).order_by(Conversation.created_at.desc()).all()
|
| 246 |
+
|
| 247 |
+
return {
|
| 248 |
+
"repository_id": repository_id,
|
| 249 |
+
"repository_name": repository.name,
|
| 250 |
+
"user_id": user_id,
|
| 251 |
+
"conversations": conversations,
|
| 252 |
+
"total_conversations": len(conversations)
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
@router.get("/conversations/{conversation_id}/messages")
|
| 256 |
+
async def get_conversation_messages(
|
| 257 |
+
conversation_id: int,
|
| 258 |
+
db: Session = Depends(get_db),
|
| 259 |
+
user_id: str = Depends(get_user_id),
|
| 260 |
+
_: bool = Depends(verify_client_secret)
|
| 261 |
+
):
|
| 262 |
+
"""Get all messages in a conversation (user must own the repository)"""
|
| 263 |
+
|
| 264 |
+
conversation = db.query(Conversation).filter(Conversation.id == conversation_id).first()
|
| 265 |
+
if not conversation:
|
| 266 |
+
raise HTTPException(status_code=404, detail="Conversation not found")
|
| 267 |
+
|
| 268 |
+
# Verify user owns the repository
|
| 269 |
+
verify_repository_ownership(conversation.repository_id, user_id, db)
|
| 270 |
+
|
| 271 |
+
messages = db.query(Message).filter(
|
| 272 |
+
Message.conversation_id == conversation_id
|
| 273 |
+
).order_by(Message.created_at.asc()).all()
|
| 274 |
+
|
| 275 |
+
return {
|
| 276 |
+
"conversation_id": conversation_id,
|
| 277 |
+
"repository_id": conversation.repository_id,
|
| 278 |
+
"user_id": user_id,
|
| 279 |
+
"messages": messages,
|
| 280 |
+
"total_messages": len(messages)
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
# ✅ NEW: User-specific chat routes
|
| 284 |
+
@router.get("/users/{target_user_id}/conversations")
|
| 285 |
+
async def get_user_all_conversations(
|
| 286 |
+
target_user_id: str,
|
| 287 |
+
db: Session = Depends(get_db),
|
| 288 |
+
user_id: str = Depends(get_user_id),
|
| 289 |
+
_: bool = Depends(verify_client_secret)
|
| 290 |
+
):
|
| 291 |
+
"""Get all conversations for a specific user across all their repositories"""
|
| 292 |
+
|
| 293 |
+
# Security: Users can only access their own conversations
|
| 294 |
+
if user_id != target_user_id:
|
| 295 |
+
raise HTTPException(status_code=403, detail="Access denied - can only access your own conversations")
|
| 296 |
+
|
| 297 |
+
# Get all repositories for this user
|
| 298 |
+
user_repos = db.query(Repository).filter(Repository.user_id == target_user_id).all()
|
| 299 |
+
repo_ids = [repo.id for repo in user_repos]
|
| 300 |
+
|
| 301 |
+
if not repo_ids:
|
| 302 |
+
return {
|
| 303 |
+
"user_id": target_user_id,
|
| 304 |
+
"total_conversations": 0,
|
| 305 |
+
"conversations": []
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
# Get all conversations for user's repositories
|
| 309 |
+
conversations = db.query(Conversation).filter(
|
| 310 |
+
Conversation.repository_id.in_(repo_ids)
|
| 311 |
+
).order_by(Conversation.created_at.desc()).all()
|
| 312 |
+
|
| 313 |
+
return {
|
| 314 |
+
"user_id": target_user_id,
|
| 315 |
+
"total_conversations": len(conversations),
|
| 316 |
+
"conversations": [
|
| 317 |
+
{
|
| 318 |
+
"id": conv.id,
|
| 319 |
+
"repository_id": conv.repository_id,
|
| 320 |
+
"repository_name": conv.repository.name,
|
| 321 |
+
"title": conv.title,
|
| 322 |
+
"created_at": conv.created_at,
|
| 323 |
+
"message_count": len(conv.messages)
|
| 324 |
+
}
|
| 325 |
+
for conv in conversations
|
| 326 |
+
]
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
@router.post("/{repository_id}/test")
|
| 330 |
+
async def test_repository_search(
|
| 331 |
+
repository_id: int,
|
| 332 |
+
query: str = "main function",
|
| 333 |
+
db: Session = Depends(get_db),
|
| 334 |
+
user_id: str = Depends(get_user_id),
|
| 335 |
+
_: bool = Depends(verify_client_secret)
|
| 336 |
+
):
|
| 337 |
+
"""Test endpoint to verify repository search functionality (user must own it)"""
|
| 338 |
+
|
| 339 |
+
# Verify repository ownership
|
| 340 |
+
repository = verify_repository_ownership(repository_id, user_id, db)
|
| 341 |
+
|
| 342 |
+
if repository.status != RepositoryStatusEnum.READY:
|
| 343 |
+
raise HTTPException(status_code=400, detail="Repository not ready")
|
| 344 |
+
|
| 345 |
+
try:
|
| 346 |
+
embedding_service = EmbeddingService()
|
| 347 |
+
vector_service = VectorService()
|
| 348 |
+
|
| 349 |
+
query_embedding = await embedding_service.generate_query_embedding(query)
|
| 350 |
+
results = await vector_service.search_similar_code(repository_id, query_embedding, top_k=3)
|
| 351 |
+
|
| 352 |
+
return {
|
| 353 |
+
"repository": repository.name,
|
| 354 |
+
"user_id": user_id,
|
| 355 |
+
"query": query,
|
| 356 |
+
"results_found": len(results),
|
| 357 |
+
"top_matches": [
|
| 358 |
+
{
|
| 359 |
+
"file": result['file_path'],
|
| 360 |
+
"lines": f"{result['start_line']}-{result['end_line']}",
|
| 361 |
+
"similarity": round(result['similarity'], 3),
|
| 362 |
+
"preview": result['content'][:200] + "..."
|
| 363 |
+
}
|
| 364 |
+
for result in results
|
| 365 |
+
],
|
| 366 |
+
"test_successful": len(results) > 0
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
except Exception as e:
|
| 370 |
+
raise HTTPException(status_code=500, detail=f"Test failed: {str(e)}")
|
app/api/v1/repositories.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks, Header
|
| 2 |
+
from sqlalchemy.orm import Session
|
| 3 |
+
from typing import List
|
| 4 |
+
from app.database import get_db
|
| 5 |
+
from app.models.repository import Repository, RepositoryStatusEnum
|
| 6 |
+
from app.schemas.repository import RepositoryCreate, RepositoryResponse
|
| 7 |
+
from app.core.config import settings
|
| 8 |
+
from app.services import GitHubService, EmbeddingService, VectorService
|
| 9 |
+
import logging
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
router = APIRouter()
|
| 15 |
+
|
| 16 |
+
async def process_repository_background(repository_id: int, user_id: str):
|
| 17 |
+
"""Background task to process repository with hybrid RAG"""
|
| 18 |
+
logger.info(f"🚀 Starting QODEX HYBRID RAG processing for repository {repository_id} (user: {user_id})")
|
| 19 |
+
|
| 20 |
+
from app.database import SessionLocal
|
| 21 |
+
db = SessionLocal()
|
| 22 |
+
|
| 23 |
+
github_service = GitHubService()
|
| 24 |
+
embedding_service = EmbeddingService()
|
| 25 |
+
vector_service = VectorService()
|
| 26 |
+
|
| 27 |
+
temp_dir = None
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
repository = db.query(Repository).filter(
|
| 31 |
+
Repository.id == repository_id,
|
| 32 |
+
Repository.user_id == user_id
|
| 33 |
+
).first()
|
| 34 |
+
|
| 35 |
+
if not repository:
|
| 36 |
+
logger.error(f"❌ Repository {repository_id} not found for user {user_id}")
|
| 37 |
+
return
|
| 38 |
+
|
| 39 |
+
repository.status = RepositoryStatusEnum.PROCESSING
|
| 40 |
+
db.commit()
|
| 41 |
+
logger.info(f"📊 Repository {repository_id} status: PROCESSING")
|
| 42 |
+
|
| 43 |
+
logger.info(f"📥 Step 1: Cloning repository {repository.github_url}")
|
| 44 |
+
temp_dir = await github_service.clone_repository(repository.github_url)
|
| 45 |
+
|
| 46 |
+
logger.info(f"📁 Step 2: Extracting code files from {repository.name}")
|
| 47 |
+
code_chunks = await github_service.extract_code_files(temp_dir)
|
| 48 |
+
|
| 49 |
+
if not code_chunks:
|
| 50 |
+
raise Exception("No supported code files found in repository")
|
| 51 |
+
|
| 52 |
+
logger.info(f"✅ Found {len(code_chunks)} code chunks")
|
| 53 |
+
|
| 54 |
+
logger.info(f"⚡ Step 3: Generating embeddings with LOCAL SentenceTransformers")
|
| 55 |
+
embedded_chunks = await embedding_service.generate_embeddings_batch(code_chunks)
|
| 56 |
+
|
| 57 |
+
if not embedded_chunks:
|
| 58 |
+
raise Exception("Failed to generate local embeddings")
|
| 59 |
+
|
| 60 |
+
logger.info(f"💾 Step 4: Storing embeddings in ChromaDB")
|
| 61 |
+
await vector_service.store_embeddings(repository_id, embedded_chunks)
|
| 62 |
+
|
| 63 |
+
repository.status = RepositoryStatusEnum.READY
|
| 64 |
+
repository.error_message = None
|
| 65 |
+
db.commit()
|
| 66 |
+
|
| 67 |
+
logger.info(f"🎉 SUCCESS! QODEX Repository {repository_id} is READY for chat! (user: {user_id})")
|
| 68 |
+
|
| 69 |
+
except Exception as e:
|
| 70 |
+
error_message = str(e)
|
| 71 |
+
logger.error(f"❌ Error processing repository {repository_id}: {error_message}")
|
| 72 |
+
|
| 73 |
+
try:
|
| 74 |
+
repository = db.query(Repository).filter(Repository.id == repository_id).first()
|
| 75 |
+
if repository:
|
| 76 |
+
repository.status = RepositoryStatusEnum.FAILED
|
| 77 |
+
repository.error_message = error_message[:500]
|
| 78 |
+
db.commit()
|
| 79 |
+
except Exception as db_error:
|
| 80 |
+
logger.error(f"❌ Failed to update repository status: {str(db_error)}")
|
| 81 |
+
|
| 82 |
+
finally:
|
| 83 |
+
if temp_dir:
|
| 84 |
+
github_service.cleanup_temp_dir(temp_dir)
|
| 85 |
+
db.close()
|
| 86 |
+
|
| 87 |
+
def verify_client_secret(x_client_secret: str = Header(..., alias="X-Client-Secret")):
|
| 88 |
+
"""Verify request comes from authorized Next.js client"""
|
| 89 |
+
if x_client_secret != settings.nextjs_secret:
|
| 90 |
+
raise HTTPException(
|
| 91 |
+
status_code=403,
|
| 92 |
+
detail="Unauthorized client - invalid secret"
|
| 93 |
+
)
|
| 94 |
+
return True
|
| 95 |
+
|
| 96 |
+
def get_user_id(x_user_id: str = Header(..., alias="X-User-ID")):
|
| 97 |
+
"""Extract and validate user ID from header"""
|
| 98 |
+
if not x_user_id or len(x_user_id.strip()) == 0:
|
| 99 |
+
raise HTTPException(status_code=400, detail="User ID required")
|
| 100 |
+
return x_user_id.strip()
|
| 101 |
+
|
| 102 |
+
@router.post("/", response_model=RepositoryResponse)
|
| 103 |
+
async def add_repository(
|
| 104 |
+
repository: RepositoryCreate,
|
| 105 |
+
background_tasks: BackgroundTasks,
|
| 106 |
+
db: Session = Depends(get_db),
|
| 107 |
+
user_id: str = Depends(get_user_id),
|
| 108 |
+
_: bool = Depends(verify_client_secret)
|
| 109 |
+
):
|
| 110 |
+
"""Add new repository for QODEX processing"""
|
| 111 |
+
|
| 112 |
+
# Verify user_id matches between header and body
|
| 113 |
+
if repository.user_id != user_id:
|
| 114 |
+
raise HTTPException(status_code=400, detail="User ID mismatch between header and body")
|
| 115 |
+
|
| 116 |
+
logger.info(f"📥 NEW QODEX REQUEST: {repository.name} - {repository.github_url} (user: {user_id})")
|
| 117 |
+
|
| 118 |
+
# Validate GitHub URL
|
| 119 |
+
if not repository.github_url.startswith(('https://github.com/', 'git@github.com:')):
|
| 120 |
+
raise HTTPException(status_code=400, detail="Invalid GitHub URL format")
|
| 121 |
+
|
| 122 |
+
# Check for duplicates for this user
|
| 123 |
+
existing = db.query(Repository).filter(
|
| 124 |
+
Repository.github_url == repository.github_url,
|
| 125 |
+
Repository.user_id == user_id
|
| 126 |
+
).first()
|
| 127 |
+
|
| 128 |
+
if existing:
|
| 129 |
+
raise HTTPException(
|
| 130 |
+
status_code=400,
|
| 131 |
+
detail=f"Repository already exists with ID: {existing.id}. Status: {existing.status.value}"
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# Create repository record
|
| 135 |
+
db_repository = Repository(
|
| 136 |
+
name=repository.name,
|
| 137 |
+
github_url=repository.github_url,
|
| 138 |
+
user_id=user_id,
|
| 139 |
+
status=RepositoryStatusEnum.PENDING
|
| 140 |
+
)
|
| 141 |
+
db.add(db_repository)
|
| 142 |
+
db.commit()
|
| 143 |
+
db.refresh(db_repository)
|
| 144 |
+
|
| 145 |
+
# Start background processing
|
| 146 |
+
background_tasks.add_task(process_repository_background, db_repository.id, user_id)
|
| 147 |
+
|
| 148 |
+
logger.info(f"✅ Repository {db_repository.id} created and queued for processing (user: {user_id})")
|
| 149 |
+
return db_repository
|
| 150 |
+
|
| 151 |
+
@router.get("/", response_model=List[RepositoryResponse])
|
| 152 |
+
async def get_user_repositories(
|
| 153 |
+
db: Session = Depends(get_db),
|
| 154 |
+
user_id: str = Depends(get_user_id),
|
| 155 |
+
_: bool = Depends(verify_client_secret)
|
| 156 |
+
):
|
| 157 |
+
"""Get all repositories for the authenticated user"""
|
| 158 |
+
repositories = db.query(Repository).filter(
|
| 159 |
+
Repository.user_id == user_id
|
| 160 |
+
).order_by(Repository.created_at.desc()).all()
|
| 161 |
+
|
| 162 |
+
logger.info(f"📋 Retrieved {len(repositories)} repositories for user {user_id}")
|
| 163 |
+
return repositories
|
| 164 |
+
|
| 165 |
+
@router.get("/{repository_id}", response_model=RepositoryResponse)
|
| 166 |
+
async def get_repository(
|
| 167 |
+
repository_id: int,
|
| 168 |
+
db: Session = Depends(get_db),
|
| 169 |
+
user_id: str = Depends(get_user_id),
|
| 170 |
+
_: bool = Depends(verify_client_secret)
|
| 171 |
+
):
|
| 172 |
+
"""Get specific repository by ID (user must own it)"""
|
| 173 |
+
repository = db.query(Repository).filter(
|
| 174 |
+
Repository.id == repository_id,
|
| 175 |
+
Repository.user_id == user_id
|
| 176 |
+
).first()
|
| 177 |
+
|
| 178 |
+
if not repository:
|
| 179 |
+
raise HTTPException(status_code=404, detail="Repository not found or access denied")
|
| 180 |
+
|
| 181 |
+
return repository
|
| 182 |
+
|
| 183 |
+
@router.delete("/{repository_id}")
|
| 184 |
+
async def delete_repository(
|
| 185 |
+
repository_id: int,
|
| 186 |
+
db: Session = Depends(get_db),
|
| 187 |
+
user_id: str = Depends(get_user_id),
|
| 188 |
+
_: bool = Depends(verify_client_secret)
|
| 189 |
+
):
|
| 190 |
+
"""Delete repository and all associated data (user must own it)"""
|
| 191 |
+
repository = db.query(Repository).filter(
|
| 192 |
+
Repository.id == repository_id,
|
| 193 |
+
Repository.user_id == user_id
|
| 194 |
+
).first()
|
| 195 |
+
|
| 196 |
+
if not repository:
|
| 197 |
+
raise HTTPException(status_code=404, detail="Repository not found or access denied")
|
| 198 |
+
|
| 199 |
+
try:
|
| 200 |
+
# Delete vector data from ChromaDB
|
| 201 |
+
vector_service = VectorService()
|
| 202 |
+
await vector_service.delete_repository_data(repository_id)
|
| 203 |
+
logger.info(f"🗑️ Deleted vector data for repository {repository_id}")
|
| 204 |
+
except Exception as e:
|
| 205 |
+
logger.warning(f"⚠️ Error deleting vector data for repo {repository_id}: {e}")
|
| 206 |
+
|
| 207 |
+
try:
|
| 208 |
+
# Delete conversations and messages (CASCADE should handle this)
|
| 209 |
+
db.delete(repository)
|
| 210 |
+
db.commit()
|
| 211 |
+
logger.info(f"🗑️ Successfully deleted repository {repository_id} (user: {user_id})")
|
| 212 |
+
except Exception as e:
|
| 213 |
+
logger.error(f"❌ Error deleting repository {repository_id}: {e}")
|
| 214 |
+
raise HTTPException(status_code=500, detail="Failed to delete repository")
|
| 215 |
+
|
| 216 |
+
return {
|
| 217 |
+
"message": f"Repository {repository_id} deleted successfully",
|
| 218 |
+
"repository_id": repository_id,
|
| 219 |
+
"user_id": user_id,
|
| 220 |
+
"success": True
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
@router.get("/{repository_id}/status")
|
| 224 |
+
async def get_repository_status(
|
| 225 |
+
repository_id: int,
|
| 226 |
+
db: Session = Depends(get_db),
|
| 227 |
+
user_id: str = Depends(get_user_id),
|
| 228 |
+
_: bool = Depends(verify_client_secret)
|
| 229 |
+
):
|
| 230 |
+
"""Get detailed repository status (user must own it)"""
|
| 231 |
+
repository = db.query(Repository).filter(
|
| 232 |
+
Repository.id == repository_id,
|
| 233 |
+
Repository.user_id == user_id
|
| 234 |
+
).first()
|
| 235 |
+
|
| 236 |
+
if not repository:
|
| 237 |
+
raise HTTPException(status_code=404, detail="Repository not found or access denied")
|
| 238 |
+
|
| 239 |
+
# Count conversations for this repository
|
| 240 |
+
from app.models.conversation import Conversation
|
| 241 |
+
conversation_count = db.query(Conversation).filter(
|
| 242 |
+
Conversation.repository_id == repository_id
|
| 243 |
+
).count()
|
| 244 |
+
|
| 245 |
+
return {
|
| 246 |
+
"id": repository.id,
|
| 247 |
+
"user_id": repository.user_id,
|
| 248 |
+
"name": repository.name,
|
| 249 |
+
"github_url": repository.github_url,
|
| 250 |
+
"status": repository.status.value,
|
| 251 |
+
"error_message": repository.error_message,
|
| 252 |
+
"created_at": repository.created_at,
|
| 253 |
+
"updated_at": repository.updated_at,
|
| 254 |
+
"is_ready_for_chat": repository.status == RepositoryStatusEnum.READY,
|
| 255 |
+
"conversation_count": conversation_count,
|
| 256 |
+
"processing_complete": repository.status in [RepositoryStatusEnum.READY, RepositoryStatusEnum.FAILED]
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
# ✅ NEW: User-specific routes
|
| 260 |
+
@router.get("/users/{target_user_id}/repositories", response_model=List[RepositoryResponse])
|
| 261 |
+
async def get_specific_user_repositories(
|
| 262 |
+
target_user_id: str,
|
| 263 |
+
db: Session = Depends(get_db),
|
| 264 |
+
user_id: str = Depends(get_user_id),
|
| 265 |
+
_: bool = Depends(verify_client_secret)
|
| 266 |
+
):
|
| 267 |
+
"""Get repositories for a specific user (must be same user)"""
|
| 268 |
+
|
| 269 |
+
# Security: Users can only access their own repositories
|
| 270 |
+
if user_id != target_user_id:
|
| 271 |
+
raise HTTPException(status_code=403, detail="Access denied - can only access your own repositories")
|
| 272 |
+
|
| 273 |
+
repositories = db.query(Repository).filter(
|
| 274 |
+
Repository.user_id == target_user_id
|
| 275 |
+
).order_by(Repository.created_at.desc()).all()
|
| 276 |
+
|
| 277 |
+
return repositories
|
app/api/v1/router.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter
|
| 2 |
+
from .repositories import router as repositories_router
|
| 3 |
+
from .chat import router as chat_router
|
| 4 |
+
|
| 5 |
+
api_router = APIRouter()
|
| 6 |
+
|
| 7 |
+
# Include only core functionality
|
| 8 |
+
api_router.include_router(repositories_router, prefix="/repositories", tags=["repositories"])
|
| 9 |
+
api_router.include_router(chat_router, prefix="/chat", tags=["chat"])
|
app/core/config.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pydantic_settings import BaseSettings
|
| 3 |
+
|
| 4 |
+
class Settings(BaseSettings):
|
| 5 |
+
# Database
|
| 6 |
+
database_url: str = os.getenv("DATABASE_URL", "sqlite:///./test.db")
|
| 7 |
+
|
| 8 |
+
# Security
|
| 9 |
+
secret_key: str = os.getenv("SECRET_KEY", "production-secret-key-change-me")
|
| 10 |
+
nextjs_secret: str = os.getenv("NEXTJS_SECRET", "qodex-production-secret-2025")
|
| 11 |
+
algorithm: str = "HS256"
|
| 12 |
+
access_token_expire_minutes: int = 30
|
| 13 |
+
|
| 14 |
+
# API Keys
|
| 15 |
+
gemini_api_key: str = os.getenv("GEMINI_API_KEY", "")
|
| 16 |
+
|
| 17 |
+
# App
|
| 18 |
+
environment: str = os.getenv("ENVIRONMENT", "production")
|
| 19 |
+
debug: bool = os.getenv("DEBUG", "false").lower() == "true"
|
| 20 |
+
|
| 21 |
+
class Config:
|
| 22 |
+
env_file = ".env"
|
| 23 |
+
|
| 24 |
+
settings = Settings()
|
app/core/database.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import create_engine
|
| 2 |
+
from sqlalchemy.ext.declarative import declarative_base
|
| 3 |
+
from sqlalchemy.orm import sessionmaker
|
| 4 |
+
from .config import settings
|
| 5 |
+
|
| 6 |
+
# ✅ Production-ready engine configuration
|
| 7 |
+
engine = create_engine(
|
| 8 |
+
settings.database_url,
|
| 9 |
+
pool_size=5, # Reduced for Neon free tier
|
| 10 |
+
max_overflow=10, # Reduced for free tier
|
| 11 |
+
pool_pre_ping=True,
|
| 12 |
+
pool_recycle=3600,
|
| 13 |
+
echo=False, # Disable SQL logging in production
|
| 14 |
+
pool_timeout=30,
|
| 15 |
+
connect_args={
|
| 16 |
+
"sslmode": "require" # Required for Neon
|
| 17 |
+
} if settings.database_url.startswith("postgresql") else {}
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 21 |
+
Base = declarative_base()
|
| 22 |
+
|
| 23 |
+
def get_db():
|
| 24 |
+
db = SessionLocal()
|
| 25 |
+
try:
|
| 26 |
+
yield db
|
| 27 |
+
finally:
|
| 28 |
+
db.close()
|
app/core/security.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from passlib.context import CryptContext
|
| 2 |
+
from jose import JWTError, jwt
|
| 3 |
+
from datetime import datetime, timedelta
|
| 4 |
+
from typing import Optional
|
| 5 |
+
from .config import settings
|
| 6 |
+
|
| 7 |
+
# Password hashing context
|
| 8 |
+
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
| 9 |
+
|
| 10 |
+
def verify_password(plain_password: str, hashed_password: str) -> bool:
|
| 11 |
+
"""Verify a plain password against its hash"""
|
| 12 |
+
return pwd_context.verify(plain_password, hashed_password)
|
| 13 |
+
|
| 14 |
+
def get_password_hash(password: str) -> str:
|
| 15 |
+
"""Hash a password"""
|
| 16 |
+
return pwd_context.hash(password)
|
| 17 |
+
|
| 18 |
+
def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
|
| 19 |
+
"""Create a JWT access token"""
|
| 20 |
+
to_encode = data.copy()
|
| 21 |
+
if expires_delta:
|
| 22 |
+
expire = datetime.utcnow() + expires_delta
|
| 23 |
+
else:
|
| 24 |
+
expire = datetime.utcnow() + timedelta(minutes=settings.access_token_expire_minutes)
|
| 25 |
+
|
| 26 |
+
to_encode.update({"exp": expire})
|
| 27 |
+
encoded_jwt = jwt.encode(to_encode, settings.secret_key, algorithm=settings.algorithm)
|
| 28 |
+
return encoded_jwt
|
| 29 |
+
|
| 30 |
+
def verify_token(token: str) -> Optional[str]:
|
| 31 |
+
"""Verify JWT token and return email"""
|
| 32 |
+
try:
|
| 33 |
+
payload = jwt.decode(token, settings.secret_key, algorithms=[settings.algorithm])
|
| 34 |
+
email: str = payload.get("sub")
|
| 35 |
+
if email is None:
|
| 36 |
+
return None
|
| 37 |
+
return email
|
| 38 |
+
except JWTError:
|
| 39 |
+
return None
|
app/database.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import create_engine
|
| 2 |
+
from sqlalchemy.ext.declarative import declarative_base
|
| 3 |
+
from sqlalchemy.orm import sessionmaker
|
| 4 |
+
import os
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
DATABASE_URL = os.getenv("DATABASE_URL")
|
| 10 |
+
|
| 11 |
+
# ✅ Add connection pooling here too
|
| 12 |
+
engine = create_engine(
|
| 13 |
+
DATABASE_URL,
|
| 14 |
+
pool_size=10, # ✅ Allow 10 concurrent connections
|
| 15 |
+
max_overflow=20, # ✅ Allow 20 more if needed
|
| 16 |
+
pool_pre_ping=True, # ✅ Verify connections are alive
|
| 17 |
+
pool_recycle=3600, # ✅ Recycle connections every hour
|
| 18 |
+
pool_timeout=30, # ✅ Wait 30s for available connection
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 22 |
+
|
| 23 |
+
Base = declarative_base()
|
| 24 |
+
|
| 25 |
+
def get_db():
|
| 26 |
+
db = SessionLocal()
|
| 27 |
+
try:
|
| 28 |
+
yield db
|
| 29 |
+
finally:
|
| 30 |
+
db.close()
|
app/main.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from app.api.v1 import repositories, chat
|
| 4 |
+
from app.core.database import engine, Base
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
import os
|
| 7 |
+
import logging
|
| 8 |
+
|
| 9 |
+
# Configure logging
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
app = FastAPI(
|
| 14 |
+
title="QODEX API",
|
| 15 |
+
description="AI-powered code repository chat system",
|
| 16 |
+
version="1.0.0",
|
| 17 |
+
docs_url="/docs",
|
| 18 |
+
redoc_url="/redoc"
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
# CORS configuration for production
|
| 22 |
+
app.add_middleware(
|
| 23 |
+
CORSMiddleware,
|
| 24 |
+
allow_origins=[
|
| 25 |
+
"https://qodex.vercel.app", # Your frontend domain
|
| 26 |
+
"https://qodex-frontend.vercel.app", # Alternative frontend domain
|
| 27 |
+
"http://localhost:3000", # Local development
|
| 28 |
+
"http://127.0.0.1:3000", # Local development
|
| 29 |
+
],
|
| 30 |
+
allow_credentials=True,
|
| 31 |
+
allow_methods=["GET", "POST", "PUT", "DELETE"],
|
| 32 |
+
allow_headers=["*"],
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Create tables on startup
|
| 36 |
+
@app.on_event("startup")
|
| 37 |
+
async def startup_event():
|
| 38 |
+
"""Create database tables on startup"""
|
| 39 |
+
try:
|
| 40 |
+
Base.metadata.create_all(bind=engine)
|
| 41 |
+
logger.info("🗄️ Database tables created successfully")
|
| 42 |
+
except Exception as e:
|
| 43 |
+
logger.error(f"❌ Error creating database tables: {e}")
|
| 44 |
+
|
| 45 |
+
# Health check endpoint
|
| 46 |
+
@app.get("/health")
|
| 47 |
+
async def health_check():
|
| 48 |
+
"""Health check endpoint for monitoring services"""
|
| 49 |
+
return {
|
| 50 |
+
"status": "healthy",
|
| 51 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 52 |
+
"service": "QODEX API",
|
| 53 |
+
"version": "1.0.0",
|
| 54 |
+
"environment": os.getenv("ENVIRONMENT", "production"),
|
| 55 |
+
"message": "QODEX is running smoothly! 🚀"
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
@app.get("/")
|
| 59 |
+
async def root():
|
| 60 |
+
"""Root endpoint"""
|
| 61 |
+
return {
|
| 62 |
+
"message": "Welcome to QODEX API! 🚀",
|
| 63 |
+
"description": "AI-powered code repository chat system",
|
| 64 |
+
"docs": "/docs",
|
| 65 |
+
"health": "/health",
|
| 66 |
+
"status": "running",
|
| 67 |
+
"version": "1.0.0"
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
# Include routers
|
| 71 |
+
app.include_router(repositories.router, prefix="/api/v1/repositories", tags=["repositories"])
|
| 72 |
+
app.include_router(chat.router, prefix="/api/v1/chat", tags=["chat"])
|
app/models/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/models/__init__.py
|
| 2 |
+
from .repository import Repository
|
| 3 |
+
from .conversation import Conversation, Message
|
| 4 |
+
|
| 5 |
+
__all__ = ["Repository", "Conversation", "Message"]
|
app/models/conversation.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, JSON, Text
|
| 2 |
+
from sqlalchemy.sql import func
|
| 3 |
+
from sqlalchemy.orm import relationship
|
| 4 |
+
from app.core.database import Base
|
| 5 |
+
|
| 6 |
+
class Conversation(Base):
|
| 7 |
+
"""Conversation model - linked to repository only"""
|
| 8 |
+
__tablename__ = "conversations"
|
| 9 |
+
|
| 10 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 11 |
+
repository_id = Column(Integer, ForeignKey("repositories.id"), nullable=False)
|
| 12 |
+
title = Column(String, nullable=False, default="New Conversation")
|
| 13 |
+
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
| 14 |
+
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
| 15 |
+
|
| 16 |
+
# Relationships
|
| 17 |
+
repository = relationship("Repository", back_populates="conversations")
|
| 18 |
+
messages = relationship("Message", back_populates="conversation", cascade="all, delete-orphan")
|
| 19 |
+
|
| 20 |
+
def __repr__(self):
|
| 21 |
+
return f"<Conversation(id={self.id}, repository_id={self.repository_id}, title='{self.title}')>"
|
| 22 |
+
|
| 23 |
+
class Message(Base):
|
| 24 |
+
"""Message model for chat history"""
|
| 25 |
+
__tablename__ = "messages"
|
| 26 |
+
|
| 27 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 28 |
+
conversation_id = Column(Integer, ForeignKey("conversations.id"), nullable=False)
|
| 29 |
+
role = Column(String, nullable=False) # 'user' or 'assistant'
|
| 30 |
+
content = Column(Text, nullable=False)
|
| 31 |
+
citations = Column(JSON, nullable=True) # Store code citations as JSON
|
| 32 |
+
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
| 33 |
+
|
| 34 |
+
# Relationships
|
| 35 |
+
conversation = relationship("Conversation", back_populates="messages")
|
| 36 |
+
|
| 37 |
+
def __repr__(self):
|
| 38 |
+
return f"<Message(id={self.id}, role='{self.role}', conversation_id={self.conversation_id})>"
|
app/models/repository.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import Column, Integer, String, DateTime, Enum
|
| 2 |
+
from sqlalchemy.sql import func
|
| 3 |
+
from sqlalchemy.orm import relationship
|
| 4 |
+
from app.core.database import Base
|
| 5 |
+
import enum
|
| 6 |
+
|
| 7 |
+
class RepositoryStatusEnum(enum.Enum):
|
| 8 |
+
"""Repository processing status"""
|
| 9 |
+
PENDING = "PENDING"
|
| 10 |
+
PROCESSING = "PROCESSING"
|
| 11 |
+
READY = "READY"
|
| 12 |
+
FAILED = "FAILED"
|
| 13 |
+
|
| 14 |
+
class Repository(Base):
|
| 15 |
+
"""Repository model with user ownership"""
|
| 16 |
+
__tablename__ = "repositories"
|
| 17 |
+
|
| 18 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 19 |
+
user_id = Column(String, nullable=False, index=True) # ✅ Added back!
|
| 20 |
+
github_url = Column(String, nullable=False, unique=True)
|
| 21 |
+
name = Column(String, nullable=False)
|
| 22 |
+
status = Column(Enum(RepositoryStatusEnum), default=RepositoryStatusEnum.PENDING)
|
| 23 |
+
error_message = Column(String, nullable=True)
|
| 24 |
+
created_at = Column(DateTime(timezone=True), server_default=func.now())
|
| 25 |
+
updated_at = Column(DateTime(timezone=True), onupdate=func.now())
|
| 26 |
+
|
| 27 |
+
# Relationships
|
| 28 |
+
conversations = relationship("Conversation", back_populates="repository", cascade="all, delete-orphan")
|
| 29 |
+
|
| 30 |
+
def __repr__(self):
|
| 31 |
+
return f"<Repository(id={self.id}, user_id='{self.user_id}', name='{self.name}', status={self.status.value})>"
|
app/schemas/__init__.py
ADDED
|
File without changes
|
app/schemas/chat.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
from typing import List, Optional, Dict, Any
|
| 4 |
+
|
| 5 |
+
class CodeCitation(BaseModel):
|
| 6 |
+
file_path: str
|
| 7 |
+
start_line: int
|
| 8 |
+
end_line: int
|
| 9 |
+
code_snippet: str
|
| 10 |
+
|
| 11 |
+
class MessageCreate(BaseModel):
|
| 12 |
+
content: str
|
| 13 |
+
|
| 14 |
+
class MessageResponse(BaseModel):
|
| 15 |
+
id: int
|
| 16 |
+
role: str
|
| 17 |
+
content: str
|
| 18 |
+
citations: Optional[List[CodeCitation]] = None
|
| 19 |
+
created_at: datetime
|
| 20 |
+
|
| 21 |
+
class Config:
|
| 22 |
+
from_attributes = True
|
| 23 |
+
|
| 24 |
+
class ConversationResponse(BaseModel):
|
| 25 |
+
id: int
|
| 26 |
+
repository_id: int
|
| 27 |
+
title: str
|
| 28 |
+
messages: List[MessageResponse]
|
| 29 |
+
created_at: datetime
|
| 30 |
+
|
| 31 |
+
class Config:
|
| 32 |
+
from_attributes = True
|
| 33 |
+
|
| 34 |
+
class QueryRequest(BaseModel):
|
| 35 |
+
question: str
|
| 36 |
+
conversation_id: Optional[int] = None
|
| 37 |
+
|
| 38 |
+
class QueryResponse(BaseModel):
|
| 39 |
+
answer_id: str
|
| 40 |
+
natural_language_answer: str
|
| 41 |
+
citations: List[CodeCitation]
|
| 42 |
+
conversation_id: int
|
app/schemas/repository.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import Optional
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from enum import Enum
|
| 5 |
+
|
| 6 |
+
class RepositoryStatus(str, Enum):
|
| 7 |
+
PENDING = "PENDING"
|
| 8 |
+
PROCESSING = "PROCESSING"
|
| 9 |
+
READY = "READY"
|
| 10 |
+
FAILED = "FAILED"
|
| 11 |
+
|
| 12 |
+
class RepositoryCreate(BaseModel):
|
| 13 |
+
name: str
|
| 14 |
+
github_url: str
|
| 15 |
+
user_id: str # ✅ Added back!
|
| 16 |
+
|
| 17 |
+
class RepositoryResponse(BaseModel):
|
| 18 |
+
id: int
|
| 19 |
+
user_id: str # ✅ Added back!
|
| 20 |
+
name: str
|
| 21 |
+
github_url: str
|
| 22 |
+
status: RepositoryStatus
|
| 23 |
+
error_message: Optional[str] = None
|
| 24 |
+
created_at: datetime
|
| 25 |
+
updated_at: Optional[datetime] = None
|
| 26 |
+
|
| 27 |
+
class Config:
|
| 28 |
+
from_attributes = True
|
app/services/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .github_service import GitHubService
|
| 2 |
+
from .embedding_service import EmbeddingService
|
| 3 |
+
from .vector_service import VectorService
|
| 4 |
+
from .chat_service import ChatService
|
| 5 |
+
|
| 6 |
+
__all__ = ['GitHubService', 'EmbeddingService', 'VectorService', 'ChatService']
|
app/services/chat_service.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import google.generativeai as genai
|
| 2 |
+
import os
|
| 3 |
+
from typing import List, Dict
|
| 4 |
+
import logging
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
load_dotenv()
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
class ChatService:
|
| 11 |
+
def __init__(self):
|
| 12 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 13 |
+
if not api_key:
|
| 14 |
+
logger.warning("⚠️ GEMINI_API_KEY not found - chat will use fallback responses")
|
| 15 |
+
self.model = None
|
| 16 |
+
self.gemini_available = False
|
| 17 |
+
else:
|
| 18 |
+
try:
|
| 19 |
+
genai.configure(api_key=api_key)
|
| 20 |
+
self.model = genai.GenerativeModel('gemini-2.0-flash')
|
| 21 |
+
self.gemini_available = True
|
| 22 |
+
logger.info("🤖 Gemini chat service initialized")
|
| 23 |
+
except Exception as e:
|
| 24 |
+
logger.error(f"❌ Failed to initialize Gemini: {e}")
|
| 25 |
+
self.model = None
|
| 26 |
+
self.gemini_available = False
|
| 27 |
+
|
| 28 |
+
async def generate_response(self, query: str, code_chunks: List[Dict], repository_name: str) -> Dict:
|
| 29 |
+
if not self.gemini_available:
|
| 30 |
+
return self.generate_fallback_response(query, code_chunks, repository_name)
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
context = self.prepare_context(code_chunks)
|
| 34 |
+
|
| 35 |
+
prompt = f"""You are an expert code assistant analyzing the {repository_name} repository.
|
| 36 |
+
|
| 37 |
+
User Question: {query}
|
| 38 |
+
|
| 39 |
+
Relevant Code Context:
|
| 40 |
+
{context}
|
| 41 |
+
|
| 42 |
+
Instructions:
|
| 43 |
+
1. Answer the user's question based on the provided code context
|
| 44 |
+
2. Reference specific files and line numbers when relevant
|
| 45 |
+
3. Explain code functionality clearly
|
| 46 |
+
4. If context is insufficient, say so clearly
|
| 47 |
+
5. Be specific and technical but also clear
|
| 48 |
+
|
| 49 |
+
Your Expert Analysis:"""
|
| 50 |
+
|
| 51 |
+
response = self.model.generate_content(prompt)
|
| 52 |
+
|
| 53 |
+
sources = []
|
| 54 |
+
for chunk in code_chunks:
|
| 55 |
+
sources.append({
|
| 56 |
+
'file_path': chunk['file_path'],
|
| 57 |
+
'start_line': chunk['start_line'],
|
| 58 |
+
'end_line': chunk['end_line'],
|
| 59 |
+
'similarity': round(chunk['similarity'], 3),
|
| 60 |
+
'preview': chunk['content'][:200] + "..."
|
| 61 |
+
})
|
| 62 |
+
|
| 63 |
+
return {
|
| 64 |
+
'response': response.text,
|
| 65 |
+
'sources': sources,
|
| 66 |
+
'context_chunks_used': len(code_chunks),
|
| 67 |
+
'repository_name': repository_name,
|
| 68 |
+
'model_used': 'gemini-2.0-flash',
|
| 69 |
+
'success': True
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
except Exception as e:
|
| 73 |
+
logger.error(f"❌ Gemini error: {e}")
|
| 74 |
+
if "429" in str(e) or "quota" in str(e).lower():
|
| 75 |
+
return self.generate_quota_response(query, code_chunks, repository_name)
|
| 76 |
+
return self.generate_fallback_response(query, code_chunks, repository_name)
|
| 77 |
+
|
| 78 |
+
def prepare_context(self, code_chunks: List[Dict]) -> str:
|
| 79 |
+
context_sections = []
|
| 80 |
+
for i, chunk in enumerate(code_chunks, 1):
|
| 81 |
+
context_sections.append(f"""
|
| 82 |
+
Code Reference {i}:
|
| 83 |
+
File: {chunk['file_path']}
|
| 84 |
+
Lines: {chunk['start_line']}-{chunk['end_line']}
|
| 85 |
+
Similarity: {chunk['similarity']:.2f}
|
| 86 |
+
{chunk['content']}
|
| 87 |
+
""")
|
| 88 |
+
return "\n".join(context_sections)
|
| 89 |
+
|
| 90 |
+
def generate_quota_response(self, query: str, code_chunks: List[Dict], repository_name: str) -> Dict:
|
| 91 |
+
context = self.prepare_context(code_chunks)
|
| 92 |
+
response = f"""🚫 Gemini quota exceeded, but I found {len(code_chunks)} relevant code sections:
|
| 93 |
+
|
| 94 |
+
{context}
|
| 95 |
+
|
| 96 |
+
The search found relevant code with similarity scores from {min(c['similarity'] for c in code_chunks):.2f} to {max(c['similarity'] for c in code_chunks):.2f}. Please try again in a few minutes when quota resets."""
|
| 97 |
+
|
| 98 |
+
return self.create_response_dict(response, code_chunks, repository_name, 'quota_exceeded')
|
| 99 |
+
|
| 100 |
+
def generate_fallback_response(self, query: str, code_chunks: List[Dict], repository_name: str) -> Dict:
|
| 101 |
+
context = self.prepare_context(code_chunks)
|
| 102 |
+
response = f"""Found {len(code_chunks)} relevant code sections for: "{query}"
|
| 103 |
+
|
| 104 |
+
{context}
|
| 105 |
+
|
| 106 |
+
Note: AI analysis requires API configuration. The search results above show the most relevant code."""
|
| 107 |
+
|
| 108 |
+
return self.create_response_dict(response, code_chunks, repository_name, 'fallback')
|
| 109 |
+
|
| 110 |
+
def create_response_dict(self, response: str, code_chunks: List[Dict], repository_name: str, model_used: str) -> Dict:
|
| 111 |
+
sources = []
|
| 112 |
+
for chunk in code_chunks:
|
| 113 |
+
sources.append({
|
| 114 |
+
'file_path': chunk['file_path'],
|
| 115 |
+
'start_line': chunk['start_line'],
|
| 116 |
+
'end_line': chunk['end_line'],
|
| 117 |
+
'similarity': round(chunk['similarity'], 3),
|
| 118 |
+
'preview': chunk['content'][:200] + "..."
|
| 119 |
+
})
|
| 120 |
+
|
| 121 |
+
return {
|
| 122 |
+
'response': response,
|
| 123 |
+
'sources': sources,
|
| 124 |
+
'context_chunks_used': len(code_chunks),
|
| 125 |
+
'repository_name': repository_name,
|
| 126 |
+
'model_used': model_used,
|
| 127 |
+
'success': True
|
| 128 |
+
}
|
app/services/embedding_service.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sentence_transformers import SentenceTransformer
|
| 2 |
+
import numpy as np
|
| 3 |
+
from typing import List, Dict
|
| 4 |
+
import logging
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
class EmbeddingService:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
try:
|
| 11 |
+
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 12 |
+
logger.info("🤖 Local embedding service initialized (all-MiniLM-L6-v2)")
|
| 13 |
+
except Exception as e:
|
| 14 |
+
logger.error(f"❌ Failed to load SentenceTransformer model: {e}")
|
| 15 |
+
raise Exception("Failed to initialize local embedding model")
|
| 16 |
+
|
| 17 |
+
async def generate_embedding(self, text: str, title: str = "") -> List[float]:
|
| 18 |
+
try:
|
| 19 |
+
content = f"File: {title}\n\nCode:\n{text}" if title else text
|
| 20 |
+
embedding = self.model.encode(content)
|
| 21 |
+
return embedding.tolist()
|
| 22 |
+
except Exception as e:
|
| 23 |
+
logger.error(f"❌ Error generating local embedding: {e}")
|
| 24 |
+
raise
|
| 25 |
+
|
| 26 |
+
async def generate_embeddings_batch(self, chunks: List[Dict]) -> List[Dict]:
|
| 27 |
+
logger.info(f"🔄 Generating LOCAL embeddings for {len(chunks)} chunks...")
|
| 28 |
+
|
| 29 |
+
texts = []
|
| 30 |
+
for chunk in chunks:
|
| 31 |
+
content = f"""File: {chunk['file_path']}
|
| 32 |
+
Lines: {chunk['start_line']}-{chunk['end_line']}
|
| 33 |
+
Type: {chunk['chunk_type']}
|
| 34 |
+
|
| 35 |
+
Code:
|
| 36 |
+
{chunk['content']}"""
|
| 37 |
+
texts.append(content)
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
embeddings = self.model.encode(texts, show_progress_bar=True, batch_size=32)
|
| 41 |
+
|
| 42 |
+
embedded_chunks = []
|
| 43 |
+
for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
|
| 44 |
+
embedded_chunk = {
|
| 45 |
+
**chunk,
|
| 46 |
+
'embedding': embedding.tolist(),
|
| 47 |
+
'content_length': len(chunk['content'])
|
| 48 |
+
}
|
| 49 |
+
embedded_chunks.append(embedded_chunk)
|
| 50 |
+
except Exception as e:
|
| 51 |
+
logger.error(f"❌ Failed to generate batch embeddings: {e}")
|
| 52 |
+
raise
|
| 53 |
+
|
| 54 |
+
logger.info(f"✅ Generated {len(embedded_chunks)} LOCAL embeddings successfully")
|
| 55 |
+
return embedded_chunks
|
| 56 |
+
|
| 57 |
+
async def generate_query_embedding(self, query: str) -> List[float]:
|
| 58 |
+
try:
|
| 59 |
+
embedding = self.model.encode(query)
|
| 60 |
+
return embedding.tolist()
|
| 61 |
+
except Exception as e:
|
| 62 |
+
logger.error(f"❌ Error generating query embedding: {e}")
|
| 63 |
+
raise
|
app/services/github_service.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import git
|
| 2 |
+
import os
|
| 3 |
+
import tempfile
|
| 4 |
+
import shutil
|
| 5 |
+
from typing import List, Dict
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
import logging
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
class GitHubService:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.supported_extensions = {
|
| 14 |
+
'.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c',
|
| 15 |
+
'.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala',
|
| 16 |
+
'.html', '.css', '.scss', '.sass', '.vue', '.svelte', '.dart',
|
| 17 |
+
'.r', '.m', '.mm', '.h', '.hpp', '.cc', '.cxx', '.sql'
|
| 18 |
+
}
|
| 19 |
+
self.ignore_dirs = {
|
| 20 |
+
'.git', 'node_modules', '__pycache__', '.venv', 'venv',
|
| 21 |
+
'build', 'dist', '.next', '.nuxt', 'coverage', '.pytest_cache',
|
| 22 |
+
'vendor', 'target', 'bin', 'obj', '.gradle', '.idea', '.vscode'
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
async def clone_repository(self, github_url: str) -> str:
|
| 26 |
+
"""Clone repository to temporary directory"""
|
| 27 |
+
temp_dir = tempfile.mkdtemp(prefix="codequery_")
|
| 28 |
+
logger.info(f"🔄 Cloning {github_url} to {temp_dir}")
|
| 29 |
+
|
| 30 |
+
try:
|
| 31 |
+
# Clone with depth=1 for faster cloning (only latest commit)
|
| 32 |
+
repo = git.Repo.clone_from(github_url, temp_dir, depth=1)
|
| 33 |
+
logger.info(f"✅ Successfully cloned repository")
|
| 34 |
+
return temp_dir
|
| 35 |
+
except Exception as e:
|
| 36 |
+
# Clean up on failure
|
| 37 |
+
if os.path.exists(temp_dir):
|
| 38 |
+
shutil.rmtree(temp_dir)
|
| 39 |
+
raise Exception(f"Failed to clone repository: {str(e)}")
|
| 40 |
+
|
| 41 |
+
def chunk_code_content(self, content: str, file_path: str, max_chunk_size: int = 1000) -> List[Dict]:
|
| 42 |
+
"""Split code into meaningful chunks"""
|
| 43 |
+
chunks = []
|
| 44 |
+
lines = content.split('\n')
|
| 45 |
+
|
| 46 |
+
# For small files, return as single chunk
|
| 47 |
+
if len(content) <= max_chunk_size:
|
| 48 |
+
return [{
|
| 49 |
+
'content': content,
|
| 50 |
+
'file_path': file_path,
|
| 51 |
+
'chunk_index': 0,
|
| 52 |
+
'start_line': 1,
|
| 53 |
+
'end_line': len(lines),
|
| 54 |
+
'chunk_type': 'full_file'
|
| 55 |
+
}]
|
| 56 |
+
|
| 57 |
+
# For larger files, split by functions/classes or line count
|
| 58 |
+
current_chunk = []
|
| 59 |
+
current_size = 0
|
| 60 |
+
chunk_index = 0
|
| 61 |
+
start_line = 1
|
| 62 |
+
|
| 63 |
+
for i, line in enumerate(lines, 1):
|
| 64 |
+
current_chunk.append(line)
|
| 65 |
+
current_size += len(line) + 1 # +1 for newline
|
| 66 |
+
|
| 67 |
+
# Split on function/class definitions or when chunk gets too large
|
| 68 |
+
is_function_start = any(line.strip().startswith(keyword) for keyword in
|
| 69 |
+
['def ', 'function ', 'class ', 'interface ', 'public class'])
|
| 70 |
+
|
| 71 |
+
if (current_size >= max_chunk_size) or (is_function_start and len(current_chunk) > 1):
|
| 72 |
+
if len(current_chunk) > 1: # Don't create empty chunks
|
| 73 |
+
chunks.append({
|
| 74 |
+
'content': '\n'.join(current_chunk[:-1] if is_function_start else current_chunk),
|
| 75 |
+
'file_path': file_path,
|
| 76 |
+
'chunk_index': chunk_index,
|
| 77 |
+
'start_line': start_line,
|
| 78 |
+
'end_line': i - (1 if is_function_start else 0),
|
| 79 |
+
'chunk_type': 'code_block'
|
| 80 |
+
})
|
| 81 |
+
chunk_index += 1
|
| 82 |
+
start_line = i if is_function_start else i + 1
|
| 83 |
+
current_chunk = [line] if is_function_start else []
|
| 84 |
+
current_size = len(line) + 1 if is_function_start else 0
|
| 85 |
+
|
| 86 |
+
# Add remaining chunk
|
| 87 |
+
if current_chunk:
|
| 88 |
+
chunks.append({
|
| 89 |
+
'content': '\n'.join(current_chunk),
|
| 90 |
+
'file_path': file_path,
|
| 91 |
+
'chunk_index': chunk_index,
|
| 92 |
+
'start_line': start_line,
|
| 93 |
+
'end_line': len(lines),
|
| 94 |
+
'chunk_type': 'code_block'
|
| 95 |
+
})
|
| 96 |
+
|
| 97 |
+
return chunks
|
| 98 |
+
|
| 99 |
+
async def extract_code_files(self, repo_path: str) -> List[Dict]:
|
| 100 |
+
"""Extract and chunk all code files from repository"""
|
| 101 |
+
code_chunks = []
|
| 102 |
+
total_files = 0
|
| 103 |
+
|
| 104 |
+
logger.info(f"📁 Extracting code files from {repo_path}")
|
| 105 |
+
|
| 106 |
+
for root, dirs, files in os.walk(repo_path):
|
| 107 |
+
# Skip ignored directories
|
| 108 |
+
dirs[:] = [d for d in dirs if d not in self.ignore_dirs]
|
| 109 |
+
|
| 110 |
+
for file in files:
|
| 111 |
+
file_path = Path(root) / file
|
| 112 |
+
|
| 113 |
+
# Skip large files (>1MB)
|
| 114 |
+
if file_path.stat().st_size > 1024 * 1024:
|
| 115 |
+
continue
|
| 116 |
+
|
| 117 |
+
if file_path.suffix in self.supported_extensions:
|
| 118 |
+
try:
|
| 119 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 120 |
+
content = f.read()
|
| 121 |
+
|
| 122 |
+
# Skip empty files
|
| 123 |
+
if not content.strip():
|
| 124 |
+
continue
|
| 125 |
+
|
| 126 |
+
relative_path = str(file_path.relative_to(repo_path))
|
| 127 |
+
|
| 128 |
+
# Chunk the file content
|
| 129 |
+
chunks = self.chunk_code_content(content, relative_path)
|
| 130 |
+
code_chunks.extend(chunks)
|
| 131 |
+
total_files += 1
|
| 132 |
+
|
| 133 |
+
if total_files % 50 == 0:
|
| 134 |
+
logger.info(f"📊 Processed {total_files} files, {len(code_chunks)} chunks so far...")
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
logger.warning(f"⚠️ Error reading file {file_path}: {e}")
|
| 138 |
+
continue
|
| 139 |
+
|
| 140 |
+
logger.info(f"✅ Extracted {len(code_chunks)} code chunks from {total_files} files")
|
| 141 |
+
return code_chunks
|
| 142 |
+
|
| 143 |
+
def cleanup_temp_dir(self, temp_dir: str):
|
| 144 |
+
"""Clean up temporary directory"""
|
| 145 |
+
try:
|
| 146 |
+
if os.path.exists(temp_dir):
|
| 147 |
+
shutil.rmtree(temp_dir)
|
| 148 |
+
logger.info(f"🧹 Cleaned up temporary directory: {temp_dir}")
|
| 149 |
+
except Exception as e:
|
| 150 |
+
logger.warning(f"⚠️ Failed to cleanup {temp_dir}: {e}")
|
app/services/vector_service.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import chromadb
|
| 2 |
+
from chromadb.config import Settings
|
| 3 |
+
import os
|
| 4 |
+
from typing import List, Dict, Optional
|
| 5 |
+
import logging
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
class VectorService:
|
| 11 |
+
def __init__(self):
|
| 12 |
+
self.client = chromadb.PersistentClient(
|
| 13 |
+
path="./chroma_db",
|
| 14 |
+
settings=Settings(
|
| 15 |
+
anonymized_telemetry=False,
|
| 16 |
+
allow_reset=True
|
| 17 |
+
)
|
| 18 |
+
)
|
| 19 |
+
logger.info("🗄️ ChromaDB client initialized")
|
| 20 |
+
|
| 21 |
+
def create_collection(self, repository_id: int) -> chromadb.Collection:
|
| 22 |
+
collection_name = f"repo_{repository_id}"
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
collection = self.client.get_collection(collection_name)
|
| 26 |
+
logger.info(f"📚 Using existing collection: {collection_name}")
|
| 27 |
+
except:
|
| 28 |
+
collection = self.client.create_collection(
|
| 29 |
+
name=collection_name,
|
| 30 |
+
metadata={"repository_id": repository_id}
|
| 31 |
+
)
|
| 32 |
+
logger.info(f"🆕 Created new collection: {collection_name}")
|
| 33 |
+
|
| 34 |
+
return collection
|
| 35 |
+
|
| 36 |
+
async def store_embeddings(self, repository_id: int, embedded_chunks: List[Dict]):
|
| 37 |
+
logger.info(f"💾 Storing {len(embedded_chunks)} embeddings for repository {repository_id}")
|
| 38 |
+
|
| 39 |
+
collection = self.create_collection(repository_id)
|
| 40 |
+
|
| 41 |
+
documents = []
|
| 42 |
+
embeddings = []
|
| 43 |
+
metadatas = []
|
| 44 |
+
ids = []
|
| 45 |
+
|
| 46 |
+
for i, chunk in enumerate(embedded_chunks):
|
| 47 |
+
chunk_id = f"chunk_{repository_id}_{chunk['chunk_index']}_{i}"
|
| 48 |
+
|
| 49 |
+
documents.append(chunk['content'])
|
| 50 |
+
embeddings.append(chunk['embedding'])
|
| 51 |
+
metadatas.append({
|
| 52 |
+
'file_path': chunk['file_path'],
|
| 53 |
+
'start_line': chunk['start_line'],
|
| 54 |
+
'end_line': chunk['end_line'],
|
| 55 |
+
'chunk_type': chunk['chunk_type'],
|
| 56 |
+
'content_length': chunk['content_length'],
|
| 57 |
+
'repository_id': repository_id
|
| 58 |
+
})
|
| 59 |
+
ids.append(chunk_id)
|
| 60 |
+
|
| 61 |
+
batch_size = 100
|
| 62 |
+
for i in range(0, len(documents), batch_size):
|
| 63 |
+
end_idx = min(i + batch_size, len(documents))
|
| 64 |
+
|
| 65 |
+
collection.add(
|
| 66 |
+
documents=documents[i:end_idx],
|
| 67 |
+
embeddings=embeddings[i:end_idx],
|
| 68 |
+
metadatas=metadatas[i:end_idx],
|
| 69 |
+
ids=ids[i:end_idx]
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
logger.info(f"✅ Successfully stored all embeddings for repository {repository_id}")
|
| 73 |
+
|
| 74 |
+
async def search_similar_code(self, repository_id: int, query_embedding: List[float], top_k: int = 5) -> List[Dict]:
|
| 75 |
+
collection_name = f"repo_{repository_id}"
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
collection = self.client.get_collection(collection_name)
|
| 79 |
+
except:
|
| 80 |
+
logger.warning(f"⚠️ Collection {collection_name} not found")
|
| 81 |
+
return []
|
| 82 |
+
|
| 83 |
+
results = collection.query(
|
| 84 |
+
query_embeddings=[query_embedding],
|
| 85 |
+
n_results=top_k,
|
| 86 |
+
include=['documents', 'metadatas', 'distances']
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
search_results = []
|
| 90 |
+
for i in range(len(results['documents'][0])):
|
| 91 |
+
# Fix similarity calculation
|
| 92 |
+
distance = results['distances'][0][i]
|
| 93 |
+
# Convert distance to similarity (higher is better)
|
| 94 |
+
similarity = max(0.0, 1.0 - distance) # Ensure positive similarity
|
| 95 |
+
|
| 96 |
+
search_results.append({
|
| 97 |
+
'content': results['documents'][0][i],
|
| 98 |
+
'metadata': results['metadatas'][0][i],
|
| 99 |
+
'similarity': similarity,
|
| 100 |
+
'file_path': results['metadatas'][0][i]['file_path'],
|
| 101 |
+
'start_line': results['metadatas'][0][i]['start_line'],
|
| 102 |
+
'end_line': results['metadatas'][0][i]['end_line']
|
| 103 |
+
})
|
| 104 |
+
|
| 105 |
+
# Sort by similarity (highest first)
|
| 106 |
+
search_results.sort(key=lambda x: x['similarity'], reverse=True)
|
| 107 |
+
|
| 108 |
+
logger.info(f"🔍 Found {len(search_results)} similar code chunks")
|
| 109 |
+
return search_results
|
| 110 |
+
|
| 111 |
+
def delete_repository_data(self, repository_id: int):
|
| 112 |
+
collection_name = f"repo_{repository_id}"
|
| 113 |
+
|
| 114 |
+
try:
|
| 115 |
+
self.client.delete_collection(collection_name)
|
| 116 |
+
logger.info(f"🗑️ Deleted collection: {collection_name}")
|
| 117 |
+
except:
|
| 118 |
+
logger.warning(f"⚠️ Collection {collection_name} not found for deletion")
|
app/utils/__init__.py
ADDED
|
File without changes
|
migrations/README
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Generic single-database configuration.
|
migrations/env.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from logging.config import fileConfig
|
| 2 |
+
from sqlalchemy import engine_from_config
|
| 3 |
+
from sqlalchemy import pool
|
| 4 |
+
from alembic import context
|
| 5 |
+
import os
|
| 6 |
+
import sys
|
| 7 |
+
|
| 8 |
+
# Add the app directory to the path
|
| 9 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
|
| 10 |
+
|
| 11 |
+
from app.core.database import Base
|
| 12 |
+
# Remove user import - we don't need it anymore
|
| 13 |
+
# from app.models.user import User # ❌ REMOVED
|
| 14 |
+
from app.models.repository import Repository
|
| 15 |
+
from app.models.conversation import Conversation, Message
|
| 16 |
+
|
| 17 |
+
# this is the Alembic Config object
|
| 18 |
+
config = context.config
|
| 19 |
+
|
| 20 |
+
# Interpret the config file for Python logging
|
| 21 |
+
if config.config_file_name is not None:
|
| 22 |
+
fileConfig(config.config_file_name)
|
| 23 |
+
|
| 24 |
+
# Set the target metadata
|
| 25 |
+
target_metadata = Base.metadata
|
| 26 |
+
|
| 27 |
+
def run_migrations_offline() -> None:
|
| 28 |
+
url = config.get_main_option("sqlalchemy.url")
|
| 29 |
+
context.configure(
|
| 30 |
+
url=url,
|
| 31 |
+
target_metadata=target_metadata,
|
| 32 |
+
literal_binds=True,
|
| 33 |
+
dialect_opts={"paramstyle": "named"},
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
with context.begin_transaction():
|
| 37 |
+
context.run_migrations()
|
| 38 |
+
|
| 39 |
+
def run_migrations_online() -> None:
|
| 40 |
+
connectable = engine_from_config(
|
| 41 |
+
config.get_section(config.config_ini_section, {}),
|
| 42 |
+
prefix="sqlalchemy.",
|
| 43 |
+
poolclass=pool.NullPool,
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
with connectable.connect() as connection:
|
| 47 |
+
context.configure(
|
| 48 |
+
connection=connection, target_metadata=target_metadata
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
with context.begin_transaction():
|
| 52 |
+
context.run_migrations()
|
| 53 |
+
|
| 54 |
+
if context.is_offline_mode():
|
| 55 |
+
run_migrations_offline()
|
| 56 |
+
else:
|
| 57 |
+
run_migrations_online()
|
migrations/script.py.mako
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""${message}
|
| 2 |
+
|
| 3 |
+
Revision ID: ${up_revision}
|
| 4 |
+
Revises: ${down_revision | comma,n}
|
| 5 |
+
Create Date: ${create_date}
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
from typing import Sequence, Union
|
| 9 |
+
|
| 10 |
+
from alembic import op
|
| 11 |
+
import sqlalchemy as sa
|
| 12 |
+
${imports if imports else ""}
|
| 13 |
+
|
| 14 |
+
# revision identifiers, used by Alembic.
|
| 15 |
+
revision: str = ${repr(up_revision)}
|
| 16 |
+
down_revision: Union[str, None] = ${repr(down_revision)}
|
| 17 |
+
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
| 18 |
+
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def upgrade() -> None:
|
| 22 |
+
${upgrades if upgrades else "pass"}
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def downgrade() -> None:
|
| 26 |
+
${downgrades if downgrades else "pass"}
|
migrations/versions/16e292816c22_initial_migration.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Initial migration
|
| 2 |
+
|
| 3 |
+
Revision ID: 16e292816c22
|
| 4 |
+
Revises:
|
| 5 |
+
Create Date: 2025-10-23 20:12:28.092984
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
from typing import Sequence, Union
|
| 9 |
+
|
| 10 |
+
from alembic import op
|
| 11 |
+
import sqlalchemy as sa
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# revision identifiers, used by Alembic.
|
| 15 |
+
revision: str = '16e292816c22'
|
| 16 |
+
down_revision: Union[str, None] = None
|
| 17 |
+
branch_labels: Union[str, Sequence[str], None] = None
|
| 18 |
+
depends_on: Union[str, Sequence[str], None] = None
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def upgrade() -> None:
|
| 22 |
+
# ### commands auto generated by Alembic - please adjust! ###
|
| 23 |
+
op.create_table('users',
|
| 24 |
+
sa.Column('id', sa.Integer(), nullable=False),
|
| 25 |
+
sa.Column('email', sa.String(), nullable=False),
|
| 26 |
+
sa.Column('name', sa.String(), nullable=False),
|
| 27 |
+
sa.Column('hashed_password', sa.String(), nullable=False),
|
| 28 |
+
sa.Column('is_active', sa.Boolean(), nullable=True),
|
| 29 |
+
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
|
| 30 |
+
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
| 31 |
+
sa.PrimaryKeyConstraint('id')
|
| 32 |
+
)
|
| 33 |
+
op.create_index(op.f('ix_users_email'), 'users', ['email'], unique=True)
|
| 34 |
+
op.create_index(op.f('ix_users_id'), 'users', ['id'], unique=False)
|
| 35 |
+
op.create_table('repositories',
|
| 36 |
+
sa.Column('id', sa.Integer(), nullable=False),
|
| 37 |
+
sa.Column('user_id', sa.Integer(), nullable=False),
|
| 38 |
+
sa.Column('github_url', sa.String(), nullable=False),
|
| 39 |
+
sa.Column('name', sa.String(), nullable=False),
|
| 40 |
+
sa.Column('status', sa.Enum('PENDING', 'PROCESSING', 'READY', 'FAILED', name='repositorystatus'), nullable=True),
|
| 41 |
+
sa.Column('error_message', sa.String(), nullable=True),
|
| 42 |
+
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
|
| 43 |
+
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
| 44 |
+
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
|
| 45 |
+
sa.PrimaryKeyConstraint('id')
|
| 46 |
+
)
|
| 47 |
+
op.create_index(op.f('ix_repositories_id'), 'repositories', ['id'], unique=False)
|
| 48 |
+
op.create_table('conversations',
|
| 49 |
+
sa.Column('id', sa.Integer(), nullable=False),
|
| 50 |
+
sa.Column('repository_id', sa.Integer(), nullable=False),
|
| 51 |
+
sa.Column('title', sa.String(), nullable=False),
|
| 52 |
+
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
|
| 53 |
+
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
|
| 54 |
+
sa.ForeignKeyConstraint(['repository_id'], ['repositories.id'], ),
|
| 55 |
+
sa.PrimaryKeyConstraint('id')
|
| 56 |
+
)
|
| 57 |
+
op.create_index(op.f('ix_conversations_id'), 'conversations', ['id'], unique=False)
|
| 58 |
+
op.create_table('messages',
|
| 59 |
+
sa.Column('id', sa.Integer(), nullable=False),
|
| 60 |
+
sa.Column('conversation_id', sa.Integer(), nullable=False),
|
| 61 |
+
sa.Column('role', sa.String(), nullable=False),
|
| 62 |
+
sa.Column('content', sa.String(), nullable=False),
|
| 63 |
+
sa.Column('citations', sa.JSON(), nullable=True),
|
| 64 |
+
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
|
| 65 |
+
sa.ForeignKeyConstraint(['conversation_id'], ['conversations.id'], ),
|
| 66 |
+
sa.PrimaryKeyConstraint('id')
|
| 67 |
+
)
|
| 68 |
+
op.create_index(op.f('ix_messages_id'), 'messages', ['id'], unique=False)
|
| 69 |
+
# ### end Alembic commands ###
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def downgrade() -> None:
|
| 73 |
+
# ### commands auto generated by Alembic - please adjust! ###
|
| 74 |
+
op.drop_index(op.f('ix_messages_id'), table_name='messages')
|
| 75 |
+
op.drop_table('messages')
|
| 76 |
+
op.drop_index(op.f('ix_conversations_id'), table_name='conversations')
|
| 77 |
+
op.drop_table('conversations')
|
| 78 |
+
op.drop_index(op.f('ix_repositories_id'), table_name='repositories')
|
| 79 |
+
op.drop_table('repositories')
|
| 80 |
+
op.drop_index(op.f('ix_users_id'), table_name='users')
|
| 81 |
+
op.drop_index(op.f('ix_users_email'), table_name='users')
|
| 82 |
+
op.drop_table('users')
|
| 83 |
+
# ### end Alembic commands ###
|
migrations/versions/2e8f053488b9_clean_qodex_architecture_no_user_.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Clean QODEX architecture - no user dependencies
|
| 2 |
+
|
| 3 |
+
Revision ID: 2e8f053488b9
|
| 4 |
+
Revises: 16e292816c22
|
| 5 |
+
Create Date: 2025-10-25 19:08:49.834310
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
from typing import Sequence, Union
|
| 9 |
+
|
| 10 |
+
from alembic import op
|
| 11 |
+
import sqlalchemy as sa
|
| 12 |
+
from sqlalchemy.dialects import postgresql
|
| 13 |
+
|
| 14 |
+
# revision identifiers, used by Alembic.
|
| 15 |
+
revision: str = '2e8f053488b9'
|
| 16 |
+
down_revision: Union[str, None] = '16e292816c22'
|
| 17 |
+
branch_labels: Union[str, Sequence[str], None] = None
|
| 18 |
+
depends_on: Union[str, Sequence[str], None] = None
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def upgrade() -> None:
|
| 22 |
+
# ### commands auto generated by Alembic - please adjust! ###
|
| 23 |
+
op.alter_column('messages', 'content',
|
| 24 |
+
existing_type=sa.VARCHAR(),
|
| 25 |
+
type_=sa.Text(),
|
| 26 |
+
existing_nullable=False)
|
| 27 |
+
op.alter_column('repositories', 'status',
|
| 28 |
+
existing_type=postgresql.ENUM('PENDING', 'PROCESSING', 'READY', 'FAILED', name='repositorystatus'),
|
| 29 |
+
type_=sa.Enum('PENDING', 'PROCESSING', 'READY', 'FAILED', name='repositorystatusenum'),
|
| 30 |
+
existing_nullable=True)
|
| 31 |
+
op.create_unique_constraint(None, 'repositories', ['github_url'])
|
| 32 |
+
# ### end Alembic commands ###
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def downgrade() -> None:
|
| 36 |
+
# ### commands auto generated by Alembic - please adjust! ###
|
| 37 |
+
op.drop_constraint(None, 'repositories', type_='unique')
|
| 38 |
+
op.alter_column('repositories', 'status',
|
| 39 |
+
existing_type=sa.Enum('PENDING', 'PROCESSING', 'READY', 'FAILED', name='repositorystatusenum'),
|
| 40 |
+
type_=postgresql.ENUM('PENDING', 'PROCESSING', 'READY', 'FAILED', name='repositorystatus'),
|
| 41 |
+
existing_nullable=True)
|
| 42 |
+
op.alter_column('messages', 'content',
|
| 43 |
+
existing_type=sa.Text(),
|
| 44 |
+
type_=sa.VARCHAR(),
|
| 45 |
+
existing_nullable=False)
|
| 46 |
+
# ### end Alembic commands ###
|
render.yaml
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
services:
|
| 2 |
+
- type: web
|
| 3 |
+
name: qodex-api
|
| 4 |
+
env: python
|
| 5 |
+
buildCommand: pip install -r requirements.txt
|
| 6 |
+
startCommand: uvicorn app.main:app --host 0.0.0.0 --port $PORT
|
| 7 |
+
envVars:
|
| 8 |
+
- key: ENVIRONMENT
|
| 9 |
+
value: production
|
| 10 |
+
- key: DEBUG
|
| 11 |
+
value: false
|
requirements.txt
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core FastAPI stack (keep versions)
|
| 2 |
+
fastapi==0.104.1
|
| 3 |
+
uvicorn[standard]==0.24.0
|
| 4 |
+
pydantic==2.5.0
|
| 5 |
+
pydantic-settings==2.1.0
|
| 6 |
+
sqlalchemy==2.0.23
|
| 7 |
+
psycopg2-binary==2.9.9
|
| 8 |
+
|
| 9 |
+
# AI/ML stack (NO VERSION PINS - let pip resolve)
|
| 10 |
+
sentence-transformers
|
| 11 |
+
transformers
|
| 12 |
+
huggingface-hub
|
| 13 |
+
torch
|
| 14 |
+
numpy
|
| 15 |
+
chromadb
|
| 16 |
+
|
| 17 |
+
# Google AI
|
| 18 |
+
google-generativeai==0.3.1
|
| 19 |
+
|
| 20 |
+
# Utils (keep versions)
|
| 21 |
+
python-dotenv==1.0.0
|
| 22 |
+
python-multipart==0.0.6
|
| 23 |
+
aiofiles==23.2.1
|
| 24 |
+
requests==2.31.0
|
| 25 |
+
gitpython==3.1.40
|
| 26 |
+
python-jose[cryptography]==3.3.0
|
| 27 |
+
passlib[bcrypt]==1.7.4
|
run_server.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uvicorn
|
| 2 |
+
|
| 3 |
+
if __name__ == "__main__":
|
| 4 |
+
uvicorn.run(
|
| 5 |
+
"app.main:app",
|
| 6 |
+
host="127.0.0.1",
|
| 7 |
+
port=8000,
|
| 8 |
+
reload=True,
|
| 9 |
+
log_level="info"
|
| 10 |
+
)
|
tests/__init__.py
ADDED
|
File without changes
|