garvitcpp commited on
Commit
a496aae
·
verified ·
1 Parent(s): 18ba1a0

Upload 35 files

Browse files
.gitignore ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98
+ __pypackages__/
99
+
100
+ # Celery stuff
101
+ celerybeat-schedule
102
+ celerybeat.pid
103
+
104
+ # SageMath parsed files
105
+ *.sage.py
106
+
107
+ # Environments
108
+ .env
109
+ .venv
110
+ env/
111
+ venv/
112
+ ENV/
113
+ env.bak/
114
+ venv.bak/
115
+
116
+ # Spyder project settings
117
+ .spyderproject
118
+ .spyproject
119
+
120
+ # Rope project settings
121
+ .ropeproject
122
+
123
+ # mkdocs documentation
124
+ /site
125
+
126
+ # mypy
127
+ .mypy_cache/
128
+ .dmypy.json
129
+ dmypy.json
130
+
131
+ # Pyre type checker
132
+ .pyre/
133
+
134
+ # pytype static type analyzer
135
+ .pytype/
136
+
137
+ # Cython debug symbols
138
+ cython_debug/
139
+
140
+ # PyCharm
141
+ # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
142
+ # be found at https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
143
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
144
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
145
+ #.idea/
146
+ # ChromaDB Vector Database (local data)
147
+ chroma_db/
148
+ *.bin
149
+ *.sqlite3
150
+
151
+ # Environment files
152
+ .env
153
+ .env.local
154
+
155
+ # Python cache
156
+ __pycache__/
157
+ *.pyc
158
+ *.pyo
159
+
160
+ # Virtual environment
161
+ venv/
162
+ env/
163
+
164
+ # IDE files
165
+ .vscode/
166
+ .idea/
167
+
168
+ # OS files
169
+ .DS_Store
170
+ Thumbs.db
171
+
172
+ # Logs
173
+ *.log
174
+
175
+ # Temporary files
176
+ temp/
177
+ tmp/
178
+
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.13-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ git \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements first (for better caching)
11
+ COPY requirements.txt .
12
+
13
+ # Install Python dependencies
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy application code
17
+ COPY . .
18
+
19
+ # Expose port (HuggingFace Spaces uses 7860)
20
+ EXPOSE 7860
21
+
22
+ # Run application (change port to 7860)
23
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
alembic.ini ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A generic, single database configuration.
2
+
3
+ [alembic]
4
+ # path to migration scripts
5
+ script_location = migrations
6
+
7
+ # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
8
+ # Uncomment the line below if you want the files to be prepended with date and time
9
+ # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
10
+ # for all available tokens
11
+ # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
12
+
13
+ # sys.path path, will be prepended to sys.path if present.
14
+ # defaults to the current working directory.
15
+ prepend_sys_path = .
16
+
17
+ # timezone to use when rendering the date within the migration file
18
+ # as well as the filename.
19
+ # If specified, requires the python-dateutil library that can be
20
+ # installed by adding `alembic[tz]` to the pip requirements
21
+ # string value is passed to dateutil.tz.gettz()
22
+ # leave blank for localtime
23
+ # timezone =
24
+
25
+ # max length of characters to apply to the
26
+ # "slug" field
27
+ # truncate_slug_length = 40
28
+
29
+ # set to 'true' to run the environment during
30
+ # the 'revision' command, regardless of autogenerate
31
+ # revision_environment = false
32
+
33
+ # set to 'true' to allow .pyc and .pyo files without
34
+ # a source .py file to be detected as revisions in the
35
+ # versions/ directory
36
+ # sourceless = false
37
+
38
+ # version location specification; This defaults
39
+ # to migrations/versions. When using multiple version
40
+ # directories, initial revisions must be specified with --version-path.
41
+ # The path separator used here should be the separator specified by "version_path_separator" below.
42
+ # version_locations = %(here)s/bar:%(here)s/bat:migrations/versions
43
+
44
+ # version path separator; As mentioned above, this is the character used to split
45
+ # version_locations. The default within new alembic.ini files is "os", which uses os.pathsep.
46
+ # If this key is omitted entirely, it falls back to the legacy behavior of splitting on spaces and/or commas.
47
+ # Valid values for version_path_separator are:
48
+ #
49
+ # version_path_separator = :
50
+ # version_path_separator = ;
51
+ # version_path_separator = space
52
+ version_path_separator = os # Use os.pathsep. Default configuration used for new projects.
53
+
54
+ # set to 'true' to search source files recursively
55
+ # in each "version_locations" directory
56
+ # new in Alembic version 1.10
57
+ # recursive_version_locations = false
58
+
59
+ # the output encoding used when revision files
60
+ # are written from script.py.mako
61
+ # output_encoding = utf-8
62
+
63
+ sqlalchemy.url = postgresql://codequery_user:codequery_pass_2025@localhost:5432/codequery_dev
64
+
65
+
66
+ [post_write_hooks]
67
+ # post_write_hooks defines scripts or Python functions that are run
68
+ # on newly generated revision scripts. See the documentation for further
69
+ # detail and examples
70
+
71
+ # format using "black" - use the console_scripts runner, against the "black" entrypoint
72
+ # hooks = black
73
+ # black.type = console_scripts
74
+ # black.entrypoint = black
75
+ # black.options = -l 79 REVISION_SCRIPT_FILENAME
76
+
77
+ # lint with attempts to fix using "ruff" - use the exec runner, execute a binary
78
+ # hooks = ruff
79
+ # ruff.type = exec
80
+ # ruff.executable = %(here)s/.venv/bin/ruff
81
+ # ruff.options = --fix REVISION_SCRIPT_FILENAME
82
+
83
+ # Logging configuration
84
+ [loggers]
85
+ keys = root,sqlalchemy,alembic
86
+
87
+ [handlers]
88
+ keys = console
89
+
90
+ [formatters]
91
+ keys = generic
92
+
93
+ [logger_root]
94
+ level = WARN
95
+ handlers = console
96
+ qualname =
97
+
98
+ [logger_sqlalchemy]
99
+ level = WARN
100
+ handlers =
101
+ qualname = sqlalchemy.engine
102
+
103
+ [logger_alembic]
104
+ level = INFO
105
+ handlers =
106
+ qualname = alembic
107
+
108
+ [handler_console]
109
+ class = StreamHandler
110
+ args = (sys.stderr,)
111
+ level = NOTSET
112
+ formatter = generic
113
+
114
+ [formatter_generic]
115
+ format = %(levelname)-5.5s [%(name)s] %(message)s
116
+ datefmt = %H:%M:%S
app/__init__.py ADDED
File without changes
app/api/__init__.py ADDED
File without changes
app/api/v1/__init__.py ADDED
File without changes
app/api/v1/chat.py ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException, Header
2
+ from sqlalchemy.orm import Session
3
+ from typing import List
4
+ from app.database import get_db
5
+ from app.models.repository import Repository, RepositoryStatusEnum
6
+ from app.models.conversation import Conversation, Message
7
+ from app.core.config import settings
8
+ from app.services import EmbeddingService, VectorService, ChatService
9
+ from pydantic import BaseModel
10
+ import logging
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ router = APIRouter()
15
+
16
+ # Define the models directly in this file
17
+ class ChatRequest(BaseModel):
18
+ """Request model for chat with repository"""
19
+ query: str
20
+ repository_id: int
21
+
22
+ class ChatResponse(BaseModel):
23
+ """Response model for chat"""
24
+ response: str
25
+ sources: List[dict]
26
+ repository_name: str
27
+ context_chunks_used: int
28
+ model_used: str
29
+ success: bool
30
+
31
+ def verify_client_secret(x_client_secret: str = Header(..., alias="X-Client-Secret")):
32
+ """Verify request comes from authorized Next.js client"""
33
+ if x_client_secret != settings.nextjs_secret:
34
+ raise HTTPException(
35
+ status_code=403,
36
+ detail="Unauthorized client - invalid secret"
37
+ )
38
+ return True
39
+
40
+ def get_user_id(x_user_id: str = Header(..., alias="X-User-ID")):
41
+ """Extract and validate user ID from header"""
42
+ if not x_user_id or len(x_user_id.strip()) == 0:
43
+ raise HTTPException(status_code=400, detail="User ID required")
44
+ return x_user_id.strip()
45
+
46
+ def verify_repository_ownership(repository_id: int, user_id: str, db: Session):
47
+ """Verify user owns the repository"""
48
+ repository = db.query(Repository).filter(
49
+ Repository.id == repository_id,
50
+ Repository.user_id == user_id
51
+ ).first()
52
+
53
+ if not repository:
54
+ raise HTTPException(status_code=404, detail="Repository not found or access denied")
55
+
56
+ return repository
57
+
58
+ @router.post("/", response_model=ChatResponse)
59
+ async def chat_with_repository(
60
+ request: ChatRequest,
61
+ db: Session = Depends(get_db),
62
+ user_id: str = Depends(get_user_id),
63
+ _: bool = Depends(verify_client_secret)
64
+ ):
65
+ """Chat with a repository using QODEX AI"""
66
+ logger.info(f"💬 QODEX Chat: '{request.query[:60]}...' for repo {request.repository_id} (user: {user_id})")
67
+
68
+ # Verify repository ownership
69
+ repository = verify_repository_ownership(request.repository_id, user_id, db)
70
+
71
+ if repository.status != RepositoryStatusEnum.READY:
72
+ status_messages = {
73
+ RepositoryStatusEnum.PENDING: "Repository is pending processing. Please wait.",
74
+ RepositoryStatusEnum.PROCESSING: "Repository is currently being processed. Please wait.",
75
+ RepositoryStatusEnum.FAILED: f"Repository processing failed: {repository.error_message}"
76
+ }
77
+ raise HTTPException(
78
+ status_code=400,
79
+ detail=status_messages.get(repository.status, "Repository not ready for chat")
80
+ )
81
+
82
+ try:
83
+ # Initialize services
84
+ embedding_service = EmbeddingService()
85
+ vector_service = VectorService()
86
+ chat_service = ChatService()
87
+
88
+ # Generate query embedding
89
+ logger.info(f"🔍 Generating embedding for query...")
90
+ query_embedding = await embedding_service.generate_query_embedding(request.query)
91
+
92
+ # Search for similar code chunks
93
+ logger.info(f"🔎 Searching for relevant code chunks...")
94
+ similar_chunks = await vector_service.search_similar_code(
95
+ repository_id=request.repository_id,
96
+ query_embedding=query_embedding,
97
+ top_k=5
98
+ )
99
+
100
+ if not similar_chunks:
101
+ logger.warning(f"⚠️ No relevant chunks found for query in repo {request.repository_id}")
102
+ return ChatResponse(
103
+ response="I couldn't find any relevant code chunks for your question. Try asking about something more specific to this repository, or check if the repository was processed correctly.",
104
+ sources=[],
105
+ repository_name=repository.name,
106
+ context_chunks_used=0,
107
+ model_used="no_results",
108
+ success=False
109
+ )
110
+
111
+ logger.info(f"✅ Found {len(similar_chunks)} relevant code chunks")
112
+
113
+ # Generate AI response
114
+ logger.info(f"🤖 Generating AI response with Gemini...")
115
+ ai_response = await chat_service.generate_response(
116
+ query=request.query,
117
+ code_chunks=similar_chunks,
118
+ repository_name=repository.name
119
+ )
120
+
121
+ # Save conversation if successful
122
+ if ai_response['success']:
123
+ try:
124
+ # Create or get conversation
125
+ conversation = db.query(Conversation).filter(
126
+ Conversation.repository_id == request.repository_id
127
+ ).first()
128
+
129
+ if not conversation:
130
+ conversation = Conversation(
131
+ repository_id=request.repository_id,
132
+ title=f"Chat about {repository.name}"
133
+ )
134
+ db.add(conversation)
135
+ db.commit()
136
+ db.refresh(conversation)
137
+
138
+ # Save user message
139
+ user_message = Message(
140
+ conversation_id=conversation.id,
141
+ role="user",
142
+ content=request.query
143
+ )
144
+ db.add(user_message)
145
+
146
+ # Save assistant response
147
+ assistant_message = Message(
148
+ conversation_id=conversation.id,
149
+ role="assistant",
150
+ content=ai_response['response'],
151
+ citations=ai_response['sources']
152
+ )
153
+ db.add(assistant_message)
154
+
155
+ db.commit()
156
+ logger.info(f"💾 Saved conversation for repo {request.repository_id} (user: {user_id})")
157
+
158
+ except Exception as save_error:
159
+ logger.warning(f"⚠️ Failed to save conversation: {save_error}")
160
+ # Continue anyway - don't fail the response
161
+
162
+ logger.info(f"🎉 QODEX chat successful for repo {request.repository_id} (user: {user_id})")
163
+
164
+ return ChatResponse(
165
+ response=ai_response['response'],
166
+ sources=ai_response['sources'],
167
+ repository_name=repository.name,
168
+ context_chunks_used=len(similar_chunks),
169
+ model_used=ai_response['model_used'],
170
+ success=ai_response['success']
171
+ )
172
+
173
+ except Exception as e:
174
+ logger.error(f"❌ Error in QODEX chat processing: {e}")
175
+ raise HTTPException(
176
+ status_code=500,
177
+ detail=f"Failed to process chat request: {str(e)}"
178
+ )
179
+
180
+ # ✅ NEW: Direct messages route (Option 1 solution!)
181
+ @router.get("/{repository_id}/messages")
182
+ async def get_repository_chat_messages(
183
+ repository_id: int,
184
+ db: Session = Depends(get_db),
185
+ user_id: str = Depends(get_user_id),
186
+ _: bool = Depends(verify_client_secret)
187
+ ):
188
+ """Get all chat messages for a repository directly - SINGLE API CALL!"""
189
+
190
+ # Verify repository ownership
191
+ repository = verify_repository_ownership(repository_id, user_id, db)
192
+
193
+ # Get conversation for this repository
194
+ conversation = db.query(Conversation).filter(
195
+ Conversation.repository_id == repository_id
196
+ ).first()
197
+
198
+ if not conversation:
199
+ return {
200
+ "repository_id": repository_id,
201
+ "repository_name": repository.name,
202
+ "user_id": user_id,
203
+ "conversation_id": None,
204
+ "messages": [],
205
+ "total_messages": 0
206
+ }
207
+
208
+ # Get all messages
209
+ messages = db.query(Message).filter(
210
+ Message.conversation_id == conversation.id
211
+ ).order_by(Message.created_at.asc()).all()
212
+
213
+ return {
214
+ "repository_id": repository_id,
215
+ "repository_name": repository.name,
216
+ "user_id": user_id,
217
+ "conversation_id": conversation.id,
218
+ "messages": [
219
+ {
220
+ "id": msg.id,
221
+ "role": msg.role,
222
+ "content": msg.content,
223
+ "citations": msg.citations,
224
+ "created_at": msg.created_at
225
+ }
226
+ for msg in messages
227
+ ],
228
+ "total_messages": len(messages)
229
+ }
230
+
231
+ @router.get("/{repository_id}/conversations")
232
+ async def get_repository_conversations(
233
+ repository_id: int,
234
+ db: Session = Depends(get_db),
235
+ user_id: str = Depends(get_user_id),
236
+ _: bool = Depends(verify_client_secret)
237
+ ):
238
+ """Get all conversations for a repository (user must own it)"""
239
+
240
+ # Verify repository ownership
241
+ repository = verify_repository_ownership(repository_id, user_id, db)
242
+
243
+ conversations = db.query(Conversation).filter(
244
+ Conversation.repository_id == repository_id
245
+ ).order_by(Conversation.created_at.desc()).all()
246
+
247
+ return {
248
+ "repository_id": repository_id,
249
+ "repository_name": repository.name,
250
+ "user_id": user_id,
251
+ "conversations": conversations,
252
+ "total_conversations": len(conversations)
253
+ }
254
+
255
+ @router.get("/conversations/{conversation_id}/messages")
256
+ async def get_conversation_messages(
257
+ conversation_id: int,
258
+ db: Session = Depends(get_db),
259
+ user_id: str = Depends(get_user_id),
260
+ _: bool = Depends(verify_client_secret)
261
+ ):
262
+ """Get all messages in a conversation (user must own the repository)"""
263
+
264
+ conversation = db.query(Conversation).filter(Conversation.id == conversation_id).first()
265
+ if not conversation:
266
+ raise HTTPException(status_code=404, detail="Conversation not found")
267
+
268
+ # Verify user owns the repository
269
+ verify_repository_ownership(conversation.repository_id, user_id, db)
270
+
271
+ messages = db.query(Message).filter(
272
+ Message.conversation_id == conversation_id
273
+ ).order_by(Message.created_at.asc()).all()
274
+
275
+ return {
276
+ "conversation_id": conversation_id,
277
+ "repository_id": conversation.repository_id,
278
+ "user_id": user_id,
279
+ "messages": messages,
280
+ "total_messages": len(messages)
281
+ }
282
+
283
+ # ✅ NEW: User-specific chat routes
284
+ @router.get("/users/{target_user_id}/conversations")
285
+ async def get_user_all_conversations(
286
+ target_user_id: str,
287
+ db: Session = Depends(get_db),
288
+ user_id: str = Depends(get_user_id),
289
+ _: bool = Depends(verify_client_secret)
290
+ ):
291
+ """Get all conversations for a specific user across all their repositories"""
292
+
293
+ # Security: Users can only access their own conversations
294
+ if user_id != target_user_id:
295
+ raise HTTPException(status_code=403, detail="Access denied - can only access your own conversations")
296
+
297
+ # Get all repositories for this user
298
+ user_repos = db.query(Repository).filter(Repository.user_id == target_user_id).all()
299
+ repo_ids = [repo.id for repo in user_repos]
300
+
301
+ if not repo_ids:
302
+ return {
303
+ "user_id": target_user_id,
304
+ "total_conversations": 0,
305
+ "conversations": []
306
+ }
307
+
308
+ # Get all conversations for user's repositories
309
+ conversations = db.query(Conversation).filter(
310
+ Conversation.repository_id.in_(repo_ids)
311
+ ).order_by(Conversation.created_at.desc()).all()
312
+
313
+ return {
314
+ "user_id": target_user_id,
315
+ "total_conversations": len(conversations),
316
+ "conversations": [
317
+ {
318
+ "id": conv.id,
319
+ "repository_id": conv.repository_id,
320
+ "repository_name": conv.repository.name,
321
+ "title": conv.title,
322
+ "created_at": conv.created_at,
323
+ "message_count": len(conv.messages)
324
+ }
325
+ for conv in conversations
326
+ ]
327
+ }
328
+
329
+ @router.post("/{repository_id}/test")
330
+ async def test_repository_search(
331
+ repository_id: int,
332
+ query: str = "main function",
333
+ db: Session = Depends(get_db),
334
+ user_id: str = Depends(get_user_id),
335
+ _: bool = Depends(verify_client_secret)
336
+ ):
337
+ """Test endpoint to verify repository search functionality (user must own it)"""
338
+
339
+ # Verify repository ownership
340
+ repository = verify_repository_ownership(repository_id, user_id, db)
341
+
342
+ if repository.status != RepositoryStatusEnum.READY:
343
+ raise HTTPException(status_code=400, detail="Repository not ready")
344
+
345
+ try:
346
+ embedding_service = EmbeddingService()
347
+ vector_service = VectorService()
348
+
349
+ query_embedding = await embedding_service.generate_query_embedding(query)
350
+ results = await vector_service.search_similar_code(repository_id, query_embedding, top_k=3)
351
+
352
+ return {
353
+ "repository": repository.name,
354
+ "user_id": user_id,
355
+ "query": query,
356
+ "results_found": len(results),
357
+ "top_matches": [
358
+ {
359
+ "file": result['file_path'],
360
+ "lines": f"{result['start_line']}-{result['end_line']}",
361
+ "similarity": round(result['similarity'], 3),
362
+ "preview": result['content'][:200] + "..."
363
+ }
364
+ for result in results
365
+ ],
366
+ "test_successful": len(results) > 0
367
+ }
368
+
369
+ except Exception as e:
370
+ raise HTTPException(status_code=500, detail=f"Test failed: {str(e)}")
app/api/v1/repositories.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException, BackgroundTasks, Header
2
+ from sqlalchemy.orm import Session
3
+ from typing import List
4
+ from app.database import get_db
5
+ from app.models.repository import Repository, RepositoryStatusEnum
6
+ from app.schemas.repository import RepositoryCreate, RepositoryResponse
7
+ from app.core.config import settings
8
+ from app.services import GitHubService, EmbeddingService, VectorService
9
+ import logging
10
+
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ router = APIRouter()
15
+
16
+ async def process_repository_background(repository_id: int, user_id: str):
17
+ """Background task to process repository with hybrid RAG"""
18
+ logger.info(f"🚀 Starting QODEX HYBRID RAG processing for repository {repository_id} (user: {user_id})")
19
+
20
+ from app.database import SessionLocal
21
+ db = SessionLocal()
22
+
23
+ github_service = GitHubService()
24
+ embedding_service = EmbeddingService()
25
+ vector_service = VectorService()
26
+
27
+ temp_dir = None
28
+
29
+ try:
30
+ repository = db.query(Repository).filter(
31
+ Repository.id == repository_id,
32
+ Repository.user_id == user_id
33
+ ).first()
34
+
35
+ if not repository:
36
+ logger.error(f"❌ Repository {repository_id} not found for user {user_id}")
37
+ return
38
+
39
+ repository.status = RepositoryStatusEnum.PROCESSING
40
+ db.commit()
41
+ logger.info(f"📊 Repository {repository_id} status: PROCESSING")
42
+
43
+ logger.info(f"📥 Step 1: Cloning repository {repository.github_url}")
44
+ temp_dir = await github_service.clone_repository(repository.github_url)
45
+
46
+ logger.info(f"📁 Step 2: Extracting code files from {repository.name}")
47
+ code_chunks = await github_service.extract_code_files(temp_dir)
48
+
49
+ if not code_chunks:
50
+ raise Exception("No supported code files found in repository")
51
+
52
+ logger.info(f"✅ Found {len(code_chunks)} code chunks")
53
+
54
+ logger.info(f"⚡ Step 3: Generating embeddings with LOCAL SentenceTransformers")
55
+ embedded_chunks = await embedding_service.generate_embeddings_batch(code_chunks)
56
+
57
+ if not embedded_chunks:
58
+ raise Exception("Failed to generate local embeddings")
59
+
60
+ logger.info(f"💾 Step 4: Storing embeddings in ChromaDB")
61
+ await vector_service.store_embeddings(repository_id, embedded_chunks)
62
+
63
+ repository.status = RepositoryStatusEnum.READY
64
+ repository.error_message = None
65
+ db.commit()
66
+
67
+ logger.info(f"🎉 SUCCESS! QODEX Repository {repository_id} is READY for chat! (user: {user_id})")
68
+
69
+ except Exception as e:
70
+ error_message = str(e)
71
+ logger.error(f"❌ Error processing repository {repository_id}: {error_message}")
72
+
73
+ try:
74
+ repository = db.query(Repository).filter(Repository.id == repository_id).first()
75
+ if repository:
76
+ repository.status = RepositoryStatusEnum.FAILED
77
+ repository.error_message = error_message[:500]
78
+ db.commit()
79
+ except Exception as db_error:
80
+ logger.error(f"❌ Failed to update repository status: {str(db_error)}")
81
+
82
+ finally:
83
+ if temp_dir:
84
+ github_service.cleanup_temp_dir(temp_dir)
85
+ db.close()
86
+
87
+ def verify_client_secret(x_client_secret: str = Header(..., alias="X-Client-Secret")):
88
+ """Verify request comes from authorized Next.js client"""
89
+ if x_client_secret != settings.nextjs_secret:
90
+ raise HTTPException(
91
+ status_code=403,
92
+ detail="Unauthorized client - invalid secret"
93
+ )
94
+ return True
95
+
96
+ def get_user_id(x_user_id: str = Header(..., alias="X-User-ID")):
97
+ """Extract and validate user ID from header"""
98
+ if not x_user_id or len(x_user_id.strip()) == 0:
99
+ raise HTTPException(status_code=400, detail="User ID required")
100
+ return x_user_id.strip()
101
+
102
+ @router.post("/", response_model=RepositoryResponse)
103
+ async def add_repository(
104
+ repository: RepositoryCreate,
105
+ background_tasks: BackgroundTasks,
106
+ db: Session = Depends(get_db),
107
+ user_id: str = Depends(get_user_id),
108
+ _: bool = Depends(verify_client_secret)
109
+ ):
110
+ """Add new repository for QODEX processing"""
111
+
112
+ # Verify user_id matches between header and body
113
+ if repository.user_id != user_id:
114
+ raise HTTPException(status_code=400, detail="User ID mismatch between header and body")
115
+
116
+ logger.info(f"📥 NEW QODEX REQUEST: {repository.name} - {repository.github_url} (user: {user_id})")
117
+
118
+ # Validate GitHub URL
119
+ if not repository.github_url.startswith(('https://github.com/', 'git@github.com:')):
120
+ raise HTTPException(status_code=400, detail="Invalid GitHub URL format")
121
+
122
+ # Check for duplicates for this user
123
+ existing = db.query(Repository).filter(
124
+ Repository.github_url == repository.github_url,
125
+ Repository.user_id == user_id
126
+ ).first()
127
+
128
+ if existing:
129
+ raise HTTPException(
130
+ status_code=400,
131
+ detail=f"Repository already exists with ID: {existing.id}. Status: {existing.status.value}"
132
+ )
133
+
134
+ # Create repository record
135
+ db_repository = Repository(
136
+ name=repository.name,
137
+ github_url=repository.github_url,
138
+ user_id=user_id,
139
+ status=RepositoryStatusEnum.PENDING
140
+ )
141
+ db.add(db_repository)
142
+ db.commit()
143
+ db.refresh(db_repository)
144
+
145
+ # Start background processing
146
+ background_tasks.add_task(process_repository_background, db_repository.id, user_id)
147
+
148
+ logger.info(f"✅ Repository {db_repository.id} created and queued for processing (user: {user_id})")
149
+ return db_repository
150
+
151
+ @router.get("/", response_model=List[RepositoryResponse])
152
+ async def get_user_repositories(
153
+ db: Session = Depends(get_db),
154
+ user_id: str = Depends(get_user_id),
155
+ _: bool = Depends(verify_client_secret)
156
+ ):
157
+ """Get all repositories for the authenticated user"""
158
+ repositories = db.query(Repository).filter(
159
+ Repository.user_id == user_id
160
+ ).order_by(Repository.created_at.desc()).all()
161
+
162
+ logger.info(f"📋 Retrieved {len(repositories)} repositories for user {user_id}")
163
+ return repositories
164
+
165
+ @router.get("/{repository_id}", response_model=RepositoryResponse)
166
+ async def get_repository(
167
+ repository_id: int,
168
+ db: Session = Depends(get_db),
169
+ user_id: str = Depends(get_user_id),
170
+ _: bool = Depends(verify_client_secret)
171
+ ):
172
+ """Get specific repository by ID (user must own it)"""
173
+ repository = db.query(Repository).filter(
174
+ Repository.id == repository_id,
175
+ Repository.user_id == user_id
176
+ ).first()
177
+
178
+ if not repository:
179
+ raise HTTPException(status_code=404, detail="Repository not found or access denied")
180
+
181
+ return repository
182
+
183
+ @router.delete("/{repository_id}")
184
+ async def delete_repository(
185
+ repository_id: int,
186
+ db: Session = Depends(get_db),
187
+ user_id: str = Depends(get_user_id),
188
+ _: bool = Depends(verify_client_secret)
189
+ ):
190
+ """Delete repository and all associated data (user must own it)"""
191
+ repository = db.query(Repository).filter(
192
+ Repository.id == repository_id,
193
+ Repository.user_id == user_id
194
+ ).first()
195
+
196
+ if not repository:
197
+ raise HTTPException(status_code=404, detail="Repository not found or access denied")
198
+
199
+ try:
200
+ # Delete vector data from ChromaDB
201
+ vector_service = VectorService()
202
+ await vector_service.delete_repository_data(repository_id)
203
+ logger.info(f"🗑️ Deleted vector data for repository {repository_id}")
204
+ except Exception as e:
205
+ logger.warning(f"⚠️ Error deleting vector data for repo {repository_id}: {e}")
206
+
207
+ try:
208
+ # Delete conversations and messages (CASCADE should handle this)
209
+ db.delete(repository)
210
+ db.commit()
211
+ logger.info(f"🗑️ Successfully deleted repository {repository_id} (user: {user_id})")
212
+ except Exception as e:
213
+ logger.error(f"❌ Error deleting repository {repository_id}: {e}")
214
+ raise HTTPException(status_code=500, detail="Failed to delete repository")
215
+
216
+ return {
217
+ "message": f"Repository {repository_id} deleted successfully",
218
+ "repository_id": repository_id,
219
+ "user_id": user_id,
220
+ "success": True
221
+ }
222
+
223
+ @router.get("/{repository_id}/status")
224
+ async def get_repository_status(
225
+ repository_id: int,
226
+ db: Session = Depends(get_db),
227
+ user_id: str = Depends(get_user_id),
228
+ _: bool = Depends(verify_client_secret)
229
+ ):
230
+ """Get detailed repository status (user must own it)"""
231
+ repository = db.query(Repository).filter(
232
+ Repository.id == repository_id,
233
+ Repository.user_id == user_id
234
+ ).first()
235
+
236
+ if not repository:
237
+ raise HTTPException(status_code=404, detail="Repository not found or access denied")
238
+
239
+ # Count conversations for this repository
240
+ from app.models.conversation import Conversation
241
+ conversation_count = db.query(Conversation).filter(
242
+ Conversation.repository_id == repository_id
243
+ ).count()
244
+
245
+ return {
246
+ "id": repository.id,
247
+ "user_id": repository.user_id,
248
+ "name": repository.name,
249
+ "github_url": repository.github_url,
250
+ "status": repository.status.value,
251
+ "error_message": repository.error_message,
252
+ "created_at": repository.created_at,
253
+ "updated_at": repository.updated_at,
254
+ "is_ready_for_chat": repository.status == RepositoryStatusEnum.READY,
255
+ "conversation_count": conversation_count,
256
+ "processing_complete": repository.status in [RepositoryStatusEnum.READY, RepositoryStatusEnum.FAILED]
257
+ }
258
+
259
+ # ✅ NEW: User-specific routes
260
+ @router.get("/users/{target_user_id}/repositories", response_model=List[RepositoryResponse])
261
+ async def get_specific_user_repositories(
262
+ target_user_id: str,
263
+ db: Session = Depends(get_db),
264
+ user_id: str = Depends(get_user_id),
265
+ _: bool = Depends(verify_client_secret)
266
+ ):
267
+ """Get repositories for a specific user (must be same user)"""
268
+
269
+ # Security: Users can only access their own repositories
270
+ if user_id != target_user_id:
271
+ raise HTTPException(status_code=403, detail="Access denied - can only access your own repositories")
272
+
273
+ repositories = db.query(Repository).filter(
274
+ Repository.user_id == target_user_id
275
+ ).order_by(Repository.created_at.desc()).all()
276
+
277
+ return repositories
app/api/v1/router.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from .repositories import router as repositories_router
3
+ from .chat import router as chat_router
4
+
5
+ api_router = APIRouter()
6
+
7
+ # Include only core functionality
8
+ api_router.include_router(repositories_router, prefix="/repositories", tags=["repositories"])
9
+ api_router.include_router(chat_router, prefix="/chat", tags=["chat"])
app/core/config.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pydantic_settings import BaseSettings
3
+
4
+ class Settings(BaseSettings):
5
+ # Database
6
+ database_url: str = os.getenv("DATABASE_URL", "sqlite:///./test.db")
7
+
8
+ # Security
9
+ secret_key: str = os.getenv("SECRET_KEY", "production-secret-key-change-me")
10
+ nextjs_secret: str = os.getenv("NEXTJS_SECRET", "qodex-production-secret-2025")
11
+ algorithm: str = "HS256"
12
+ access_token_expire_minutes: int = 30
13
+
14
+ # API Keys
15
+ gemini_api_key: str = os.getenv("GEMINI_API_KEY", "")
16
+
17
+ # App
18
+ environment: str = os.getenv("ENVIRONMENT", "production")
19
+ debug: bool = os.getenv("DEBUG", "false").lower() == "true"
20
+
21
+ class Config:
22
+ env_file = ".env"
23
+
24
+ settings = Settings()
app/core/database.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine
2
+ from sqlalchemy.ext.declarative import declarative_base
3
+ from sqlalchemy.orm import sessionmaker
4
+ from .config import settings
5
+
6
+ # ✅ Production-ready engine configuration
7
+ engine = create_engine(
8
+ settings.database_url,
9
+ pool_size=5, # Reduced for Neon free tier
10
+ max_overflow=10, # Reduced for free tier
11
+ pool_pre_ping=True,
12
+ pool_recycle=3600,
13
+ echo=False, # Disable SQL logging in production
14
+ pool_timeout=30,
15
+ connect_args={
16
+ "sslmode": "require" # Required for Neon
17
+ } if settings.database_url.startswith("postgresql") else {}
18
+ )
19
+
20
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
21
+ Base = declarative_base()
22
+
23
+ def get_db():
24
+ db = SessionLocal()
25
+ try:
26
+ yield db
27
+ finally:
28
+ db.close()
app/core/security.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from passlib.context import CryptContext
2
+ from jose import JWTError, jwt
3
+ from datetime import datetime, timedelta
4
+ from typing import Optional
5
+ from .config import settings
6
+
7
+ # Password hashing context
8
+ pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
9
+
10
+ def verify_password(plain_password: str, hashed_password: str) -> bool:
11
+ """Verify a plain password against its hash"""
12
+ return pwd_context.verify(plain_password, hashed_password)
13
+
14
+ def get_password_hash(password: str) -> str:
15
+ """Hash a password"""
16
+ return pwd_context.hash(password)
17
+
18
+ def create_access_token(data: dict, expires_delta: Optional[timedelta] = None) -> str:
19
+ """Create a JWT access token"""
20
+ to_encode = data.copy()
21
+ if expires_delta:
22
+ expire = datetime.utcnow() + expires_delta
23
+ else:
24
+ expire = datetime.utcnow() + timedelta(minutes=settings.access_token_expire_minutes)
25
+
26
+ to_encode.update({"exp": expire})
27
+ encoded_jwt = jwt.encode(to_encode, settings.secret_key, algorithm=settings.algorithm)
28
+ return encoded_jwt
29
+
30
+ def verify_token(token: str) -> Optional[str]:
31
+ """Verify JWT token and return email"""
32
+ try:
33
+ payload = jwt.decode(token, settings.secret_key, algorithms=[settings.algorithm])
34
+ email: str = payload.get("sub")
35
+ if email is None:
36
+ return None
37
+ return email
38
+ except JWTError:
39
+ return None
app/database.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine
2
+ from sqlalchemy.ext.declarative import declarative_base
3
+ from sqlalchemy.orm import sessionmaker
4
+ import os
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+ DATABASE_URL = os.getenv("DATABASE_URL")
10
+
11
+ # ✅ Add connection pooling here too
12
+ engine = create_engine(
13
+ DATABASE_URL,
14
+ pool_size=10, # ✅ Allow 10 concurrent connections
15
+ max_overflow=20, # ✅ Allow 20 more if needed
16
+ pool_pre_ping=True, # ✅ Verify connections are alive
17
+ pool_recycle=3600, # ✅ Recycle connections every hour
18
+ pool_timeout=30, # ✅ Wait 30s for available connection
19
+ )
20
+
21
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
22
+
23
+ Base = declarative_base()
24
+
25
+ def get_db():
26
+ db = SessionLocal()
27
+ try:
28
+ yield db
29
+ finally:
30
+ db.close()
app/main.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from app.api.v1 import repositories, chat
4
+ from app.core.database import engine, Base
5
+ from datetime import datetime
6
+ import os
7
+ import logging
8
+
9
+ # Configure logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ app = FastAPI(
14
+ title="QODEX API",
15
+ description="AI-powered code repository chat system",
16
+ version="1.0.0",
17
+ docs_url="/docs",
18
+ redoc_url="/redoc"
19
+ )
20
+
21
+ # CORS configuration for production
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=[
25
+ "https://qodex.vercel.app", # Your frontend domain
26
+ "https://qodex-frontend.vercel.app", # Alternative frontend domain
27
+ "http://localhost:3000", # Local development
28
+ "http://127.0.0.1:3000", # Local development
29
+ ],
30
+ allow_credentials=True,
31
+ allow_methods=["GET", "POST", "PUT", "DELETE"],
32
+ allow_headers=["*"],
33
+ )
34
+
35
+ # Create tables on startup
36
+ @app.on_event("startup")
37
+ async def startup_event():
38
+ """Create database tables on startup"""
39
+ try:
40
+ Base.metadata.create_all(bind=engine)
41
+ logger.info("🗄️ Database tables created successfully")
42
+ except Exception as e:
43
+ logger.error(f"❌ Error creating database tables: {e}")
44
+
45
+ # Health check endpoint
46
+ @app.get("/health")
47
+ async def health_check():
48
+ """Health check endpoint for monitoring services"""
49
+ return {
50
+ "status": "healthy",
51
+ "timestamp": datetime.utcnow().isoformat(),
52
+ "service": "QODEX API",
53
+ "version": "1.0.0",
54
+ "environment": os.getenv("ENVIRONMENT", "production"),
55
+ "message": "QODEX is running smoothly! 🚀"
56
+ }
57
+
58
+ @app.get("/")
59
+ async def root():
60
+ """Root endpoint"""
61
+ return {
62
+ "message": "Welcome to QODEX API! 🚀",
63
+ "description": "AI-powered code repository chat system",
64
+ "docs": "/docs",
65
+ "health": "/health",
66
+ "status": "running",
67
+ "version": "1.0.0"
68
+ }
69
+
70
+ # Include routers
71
+ app.include_router(repositories.router, prefix="/api/v1/repositories", tags=["repositories"])
72
+ app.include_router(chat.router, prefix="/api/v1/chat", tags=["chat"])
app/models/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # app/models/__init__.py
2
+ from .repository import Repository
3
+ from .conversation import Conversation, Message
4
+
5
+ __all__ = ["Repository", "Conversation", "Message"]
app/models/conversation.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, JSON, Text
2
+ from sqlalchemy.sql import func
3
+ from sqlalchemy.orm import relationship
4
+ from app.core.database import Base
5
+
6
+ class Conversation(Base):
7
+ """Conversation model - linked to repository only"""
8
+ __tablename__ = "conversations"
9
+
10
+ id = Column(Integer, primary_key=True, index=True)
11
+ repository_id = Column(Integer, ForeignKey("repositories.id"), nullable=False)
12
+ title = Column(String, nullable=False, default="New Conversation")
13
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
14
+ updated_at = Column(DateTime(timezone=True), onupdate=func.now())
15
+
16
+ # Relationships
17
+ repository = relationship("Repository", back_populates="conversations")
18
+ messages = relationship("Message", back_populates="conversation", cascade="all, delete-orphan")
19
+
20
+ def __repr__(self):
21
+ return f"<Conversation(id={self.id}, repository_id={self.repository_id}, title='{self.title}')>"
22
+
23
+ class Message(Base):
24
+ """Message model for chat history"""
25
+ __tablename__ = "messages"
26
+
27
+ id = Column(Integer, primary_key=True, index=True)
28
+ conversation_id = Column(Integer, ForeignKey("conversations.id"), nullable=False)
29
+ role = Column(String, nullable=False) # 'user' or 'assistant'
30
+ content = Column(Text, nullable=False)
31
+ citations = Column(JSON, nullable=True) # Store code citations as JSON
32
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
33
+
34
+ # Relationships
35
+ conversation = relationship("Conversation", back_populates="messages")
36
+
37
+ def __repr__(self):
38
+ return f"<Message(id={self.id}, role='{self.role}', conversation_id={self.conversation_id})>"
app/models/repository.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, Integer, String, DateTime, Enum
2
+ from sqlalchemy.sql import func
3
+ from sqlalchemy.orm import relationship
4
+ from app.core.database import Base
5
+ import enum
6
+
7
+ class RepositoryStatusEnum(enum.Enum):
8
+ """Repository processing status"""
9
+ PENDING = "PENDING"
10
+ PROCESSING = "PROCESSING"
11
+ READY = "READY"
12
+ FAILED = "FAILED"
13
+
14
+ class Repository(Base):
15
+ """Repository model with user ownership"""
16
+ __tablename__ = "repositories"
17
+
18
+ id = Column(Integer, primary_key=True, index=True)
19
+ user_id = Column(String, nullable=False, index=True) # ✅ Added back!
20
+ github_url = Column(String, nullable=False, unique=True)
21
+ name = Column(String, nullable=False)
22
+ status = Column(Enum(RepositoryStatusEnum), default=RepositoryStatusEnum.PENDING)
23
+ error_message = Column(String, nullable=True)
24
+ created_at = Column(DateTime(timezone=True), server_default=func.now())
25
+ updated_at = Column(DateTime(timezone=True), onupdate=func.now())
26
+
27
+ # Relationships
28
+ conversations = relationship("Conversation", back_populates="repository", cascade="all, delete-orphan")
29
+
30
+ def __repr__(self):
31
+ return f"<Repository(id={self.id}, user_id='{self.user_id}', name='{self.name}', status={self.status.value})>"
app/schemas/__init__.py ADDED
File without changes
app/schemas/chat.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from datetime import datetime
3
+ from typing import List, Optional, Dict, Any
4
+
5
+ class CodeCitation(BaseModel):
6
+ file_path: str
7
+ start_line: int
8
+ end_line: int
9
+ code_snippet: str
10
+
11
+ class MessageCreate(BaseModel):
12
+ content: str
13
+
14
+ class MessageResponse(BaseModel):
15
+ id: int
16
+ role: str
17
+ content: str
18
+ citations: Optional[List[CodeCitation]] = None
19
+ created_at: datetime
20
+
21
+ class Config:
22
+ from_attributes = True
23
+
24
+ class ConversationResponse(BaseModel):
25
+ id: int
26
+ repository_id: int
27
+ title: str
28
+ messages: List[MessageResponse]
29
+ created_at: datetime
30
+
31
+ class Config:
32
+ from_attributes = True
33
+
34
+ class QueryRequest(BaseModel):
35
+ question: str
36
+ conversation_id: Optional[int] = None
37
+
38
+ class QueryResponse(BaseModel):
39
+ answer_id: str
40
+ natural_language_answer: str
41
+ citations: List[CodeCitation]
42
+ conversation_id: int
app/schemas/repository.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+ from typing import Optional
3
+ from datetime import datetime
4
+ from enum import Enum
5
+
6
+ class RepositoryStatus(str, Enum):
7
+ PENDING = "PENDING"
8
+ PROCESSING = "PROCESSING"
9
+ READY = "READY"
10
+ FAILED = "FAILED"
11
+
12
+ class RepositoryCreate(BaseModel):
13
+ name: str
14
+ github_url: str
15
+ user_id: str # ✅ Added back!
16
+
17
+ class RepositoryResponse(BaseModel):
18
+ id: int
19
+ user_id: str # ✅ Added back!
20
+ name: str
21
+ github_url: str
22
+ status: RepositoryStatus
23
+ error_message: Optional[str] = None
24
+ created_at: datetime
25
+ updated_at: Optional[datetime] = None
26
+
27
+ class Config:
28
+ from_attributes = True
app/services/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .github_service import GitHubService
2
+ from .embedding_service import EmbeddingService
3
+ from .vector_service import VectorService
4
+ from .chat_service import ChatService
5
+
6
+ __all__ = ['GitHubService', 'EmbeddingService', 'VectorService', 'ChatService']
app/services/chat_service.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ import os
3
+ from typing import List, Dict
4
+ import logging
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class ChatService:
11
+ def __init__(self):
12
+ api_key = os.getenv("GEMINI_API_KEY")
13
+ if not api_key:
14
+ logger.warning("⚠️ GEMINI_API_KEY not found - chat will use fallback responses")
15
+ self.model = None
16
+ self.gemini_available = False
17
+ else:
18
+ try:
19
+ genai.configure(api_key=api_key)
20
+ self.model = genai.GenerativeModel('gemini-2.0-flash')
21
+ self.gemini_available = True
22
+ logger.info("🤖 Gemini chat service initialized")
23
+ except Exception as e:
24
+ logger.error(f"❌ Failed to initialize Gemini: {e}")
25
+ self.model = None
26
+ self.gemini_available = False
27
+
28
+ async def generate_response(self, query: str, code_chunks: List[Dict], repository_name: str) -> Dict:
29
+ if not self.gemini_available:
30
+ return self.generate_fallback_response(query, code_chunks, repository_name)
31
+
32
+ try:
33
+ context = self.prepare_context(code_chunks)
34
+
35
+ prompt = f"""You are an expert code assistant analyzing the {repository_name} repository.
36
+
37
+ User Question: {query}
38
+
39
+ Relevant Code Context:
40
+ {context}
41
+
42
+ Instructions:
43
+ 1. Answer the user's question based on the provided code context
44
+ 2. Reference specific files and line numbers when relevant
45
+ 3. Explain code functionality clearly
46
+ 4. If context is insufficient, say so clearly
47
+ 5. Be specific and technical but also clear
48
+
49
+ Your Expert Analysis:"""
50
+
51
+ response = self.model.generate_content(prompt)
52
+
53
+ sources = []
54
+ for chunk in code_chunks:
55
+ sources.append({
56
+ 'file_path': chunk['file_path'],
57
+ 'start_line': chunk['start_line'],
58
+ 'end_line': chunk['end_line'],
59
+ 'similarity': round(chunk['similarity'], 3),
60
+ 'preview': chunk['content'][:200] + "..."
61
+ })
62
+
63
+ return {
64
+ 'response': response.text,
65
+ 'sources': sources,
66
+ 'context_chunks_used': len(code_chunks),
67
+ 'repository_name': repository_name,
68
+ 'model_used': 'gemini-2.0-flash',
69
+ 'success': True
70
+ }
71
+
72
+ except Exception as e:
73
+ logger.error(f"❌ Gemini error: {e}")
74
+ if "429" in str(e) or "quota" in str(e).lower():
75
+ return self.generate_quota_response(query, code_chunks, repository_name)
76
+ return self.generate_fallback_response(query, code_chunks, repository_name)
77
+
78
+ def prepare_context(self, code_chunks: List[Dict]) -> str:
79
+ context_sections = []
80
+ for i, chunk in enumerate(code_chunks, 1):
81
+ context_sections.append(f"""
82
+ Code Reference {i}:
83
+ File: {chunk['file_path']}
84
+ Lines: {chunk['start_line']}-{chunk['end_line']}
85
+ Similarity: {chunk['similarity']:.2f}
86
+ {chunk['content']}
87
+ """)
88
+ return "\n".join(context_sections)
89
+
90
+ def generate_quota_response(self, query: str, code_chunks: List[Dict], repository_name: str) -> Dict:
91
+ context = self.prepare_context(code_chunks)
92
+ response = f"""🚫 Gemini quota exceeded, but I found {len(code_chunks)} relevant code sections:
93
+
94
+ {context}
95
+
96
+ The search found relevant code with similarity scores from {min(c['similarity'] for c in code_chunks):.2f} to {max(c['similarity'] for c in code_chunks):.2f}. Please try again in a few minutes when quota resets."""
97
+
98
+ return self.create_response_dict(response, code_chunks, repository_name, 'quota_exceeded')
99
+
100
+ def generate_fallback_response(self, query: str, code_chunks: List[Dict], repository_name: str) -> Dict:
101
+ context = self.prepare_context(code_chunks)
102
+ response = f"""Found {len(code_chunks)} relevant code sections for: "{query}"
103
+
104
+ {context}
105
+
106
+ Note: AI analysis requires API configuration. The search results above show the most relevant code."""
107
+
108
+ return self.create_response_dict(response, code_chunks, repository_name, 'fallback')
109
+
110
+ def create_response_dict(self, response: str, code_chunks: List[Dict], repository_name: str, model_used: str) -> Dict:
111
+ sources = []
112
+ for chunk in code_chunks:
113
+ sources.append({
114
+ 'file_path': chunk['file_path'],
115
+ 'start_line': chunk['start_line'],
116
+ 'end_line': chunk['end_line'],
117
+ 'similarity': round(chunk['similarity'], 3),
118
+ 'preview': chunk['content'][:200] + "..."
119
+ })
120
+
121
+ return {
122
+ 'response': response,
123
+ 'sources': sources,
124
+ 'context_chunks_used': len(code_chunks),
125
+ 'repository_name': repository_name,
126
+ 'model_used': model_used,
127
+ 'success': True
128
+ }
app/services/embedding_service.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import numpy as np
3
+ from typing import List, Dict
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ class EmbeddingService:
9
+ def __init__(self):
10
+ try:
11
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
12
+ logger.info("🤖 Local embedding service initialized (all-MiniLM-L6-v2)")
13
+ except Exception as e:
14
+ logger.error(f"❌ Failed to load SentenceTransformer model: {e}")
15
+ raise Exception("Failed to initialize local embedding model")
16
+
17
+ async def generate_embedding(self, text: str, title: str = "") -> List[float]:
18
+ try:
19
+ content = f"File: {title}\n\nCode:\n{text}" if title else text
20
+ embedding = self.model.encode(content)
21
+ return embedding.tolist()
22
+ except Exception as e:
23
+ logger.error(f"❌ Error generating local embedding: {e}")
24
+ raise
25
+
26
+ async def generate_embeddings_batch(self, chunks: List[Dict]) -> List[Dict]:
27
+ logger.info(f"🔄 Generating LOCAL embeddings for {len(chunks)} chunks...")
28
+
29
+ texts = []
30
+ for chunk in chunks:
31
+ content = f"""File: {chunk['file_path']}
32
+ Lines: {chunk['start_line']}-{chunk['end_line']}
33
+ Type: {chunk['chunk_type']}
34
+
35
+ Code:
36
+ {chunk['content']}"""
37
+ texts.append(content)
38
+
39
+ try:
40
+ embeddings = self.model.encode(texts, show_progress_bar=True, batch_size=32)
41
+
42
+ embedded_chunks = []
43
+ for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
44
+ embedded_chunk = {
45
+ **chunk,
46
+ 'embedding': embedding.tolist(),
47
+ 'content_length': len(chunk['content'])
48
+ }
49
+ embedded_chunks.append(embedded_chunk)
50
+ except Exception as e:
51
+ logger.error(f"❌ Failed to generate batch embeddings: {e}")
52
+ raise
53
+
54
+ logger.info(f"✅ Generated {len(embedded_chunks)} LOCAL embeddings successfully")
55
+ return embedded_chunks
56
+
57
+ async def generate_query_embedding(self, query: str) -> List[float]:
58
+ try:
59
+ embedding = self.model.encode(query)
60
+ return embedding.tolist()
61
+ except Exception as e:
62
+ logger.error(f"❌ Error generating query embedding: {e}")
63
+ raise
app/services/github_service.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import git
2
+ import os
3
+ import tempfile
4
+ import shutil
5
+ from typing import List, Dict
6
+ from pathlib import Path
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class GitHubService:
12
+ def __init__(self):
13
+ self.supported_extensions = {
14
+ '.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.cpp', '.c',
15
+ '.cs', '.go', '.rs', '.php', '.rb', '.swift', '.kt', '.scala',
16
+ '.html', '.css', '.scss', '.sass', '.vue', '.svelte', '.dart',
17
+ '.r', '.m', '.mm', '.h', '.hpp', '.cc', '.cxx', '.sql'
18
+ }
19
+ self.ignore_dirs = {
20
+ '.git', 'node_modules', '__pycache__', '.venv', 'venv',
21
+ 'build', 'dist', '.next', '.nuxt', 'coverage', '.pytest_cache',
22
+ 'vendor', 'target', 'bin', 'obj', '.gradle', '.idea', '.vscode'
23
+ }
24
+
25
+ async def clone_repository(self, github_url: str) -> str:
26
+ """Clone repository to temporary directory"""
27
+ temp_dir = tempfile.mkdtemp(prefix="codequery_")
28
+ logger.info(f"🔄 Cloning {github_url} to {temp_dir}")
29
+
30
+ try:
31
+ # Clone with depth=1 for faster cloning (only latest commit)
32
+ repo = git.Repo.clone_from(github_url, temp_dir, depth=1)
33
+ logger.info(f"✅ Successfully cloned repository")
34
+ return temp_dir
35
+ except Exception as e:
36
+ # Clean up on failure
37
+ if os.path.exists(temp_dir):
38
+ shutil.rmtree(temp_dir)
39
+ raise Exception(f"Failed to clone repository: {str(e)}")
40
+
41
+ def chunk_code_content(self, content: str, file_path: str, max_chunk_size: int = 1000) -> List[Dict]:
42
+ """Split code into meaningful chunks"""
43
+ chunks = []
44
+ lines = content.split('\n')
45
+
46
+ # For small files, return as single chunk
47
+ if len(content) <= max_chunk_size:
48
+ return [{
49
+ 'content': content,
50
+ 'file_path': file_path,
51
+ 'chunk_index': 0,
52
+ 'start_line': 1,
53
+ 'end_line': len(lines),
54
+ 'chunk_type': 'full_file'
55
+ }]
56
+
57
+ # For larger files, split by functions/classes or line count
58
+ current_chunk = []
59
+ current_size = 0
60
+ chunk_index = 0
61
+ start_line = 1
62
+
63
+ for i, line in enumerate(lines, 1):
64
+ current_chunk.append(line)
65
+ current_size += len(line) + 1 # +1 for newline
66
+
67
+ # Split on function/class definitions or when chunk gets too large
68
+ is_function_start = any(line.strip().startswith(keyword) for keyword in
69
+ ['def ', 'function ', 'class ', 'interface ', 'public class'])
70
+
71
+ if (current_size >= max_chunk_size) or (is_function_start and len(current_chunk) > 1):
72
+ if len(current_chunk) > 1: # Don't create empty chunks
73
+ chunks.append({
74
+ 'content': '\n'.join(current_chunk[:-1] if is_function_start else current_chunk),
75
+ 'file_path': file_path,
76
+ 'chunk_index': chunk_index,
77
+ 'start_line': start_line,
78
+ 'end_line': i - (1 if is_function_start else 0),
79
+ 'chunk_type': 'code_block'
80
+ })
81
+ chunk_index += 1
82
+ start_line = i if is_function_start else i + 1
83
+ current_chunk = [line] if is_function_start else []
84
+ current_size = len(line) + 1 if is_function_start else 0
85
+
86
+ # Add remaining chunk
87
+ if current_chunk:
88
+ chunks.append({
89
+ 'content': '\n'.join(current_chunk),
90
+ 'file_path': file_path,
91
+ 'chunk_index': chunk_index,
92
+ 'start_line': start_line,
93
+ 'end_line': len(lines),
94
+ 'chunk_type': 'code_block'
95
+ })
96
+
97
+ return chunks
98
+
99
+ async def extract_code_files(self, repo_path: str) -> List[Dict]:
100
+ """Extract and chunk all code files from repository"""
101
+ code_chunks = []
102
+ total_files = 0
103
+
104
+ logger.info(f"📁 Extracting code files from {repo_path}")
105
+
106
+ for root, dirs, files in os.walk(repo_path):
107
+ # Skip ignored directories
108
+ dirs[:] = [d for d in dirs if d not in self.ignore_dirs]
109
+
110
+ for file in files:
111
+ file_path = Path(root) / file
112
+
113
+ # Skip large files (>1MB)
114
+ if file_path.stat().st_size > 1024 * 1024:
115
+ continue
116
+
117
+ if file_path.suffix in self.supported_extensions:
118
+ try:
119
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
120
+ content = f.read()
121
+
122
+ # Skip empty files
123
+ if not content.strip():
124
+ continue
125
+
126
+ relative_path = str(file_path.relative_to(repo_path))
127
+
128
+ # Chunk the file content
129
+ chunks = self.chunk_code_content(content, relative_path)
130
+ code_chunks.extend(chunks)
131
+ total_files += 1
132
+
133
+ if total_files % 50 == 0:
134
+ logger.info(f"📊 Processed {total_files} files, {len(code_chunks)} chunks so far...")
135
+
136
+ except Exception as e:
137
+ logger.warning(f"⚠️ Error reading file {file_path}: {e}")
138
+ continue
139
+
140
+ logger.info(f"✅ Extracted {len(code_chunks)} code chunks from {total_files} files")
141
+ return code_chunks
142
+
143
+ def cleanup_temp_dir(self, temp_dir: str):
144
+ """Clean up temporary directory"""
145
+ try:
146
+ if os.path.exists(temp_dir):
147
+ shutil.rmtree(temp_dir)
148
+ logger.info(f"🧹 Cleaned up temporary directory: {temp_dir}")
149
+ except Exception as e:
150
+ logger.warning(f"⚠️ Failed to cleanup {temp_dir}: {e}")
app/services/vector_service.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+ from chromadb.config import Settings
3
+ import os
4
+ from typing import List, Dict, Optional
5
+ import logging
6
+ import numpy as np
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class VectorService:
11
+ def __init__(self):
12
+ self.client = chromadb.PersistentClient(
13
+ path="./chroma_db",
14
+ settings=Settings(
15
+ anonymized_telemetry=False,
16
+ allow_reset=True
17
+ )
18
+ )
19
+ logger.info("🗄️ ChromaDB client initialized")
20
+
21
+ def create_collection(self, repository_id: int) -> chromadb.Collection:
22
+ collection_name = f"repo_{repository_id}"
23
+
24
+ try:
25
+ collection = self.client.get_collection(collection_name)
26
+ logger.info(f"📚 Using existing collection: {collection_name}")
27
+ except:
28
+ collection = self.client.create_collection(
29
+ name=collection_name,
30
+ metadata={"repository_id": repository_id}
31
+ )
32
+ logger.info(f"🆕 Created new collection: {collection_name}")
33
+
34
+ return collection
35
+
36
+ async def store_embeddings(self, repository_id: int, embedded_chunks: List[Dict]):
37
+ logger.info(f"💾 Storing {len(embedded_chunks)} embeddings for repository {repository_id}")
38
+
39
+ collection = self.create_collection(repository_id)
40
+
41
+ documents = []
42
+ embeddings = []
43
+ metadatas = []
44
+ ids = []
45
+
46
+ for i, chunk in enumerate(embedded_chunks):
47
+ chunk_id = f"chunk_{repository_id}_{chunk['chunk_index']}_{i}"
48
+
49
+ documents.append(chunk['content'])
50
+ embeddings.append(chunk['embedding'])
51
+ metadatas.append({
52
+ 'file_path': chunk['file_path'],
53
+ 'start_line': chunk['start_line'],
54
+ 'end_line': chunk['end_line'],
55
+ 'chunk_type': chunk['chunk_type'],
56
+ 'content_length': chunk['content_length'],
57
+ 'repository_id': repository_id
58
+ })
59
+ ids.append(chunk_id)
60
+
61
+ batch_size = 100
62
+ for i in range(0, len(documents), batch_size):
63
+ end_idx = min(i + batch_size, len(documents))
64
+
65
+ collection.add(
66
+ documents=documents[i:end_idx],
67
+ embeddings=embeddings[i:end_idx],
68
+ metadatas=metadatas[i:end_idx],
69
+ ids=ids[i:end_idx]
70
+ )
71
+
72
+ logger.info(f"✅ Successfully stored all embeddings for repository {repository_id}")
73
+
74
+ async def search_similar_code(self, repository_id: int, query_embedding: List[float], top_k: int = 5) -> List[Dict]:
75
+ collection_name = f"repo_{repository_id}"
76
+
77
+ try:
78
+ collection = self.client.get_collection(collection_name)
79
+ except:
80
+ logger.warning(f"⚠️ Collection {collection_name} not found")
81
+ return []
82
+
83
+ results = collection.query(
84
+ query_embeddings=[query_embedding],
85
+ n_results=top_k,
86
+ include=['documents', 'metadatas', 'distances']
87
+ )
88
+
89
+ search_results = []
90
+ for i in range(len(results['documents'][0])):
91
+ # Fix similarity calculation
92
+ distance = results['distances'][0][i]
93
+ # Convert distance to similarity (higher is better)
94
+ similarity = max(0.0, 1.0 - distance) # Ensure positive similarity
95
+
96
+ search_results.append({
97
+ 'content': results['documents'][0][i],
98
+ 'metadata': results['metadatas'][0][i],
99
+ 'similarity': similarity,
100
+ 'file_path': results['metadatas'][0][i]['file_path'],
101
+ 'start_line': results['metadatas'][0][i]['start_line'],
102
+ 'end_line': results['metadatas'][0][i]['end_line']
103
+ })
104
+
105
+ # Sort by similarity (highest first)
106
+ search_results.sort(key=lambda x: x['similarity'], reverse=True)
107
+
108
+ logger.info(f"🔍 Found {len(search_results)} similar code chunks")
109
+ return search_results
110
+
111
+ def delete_repository_data(self, repository_id: int):
112
+ collection_name = f"repo_{repository_id}"
113
+
114
+ try:
115
+ self.client.delete_collection(collection_name)
116
+ logger.info(f"🗑️ Deleted collection: {collection_name}")
117
+ except:
118
+ logger.warning(f"⚠️ Collection {collection_name} not found for deletion")
app/utils/__init__.py ADDED
File without changes
migrations/README ADDED
@@ -0,0 +1 @@
 
 
1
+ Generic single-database configuration.
migrations/env.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from logging.config import fileConfig
2
+ from sqlalchemy import engine_from_config
3
+ from sqlalchemy import pool
4
+ from alembic import context
5
+ import os
6
+ import sys
7
+
8
+ # Add the app directory to the path
9
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
10
+
11
+ from app.core.database import Base
12
+ # Remove user import - we don't need it anymore
13
+ # from app.models.user import User # ❌ REMOVED
14
+ from app.models.repository import Repository
15
+ from app.models.conversation import Conversation, Message
16
+
17
+ # this is the Alembic Config object
18
+ config = context.config
19
+
20
+ # Interpret the config file for Python logging
21
+ if config.config_file_name is not None:
22
+ fileConfig(config.config_file_name)
23
+
24
+ # Set the target metadata
25
+ target_metadata = Base.metadata
26
+
27
+ def run_migrations_offline() -> None:
28
+ url = config.get_main_option("sqlalchemy.url")
29
+ context.configure(
30
+ url=url,
31
+ target_metadata=target_metadata,
32
+ literal_binds=True,
33
+ dialect_opts={"paramstyle": "named"},
34
+ )
35
+
36
+ with context.begin_transaction():
37
+ context.run_migrations()
38
+
39
+ def run_migrations_online() -> None:
40
+ connectable = engine_from_config(
41
+ config.get_section(config.config_ini_section, {}),
42
+ prefix="sqlalchemy.",
43
+ poolclass=pool.NullPool,
44
+ )
45
+
46
+ with connectable.connect() as connection:
47
+ context.configure(
48
+ connection=connection, target_metadata=target_metadata
49
+ )
50
+
51
+ with context.begin_transaction():
52
+ context.run_migrations()
53
+
54
+ if context.is_offline_mode():
55
+ run_migrations_offline()
56
+ else:
57
+ run_migrations_online()
migrations/script.py.mako ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ ${imports if imports else ""}
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = ${repr(up_revision)}
16
+ down_revision: Union[str, None] = ${repr(down_revision)}
17
+ branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18
+ depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19
+
20
+
21
+ def upgrade() -> None:
22
+ ${upgrades if upgrades else "pass"}
23
+
24
+
25
+ def downgrade() -> None:
26
+ ${downgrades if downgrades else "pass"}
migrations/versions/16e292816c22_initial_migration.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Initial migration
2
+
3
+ Revision ID: 16e292816c22
4
+ Revises:
5
+ Create Date: 2025-10-23 20:12:28.092984
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '16e292816c22'
16
+ down_revision: Union[str, None] = None
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ op.create_table('users',
24
+ sa.Column('id', sa.Integer(), nullable=False),
25
+ sa.Column('email', sa.String(), nullable=False),
26
+ sa.Column('name', sa.String(), nullable=False),
27
+ sa.Column('hashed_password', sa.String(), nullable=False),
28
+ sa.Column('is_active', sa.Boolean(), nullable=True),
29
+ sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
30
+ sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
31
+ sa.PrimaryKeyConstraint('id')
32
+ )
33
+ op.create_index(op.f('ix_users_email'), 'users', ['email'], unique=True)
34
+ op.create_index(op.f('ix_users_id'), 'users', ['id'], unique=False)
35
+ op.create_table('repositories',
36
+ sa.Column('id', sa.Integer(), nullable=False),
37
+ sa.Column('user_id', sa.Integer(), nullable=False),
38
+ sa.Column('github_url', sa.String(), nullable=False),
39
+ sa.Column('name', sa.String(), nullable=False),
40
+ sa.Column('status', sa.Enum('PENDING', 'PROCESSING', 'READY', 'FAILED', name='repositorystatus'), nullable=True),
41
+ sa.Column('error_message', sa.String(), nullable=True),
42
+ sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
43
+ sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
44
+ sa.ForeignKeyConstraint(['user_id'], ['users.id'], ),
45
+ sa.PrimaryKeyConstraint('id')
46
+ )
47
+ op.create_index(op.f('ix_repositories_id'), 'repositories', ['id'], unique=False)
48
+ op.create_table('conversations',
49
+ sa.Column('id', sa.Integer(), nullable=False),
50
+ sa.Column('repository_id', sa.Integer(), nullable=False),
51
+ sa.Column('title', sa.String(), nullable=False),
52
+ sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
53
+ sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
54
+ sa.ForeignKeyConstraint(['repository_id'], ['repositories.id'], ),
55
+ sa.PrimaryKeyConstraint('id')
56
+ )
57
+ op.create_index(op.f('ix_conversations_id'), 'conversations', ['id'], unique=False)
58
+ op.create_table('messages',
59
+ sa.Column('id', sa.Integer(), nullable=False),
60
+ sa.Column('conversation_id', sa.Integer(), nullable=False),
61
+ sa.Column('role', sa.String(), nullable=False),
62
+ sa.Column('content', sa.String(), nullable=False),
63
+ sa.Column('citations', sa.JSON(), nullable=True),
64
+ sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
65
+ sa.ForeignKeyConstraint(['conversation_id'], ['conversations.id'], ),
66
+ sa.PrimaryKeyConstraint('id')
67
+ )
68
+ op.create_index(op.f('ix_messages_id'), 'messages', ['id'], unique=False)
69
+ # ### end Alembic commands ###
70
+
71
+
72
+ def downgrade() -> None:
73
+ # ### commands auto generated by Alembic - please adjust! ###
74
+ op.drop_index(op.f('ix_messages_id'), table_name='messages')
75
+ op.drop_table('messages')
76
+ op.drop_index(op.f('ix_conversations_id'), table_name='conversations')
77
+ op.drop_table('conversations')
78
+ op.drop_index(op.f('ix_repositories_id'), table_name='repositories')
79
+ op.drop_table('repositories')
80
+ op.drop_index(op.f('ix_users_id'), table_name='users')
81
+ op.drop_index(op.f('ix_users_email'), table_name='users')
82
+ op.drop_table('users')
83
+ # ### end Alembic commands ###
migrations/versions/2e8f053488b9_clean_qodex_architecture_no_user_.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Clean QODEX architecture - no user dependencies
2
+
3
+ Revision ID: 2e8f053488b9
4
+ Revises: 16e292816c22
5
+ Create Date: 2025-10-25 19:08:49.834310
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy.dialects import postgresql
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '2e8f053488b9'
16
+ down_revision: Union[str, None] = '16e292816c22'
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ # ### commands auto generated by Alembic - please adjust! ###
23
+ op.alter_column('messages', 'content',
24
+ existing_type=sa.VARCHAR(),
25
+ type_=sa.Text(),
26
+ existing_nullable=False)
27
+ op.alter_column('repositories', 'status',
28
+ existing_type=postgresql.ENUM('PENDING', 'PROCESSING', 'READY', 'FAILED', name='repositorystatus'),
29
+ type_=sa.Enum('PENDING', 'PROCESSING', 'READY', 'FAILED', name='repositorystatusenum'),
30
+ existing_nullable=True)
31
+ op.create_unique_constraint(None, 'repositories', ['github_url'])
32
+ # ### end Alembic commands ###
33
+
34
+
35
+ def downgrade() -> None:
36
+ # ### commands auto generated by Alembic - please adjust! ###
37
+ op.drop_constraint(None, 'repositories', type_='unique')
38
+ op.alter_column('repositories', 'status',
39
+ existing_type=sa.Enum('PENDING', 'PROCESSING', 'READY', 'FAILED', name='repositorystatusenum'),
40
+ type_=postgresql.ENUM('PENDING', 'PROCESSING', 'READY', 'FAILED', name='repositorystatus'),
41
+ existing_nullable=True)
42
+ op.alter_column('messages', 'content',
43
+ existing_type=sa.Text(),
44
+ type_=sa.VARCHAR(),
45
+ existing_nullable=False)
46
+ # ### end Alembic commands ###
render.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ - type: web
3
+ name: qodex-api
4
+ env: python
5
+ buildCommand: pip install -r requirements.txt
6
+ startCommand: uvicorn app.main:app --host 0.0.0.0 --port $PORT
7
+ envVars:
8
+ - key: ENVIRONMENT
9
+ value: production
10
+ - key: DEBUG
11
+ value: false
requirements.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core FastAPI stack (keep versions)
2
+ fastapi==0.104.1
3
+ uvicorn[standard]==0.24.0
4
+ pydantic==2.5.0
5
+ pydantic-settings==2.1.0
6
+ sqlalchemy==2.0.23
7
+ psycopg2-binary==2.9.9
8
+
9
+ # AI/ML stack (NO VERSION PINS - let pip resolve)
10
+ sentence-transformers
11
+ transformers
12
+ huggingface-hub
13
+ torch
14
+ numpy
15
+ chromadb
16
+
17
+ # Google AI
18
+ google-generativeai==0.3.1
19
+
20
+ # Utils (keep versions)
21
+ python-dotenv==1.0.0
22
+ python-multipart==0.0.6
23
+ aiofiles==23.2.1
24
+ requests==2.31.0
25
+ gitpython==3.1.40
26
+ python-jose[cryptography]==3.3.0
27
+ passlib[bcrypt]==1.7.4
run_server.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+
3
+ if __name__ == "__main__":
4
+ uvicorn.run(
5
+ "app.main:app",
6
+ host="127.0.0.1",
7
+ port=8000,
8
+ reload=True,
9
+ log_level="info"
10
+ )
tests/__init__.py ADDED
File without changes