Update app/api/v1/repositories.py
Browse files- app/api/v1/repositories.py +38 -13
app/api/v1/repositories.py
CHANGED
|
@@ -7,15 +7,39 @@ from app.schemas.repository import RepositoryCreate, RepositoryResponse
|
|
| 7 |
from app.core.config import settings
|
| 8 |
from app.services import GitHubService, EmbeddingService, VectorService
|
| 9 |
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
logging.basicConfig(level=logging.INFO)
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
router = APIRouter()
|
| 15 |
|
| 16 |
async def process_repository_background(repository_id: int, user_id: str):
|
| 17 |
"""Background task to process repository with hybrid RAG"""
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
from app.database import SessionLocal
|
| 21 |
db = SessionLocal()
|
|
@@ -33,42 +57,42 @@ async def process_repository_background(repository_id: int, user_id: str):
|
|
| 33 |
).first()
|
| 34 |
|
| 35 |
if not repository:
|
| 36 |
-
|
| 37 |
return
|
| 38 |
|
| 39 |
repository.status = RepositoryStatusEnum.PROCESSING
|
| 40 |
db.commit()
|
| 41 |
-
|
| 42 |
|
| 43 |
-
|
| 44 |
temp_dir = await github_service.clone_repository(repository.github_url)
|
| 45 |
|
| 46 |
-
|
| 47 |
code_chunks = await github_service.extract_code_files(temp_dir)
|
| 48 |
|
| 49 |
if not code_chunks:
|
| 50 |
raise Exception("No supported code files found in repository")
|
| 51 |
|
| 52 |
-
|
| 53 |
|
| 54 |
-
|
| 55 |
embedded_chunks = await embedding_service.generate_embeddings_batch(code_chunks)
|
| 56 |
|
| 57 |
if not embedded_chunks:
|
| 58 |
raise Exception("Failed to generate local embeddings")
|
| 59 |
|
| 60 |
-
|
| 61 |
await vector_service.store_embeddings(repository_id, embedded_chunks)
|
| 62 |
|
| 63 |
repository.status = RepositoryStatusEnum.READY
|
| 64 |
repository.error_message = None
|
| 65 |
db.commit()
|
| 66 |
|
| 67 |
-
|
| 68 |
|
| 69 |
except Exception as e:
|
| 70 |
error_message = str(e)
|
| 71 |
-
|
| 72 |
|
| 73 |
try:
|
| 74 |
repository = db.query(Repository).filter(Repository.id == repository_id).first()
|
|
@@ -77,13 +101,14 @@ async def process_repository_background(repository_id: int, user_id: str):
|
|
| 77 |
repository.error_message = error_message[:500]
|
| 78 |
db.commit()
|
| 79 |
except Exception as db_error:
|
| 80 |
-
|
| 81 |
|
| 82 |
finally:
|
| 83 |
if temp_dir:
|
| 84 |
github_service.cleanup_temp_dir(temp_dir)
|
| 85 |
db.close()
|
| 86 |
-
|
|
|
|
| 87 |
def verify_client_secret(x_client_secret: str = Header(..., alias="X-Client-Secret")):
|
| 88 |
"""Verify request comes from authorized Next.js client"""
|
| 89 |
if x_client_secret != settings.nextjs_secret:
|
|
|
|
| 7 |
from app.core.config import settings
|
| 8 |
from app.services import GitHubService, EmbeddingService, VectorService
|
| 9 |
import logging
|
| 10 |
+
import logging
|
| 11 |
+
import sys
|
| 12 |
+
|
| 13 |
+
# Force logging to stdout for HuggingFace visibility
|
| 14 |
+
logging.basicConfig(
|
| 15 |
+
level=logging.INFO,
|
| 16 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 17 |
+
handlers=[
|
| 18 |
+
logging.StreamHandler(sys.stdout)
|
| 19 |
+
],
|
| 20 |
+
force=True # Override any existing config
|
| 21 |
+
)
|
| 22 |
|
|
|
|
| 23 |
logger = logging.getLogger(__name__)
|
| 24 |
|
| 25 |
+
# Ensure all loggers use stdout
|
| 26 |
+
for handler in logging.root.handlers:
|
| 27 |
+
handler.stream = sys.stdout
|
| 28 |
+
|
| 29 |
router = APIRouter()
|
| 30 |
|
| 31 |
async def process_repository_background(repository_id: int, user_id: str):
|
| 32 |
"""Background task to process repository with hybrid RAG"""
|
| 33 |
+
|
| 34 |
+
# Force stdout for this specific task
|
| 35 |
+
import sys
|
| 36 |
+
|
| 37 |
+
def force_log(message):
|
| 38 |
+
"""Force log to stdout and flush immediately"""
|
| 39 |
+
print(f"[QODEX-BG] {message}", file=sys.stdout, flush=True)
|
| 40 |
+
logger.info(message)
|
| 41 |
+
|
| 42 |
+
force_log(f"π Starting QODEX HYBRID RAG processing for repository {repository_id} (user: {user_id})")
|
| 43 |
|
| 44 |
from app.database import SessionLocal
|
| 45 |
db = SessionLocal()
|
|
|
|
| 57 |
).first()
|
| 58 |
|
| 59 |
if not repository:
|
| 60 |
+
force_log(f"β Repository {repository_id} not found for user {user_id}")
|
| 61 |
return
|
| 62 |
|
| 63 |
repository.status = RepositoryStatusEnum.PROCESSING
|
| 64 |
db.commit()
|
| 65 |
+
force_log(f"π Repository {repository_id} status: PROCESSING")
|
| 66 |
|
| 67 |
+
force_log(f"π₯ Step 1: Cloning repository {repository.github_url}")
|
| 68 |
temp_dir = await github_service.clone_repository(repository.github_url)
|
| 69 |
|
| 70 |
+
force_log(f"π Step 2: Extracting code files from {repository.name}")
|
| 71 |
code_chunks = await github_service.extract_code_files(temp_dir)
|
| 72 |
|
| 73 |
if not code_chunks:
|
| 74 |
raise Exception("No supported code files found in repository")
|
| 75 |
|
| 76 |
+
force_log(f"β
Found {len(code_chunks)} code chunks")
|
| 77 |
|
| 78 |
+
force_log(f"β‘ Step 3: Generating embeddings with LOCAL SentenceTransformers")
|
| 79 |
embedded_chunks = await embedding_service.generate_embeddings_batch(code_chunks)
|
| 80 |
|
| 81 |
if not embedded_chunks:
|
| 82 |
raise Exception("Failed to generate local embeddings")
|
| 83 |
|
| 84 |
+
force_log(f"πΎ Step 4: Storing embeddings in ChromaDB")
|
| 85 |
await vector_service.store_embeddings(repository_id, embedded_chunks)
|
| 86 |
|
| 87 |
repository.status = RepositoryStatusEnum.READY
|
| 88 |
repository.error_message = None
|
| 89 |
db.commit()
|
| 90 |
|
| 91 |
+
force_log(f"π SUCCESS! QODEX Repository {repository_id} is READY for chat! (user: {user_id})")
|
| 92 |
|
| 93 |
except Exception as e:
|
| 94 |
error_message = str(e)
|
| 95 |
+
force_log(f"β Error processing repository {repository_id}: {error_message}")
|
| 96 |
|
| 97 |
try:
|
| 98 |
repository = db.query(Repository).filter(Repository.id == repository_id).first()
|
|
|
|
| 101 |
repository.error_message = error_message[:500]
|
| 102 |
db.commit()
|
| 103 |
except Exception as db_error:
|
| 104 |
+
force_log(f"β Failed to update repository status: {str(db_error)}")
|
| 105 |
|
| 106 |
finally:
|
| 107 |
if temp_dir:
|
| 108 |
github_service.cleanup_temp_dir(temp_dir)
|
| 109 |
db.close()
|
| 110 |
+
force_log(f"π Finished processing repository {repository_id}")
|
| 111 |
+
|
| 112 |
def verify_client_secret(x_client_secret: str = Header(..., alias="X-Client-Secret")):
|
| 113 |
"""Verify request comes from authorized Next.js client"""
|
| 114 |
if x_client_secret != settings.nextjs_secret:
|