Sonu Prasad commited on
Commit ·
4c6e0cc
1
Parent(s): 825dc14
Optimize backend for production
Browse files- Dockerfile +9 -1
- __pycache__/ai_core.cpython-314.pyc +0 -0
- __pycache__/main.cpython-314.pyc +0 -0
- __pycache__/shared.cpython-314.pyc +0 -0
- ai_core.py +115 -31
- main.py +176 -73
- requirements.txt +17 -14
- shared.py +57 -0
Dockerfile
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
|
|
|
| 3 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 4 |
libmagic1 \
|
| 5 |
git \
|
|
@@ -10,15 +11,22 @@ WORKDIR /code
|
|
| 10 |
# Set environment variables for cache directories
|
| 11 |
ENV HF_HOME=/tmp/huggingface_cache
|
| 12 |
ENV SENTENCE_TRANSFORMERS_HOME=/tmp/huggingface_cache
|
|
|
|
| 13 |
|
|
|
|
| 14 |
COPY ./requirements.txt /code/requirements.txt
|
| 15 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
| 16 |
|
| 17 |
-
# Pre-download
|
| 18 |
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2', cache_folder='/tmp/huggingface_cache/sentence_transformers')"
|
| 19 |
|
|
|
|
|
|
|
| 20 |
COPY ./ai_core.py /code/ai_core.py
|
| 21 |
COPY ./main.py /code/main.py
|
| 22 |
|
|
|
|
| 23 |
EXPOSE 7860
|
|
|
|
|
|
|
| 24 |
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
| 3 |
+
# Install system dependencies
|
| 4 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 5 |
libmagic1 \
|
| 6 |
git \
|
|
|
|
| 11 |
# Set environment variables for cache directories
|
| 12 |
ENV HF_HOME=/tmp/huggingface_cache
|
| 13 |
ENV SENTENCE_TRANSFORMERS_HOME=/tmp/huggingface_cache
|
| 14 |
+
ENV TRANSFORMERS_CACHE=/tmp/huggingface_cache
|
| 15 |
|
| 16 |
+
# Copy requirements first for better caching
|
| 17 |
COPY ./requirements.txt /code/requirements.txt
|
| 18 |
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
| 19 |
|
| 20 |
+
# Pre-download the embedding model
|
| 21 |
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2', cache_folder='/tmp/huggingface_cache/sentence_transformers')"
|
| 22 |
|
| 23 |
+
# Copy application code
|
| 24 |
+
COPY ./shared.py /code/shared.py
|
| 25 |
COPY ./ai_core.py /code/ai_core.py
|
| 26 |
COPY ./main.py /code/main.py
|
| 27 |
|
| 28 |
+
# Expose port for Hugging Face Spaces
|
| 29 |
EXPOSE 7860
|
| 30 |
+
|
| 31 |
+
# Run with uvicorn (single worker to avoid multiprocessing issues in HF Spaces)
|
| 32 |
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
__pycache__/ai_core.cpython-314.pyc
ADDED
|
Binary file (9.8 kB). View file
|
|
|
__pycache__/main.cpython-314.pyc
ADDED
|
Binary file (19.1 kB). View file
|
|
|
__pycache__/shared.cpython-314.pyc
ADDED
|
Binary file (3.59 kB). View file
|
|
|
ai_core.py
CHANGED
|
@@ -1,6 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import tempfile
|
| 3 |
import pathlib
|
|
|
|
|
|
|
|
|
|
| 4 |
from langchain_community.document_loaders import TextLoader
|
| 5 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 6 |
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
|
@@ -12,38 +24,74 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
| 12 |
from langchain.docstore.document import Document
|
| 13 |
from langchain_core.messages import AIMessage, HumanMessage
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
CACHE_DIR = os.path.join(tempfile.gettempdir(), "huggingface_cache", "sentence_transformers")
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
try:
|
|
|
|
|
|
|
| 21 |
chroma_db_path = os.path.join(tempfile.gettempdir(), "chroma_db_cache", session_id)
|
| 22 |
|
|
|
|
| 23 |
documents = []
|
| 24 |
if file_paths:
|
| 25 |
for file_path in file_paths:
|
| 26 |
try:
|
| 27 |
loader = TextLoader(file_path, encoding='utf-8')
|
| 28 |
documents.extend(loader.load())
|
|
|
|
| 29 |
except Exception as e:
|
| 30 |
-
|
| 31 |
continue
|
| 32 |
|
|
|
|
| 33 |
if not documents:
|
| 34 |
documents = [Document(page_content="No text files were provided for initial analysis.")]
|
|
|
|
| 35 |
|
|
|
|
| 36 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
|
| 37 |
texts = text_splitter.split_documents(documents)
|
|
|
|
| 38 |
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
|
|
|
| 41 |
db = Chroma.from_documents(texts, embeddings, persist_directory=chroma_db_path)
|
|
|
|
| 42 |
|
|
|
|
| 43 |
retriever = db.as_retriever(search_kwargs={"k": 5})
|
| 44 |
|
| 45 |
-
|
|
|
|
| 46 |
|
|
|
|
| 47 |
system_prompt = (
|
| 48 |
"You are an expert software developer assistant. Your goal is to help users "
|
| 49 |
"understand a GitHub repository. Use the following pieces of retrieved context "
|
|
@@ -58,45 +106,77 @@ def create_conversational_chain(file_paths: list[str], session_id: str):
|
|
| 58 |
("human", "{input}"),
|
| 59 |
])
|
| 60 |
|
|
|
|
| 61 |
question_answer_chain = create_stuff_documents_chain(llm, prompt)
|
| 62 |
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
| 63 |
|
| 64 |
-
# Store vectorstore in
|
| 65 |
-
|
| 66 |
|
|
|
|
| 67 |
return rag_chain
|
| 68 |
|
| 69 |
except Exception as e:
|
| 70 |
-
|
| 71 |
return None
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
try:
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
loader = TextLoader(file_path, encoding='utf-8')
|
| 84 |
-
documents.extend(loader.load())
|
| 85 |
-
except Exception:
|
| 86 |
-
continue
|
| 87 |
-
|
| 88 |
-
if documents:
|
| 89 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
|
| 90 |
-
texts = text_splitter.split_documents(documents)
|
| 91 |
-
vectorstore.add_documents(texts)
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
except Exception as e:
|
| 96 |
-
|
|
|
|
| 97 |
|
| 98 |
-
def query_with_context(rag_chain, chat_history: list, query: str, pinned_files:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
try:
|
|
|
|
| 100 |
context_str = ""
|
| 101 |
if pinned_files:
|
| 102 |
context_str += "The user has pinned the following files for primary context. Prioritize information from these files:\n\n"
|
|
@@ -105,21 +185,25 @@ def query_with_context(rag_chain, chat_history: list, query: str, pinned_files:
|
|
| 105 |
if file_p.is_file():
|
| 106 |
context_str += f"--- START OF FILE: {file} ---\n"
|
| 107 |
try:
|
|
|
|
| 108 |
context_str += file_p.read_text(encoding="utf-8")[:4000]
|
| 109 |
except Exception:
|
| 110 |
context_str += "(Could not read file content)"
|
| 111 |
context_str += f"\n--- END OF FILE: {file} ---\n\n"
|
| 112 |
|
|
|
|
| 113 |
final_query = f"{context_str}Based on the context and our history, answer the question: {query}"
|
| 114 |
|
|
|
|
| 115 |
response = rag_chain.invoke({"input": final_query, "chat_history": chat_history})
|
| 116 |
|
| 117 |
answer = response.get("answer", "I could not find an answer.")
|
| 118 |
|
| 119 |
-
#
|
| 120 |
chat_history.extend([HumanMessage(content=query), AIMessage(content=answer)])
|
| 121 |
|
| 122 |
return answer
|
|
|
|
| 123 |
except Exception as e:
|
| 124 |
-
|
| 125 |
return f"An error occurred while processing your request: {str(e)}"
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
AI Core Module for GitHub Companion
|
| 3 |
+
|
| 4 |
+
Handles:
|
| 5 |
+
- Document embedding with ChromaDB
|
| 6 |
+
- Conversational RAG chain creation
|
| 7 |
+
- Context-aware query processing
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
import os
|
| 11 |
import tempfile
|
| 12 |
import pathlib
|
| 13 |
+
import logging
|
| 14 |
+
from typing import List
|
| 15 |
+
|
| 16 |
from langchain_community.document_loaders import TextLoader
|
| 17 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 18 |
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
|
|
|
| 24 |
from langchain.docstore.document import Document
|
| 25 |
from langchain_core.messages import AIMessage, HumanMessage
|
| 26 |
|
| 27 |
+
from shared import analysis_jobs, update_session, get_session
|
| 28 |
+
|
| 29 |
+
# Configure logging
|
| 30 |
+
logging.basicConfig(
|
| 31 |
+
level=logging.INFO,
|
| 32 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 33 |
+
)
|
| 34 |
+
logger = logging.getLogger(__name__)
|
| 35 |
+
|
| 36 |
+
# Cache directory for embeddings model
|
| 37 |
CACHE_DIR = os.path.join(tempfile.gettempdir(), "huggingface_cache", "sentence_transformers")
|
| 38 |
|
| 39 |
+
|
| 40 |
+
def create_conversational_chain(file_paths: List[str], session_id: str):
|
| 41 |
+
"""
|
| 42 |
+
Create a conversational RAG chain from the provided files.
|
| 43 |
|
| 44 |
+
Args:
|
| 45 |
+
file_paths: List of file paths to embed for initial context
|
| 46 |
+
session_id: Unique session identifier
|
| 47 |
+
|
| 48 |
+
Returns:
|
| 49 |
+
A LangChain retrieval chain or None if creation fails
|
| 50 |
+
"""
|
| 51 |
try:
|
| 52 |
+
logger.info(f"Creating conversational chain for session {session_id}")
|
| 53 |
+
|
| 54 |
chroma_db_path = os.path.join(tempfile.gettempdir(), "chroma_db_cache", session_id)
|
| 55 |
|
| 56 |
+
# Load documents
|
| 57 |
documents = []
|
| 58 |
if file_paths:
|
| 59 |
for file_path in file_paths:
|
| 60 |
try:
|
| 61 |
loader = TextLoader(file_path, encoding='utf-8')
|
| 62 |
documents.extend(loader.load())
|
| 63 |
+
logger.debug(f"Loaded file: {file_path}")
|
| 64 |
except Exception as e:
|
| 65 |
+
logger.warning(f"Skipping file {file_path}: {e}")
|
| 66 |
continue
|
| 67 |
|
| 68 |
+
# Fallback if no documents
|
| 69 |
if not documents:
|
| 70 |
documents = [Document(page_content="No text files were provided for initial analysis.")]
|
| 71 |
+
logger.warning("No documents loaded, using fallback.")
|
| 72 |
|
| 73 |
+
# Split documents
|
| 74 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
|
| 75 |
texts = text_splitter.split_documents(documents)
|
| 76 |
+
logger.info(f"Split into {len(texts)} text chunks")
|
| 77 |
|
| 78 |
+
# Create embeddings
|
| 79 |
+
embeddings = SentenceTransformerEmbeddings(
|
| 80 |
+
model_name="all-MiniLM-L6-v2",
|
| 81 |
+
cache_folder=CACHE_DIR
|
| 82 |
+
)
|
| 83 |
|
| 84 |
+
# Create vector store
|
| 85 |
db = Chroma.from_documents(texts, embeddings, persist_directory=chroma_db_path)
|
| 86 |
+
logger.info(f"Created ChromaDB at {chroma_db_path}")
|
| 87 |
|
| 88 |
+
# Create retriever
|
| 89 |
retriever = db.as_retriever(search_kwargs={"k": 5})
|
| 90 |
|
| 91 |
+
# Create LLM
|
| 92 |
+
llm = GoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.7)
|
| 93 |
|
| 94 |
+
# System prompt
|
| 95 |
system_prompt = (
|
| 96 |
"You are an expert software developer assistant. Your goal is to help users "
|
| 97 |
"understand a GitHub repository. Use the following pieces of retrieved context "
|
|
|
|
| 106 |
("human", "{input}"),
|
| 107 |
])
|
| 108 |
|
| 109 |
+
# Create chains
|
| 110 |
question_answer_chain = create_stuff_documents_chain(llm, prompt)
|
| 111 |
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
| 112 |
|
| 113 |
+
# Store vectorstore in session
|
| 114 |
+
update_session(session_id, "vectorstore", db)
|
| 115 |
|
| 116 |
+
logger.info(f"✅ Conversational chain created for session {session_id}")
|
| 117 |
return rag_chain
|
| 118 |
|
| 119 |
except Exception as e:
|
| 120 |
+
logger.error(f"❌ Error creating conversational chain: {e}")
|
| 121 |
return None
|
| 122 |
|
| 123 |
+
|
| 124 |
+
def embed_entire_repository(session_id: str, all_file_paths: List[str]):
|
| 125 |
+
"""
|
| 126 |
+
Background task to embed all text files in the repository.
|
| 127 |
|
| 128 |
+
Args:
|
| 129 |
+
session_id: Unique session identifier
|
| 130 |
+
all_file_paths: List of all text file paths to embed
|
| 131 |
+
"""
|
| 132 |
try:
|
| 133 |
+
logger.info(f"Starting background embedding for session {session_id} ({len(all_file_paths)} files)")
|
| 134 |
+
|
| 135 |
+
job = get_session(session_id)
|
| 136 |
+
if not job or "vectorstore" not in job:
|
| 137 |
+
logger.error(f"No vectorstore found for session {session_id}")
|
| 138 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
+
vectorstore = job["vectorstore"]
|
| 141 |
+
|
| 142 |
+
# Load all documents
|
| 143 |
+
documents = []
|
| 144 |
+
for file_path in all_file_paths:
|
| 145 |
+
try:
|
| 146 |
+
loader = TextLoader(file_path, encoding='utf-8')
|
| 147 |
+
documents.extend(loader.load())
|
| 148 |
+
except Exception:
|
| 149 |
+
continue
|
| 150 |
+
|
| 151 |
+
if documents:
|
| 152 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=200)
|
| 153 |
+
texts = text_splitter.split_documents(documents)
|
| 154 |
+
vectorstore.add_documents(texts)
|
| 155 |
+
logger.info(f"Added {len(texts)} chunks to vectorstore")
|
| 156 |
+
|
| 157 |
+
update_session(session_id, "embedding_complete", True)
|
| 158 |
+
logger.info(f"✅ Background embedding complete for session {session_id}")
|
| 159 |
+
|
| 160 |
except Exception as e:
|
| 161 |
+
logger.error(f"❌ Error in background embedding for session {session_id}: {e}")
|
| 162 |
+
|
| 163 |
|
| 164 |
+
def query_with_context(rag_chain, chat_history: list, query: str, pinned_files: List[str], repo_path: str) -> str:
|
| 165 |
+
"""
|
| 166 |
+
Query the RAG chain with additional context from pinned files.
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
rag_chain: The LangChain retrieval chain
|
| 170 |
+
chat_history: List of previous chat messages
|
| 171 |
+
query: The user's query
|
| 172 |
+
pinned_files: List of file paths the user has pinned for context
|
| 173 |
+
repo_path: Path to the repository root
|
| 174 |
+
|
| 175 |
+
Returns:
|
| 176 |
+
The AI's response as a string
|
| 177 |
+
"""
|
| 178 |
try:
|
| 179 |
+
# Build context from pinned files
|
| 180 |
context_str = ""
|
| 181 |
if pinned_files:
|
| 182 |
context_str += "The user has pinned the following files for primary context. Prioritize information from these files:\n\n"
|
|
|
|
| 185 |
if file_p.is_file():
|
| 186 |
context_str += f"--- START OF FILE: {file} ---\n"
|
| 187 |
try:
|
| 188 |
+
# Limit file content to prevent token overflow
|
| 189 |
context_str += file_p.read_text(encoding="utf-8")[:4000]
|
| 190 |
except Exception:
|
| 191 |
context_str += "(Could not read file content)"
|
| 192 |
context_str += f"\n--- END OF FILE: {file} ---\n\n"
|
| 193 |
|
| 194 |
+
# Build final query
|
| 195 |
final_query = f"{context_str}Based on the context and our history, answer the question: {query}"
|
| 196 |
|
| 197 |
+
# Invoke the chain
|
| 198 |
response = rag_chain.invoke({"input": final_query, "chat_history": chat_history})
|
| 199 |
|
| 200 |
answer = response.get("answer", "I could not find an answer.")
|
| 201 |
|
| 202 |
+
# Update chat history
|
| 203 |
chat_history.extend([HumanMessage(content=query), AIMessage(content=answer)])
|
| 204 |
|
| 205 |
return answer
|
| 206 |
+
|
| 207 |
except Exception as e:
|
| 208 |
+
logger.error(f"Error during query invocation: {e}")
|
| 209 |
return f"An error occurred while processing your request: {str(e)}"
|
main.py
CHANGED
|
@@ -1,245 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import uuid
|
| 3 |
import shutil
|
| 4 |
import pathlib
|
| 5 |
import tempfile
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
| 7 |
from fastapi import FastAPI, BackgroundTasks, HTTPException, Query
|
| 8 |
from fastapi.responses import FileResponse, JSONResponse
|
| 9 |
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
-
from pydantic import BaseModel
|
| 11 |
from git import Repo
|
|
|
|
| 12 |
from langchain_core.messages import AIMessage, HumanMessage
|
| 13 |
from ai_core import create_conversational_chain, query_with_context, embed_entire_repository
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
SESSIONS_BASE_DIR = pathlib.Path(tempfile.gettempdir()) / "repo_sessions"
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
app = FastAPI(
|
| 18 |
-
title="
|
| 19 |
description="API for high-performance analysis and contextual chat with GitHub repositories.",
|
| 20 |
-
version="
|
|
|
|
| 21 |
)
|
| 22 |
|
| 23 |
-
origins
|
| 24 |
app.add_middleware(
|
| 25 |
CORSMiddleware,
|
| 26 |
-
allow_origins=
|
| 27 |
allow_credentials=True,
|
| 28 |
allow_methods=["*"],
|
| 29 |
allow_headers=["*"],
|
| 30 |
)
|
| 31 |
|
| 32 |
-
analysis_jobs: Dict[str, Dict[str, Any]] = {}
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
f.read(512)
|
| 38 |
-
return True
|
| 39 |
-
except Exception:
|
| 40 |
-
return False
|
| 41 |
|
| 42 |
class RepoRequest(BaseModel):
|
| 43 |
repo_url: str
|
| 44 |
|
|
|
|
| 45 |
class AnalysisResponse(BaseModel):
|
| 46 |
session_id: str
|
| 47 |
|
|
|
|
| 48 |
class StatusResponse(BaseModel):
|
| 49 |
session_id: str
|
| 50 |
status: Literal["pending", "cloning", "summarizing", "embedding_background", "completed", "failed"]
|
| 51 |
message: str | None = None
|
| 52 |
|
|
|
|
| 53 |
class FileDetail(BaseModel):
|
| 54 |
path: str
|
| 55 |
size_bytes: int
|
| 56 |
|
|
|
|
| 57 |
class AnalysisResult(BaseModel):
|
| 58 |
repo_url: str
|
| 59 |
directory_structure: List[FileDetail]
|
| 60 |
initial_summary: str
|
| 61 |
|
|
|
|
| 62 |
class FileContentResponse(BaseModel):
|
| 63 |
path: str
|
| 64 |
content: str
|
| 65 |
|
|
|
|
| 66 |
class ChatRequest(BaseModel):
|
| 67 |
query: str
|
| 68 |
pinned_files: List[str] = []
|
| 69 |
|
|
|
|
| 70 |
class ChatResponse(BaseModel):
|
| 71 |
answer: str
|
| 72 |
|
|
|
|
| 73 |
class ModifiedFile(BaseModel):
|
| 74 |
path: str
|
| 75 |
content: str
|
| 76 |
|
|
|
|
| 77 |
class DownloadRequest(BaseModel):
|
| 78 |
modified_files: List[ModifiedFile]
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
def initial_analysis_task(session_id: str, repo_url: str, background_tasks: BackgroundTasks):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
session_repo_path = SESSIONS_BASE_DIR / session_id
|
| 82 |
-
|
| 83 |
try:
|
|
|
|
| 84 |
if session_repo_path.exists():
|
| 85 |
shutil.rmtree(session_repo_path)
|
| 86 |
-
|
| 87 |
SESSIONS_BASE_DIR.mkdir(exist_ok=True)
|
| 88 |
-
|
| 89 |
-
|
|
|
|
| 90 |
Repo.clone_from(repo_url, str(session_repo_path), depth=1)
|
| 91 |
-
|
| 92 |
-
|
| 93 |
repo_name = repo_url.split('/')[-1].replace('.git', '')
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
| 98 |
all_file_details = []
|
| 99 |
key_file_paths_for_summary = []
|
| 100 |
all_text_file_paths_for_embedding = []
|
| 101 |
-
summary_candidate_names =
|
| 102 |
-
|
|
|
|
| 103 |
for root, dirs, files in os.walk(str(session_repo_path), topdown=True):
|
| 104 |
dirs[:] = [d for d in dirs if d not in ignore_patterns]
|
| 105 |
for name in files:
|
| 106 |
-
if name in ignore_patterns:
|
| 107 |
continue
|
| 108 |
file_path = os.path.join(root, name)
|
| 109 |
if not os.path.islink(file_path):
|
| 110 |
-
# Fixed relative path calculation using pathlib
|
| 111 |
try:
|
| 112 |
relative_path = pathlib.Path(file_path).relative_to(session_repo_path).as_posix()
|
| 113 |
except ValueError:
|
| 114 |
-
# Fallback to os.path.relpath with proper escaping
|
| 115 |
relative_path = os.path.relpath(file_path, str(session_repo_path)).replace("\\", "/")
|
| 116 |
-
|
| 117 |
file_size = os.path.getsize(file_path)
|
| 118 |
all_file_details.append(FileDetail(path=relative_path, size_bytes=file_size))
|
| 119 |
-
|
| 120 |
if is_text_file(file_path):
|
| 121 |
all_text_file_paths_for_embedding.append(file_path)
|
| 122 |
if name.lower() in summary_candidate_names:
|
| 123 |
key_file_paths_for_summary.append(file_path)
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
# Initialize chat history
|
| 128 |
-
|
| 129 |
-
|
|
|
|
| 130 |
rag_chain = create_conversational_chain(key_file_paths_for_summary, session_id)
|
| 131 |
-
|
| 132 |
if not rag_chain:
|
| 133 |
raise Exception("Failed to create initial AI chain.")
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
| 137 |
summary_query = "Based on the provided files (like README, package.json, etc.), what is the primary purpose of this software project? Provide a concise, one-paragraph summary."
|
| 138 |
-
initial_summary = query_with_context(rag_chain,
|
| 139 |
-
|
|
|
|
| 140 |
result = AnalysisResult(
|
| 141 |
repo_url=repo_url,
|
| 142 |
directory_structure=sorted(all_file_details, key=lambda x: x.path),
|
| 143 |
initial_summary=initial_summary
|
| 144 |
)
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
|
|
|
| 148 |
background_tasks.add_task(embed_entire_repository, session_id, all_text_file_paths_for_embedding)
|
| 149 |
-
|
| 150 |
except Exception as e:
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
@app.get("/")
|
| 155 |
def read_root():
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
@app.post("/analyze", response_model=AnalysisResponse, status_code=202)
|
| 159 |
def submit_analysis(request: RepoRequest, background_tasks: BackgroundTasks):
|
|
|
|
| 160 |
session_id = str(uuid.uuid4())
|
| 161 |
-
|
| 162 |
background_tasks.add_task(initial_analysis_task, session_id, request.repo_url, background_tasks)
|
| 163 |
return AnalysisResponse(session_id=session_id)
|
| 164 |
|
|
|
|
| 165 |
@app.get("/status/{session_id}", response_model=StatusResponse)
|
| 166 |
def get_analysis_status(session_id: str):
|
| 167 |
-
|
|
|
|
| 168 |
if not job:
|
| 169 |
raise HTTPException(status_code=404, detail="Session ID not found.")
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
| 172 |
job["status"] = "completed"
|
| 173 |
-
|
| 174 |
return StatusResponse(session_id=session_id, status=job["status"], message=job.get("message"))
|
| 175 |
|
|
|
|
| 176 |
@app.get("/result/{session_id}", response_model=AnalysisResult)
|
| 177 |
def get_analysis_result(session_id: str):
|
| 178 |
-
|
|
|
|
| 179 |
if not job or job.get("status") not in ["embedding_background", "completed"]:
|
| 180 |
raise HTTPException(status_code=400, detail="Job not found or not ready.")
|
| 181 |
return job["result"]
|
| 182 |
|
|
|
|
| 183 |
@app.get("/file-content/{session_id}", response_model=FileContentResponse)
|
| 184 |
def get_file_content(session_id: str, file_path: str = Query(..., alias="path")):
|
| 185 |
-
|
|
|
|
| 186 |
if not job or "repo_path" not in job:
|
| 187 |
raise HTTPException(status_code=404, detail="Session not found.")
|
| 188 |
-
|
| 189 |
repo_base_path = pathlib.Path(job["repo_path"]).resolve()
|
| 190 |
requested_file_path = (repo_base_path / file_path).resolve()
|
| 191 |
-
|
|
|
|
| 192 |
if not requested_file_path.is_relative_to(repo_base_path):
|
| 193 |
raise HTTPException(status_code=403, detail="Access denied.")
|
| 194 |
-
|
| 195 |
if not requested_file_path.is_file():
|
| 196 |
raise HTTPException(status_code=404, detail="File not found.")
|
| 197 |
-
|
| 198 |
try:
|
| 199 |
content = requested_file_path.read_text(encoding="utf-8")
|
| 200 |
return FileContentResponse(path=file_path, content=content)
|
| 201 |
except Exception as e:
|
| 202 |
raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")
|
| 203 |
|
|
|
|
| 204 |
@app.post("/chat/{session_id}", response_model=ChatResponse)
|
| 205 |
def chat_with_repo(session_id: str, request: ChatRequest):
|
| 206 |
-
|
|
|
|
| 207 |
if not job or "rag_chain" not in job:
|
| 208 |
raise HTTPException(status_code=404, detail="Chat session not ready.")
|
| 209 |
-
|
| 210 |
rag_chain = job["rag_chain"]
|
| 211 |
chat_history = job.get("chat_history", [])
|
| 212 |
repo_path = job["repo_path"]
|
| 213 |
-
|
| 214 |
answer = query_with_context(rag_chain, chat_history, request.query, request.pinned_files, repo_path)
|
| 215 |
-
|
| 216 |
return ChatResponse(answer=answer)
|
| 217 |
|
|
|
|
| 218 |
@app.post("/download-zip/{session_id}")
|
| 219 |
async def download_zip(session_id: str, request: DownloadRequest, background_tasks: BackgroundTasks):
|
| 220 |
-
|
|
|
|
| 221 |
if not job or "repo_path" not in job:
|
| 222 |
raise HTTPException(status_code=404, detail="Session not found.")
|
| 223 |
-
|
| 224 |
repo_base_path = pathlib.Path(job["repo_path"]).resolve()
|
| 225 |
repo_name = job.get("repo_name", session_id)
|
| 226 |
temp_zip_dir = pathlib.Path(tempfile.gettempdir()) / "temp_zips"
|
| 227 |
-
|
|
|
|
| 228 |
for modified_file in request.modified_files:
|
| 229 |
file_to_update = (repo_base_path / modified_file.path).resolve()
|
| 230 |
-
|
| 231 |
if not file_to_update.is_relative_to(repo_base_path):
|
| 232 |
continue
|
| 233 |
-
|
| 234 |
file_to_update.parent.mkdir(parents=True, exist_ok=True)
|
| 235 |
file_to_update.write_text(modified_file.content, encoding="utf-8")
|
| 236 |
-
|
|
|
|
| 237 |
temp_zip_dir.mkdir(exist_ok=True)
|
| 238 |
zip_path_base = temp_zip_dir / f"{repo_name}-{session_id}"
|
| 239 |
zip_path_final = shutil.make_archive(str(zip_path_base), 'zip', str(repo_base_path))
|
| 240 |
-
|
|
|
|
| 241 |
background_tasks.add_task(os.remove, zip_path_final)
|
| 242 |
-
|
| 243 |
return FileResponse(
|
| 244 |
path=zip_path_final,
|
| 245 |
media_type='application/zip',
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GitHub Companion API - Main FastAPI Application
|
| 3 |
+
|
| 4 |
+
A high-performance API for analyzing and chatting with GitHub repositories.
|
| 5 |
+
Optimized for Hugging Face Spaces deployment with multi-user support.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
import os
|
| 9 |
import uuid
|
| 10 |
import shutil
|
| 11 |
import pathlib
|
| 12 |
import tempfile
|
| 13 |
+
import asyncio
|
| 14 |
+
from typing import List, Literal
|
| 15 |
+
from contextlib import asynccontextmanager
|
| 16 |
+
|
| 17 |
from fastapi import FastAPI, BackgroundTasks, HTTPException, Query
|
| 18 |
from fastapi.responses import FileResponse, JSONResponse
|
| 19 |
from fastapi.middleware.cors import CORSMiddleware
|
| 20 |
+
from pydantic import BaseModel
|
| 21 |
from git import Repo
|
| 22 |
+
|
| 23 |
from langchain_core.messages import AIMessage, HumanMessage
|
| 24 |
from ai_core import create_conversational_chain, query_with_context, embed_entire_repository
|
| 25 |
+
from shared import analysis_jobs, get_session, set_session, update_session
|
| 26 |
+
|
| 27 |
+
# ============================================================================
|
| 28 |
+
# Configuration
|
| 29 |
+
# ============================================================================
|
| 30 |
|
| 31 |
SESSIONS_BASE_DIR = pathlib.Path(tempfile.gettempdir()) / "repo_sessions"
|
| 32 |
|
| 33 |
+
|
| 34 |
+
# ============================================================================
|
| 35 |
+
# Lifespan Context Manager (Startup/Shutdown)
|
| 36 |
+
# ============================================================================
|
| 37 |
+
|
| 38 |
+
@asynccontextmanager
|
| 39 |
+
async def lifespan(app: FastAPI):
|
| 40 |
+
"""Handle startup and shutdown events."""
|
| 41 |
+
# Startup: Ensure directories exist
|
| 42 |
+
SESSIONS_BASE_DIR.mkdir(exist_ok=True)
|
| 43 |
+
print(f"✅ GitHub Companion API started. Sessions dir: {SESSIONS_BASE_DIR}")
|
| 44 |
+
yield
|
| 45 |
+
# Shutdown: Cleanup could be added here if needed
|
| 46 |
+
print("🛑 GitHub Companion API shutting down.")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# ============================================================================
|
| 50 |
+
# FastAPI App Initialization
|
| 51 |
+
# ============================================================================
|
| 52 |
+
|
| 53 |
app = FastAPI(
|
| 54 |
+
title="GitHub Companion API",
|
| 55 |
description="API for high-performance analysis and contextual chat with GitHub repositories.",
|
| 56 |
+
version="5.0.0",
|
| 57 |
+
lifespan=lifespan
|
| 58 |
)
|
| 59 |
|
| 60 |
+
# CORS Configuration (allows all origins for Hugging Face Spaces)
|
| 61 |
app.add_middleware(
|
| 62 |
CORSMiddleware,
|
| 63 |
+
allow_origins=["*"],
|
| 64 |
allow_credentials=True,
|
| 65 |
allow_methods=["*"],
|
| 66 |
allow_headers=["*"],
|
| 67 |
)
|
| 68 |
|
|
|
|
| 69 |
|
| 70 |
+
# ============================================================================
|
| 71 |
+
# Pydantic Models
|
| 72 |
+
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
class RepoRequest(BaseModel):
|
| 75 |
repo_url: str
|
| 76 |
|
| 77 |
+
|
| 78 |
class AnalysisResponse(BaseModel):
|
| 79 |
session_id: str
|
| 80 |
|
| 81 |
+
|
| 82 |
class StatusResponse(BaseModel):
|
| 83 |
session_id: str
|
| 84 |
status: Literal["pending", "cloning", "summarizing", "embedding_background", "completed", "failed"]
|
| 85 |
message: str | None = None
|
| 86 |
|
| 87 |
+
|
| 88 |
class FileDetail(BaseModel):
|
| 89 |
path: str
|
| 90 |
size_bytes: int
|
| 91 |
|
| 92 |
+
|
| 93 |
class AnalysisResult(BaseModel):
|
| 94 |
repo_url: str
|
| 95 |
directory_structure: List[FileDetail]
|
| 96 |
initial_summary: str
|
| 97 |
|
| 98 |
+
|
| 99 |
class FileContentResponse(BaseModel):
|
| 100 |
path: str
|
| 101 |
content: str
|
| 102 |
|
| 103 |
+
|
| 104 |
class ChatRequest(BaseModel):
|
| 105 |
query: str
|
| 106 |
pinned_files: List[str] = []
|
| 107 |
|
| 108 |
+
|
| 109 |
class ChatResponse(BaseModel):
|
| 110 |
answer: str
|
| 111 |
|
| 112 |
+
|
| 113 |
class ModifiedFile(BaseModel):
|
| 114 |
path: str
|
| 115 |
content: str
|
| 116 |
|
| 117 |
+
|
| 118 |
class DownloadRequest(BaseModel):
|
| 119 |
modified_files: List[ModifiedFile]
|
| 120 |
|
| 121 |
+
|
| 122 |
+
# ============================================================================
|
| 123 |
+
# Utility Functions
|
| 124 |
+
# ============================================================================
|
| 125 |
+
|
| 126 |
+
def is_text_file(file_path: str) -> bool:
|
| 127 |
+
"""Check if a file is readable as text."""
|
| 128 |
+
try:
|
| 129 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 130 |
+
f.read(512)
|
| 131 |
+
return True
|
| 132 |
+
except Exception:
|
| 133 |
+
return False
|
| 134 |
+
|
| 135 |
+
|
| 136 |
def initial_analysis_task(session_id: str, repo_url: str, background_tasks: BackgroundTasks):
|
| 137 |
+
"""
|
| 138 |
+
Background task to clone and analyze a repository.
|
| 139 |
+
This runs in a thread pool to avoid blocking the main event loop.
|
| 140 |
+
"""
|
| 141 |
session_repo_path = SESSIONS_BASE_DIR / session_id
|
| 142 |
+
|
| 143 |
try:
|
| 144 |
+
# Cleanup if exists
|
| 145 |
if session_repo_path.exists():
|
| 146 |
shutil.rmtree(session_repo_path)
|
| 147 |
+
|
| 148 |
SESSIONS_BASE_DIR.mkdir(exist_ok=True)
|
| 149 |
+
update_session(session_id, "status", "cloning")
|
| 150 |
+
|
| 151 |
+
# Clone repository (shallow clone for speed)
|
| 152 |
Repo.clone_from(repo_url, str(session_repo_path), depth=1)
|
| 153 |
+
update_session(session_id, "repo_path", str(session_repo_path))
|
| 154 |
+
|
| 155 |
repo_name = repo_url.split('/')[-1].replace('.git', '')
|
| 156 |
+
update_session(session_id, "repo_name", repo_name)
|
| 157 |
+
|
| 158 |
+
# Define ignore patterns
|
| 159 |
+
ignore_patterns = {'.git', '.gitignore', '__pycache__', 'node_modules', 'dist', 'build', '.venv', 'venv'}
|
| 160 |
+
|
| 161 |
all_file_details = []
|
| 162 |
key_file_paths_for_summary = []
|
| 163 |
all_text_file_paths_for_embedding = []
|
| 164 |
+
summary_candidate_names = {"readme.md", "package.json", "pyproject.toml", "requirements.txt", "pom.xml", "build.gradle", "cargo.toml"}
|
| 165 |
+
|
| 166 |
+
# Walk the repository
|
| 167 |
for root, dirs, files in os.walk(str(session_repo_path), topdown=True):
|
| 168 |
dirs[:] = [d for d in dirs if d not in ignore_patterns]
|
| 169 |
for name in files:
|
| 170 |
+
if name in ignore_patterns:
|
| 171 |
continue
|
| 172 |
file_path = os.path.join(root, name)
|
| 173 |
if not os.path.islink(file_path):
|
|
|
|
| 174 |
try:
|
| 175 |
relative_path = pathlib.Path(file_path).relative_to(session_repo_path).as_posix()
|
| 176 |
except ValueError:
|
|
|
|
| 177 |
relative_path = os.path.relpath(file_path, str(session_repo_path)).replace("\\", "/")
|
| 178 |
+
|
| 179 |
file_size = os.path.getsize(file_path)
|
| 180 |
all_file_details.append(FileDetail(path=relative_path, size_bytes=file_size))
|
| 181 |
+
|
| 182 |
if is_text_file(file_path):
|
| 183 |
all_text_file_paths_for_embedding.append(file_path)
|
| 184 |
if name.lower() in summary_candidate_names:
|
| 185 |
key_file_paths_for_summary.append(file_path)
|
| 186 |
+
|
| 187 |
+
update_session(session_id, "status", "summarizing")
|
| 188 |
+
|
| 189 |
+
# Initialize chat history
|
| 190 |
+
update_session(session_id, "chat_history", [])
|
| 191 |
+
|
| 192 |
+
# Create RAG chain with key files
|
| 193 |
rag_chain = create_conversational_chain(key_file_paths_for_summary, session_id)
|
| 194 |
+
|
| 195 |
if not rag_chain:
|
| 196 |
raise Exception("Failed to create initial AI chain.")
|
| 197 |
+
|
| 198 |
+
update_session(session_id, "rag_chain", rag_chain)
|
| 199 |
+
|
| 200 |
+
# Generate initial summary
|
| 201 |
+
job = get_session(session_id)
|
| 202 |
+
chat_history = job.get("chat_history", [])
|
| 203 |
summary_query = "Based on the provided files (like README, package.json, etc.), what is the primary purpose of this software project? Provide a concise, one-paragraph summary."
|
| 204 |
+
initial_summary = query_with_context(rag_chain, chat_history, summary_query, [], str(session_repo_path))
|
| 205 |
+
|
| 206 |
+
# Store result
|
| 207 |
result = AnalysisResult(
|
| 208 |
repo_url=repo_url,
|
| 209 |
directory_structure=sorted(all_file_details, key=lambda x: x.path),
|
| 210 |
initial_summary=initial_summary
|
| 211 |
)
|
| 212 |
+
update_session(session_id, "result", result)
|
| 213 |
+
update_session(session_id, "status", "embedding_background")
|
| 214 |
+
|
| 215 |
+
# Start background embedding
|
| 216 |
background_tasks.add_task(embed_entire_repository, session_id, all_text_file_paths_for_embedding)
|
| 217 |
+
|
| 218 |
except Exception as e:
|
| 219 |
+
update_session(session_id, "status", "failed")
|
| 220 |
+
update_session(session_id, "message", str(e))
|
| 221 |
+
print(f"❌ Analysis failed for session {session_id}: {e}")
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
# ============================================================================
|
| 225 |
+
# API Endpoints
|
| 226 |
+
# ============================================================================
|
| 227 |
|
| 228 |
@app.get("/")
|
| 229 |
def read_root():
|
| 230 |
+
"""Root endpoint with API info."""
|
| 231 |
+
return JSONResponse(content={
|
| 232 |
+
"message": "GitHub Companion Backend is Running",
|
| 233 |
+
"version": "5.0.0",
|
| 234 |
+
"docs": "/docs"
|
| 235 |
+
})
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
@app.get("/health")
|
| 239 |
+
def health_check():
|
| 240 |
+
"""Health check endpoint for Hugging Face Spaces."""
|
| 241 |
+
return JSONResponse(content={"status": "healthy"})
|
| 242 |
+
|
| 243 |
|
| 244 |
@app.post("/analyze", response_model=AnalysisResponse, status_code=202)
|
| 245 |
def submit_analysis(request: RepoRequest, background_tasks: BackgroundTasks):
|
| 246 |
+
"""Submit a repository for analysis."""
|
| 247 |
session_id = str(uuid.uuid4())
|
| 248 |
+
set_session(session_id, {"status": "pending"})
|
| 249 |
background_tasks.add_task(initial_analysis_task, session_id, request.repo_url, background_tasks)
|
| 250 |
return AnalysisResponse(session_id=session_id)
|
| 251 |
|
| 252 |
+
|
| 253 |
@app.get("/status/{session_id}", response_model=StatusResponse)
|
| 254 |
def get_analysis_status(session_id: str):
|
| 255 |
+
"""Get the status of an analysis job."""
|
| 256 |
+
job = get_session(session_id)
|
| 257 |
if not job:
|
| 258 |
raise HTTPException(status_code=404, detail="Session ID not found.")
|
| 259 |
+
|
| 260 |
+
# Check if embedding is complete
|
| 261 |
+
if job.get("status") == "embedding_background" and job.get("embedding_complete"):
|
| 262 |
+
update_session(session_id, "status", "completed")
|
| 263 |
job["status"] = "completed"
|
| 264 |
+
|
| 265 |
return StatusResponse(session_id=session_id, status=job["status"], message=job.get("message"))
|
| 266 |
|
| 267 |
+
|
| 268 |
@app.get("/result/{session_id}", response_model=AnalysisResult)
|
| 269 |
def get_analysis_result(session_id: str):
|
| 270 |
+
"""Get the analysis result for a completed job."""
|
| 271 |
+
job = get_session(session_id)
|
| 272 |
if not job or job.get("status") not in ["embedding_background", "completed"]:
|
| 273 |
raise HTTPException(status_code=400, detail="Job not found or not ready.")
|
| 274 |
return job["result"]
|
| 275 |
|
| 276 |
+
|
| 277 |
@app.get("/file-content/{session_id}", response_model=FileContentResponse)
|
| 278 |
def get_file_content(session_id: str, file_path: str = Query(..., alias="path")):
|
| 279 |
+
"""Get the content of a specific file in the repository."""
|
| 280 |
+
job = get_session(session_id)
|
| 281 |
if not job or "repo_path" not in job:
|
| 282 |
raise HTTPException(status_code=404, detail="Session not found.")
|
| 283 |
+
|
| 284 |
repo_base_path = pathlib.Path(job["repo_path"]).resolve()
|
| 285 |
requested_file_path = (repo_base_path / file_path).resolve()
|
| 286 |
+
|
| 287 |
+
# Security: Prevent path traversal
|
| 288 |
if not requested_file_path.is_relative_to(repo_base_path):
|
| 289 |
raise HTTPException(status_code=403, detail="Access denied.")
|
| 290 |
+
|
| 291 |
if not requested_file_path.is_file():
|
| 292 |
raise HTTPException(status_code=404, detail="File not found.")
|
| 293 |
+
|
| 294 |
try:
|
| 295 |
content = requested_file_path.read_text(encoding="utf-8")
|
| 296 |
return FileContentResponse(path=file_path, content=content)
|
| 297 |
except Exception as e:
|
| 298 |
raise HTTPException(status_code=500, detail=f"Error reading file: {str(e)}")
|
| 299 |
|
| 300 |
+
|
| 301 |
@app.post("/chat/{session_id}", response_model=ChatResponse)
|
| 302 |
def chat_with_repo(session_id: str, request: ChatRequest):
|
| 303 |
+
"""Chat with the AI about the repository."""
|
| 304 |
+
job = get_session(session_id)
|
| 305 |
if not job or "rag_chain" not in job:
|
| 306 |
raise HTTPException(status_code=404, detail="Chat session not ready.")
|
| 307 |
+
|
| 308 |
rag_chain = job["rag_chain"]
|
| 309 |
chat_history = job.get("chat_history", [])
|
| 310 |
repo_path = job["repo_path"]
|
| 311 |
+
|
| 312 |
answer = query_with_context(rag_chain, chat_history, request.query, request.pinned_files, repo_path)
|
| 313 |
+
|
| 314 |
return ChatResponse(answer=answer)
|
| 315 |
|
| 316 |
+
|
| 317 |
@app.post("/download-zip/{session_id}")
|
| 318 |
async def download_zip(session_id: str, request: DownloadRequest, background_tasks: BackgroundTasks):
|
| 319 |
+
"""Download the repository as a ZIP file with any modifications applied."""
|
| 320 |
+
job = get_session(session_id)
|
| 321 |
if not job or "repo_path" not in job:
|
| 322 |
raise HTTPException(status_code=404, detail="Session not found.")
|
| 323 |
+
|
| 324 |
repo_base_path = pathlib.Path(job["repo_path"]).resolve()
|
| 325 |
repo_name = job.get("repo_name", session_id)
|
| 326 |
temp_zip_dir = pathlib.Path(tempfile.gettempdir()) / "temp_zips"
|
| 327 |
+
|
| 328 |
+
# Apply modifications
|
| 329 |
for modified_file in request.modified_files:
|
| 330 |
file_to_update = (repo_base_path / modified_file.path).resolve()
|
| 331 |
+
|
| 332 |
if not file_to_update.is_relative_to(repo_base_path):
|
| 333 |
continue
|
| 334 |
+
|
| 335 |
file_to_update.parent.mkdir(parents=True, exist_ok=True)
|
| 336 |
file_to_update.write_text(modified_file.content, encoding="utf-8")
|
| 337 |
+
|
| 338 |
+
# Create ZIP
|
| 339 |
temp_zip_dir.mkdir(exist_ok=True)
|
| 340 |
zip_path_base = temp_zip_dir / f"{repo_name}-{session_id}"
|
| 341 |
zip_path_final = shutil.make_archive(str(zip_path_base), 'zip', str(repo_base_path))
|
| 342 |
+
|
| 343 |
+
# Cleanup ZIP after download
|
| 344 |
background_tasks.add_task(os.remove, zip_path_final)
|
| 345 |
+
|
| 346 |
return FileResponse(
|
| 347 |
path=zip_path_final,
|
| 348 |
media_type='application/zip',
|
requirements.txt
CHANGED
|
@@ -1,14 +1,17 @@
|
|
| 1 |
-
fastapi
|
| 2 |
-
uvicorn[standard]
|
| 3 |
-
GitPython
|
| 4 |
-
|
| 5 |
-
langchain
|
| 6 |
-
langchain-
|
| 7 |
-
langchain-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.115.14
|
| 2 |
+
uvicorn[standard]==0.35.0
|
| 3 |
+
GitPython==3.1.44
|
| 4 |
+
|
| 5 |
+
langchain==0.3.24
|
| 6 |
+
langchain-core==0.3.66
|
| 7 |
+
langchain-community==0.4.1
|
| 8 |
+
langchain-google-genai==2.0.8
|
| 9 |
+
|
| 10 |
+
chromadb==0.6.3
|
| 11 |
+
sentence-transformers==4.1.0
|
| 12 |
+
unstructured==0.16.17
|
| 13 |
+
python-magic==0.4.27
|
| 14 |
+
|
| 15 |
+
torch==2.7.0
|
| 16 |
+
transformers==4.53.0
|
| 17 |
+
huggingface-hub==0.33.1
|
shared.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Shared state module for GitHub Companion Backend.
|
| 3 |
+
|
| 4 |
+
This module provides thread-safe shared state for managing analysis sessions
|
| 5 |
+
across multiple concurrent users. It uses threading locks to ensure safe
|
| 6 |
+
access to the shared dictionary in a multi-threaded environment.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import threading
|
| 10 |
+
from typing import Dict, Any
|
| 11 |
+
|
| 12 |
+
# Thread-safe lock for accessing analysis_jobs
|
| 13 |
+
_lock = threading.Lock()
|
| 14 |
+
|
| 15 |
+
# Global dictionary to store analysis job states
|
| 16 |
+
# Each session_id maps to a dictionary containing:
|
| 17 |
+
# - status: The current job status
|
| 18 |
+
# - repo_path: Path to cloned repository
|
| 19 |
+
# - rag_chain: The LangChain retrieval chain
|
| 20 |
+
# - vectorstore: ChromaDB vectorstore
|
| 21 |
+
# - chat_history: List of chat messages
|
| 22 |
+
# - result: Analysis result data
|
| 23 |
+
analysis_jobs: Dict[str, Dict[str, Any]] = {}
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def get_session(session_id: str) -> Dict[str, Any] | None:
|
| 27 |
+
"""Thread-safe getter for a session."""
|
| 28 |
+
with _lock:
|
| 29 |
+
return analysis_jobs.get(session_id)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def set_session(session_id: str, data: Dict[str, Any]) -> None:
|
| 33 |
+
"""Thread-safe setter for a session."""
|
| 34 |
+
with _lock:
|
| 35 |
+
analysis_jobs[session_id] = data
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def update_session(session_id: str, key: str, value: Any) -> None:
|
| 39 |
+
"""Thread-safe update for a specific key in a session."""
|
| 40 |
+
with _lock:
|
| 41 |
+
if session_id in analysis_jobs:
|
| 42 |
+
analysis_jobs[session_id][key] = value
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def delete_session(session_id: str) -> bool:
|
| 46 |
+
"""Thread-safe deletion of a session. Returns True if deleted."""
|
| 47 |
+
with _lock:
|
| 48 |
+
if session_id in analysis_jobs:
|
| 49 |
+
del analysis_jobs[session_id]
|
| 50 |
+
return True
|
| 51 |
+
return False
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def session_exists(session_id: str) -> bool:
|
| 55 |
+
"""Check if a session exists."""
|
| 56 |
+
with _lock:
|
| 57 |
+
return session_id in analysis_jobs
|