""" Quick end-to-end check of the code-aware retrieval pipeline on a real repo. Usage: python smoke_test.py "your question" Example: python smoke_test.py C:\\Users\\aisha\\ai_projects\\studysync_backend\\studysync\\backend "where are JWT tokens created" """ import sys from src.agent.tools import CodeTools from src.agent.workflow import TestAgent from src.ingestion.scanner import scan_python_files from src.ingestion.chunker import chunk_repo from src.rag.embedder import Embedder from src.rag.bm25_search import BM25Retriever from src.rag.vector_store import VectorStore from src.rag.hybrid_search import HybridRetriever from src.rag.reranker import Reranker from src.rag.answerer import Answerer def main(): if len(sys.argv) < 3: print('usage: python smoke_test.py "your question"') sys.exit(1) repo_dir = sys.argv[1] query = sys.argv[2] print("Scanning + chunking...") chunks = chunk_repo(scan_python_files(repo_dir)) print(f" {len(chunks)} chunks") print("Embedding + indexing...") embedder = Embedder() embeddings = embedder.create_embeddings([c["chunk_text"] for c in chunks]) vector_store = VectorStore() vector_store.build(embeddings, chunks) hybrid = HybridRetriever(vector_store, BM25Retriever(chunks)) print(f'Searching: "{query}"\n') query_emb = embedder.create_embeddings([query])[0] results = hybrid.search(query, query_emb, k=10) results = Reranker().rerank(query, results) print("Top 5 results:") for r in results[:5]: d = r["document"] print(f" {d['name']:40} {d['file']}:{d['start_line']}-{d['end_line']}") print("\n--- Answer ---") result = Answerer().answer(query, results[:5]) print(result["answer"]) tools = CodeTools(chunks, embedder, hybrid, Reranker()) agent = TestAgent(tools) print("\n--- Generated tests for create_access_token ---") print(agent.generate_tests("create_access_token")) if __name__ == "__main__": main()