File size: 2,035 Bytes
8e72e1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""
Quick end-to-end check of the code-aware retrieval pipeline on a real repo.

Usage:
    python smoke_test.py <repo_dir> "your question"

Example:
    python smoke_test.py C:\\Users\\aisha\\ai_projects\\studysync_backend\\studysync\\backend "where are JWT tokens created"
"""
import sys
from src.agent.tools import CodeTools
from src.agent.workflow import TestAgent
from src.ingestion.scanner import scan_python_files
from src.ingestion.chunker import chunk_repo
from src.rag.embedder import Embedder
from src.rag.bm25_search import BM25Retriever
from src.rag.vector_store import VectorStore
from src.rag.hybrid_search import HybridRetriever
from src.rag.reranker import Reranker
from src.rag.answerer import Answerer


def main():
    if len(sys.argv) < 3:
        print('usage: python smoke_test.py <repo_dir> "your question"')
        sys.exit(1)

    repo_dir = sys.argv[1]
    query = sys.argv[2]

    print("Scanning + chunking...")
    chunks = chunk_repo(scan_python_files(repo_dir))
    print(f"  {len(chunks)} chunks")

    print("Embedding + indexing...")
    embedder = Embedder()
    embeddings = embedder.create_embeddings([c["chunk_text"] for c in chunks])
    vector_store = VectorStore()
    vector_store.build(embeddings, chunks)
    hybrid = HybridRetriever(vector_store, BM25Retriever(chunks))

    print(f'Searching: "{query}"\n')
    query_emb = embedder.create_embeddings([query])[0]
    results = hybrid.search(query, query_emb, k=10)
    results = Reranker().rerank(query, results)

    print("Top 5 results:")
    for r in results[:5]:
        d = r["document"]
        print(f"  {d['name']:40}  {d['file']}:{d['start_line']}-{d['end_line']}")
    
    print("\n--- Answer ---")
    result = Answerer().answer(query, results[:5])
    print(result["answer"])

    tools = CodeTools(chunks, embedder, hybrid, Reranker())
    agent = TestAgent(tools)
    print("\n--- Generated tests for create_access_token ---")
    print(agent.generate_tests("create_access_token"))


if __name__ == "__main__":
    main()