Spaces:
Running
Running
| import os | |
| import hashlib | |
| import streamlit as st | |
| st.set_page_config(page_title="Codebase Intelligence Agent", page_icon="🧭", layout="wide") | |
| # Bridge HF/Streamlit secrets to env (safe if neither exists). | |
| try: | |
| if not os.getenv("OPENAI_API_KEY") and "OPENAI_API_KEY" in st.secrets: | |
| os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"] | |
| except Exception: | |
| pass | |
| st.title("🧭 Codebase Intelligence Agent") | |
| st.write("Upload a Python repository (ZIP) and ask questions — answers come with exact file/line citations.") | |
| # ---- models: loaded once per session ---- | |
| def load_models(): | |
| from src.rag.embedder import Embedder | |
| from src.rag.reranker import Reranker | |
| from src.rag.answerer import Answerer | |
| return {"embedder": Embedder(), "reranker": Reranker(), "answerer": Answerer()} | |
| # ---- ingest + chunk, cached per uploaded file ---- | |
| def ingest(file_bytes, file_name): | |
| import tempfile | |
| from src.ingestion.scanner import scan_repo | |
| from src.ingestion.chunker import chunk_repo | |
| tmp_zip = os.path.join(tempfile.mkdtemp(), file_name) | |
| with open(tmp_zip, "wb") as f: | |
| f.write(file_bytes) | |
| files, _ = scan_repo(tmp_zip) | |
| chunks = chunk_repo(files) | |
| return chunks, len(files) | |
| # ---- build index, cached per file (underscore args skip hashing) ---- | |
| def build_index(file_hash, _chunks, _embeddings): | |
| from src.rag.vector_store import VectorStore | |
| from src.rag.bm25_search import BM25Retriever | |
| from src.rag.hybrid_search import HybridRetriever | |
| vs = VectorStore() | |
| vs.build(_embeddings, _chunks) | |
| return HybridRetriever(vs, BM25Retriever(_chunks)) | |
| def embed(file_hash, _texts, _embedder): | |
| return _embedder.create_embeddings(_texts) | |
| uploaded = st.file_uploader("Upload repository ZIP", type=["zip"]) | |
| if uploaded: | |
| file_bytes = uploaded.getvalue() | |
| file_hash = hashlib.md5(file_bytes).hexdigest() | |
| models = load_models() | |
| chunks, n_files = ingest(file_bytes, uploaded.name) | |
| texts = [c["chunk_text"] for c in chunks] | |
| embeddings = embed(file_hash, texts, models["embedder"]) | |
| hybrid = build_index(file_hash, chunks, embeddings) | |
| st.success(f"Indexed {len(chunks)} definitions from {n_files} files. Ask away.") | |
| ask_tab, test_tab = st.tabs(["💬 Ask the codebase", "🧪 Generate tests"]) | |
| with ask_tab: | |
| query = st.text_input("Ask about the codebase", placeholder="e.g. where are JWT tokens created?") | |
| if query: | |
| with st.spinner("Searching and answering..."): | |
| results = hybrid.search(query, models["embedder"].create_embeddings([query])[0], k=10) | |
| results = models["reranker"].rerank(query, results) | |
| result = models["answerer"].answer(query, results[:5]) | |
| st.subheader("Answer") | |
| st.write(result["answer"]) | |
| st.subheader("Sources") | |
| for s in result["sources"]: | |
| label = f"📄 {s['file']}:{s['start_line']}-{s['end_line']} · {s['type']} {s['name']}" | |
| with st.expander(label): | |
| st.code(s["code"], language="python") | |
| with test_tab: | |
| st.write("Enter a function or class name and the agent will read its real source and write pytest tests.") | |
| target = st.text_input("Function / class name", placeholder="e.g. create_access_token") | |
| if target and st.button("Generate tests"): | |
| from src.agent.tools import CodeTools | |
| from src.agent.workflow import TestAgent | |
| with st.spinner("Agent reading the code and writing tests..."): | |
| tools = CodeTools(chunks, models["embedder"], hybrid, models["reranker"]) | |
| tests = TestAgent(tools).generate_tests(target) | |
| st.code(tests.replace("```python", "").replace("```", "").strip(), language="python") | |
| with st.expander("Repository overview"): | |
| by_type = {} | |
| for c in chunks: | |
| by_type[c["type"]] = by_type.get(c["type"], 0) + 1 | |
| st.write({"files": n_files, "definitions": len(chunks), **by_type}) |