from dotenv import load_dotenv load_dotenv(".env") import uuid import ast from llama_index.core.schema import TextNode from github_repo_downloader import GitHubRepoDownloader from pyan_insperation.analyzer import CallGraphVisitor from graph_converter import pyan_to_networkx from level_computer import compute_node_levels from prompt_generator import generate_explaination_by_level from embedding_service import EmbeddingService from structlog import get_logger logger = get_logger(__name__) import gradio as gr local_db = {} def ingest(repo_url, branch="main"): """ Clone a GitHub repo, parse Python code, build code graph, index it. Args: repo_url: public GitHub repo URL. branch: branch to index, defaults to "main". Returns: project_id: internal ID used to reference the indexed project. """ repo = GitHubRepoDownloader( repo_url=repo_url , branch=branch ) yield "repo downloaded" files = repo.read_files( file_filter=lambda path: path.endswith(".py") ) yield "pyhton files loaded" pyan_graph = CallGraphVisitor(files=files) graph = pyan_to_networkx(pyan_graph=pyan_graph) yield "graph builded" levels = compute_node_levels(graph=graph) yield "start generating explination" prompts_by_level = generate_explaination_by_level(graph=graph, levels=levels) yield " start embedding" nodes = [] for node in graph.nodes: if node.namespace is None or node.get_short_name() in ["lambda" ] or node.ast_node is None: continue if hasattr(node, "explination"): nodes.append(TextNode( text=node.explination, metadata={ "name": node.name, "filename": node.filename, "type": node.flavor.name, "namespace": node.namespace }, )) else: nodes.append(TextNode( text=ast.unparse(node.ast_node), metadata={ "name": node.name, "filename": node.filename, "type": node.flavor.name, "namespace": node.namespace }, )) embedding = EmbeddingService("test") embedding.prepare_index(nodes) project_id = uuid.uuid4().hex local_db[project_id]=embedding yield project_id def query(project_id, question, top_k=10): """ Retrieve relevant nodes and send to reasoning LLM. Args: project_id: ID returned from ingest(). question: user question about the codebase. Returns: answer: generated explanation or context. """ retrievers = local_db[project_id].infer(question, top_k=top_k) yield retrievers demo = gr.TabbedInterface( [ gr.Interface( ingest, [ gr.Textbox(label="repo_url"), gr.Textbox(label="branch", value="main"), ], gr.Textbox(label="project_id"), ), gr.Interface( query, [ gr.Textbox(label="project_id"), gr.Textbox(label="query"), gr.Number(value =5, label="top_k", maximum=20, minimum=2), ], gr.Textbox(label="answer"), ), ], [ "Ingest Repo", "Query Project", ], ) demo.launch(mcp_server=True)