| from dotenv import load_dotenv | |
| load_dotenv(".env") | |
| import uuid | |
| import ast | |
| from llama_index.core.schema import TextNode | |
| from github_repo_downloader import GitHubRepoDownloader | |
| from pyan_insperation.analyzer import CallGraphVisitor | |
| from graph_converter import pyan_to_networkx | |
| from level_computer import compute_node_levels | |
| from prompt_generator import generate_explaination_by_level | |
| from embedding_service import EmbeddingService | |
| from structlog import get_logger | |
| logger = get_logger(__name__) | |
| import gradio as gr | |
| local_db = {} | |
| def ingest(repo_url, branch="main"): | |
| """ | |
| Clone a GitHub repo, parse Python code, build code graph, index it. | |
| Args: | |
| repo_url: public GitHub repo URL. | |
| branch: branch to index, defaults to "main". | |
| Returns: | |
| project_id: internal ID used to reference the indexed project. | |
| """ | |
| repo = GitHubRepoDownloader( | |
| repo_url=repo_url | |
| , branch=branch | |
| ) | |
| yield "repo downloaded" | |
| files = repo.read_files( | |
| file_filter=lambda path: path.endswith(".py") | |
| ) | |
| yield "pyhton files loaded" | |
| pyan_graph = CallGraphVisitor(files=files) | |
| graph = pyan_to_networkx(pyan_graph=pyan_graph) | |
| yield "graph builded" | |
| levels = compute_node_levels(graph=graph) | |
| yield "start generating explination" | |
| prompts_by_level = generate_explaination_by_level(graph=graph, levels=levels) | |
| yield " start embedding" | |
| nodes = [] | |
| for node in graph.nodes: | |
| if node.namespace is None or node.get_short_name() in ["lambda" ] or node.ast_node is None: | |
| continue | |
| if hasattr(node, "explination"): | |
| nodes.append(TextNode( | |
| text=node.explination, | |
| metadata={ | |
| "name": node.name, | |
| "filename": node.filename, | |
| "type": node.flavor.name, | |
| "namespace": node.namespace | |
| }, | |
| )) | |
| else: | |
| nodes.append(TextNode( | |
| text=ast.unparse(node.ast_node), | |
| metadata={ | |
| "name": node.name, | |
| "filename": node.filename, | |
| "type": node.flavor.name, | |
| "namespace": node.namespace | |
| }, | |
| )) | |
| embedding = EmbeddingService("test") | |
| embedding.prepare_index(nodes) | |
| project_id = uuid.uuid4().hex | |
| local_db[project_id]=embedding | |
| yield project_id | |
| def query(project_id, question, top_k=10): | |
| """ | |
| Retrieve relevant nodes and send to reasoning LLM. | |
| Args: | |
| project_id: ID returned from ingest(). | |
| question: user question about the codebase. | |
| Returns: | |
| answer: generated explanation or context. | |
| """ | |
| retrievers = local_db[project_id].infer(question, top_k=top_k) | |
| yield retrievers | |
| demo = gr.TabbedInterface( | |
| [ | |
| gr.Interface( | |
| ingest, | |
| [ | |
| gr.Textbox(label="repo_url"), | |
| gr.Textbox(label="branch", value="main"), | |
| ], | |
| gr.Textbox(label="project_id"), | |
| ), | |
| gr.Interface( | |
| query, | |
| [ | |
| gr.Textbox(label="project_id"), | |
| gr.Textbox(label="query"), | |
| gr.Number(value =5, label="top_k", maximum=20, minimum=2), | |
| ], | |
| gr.Textbox(label="answer"), | |
| ), | |
| ], | |
| [ | |
| "Ingest Repo", | |
| "Query Project", | |
| ], | |
| ) | |
| demo.launch(mcp_server=True) | |