File size: 3,511 Bytes
933c2fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
from dotenv import load_dotenv
load_dotenv(".env")
import uuid
import ast
from llama_index.core.schema import TextNode
from github_repo_downloader import GitHubRepoDownloader
from pyan_insperation.analyzer import CallGraphVisitor
from graph_converter import pyan_to_networkx
from level_computer import compute_node_levels
from prompt_generator import generate_explaination_by_level
from embedding_service import EmbeddingService
from structlog import get_logger
logger = get_logger(__name__)
import gradio as gr
local_db = {}
def ingest(repo_url, branch="main"):
"""
Clone a GitHub repo, parse Python code, build code graph, index it.
Args:
repo_url: public GitHub repo URL.
branch: branch to index, defaults to "main".
Returns:
project_id: internal ID used to reference the indexed project.
"""
repo = GitHubRepoDownloader(
repo_url=repo_url
, branch=branch
)
yield "repo downloaded"
files = repo.read_files(
file_filter=lambda path: path.endswith(".py")
)
yield "pyhton files loaded"
pyan_graph = CallGraphVisitor(files=files)
graph = pyan_to_networkx(pyan_graph=pyan_graph)
yield "graph builded"
levels = compute_node_levels(graph=graph)
yield "start generating explination"
prompts_by_level = generate_explaination_by_level(graph=graph, levels=levels)
yield " start embedding"
nodes = []
for node in graph.nodes:
if node.namespace is None or node.get_short_name() in ["lambda" ] or node.ast_node is None:
continue
if hasattr(node, "explination"):
nodes.append(TextNode(
text=node.explination,
metadata={
"name": node.name,
"filename": node.filename,
"type": node.flavor.name,
"namespace": node.namespace
},
))
else:
nodes.append(TextNode(
text=ast.unparse(node.ast_node),
metadata={
"name": node.name,
"filename": node.filename,
"type": node.flavor.name,
"namespace": node.namespace
},
))
embedding = EmbeddingService("test")
embedding.prepare_index(nodes)
project_id = uuid.uuid4().hex
local_db[project_id]=embedding
yield project_id
def query(project_id, question, top_k=10):
"""
Retrieve relevant nodes and send to reasoning LLM.
Args:
project_id: ID returned from ingest().
question: user question about the codebase.
Returns:
answer: generated explanation or context.
"""
retrievers = local_db[project_id].infer(question, top_k=top_k)
yield retrievers
demo = gr.TabbedInterface(
[
gr.Interface(
ingest,
[
gr.Textbox(label="repo_url"),
gr.Textbox(label="branch", value="main"),
],
gr.Textbox(label="project_id"),
),
gr.Interface(
query,
[
gr.Textbox(label="project_id"),
gr.Textbox(label="query"),
gr.Number(value =5, label="top_k", maximum=20, minimum=2),
],
gr.Textbox(label="answer"),
),
],
[
"Ingest Repo",
"Query Project",
],
)
demo.launch(mcp_server=True)
|