lafifi-24's picture
i
933c2fa
from dotenv import load_dotenv
load_dotenv(".env")
import uuid
import ast
from llama_index.core.schema import TextNode
from github_repo_downloader import GitHubRepoDownloader
from pyan_insperation.analyzer import CallGraphVisitor
from graph_converter import pyan_to_networkx
from level_computer import compute_node_levels
from prompt_generator import generate_explaination_by_level
from embedding_service import EmbeddingService
from structlog import get_logger
logger = get_logger(__name__)
import gradio as gr
local_db = {}
def ingest(repo_url, branch="main"):
"""
Clone a GitHub repo, parse Python code, build code graph, index it.
Args:
repo_url: public GitHub repo URL.
branch: branch to index, defaults to "main".
Returns:
project_id: internal ID used to reference the indexed project.
"""
repo = GitHubRepoDownloader(
repo_url=repo_url
, branch=branch
)
yield "repo downloaded"
files = repo.read_files(
file_filter=lambda path: path.endswith(".py")
)
yield "pyhton files loaded"
pyan_graph = CallGraphVisitor(files=files)
graph = pyan_to_networkx(pyan_graph=pyan_graph)
yield "graph builded"
levels = compute_node_levels(graph=graph)
yield "start generating explination"
prompts_by_level = generate_explaination_by_level(graph=graph, levels=levels)
yield " start embedding"
nodes = []
for node in graph.nodes:
if node.namespace is None or node.get_short_name() in ["lambda" ] or node.ast_node is None:
continue
if hasattr(node, "explination"):
nodes.append(TextNode(
text=node.explination,
metadata={
"name": node.name,
"filename": node.filename,
"type": node.flavor.name,
"namespace": node.namespace
},
))
else:
nodes.append(TextNode(
text=ast.unparse(node.ast_node),
metadata={
"name": node.name,
"filename": node.filename,
"type": node.flavor.name,
"namespace": node.namespace
},
))
embedding = EmbeddingService("test")
embedding.prepare_index(nodes)
project_id = uuid.uuid4().hex
local_db[project_id]=embedding
yield project_id
def query(project_id, question, top_k=10):
"""
Retrieve relevant nodes and send to reasoning LLM.
Args:
project_id: ID returned from ingest().
question: user question about the codebase.
Returns:
answer: generated explanation or context.
"""
retrievers = local_db[project_id].infer(question, top_k=top_k)
yield retrievers
demo = gr.TabbedInterface(
[
gr.Interface(
ingest,
[
gr.Textbox(label="repo_url"),
gr.Textbox(label="branch", value="main"),
],
gr.Textbox(label="project_id"),
),
gr.Interface(
query,
[
gr.Textbox(label="project_id"),
gr.Textbox(label="query"),
gr.Number(value =5, label="top_k", maximum=20, minimum=2),
],
gr.Textbox(label="answer"),
),
],
[
"Ingest Repo",
"Query Project",
],
)
demo.launch(mcp_server=True)