File size: 3,511 Bytes
933c2fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from dotenv import load_dotenv
load_dotenv(".env")

import uuid
import ast

from llama_index.core.schema import TextNode


from github_repo_downloader import GitHubRepoDownloader
from pyan_insperation.analyzer import CallGraphVisitor
from graph_converter import pyan_to_networkx
from level_computer import compute_node_levels
from prompt_generator import generate_explaination_by_level
from embedding_service import EmbeddingService


from structlog import get_logger
logger = get_logger(__name__)


import gradio as gr

local_db = {}

def ingest(repo_url, branch="main"):
    """
    Clone a GitHub repo, parse Python code, build code graph, index it.

    Args:
        repo_url: public GitHub repo URL.
        branch: branch to index, defaults to "main".

    Returns:
        project_id: internal ID used to reference the indexed project.
    """
    repo = GitHubRepoDownloader(
            repo_url=repo_url
            , branch=branch
        )
    yield "repo downloaded"
    
    files = repo.read_files(
        file_filter=lambda path: path.endswith(".py")
    )
    yield "pyhton files loaded"
    
    pyan_graph = CallGraphVisitor(files=files)
    graph = pyan_to_networkx(pyan_graph=pyan_graph)
    
    yield "graph builded"
    
    
    levels = compute_node_levels(graph=graph)
    yield "start generating explination"
    prompts_by_level = generate_explaination_by_level(graph=graph, levels=levels)
    yield " start embedding"
    nodes = []
    for node in graph.nodes:
        if node.namespace is None or node.get_short_name() in ["lambda" ] or node.ast_node is None:
            continue
        if hasattr(node, "explination"):
            nodes.append(TextNode(
                text=node.explination,
                metadata={
                    "name": node.name,
                    "filename": node.filename,
                    "type": node.flavor.name,
                    "namespace": node.namespace
                },
            ))
        else:
            nodes.append(TextNode(
                text=ast.unparse(node.ast_node),
                metadata={
                    "name": node.name,
                    "filename": node.filename,
                    "type": node.flavor.name,
                    "namespace": node.namespace
                },
            ))
    embedding = EmbeddingService("test")
    embedding.prepare_index(nodes)
    project_id = uuid.uuid4().hex
    local_db[project_id]=embedding
    
    yield project_id


def query(project_id, question, top_k=10):
    """
    Retrieve relevant nodes and send to reasoning LLM.

    Args:
        project_id: ID returned from ingest().
        question: user question about the codebase.

    Returns:
        answer: generated explanation or context.
    """
    retrievers = local_db[project_id].infer(question, top_k=top_k)
    yield retrievers


demo = gr.TabbedInterface(
    [
        gr.Interface(
            ingest,
            [
                gr.Textbox(label="repo_url"),
                gr.Textbox(label="branch", value="main"),
            ],
            gr.Textbox(label="project_id"),
        ),
        gr.Interface(
            query,
            [
                gr.Textbox(label="project_id"),
                gr.Textbox(label="query"),
                gr.Number(value =5, label="top_k", maximum=20, minimum=2),
            ],
            gr.Textbox(label="answer"),
        ),
    ],
    [
        "Ingest Repo",
        "Query Project",
    ],
)

demo.launch(mcp_server=True)