File size: 7,008 Bytes
c43538b
e147c33
c43538b
e147c33
 
c43538b
 
 
 
e147c33
 
 
 
 
c43538b
e147c33
 
c43538b
e147c33
c43538b
e147c33
c43538b
 
 
e147c33
 
c43538b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e147c33
 
 
 
 
 
c43538b
e147c33
 
 
 
c43538b
e147c33
 
 
 
 
c43538b
 
e147c33
c43538b
e147c33
 
c43538b
e147c33
 
c43538b
e147c33
 
c43538b
e147c33
 
 
 
 
c43538b
e147c33
 
 
 
c43538b
e147c33
 
c43538b
e147c33
c43538b
e147c33
 
 
 
 
 
c43538b
 
e147c33
 
 
 
 
 
 
c43538b
 
e147c33
 
 
 
 
 
 
c43538b
 
e147c33
 
 
 
 
 
 
c43538b
 
e147c33
 
 
 
 
 
 
c43538b
 
e147c33
 
 
 
 
 
 
c43538b
e147c33
c43538b
e147c33
 
c43538b
e147c33
 
c43538b
 
e147c33
 
 
 
c43538b
 
e147c33
 
 
 
c43538b
 
e147c33
 
 
 
c43538b
 
e147c33
 
 
 
c43538b
 
e147c33
 
 
 
 
c43538b
e147c33
c43538b
e147c33
 
 
 
 
 
 
 
 
 
 
 
c43538b
e147c33
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# agent.py
import os
from typing import Optional, List
from crewai import Agent, Task, Crew, Process
from crewai_tools import GithubSearchTool
from google import genai  # Gemini client (google-genai package)
from dotenv import load_dotenv

load_dotenv()

# ---------------------------
# CONFIG
# ---------------------------
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

if not GOOGLE_API_KEY:
    raise RuntimeError("❌ Missing GOOGLE_API_KEY (get one from https://aistudio.google.com)")

# Gemini Client
client = genai.Client(api_key=GOOGLE_API_KEY)
MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")

DEFAULT_CONTENT_TYPES = ["code", "pr", "issue", "repo"]

# ---------------------------
# Gemini Embedding Adapter (Free Embeddings)
# ---------------------------
class GeminiEmbedding:
    """Uses Google Gemini text-embedding-004 model (free tier)"""

    def __init__(self, model="text-embedding-004", api_key=None):
        self.model = model
        self.client = genai.Client(api_key=api_key or GOOGLE_API_KEY)

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        vectors = []
        for text in texts:
            try:
                res = self.client.models.embed_content(model=self.model, contents=text)
                vectors.append(res.embedding.values)
            except Exception as e:
                print(f"⚠️ Embedding error: {e}")
                vectors.append([])
        return vectors

    def embed_query(self, text: str) -> List[float]:
        try:
            res = self.client.models.embed_content(model=self.model, contents=text)
            return res.embedding.values
        except Exception as e:
            print(f"⚠️ Embedding query error: {e}")
            return []


# ---------------------------
# Gemini LLM Wrapper
# ---------------------------
class GeminiLLM:
    def __init__(self, model: str):
        self.model = model

    def generate(self, prompt: str) -> str:
        """CrewAI-compatible LLM generate method."""
        try:
            response = client.models.generate_content(
                model=self.model,
                contents=prompt,
                generation_config={"temperature": 0.7, "max_output_tokens": 1024}
            )
            return response.text
        except Exception as e:
            return f"⚠️ Gemini API error: {e}"


# Instantiate LLM + embedder
gemini_llm = GeminiLLM(MODEL_NAME)
embedder = GeminiEmbedding(api_key=GOOGLE_API_KEY)

# ---------------------------
# GitHub Tool using free embeddings
# ---------------------------
def github_tool(repo_url: Optional[str] = None) -> GithubSearchTool:
    """Create a GitHub Search Tool with free Gemini embeddings (no OpenAI key)."""
    if not GITHUB_TOKEN:
        raise RuntimeError("Missing GITHUB_TOKEN in environment.")

    if repo_url:
        return GithubSearchTool(
            github_repo=repo_url,
            gh_token=GITHUB_TOKEN,
            content_types=DEFAULT_CONTENT_TYPES,
            embedder=embedder,
        )
    return GithubSearchTool(
        gh_token=GITHUB_TOKEN,
        content_types=DEFAULT_CONTENT_TYPES,
        embedder=embedder,
    )


# ---------------------------
# AGENTS
# ---------------------------
def make_agents(repo_url: str):
    repo_search = github_tool(repo_url)

    repo_mapper = Agent(
        role="Repository Mapper",
        goal="Map repo structure, dependencies, and frameworks.",
        backstory="Understands directory trees, tech stacks, and configuration files.",
        tools=[repo_search],
        llm=gemini_llm,
        verbose=True,
    )

    code_reviewer = Agent(
        role="Code Reviewer",
        goal="Perform code review for quality, structure, and clarity.",
        backstory="A senior engineer giving actionable review comments with examples.",
        tools=[repo_search],
        llm=gemini_llm,
        verbose=True,
    )

    security_auditor = Agent(
        role="Security Auditor",
        goal="Identify secrets, vulnerabilities, unsafe APIs, and dependencies.",
        backstory="A white-hat hacker finding issues and giving fixes.",
        tools=[repo_search],
        llm=gemini_llm,
        verbose=True,
    )

    doc_explainer = Agent(
        role="Documentation Explainer",
        goal="Summarize architecture, data flow, and how to run the project.",
        backstory="Explains tech systems simply and clearly with examples.",
        tools=[repo_search],
        llm=gemini_llm,
        verbose=True,
    )

    manager = Agent(
        role="Engineering Manager",
        goal="Coordinate all agents and compile a clear, cohesive final report.",
        backstory="Ensures a professional, well-structured final document.",
        allow_delegation=True,
        llm=gemini_llm,
        verbose=True,
    )

    return repo_mapper, code_reviewer, security_auditor, doc_explainer, manager


# ---------------------------
# TASKS
# ---------------------------
def make_tasks(repo_url: str, brief: str = ""):
    prefix = f"Target Repository: {repo_url}\nBrief: {brief}\nInclude file paths where relevant."

    t_map = Task(
        description=f"{prefix}\nMap the repository structure, dependencies, languages, and build tools.",
        expected_output="Markdown-formatted repository map with bullets and paths.",
        agent_role="Repository Mapper",
    )

    t_review = Task(
        description=f"{prefix}\nPerform detailed code review (readability, refactors, testing, etc.).",
        expected_output="Actionable review bullets grouped by issue type.",
        agent_role="Code Reviewer",
    )

    t_sec = Task(
        description=f"{prefix}\nPerform security audit (secrets, vulnerabilities, dependencies).",
        expected_output="Security findings table (Issue | Evidence | Risk | Fix).",
        agent_role="Security Auditor",
    )

    t_doc = Task(
        description=f"{prefix}\nExplain architecture, modules, data flow, setup, and usage.",
        expected_output="Readable explanation with Quickstart and architecture overview.",
        agent_role="Documentation Explainer",
    )

    t_merge = Task(
        description="Merge all outputs into a clean, single Markdown report with clear sections and TOC.",
        expected_output="Final cohesive Markdown report.",
        agent_role="Engineering Manager",
    )

    return t_map, t_review, t_sec, t_doc, t_merge


# ---------------------------
# RUNNER
# ---------------------------
def run_repo_review(repo_url: str, brief: str = "") -> str:
    repo_mapper, reviewer, auditor, explainer, manager = make_agents(repo_url)
    t_map, t_review, t_sec, t_doc, t_merge = make_tasks(repo_url, brief)

    crew = Crew(
        agents=[repo_mapper, reviewer, auditor, explainer, manager],
        tasks=[t_map, t_review, t_sec, t_doc, t_merge],
        process=Process.hierarchical,
        manager_agent=manager,
        verbose=True,
    )

    result = crew.kickoff()
    return str(result)