PraneshJs commited on
Commit
b87b5f6
·
verified ·
1 Parent(s): afca07c

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +76 -114
agent.py CHANGED
@@ -1,150 +1,112 @@
1
- # agent.py
2
  import os
3
- from typing import Optional, List
 
4
  from crewai import Agent, Task, Crew, Process
5
- from crewai_tools import GithubSearchTool
6
- from google import genai # Gemini client (google-genai package)
7
  from dotenv import load_dotenv
8
 
9
  load_dotenv()
10
-
11
- # ---------------------------
12
  # CONFIG
13
- # ---------------------------
14
- GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
15
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 
 
16
 
17
  if not GOOGLE_API_KEY:
18
- raise RuntimeError("❌ Missing GOOGLE_API_KEY (get one from https://aistudio.google.com)")
19
 
20
- # Gemini Client
21
  client = genai.Client(api_key=GOOGLE_API_KEY)
22
- MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
23
-
24
- DEFAULT_CONTENT_TYPES = ["code", "pr", "issue", "repo"]
25
-
26
- # ---------------------------
27
- # Gemini Embedding Adapter (Free Embeddings)
28
- # ---------------------------
29
- class GeminiEmbedding:
30
- """Uses Google Gemini text-embedding-004 model (free tier)"""
31
-
32
- def __init__(self, model="text-embedding-004", api_key=None):
33
- self.model = model
34
- self.client = genai.Client(api_key=api_key or GOOGLE_API_KEY)
35
-
36
- def embed_documents(self, texts: List[str]) -> List[List[float]]:
37
- vectors = []
38
- for text in texts:
39
- try:
40
- res = self.client.models.embed_content(model=self.model, contents=text)
41
- vectors.append(res.embedding.values)
42
- except Exception as e:
43
- print(f"⚠️ Embedding error: {e}")
44
- vectors.append([])
45
- return vectors
46
-
47
- def embed_query(self, text: str) -> List[float]:
48
- try:
49
- res = self.client.models.embed_content(model=self.model, contents=text)
50
- return res.embedding.values
51
- except Exception as e:
52
- print(f"⚠️ Embedding query error: {e}")
53
- return []
54
-
55
 
56
- # ---------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # Gemini LLM Wrapper
58
- # ---------------------------
59
  class GeminiLLM:
60
- def __init__(self, model: str):
61
  self.model = model
62
 
63
  def generate(self, prompt: str) -> str:
64
- """CrewAI-compatible LLM generate method."""
65
  try:
66
- response = client.models.generate_content(
67
  model=self.model,
68
  contents=prompt,
69
- generation_config={"temperature": 0.7, "max_output_tokens": 1024}
70
  )
71
- return response.text
72
  except Exception as e:
73
- return f"⚠️ Gemini API error: {e}"
74
-
75
 
76
- # Instantiate LLM + embedder
77
  gemini_llm = GeminiLLM(MODEL_NAME)
78
- embedder = GeminiEmbedding(api_key=GOOGLE_API_KEY)
79
-
80
- # ---------------------------
81
- # GitHub Tool using free embeddings
82
- # ---------------------------
83
- def github_tool(repo_url: Optional[str] = None) -> GithubSearchTool:
84
- """Create a GitHub Search Tool with free Gemini embeddings (no OpenAI key)."""
85
- if not GITHUB_TOKEN:
86
- raise RuntimeError("Missing GITHUB_TOKEN in environment.")
87
-
88
- if repo_url:
89
- return GithubSearchTool(
90
- github_repo=repo_url,
91
- gh_token=GITHUB_TOKEN,
92
- content_types=DEFAULT_CONTENT_TYPES,
93
- embedder=embedder,
94
- )
95
- return GithubSearchTool(
96
- gh_token=GITHUB_TOKEN,
97
- content_types=DEFAULT_CONTENT_TYPES,
98
- embedder=embedder,
99
- )
100
-
101
 
102
- # ---------------------------
103
  # AGENTS
104
- # ---------------------------
105
- def make_agents(repo_url: str):
106
- repo_search = github_tool(repo_url)
107
-
108
  repo_mapper = Agent(
109
  role="Repository Mapper",
110
- goal="Map repo structure, dependencies, and frameworks.",
111
- backstory="Understands directory trees, tech stacks, and configuration files.",
112
- tools=[repo_search],
113
  llm=gemini_llm,
114
  verbose=True,
115
  )
116
 
117
  code_reviewer = Agent(
118
  role="Code Reviewer",
119
- goal="Perform code review for quality, structure, and clarity.",
120
- backstory="A senior engineer giving actionable review comments with examples.",
121
- tools=[repo_search],
122
  llm=gemini_llm,
123
  verbose=True,
124
  )
125
 
126
  security_auditor = Agent(
127
  role="Security Auditor",
128
- goal="Identify secrets, vulnerabilities, unsafe APIs, and dependencies.",
129
- backstory="A white-hat hacker finding issues and giving fixes.",
130
- tools=[repo_search],
131
  llm=gemini_llm,
132
  verbose=True,
133
  )
134
 
135
  doc_explainer = Agent(
136
  role="Documentation Explainer",
137
- goal="Summarize architecture, data flow, and how to run the project.",
138
- backstory="Explains tech systems simply and clearly with examples.",
139
- tools=[repo_search],
140
  llm=gemini_llm,
141
  verbose=True,
142
  )
143
 
144
  manager = Agent(
145
  role="Engineering Manager",
146
- goal="Coordinate all agents and compile a clear, cohesive final report.",
147
- backstory="Ensures a professional, well-structured final document.",
148
  allow_delegation=True,
149
  llm=gemini_llm,
150
  verbose=True,
@@ -152,51 +114,52 @@ def make_agents(repo_url: str):
152
 
153
  return repo_mapper, code_reviewer, security_auditor, doc_explainer, manager
154
 
155
-
156
- # ---------------------------
157
  # TASKS
158
- # ---------------------------
159
  def make_tasks(repo_url: str, brief: str = ""):
160
- prefix = f"Target Repository: {repo_url}\nBrief: {brief}\nInclude file paths where relevant."
 
 
 
161
 
162
  t_map = Task(
163
- description=f"{prefix}\nMap the repository structure, dependencies, languages, and build tools.",
164
- expected_output="Markdown-formatted repository map with bullets and paths.",
165
  agent_role="Repository Mapper",
166
  )
167
 
168
  t_review = Task(
169
- description=f"{prefix}\nPerform detailed code review (readability, refactors, testing, etc.).",
170
- expected_output="Actionable review bullets grouped by issue type.",
171
  agent_role="Code Reviewer",
172
  )
173
 
174
  t_sec = Task(
175
- description=f"{prefix}\nPerform security audit (secrets, vulnerabilities, dependencies).",
176
- expected_output="Security findings table (Issue | Evidence | Risk | Fix).",
177
  agent_role="Security Auditor",
178
  )
179
 
180
  t_doc = Task(
181
- description=f"{prefix}\nExplain architecture, modules, data flow, setup, and usage.",
182
- expected_output="Readable explanation with Quickstart and architecture overview.",
183
  agent_role="Documentation Explainer",
184
  )
185
 
186
  t_merge = Task(
187
- description="Merge all outputs into a clean, single Markdown report with clear sections and TOC.",
188
- expected_output="Final cohesive Markdown report.",
189
  agent_role="Engineering Manager",
190
  )
191
 
192
  return t_map, t_review, t_sec, t_doc, t_merge
193
 
194
-
195
- # ---------------------------
196
  # RUNNER
197
- # ---------------------------
198
  def run_repo_review(repo_url: str, brief: str = "") -> str:
199
- repo_mapper, reviewer, auditor, explainer, manager = make_agents(repo_url)
200
  t_map, t_review, t_sec, t_doc, t_merge = make_tasks(repo_url, brief)
201
 
202
  crew = Crew(
@@ -206,6 +169,5 @@ def run_repo_review(repo_url: str, brief: str = "") -> str:
206
  manager_agent=manager,
207
  verbose=True,
208
  )
209
-
210
  result = crew.kickoff()
211
  return str(result)
 
 
1
  import os
2
+ import requests
3
+ from typing import List
4
  from crewai import Agent, Task, Crew, Process
5
+ from google import genai # Gemini client
 
6
  from dotenv import load_dotenv
7
 
8
  load_dotenv()
9
+ # ---------------------------------
 
10
  # CONFIG
11
+ # ---------------------------------
 
12
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
13
+ GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
14
+ MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
15
 
16
  if not GOOGLE_API_KEY:
17
+ raise RuntimeError("❌ Missing GOOGLE_API_KEY get one at https://aistudio.google.com")
18
 
 
19
  client = genai.Client(api_key=GOOGLE_API_KEY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # ---------------------------------
22
+ # HELPER: Simple GitHub Repo Fetcher (no embeddings)
23
+ # ---------------------------------
24
+ def fetch_repo_files(repo_url: str, max_files: int = 10) -> List[str]:
25
+ """Fetch first few code/text files from a GitHub repo using the REST API."""
26
+ try:
27
+ owner_repo = repo_url.strip().split("github.com/")[-1]
28
+ api_url = f"https://api.github.com/repos/{owner_repo}/contents"
29
+ headers = {"Authorization": f"token {GITHUB_TOKEN}"} if GITHUB_TOKEN else {}
30
+ response = requests.get(api_url, headers=headers)
31
+ response.raise_for_status()
32
+ data = response.json()
33
+ files = []
34
+ for f in data:
35
+ if f["type"] == "file" and f["name"].endswith((".py", ".js", ".ts", ".md")):
36
+ files.append(f["download_url"])
37
+ if len(files) >= max_files:
38
+ break
39
+ return files
40
+ except Exception as e:
41
+ return [f"⚠️ Error fetching repo: {e}"]
42
+
43
+
44
+ def fetch_file_content(url: str) -> str:
45
+ try:
46
+ return requests.get(url).text
47
+ except Exception as e:
48
+ return f"⚠️ Could not fetch file: {url}\nError: {e}"
49
+
50
+ # ---------------------------------
51
  # Gemini LLM Wrapper
52
+ # ---------------------------------
53
  class GeminiLLM:
54
+ def __init__(self, model):
55
  self.model = model
56
 
57
  def generate(self, prompt: str) -> str:
 
58
  try:
59
+ res = client.models.generate_content(
60
  model=self.model,
61
  contents=prompt,
62
+ generation_config={"temperature": 0.7, "max_output_tokens": 2048}
63
  )
64
+ return res.text
65
  except Exception as e:
66
+ return f"⚠️ Gemini Error: {e}"
 
67
 
 
68
  gemini_llm = GeminiLLM(MODEL_NAME)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ # ---------------------------------
71
  # AGENTS
72
+ # ---------------------------------
73
+ def make_agents():
 
 
74
  repo_mapper = Agent(
75
  role="Repository Mapper",
76
+ goal="Map the project’s structure and identify its core technologies.",
77
+ backstory="You are skilled at reading GitHub repositories and summarizing their structure.",
 
78
  llm=gemini_llm,
79
  verbose=True,
80
  )
81
 
82
  code_reviewer = Agent(
83
  role="Code Reviewer",
84
+ goal="Perform code reviews to identify potential issues and refactors.",
85
+ backstory="A senior engineer reviewing open-source codebases with actionable advice.",
 
86
  llm=gemini_llm,
87
  verbose=True,
88
  )
89
 
90
  security_auditor = Agent(
91
  role="Security Auditor",
92
+ goal="Find potential security risks in code and suggest fixes.",
93
+ backstory="You think like an attacker but report like a professional auditor.",
 
94
  llm=gemini_llm,
95
  verbose=True,
96
  )
97
 
98
  doc_explainer = Agent(
99
  role="Documentation Explainer",
100
+ goal="Explain what the repo does and how to run it.",
101
+ backstory="You make technical systems understandable.",
 
102
  llm=gemini_llm,
103
  verbose=True,
104
  )
105
 
106
  manager = Agent(
107
  role="Engineering Manager",
108
+ goal="Merge all insights into a final cohesive report.",
109
+ backstory="You coordinate team outputs into a polished result.",
110
  allow_delegation=True,
111
  llm=gemini_llm,
112
  verbose=True,
 
114
 
115
  return repo_mapper, code_reviewer, security_auditor, doc_explainer, manager
116
 
117
+ # ---------------------------------
 
118
  # TASKS
119
+ # ---------------------------------
120
  def make_tasks(repo_url: str, brief: str = ""):
121
+ repo_files = fetch_repo_files(repo_url)
122
+ file_contents = "\n\n".join([fetch_file_content(u) for u in repo_files[:5]])
123
+
124
+ context = f"Repository: {repo_url}\n{brief}\nFetched files:\n{', '.join(repo_files[:5])}\n\n{file_contents[:6000]}"
125
 
126
  t_map = Task(
127
+ description=f"{context}\n\nCreate a summary of the repository’s structure, dependencies, and frameworks.",
128
+ expected_output="Markdown repo overview (sections: Structure, Tech, Dependencies).",
129
  agent_role="Repository Mapper",
130
  )
131
 
132
  t_review = Task(
133
+ description=f"{context}\n\nPerform a detailed code review and suggest refactors and improvements.",
134
+ expected_output="Actionable code review notes with example snippets.",
135
  agent_role="Code Reviewer",
136
  )
137
 
138
  t_sec = Task(
139
+ description=f"{context}\n\nPerform a security audit on visible files.",
140
+ expected_output="Table of Security Issues | Risk | Mitigation.",
141
  agent_role="Security Auditor",
142
  )
143
 
144
  t_doc = Task(
145
+ description=f"{context}\n\nExplain what this repo does and how to run it.",
146
+ expected_output="Simple explanation + setup instructions.",
147
  agent_role="Documentation Explainer",
148
  )
149
 
150
  t_merge = Task(
151
+ description="Combine all reports into one well-structured Markdown summary with a title and TOC.",
152
+ expected_output="Final comprehensive Markdown report.",
153
  agent_role="Engineering Manager",
154
  )
155
 
156
  return t_map, t_review, t_sec, t_doc, t_merge
157
 
158
+ # ---------------------------------
 
159
  # RUNNER
160
+ # ---------------------------------
161
  def run_repo_review(repo_url: str, brief: str = "") -> str:
162
+ repo_mapper, reviewer, auditor, explainer, manager = make_agents()
163
  t_map, t_review, t_sec, t_doc, t_merge = make_tasks(repo_url, brief)
164
 
165
  crew = Crew(
 
169
  manager_agent=manager,
170
  verbose=True,
171
  )
 
172
  result = crew.kickoff()
173
  return str(result)