Spaces:

PraneshJs
/

CodeAgentbyCrewAi

Paused

App Files Files Community

PraneshJs commited on Nov 12, 2025

Commit

b87b5f6

verified ·

1 Parent(s): afca07c

Update agent.py

Browse files

Files changed (1) hide show

agent.py +76 -114

agent.py CHANGED Viewed

@@ -1,150 +1,112 @@
-# agent.py
 import os
-from typing import Optional, List
 from crewai import Agent, Task, Crew, Process
-from crewai_tools import GithubSearchTool
-from google import genai  # Gemini client (google-genai package)
 from dotenv import load_dotenv
 load_dotenv()
-# ---------------------------
 # CONFIG
-# ---------------------------
-GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 if not GOOGLE_API_KEY:
-    raise RuntimeError("❌ Missing GOOGLE_API_KEY (get one from https://aistudio.google.com)")
-# Gemini Client
 client = genai.Client(api_key=GOOGLE_API_KEY)
-MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
-DEFAULT_CONTENT_TYPES = ["code", "pr", "issue", "repo"]
-# ---------------------------
-# Gemini Embedding Adapter (Free Embeddings)
-# ---------------------------
-class GeminiEmbedding:
-    """Uses Google Gemini text-embedding-004 model (free tier)"""
-    def __init__(self, model="text-embedding-004", api_key=None):
-        self.model = model
-        self.client = genai.Client(api_key=api_key or GOOGLE_API_KEY)
-    def embed_documents(self, texts: List[str]) -> List[List[float]]:
-        vectors = []
-        for text in texts:
-            try:
-                res = self.client.models.embed_content(model=self.model, contents=text)
-                vectors.append(res.embedding.values)
-            except Exception as e:
-                print(f"⚠️ Embedding error: {e}")
-                vectors.append([])
-        return vectors
-    def embed_query(self, text: str) -> List[float]:
-        try:
-            res = self.client.models.embed_content(model=self.model, contents=text)
-            return res.embedding.values
-        except Exception as e:
-            print(f"⚠️ Embedding query error: {e}")
-            return []
-# ---------------------------
 # Gemini LLM Wrapper
-# ---------------------------
 class GeminiLLM:
-    def __init__(self, model: str):
         self.model = model
     def generate(self, prompt: str) -> str:
-        """CrewAI-compatible LLM generate method."""
         try:
-            response = client.models.generate_content(
                 model=self.model,
                 contents=prompt,
-                generation_config={"temperature": 0.7, "max_output_tokens": 1024}
             )
-            return response.text
         except Exception as e:
-            return f"⚠️ Gemini API error: {e}"
-# Instantiate LLM + embedder
 gemini_llm = GeminiLLM(MODEL_NAME)
-embedder = GeminiEmbedding(api_key=GOOGLE_API_KEY)
-# ---------------------------
-# GitHub Tool using free embeddings
-# ---------------------------
-def github_tool(repo_url: Optional[str] = None) -> GithubSearchTool:
-    """Create a GitHub Search Tool with free Gemini embeddings (no OpenAI key)."""
-    if not GITHUB_TOKEN:
-        raise RuntimeError("Missing GITHUB_TOKEN in environment.")
-    if repo_url:
-        return GithubSearchTool(
-            github_repo=repo_url,
-            gh_token=GITHUB_TOKEN,
-            content_types=DEFAULT_CONTENT_TYPES,
-            embedder=embedder,
-        )
-    return GithubSearchTool(
-        gh_token=GITHUB_TOKEN,
-        content_types=DEFAULT_CONTENT_TYPES,
-        embedder=embedder,
-    )
-# ---------------------------
 # AGENTS
-# ---------------------------
-def make_agents(repo_url: str):
-    repo_search = github_tool(repo_url)
     repo_mapper = Agent(
         role="Repository Mapper",
-        goal="Map repo structure, dependencies, and frameworks.",
-        backstory="Understands directory trees, tech stacks, and configuration files.",
-        tools=[repo_search],
         llm=gemini_llm,
         verbose=True,
     )
     code_reviewer = Agent(
         role="Code Reviewer",
-        goal="Perform code review for quality, structure, and clarity.",
-        backstory="A senior engineer giving actionable review comments with examples.",
-        tools=[repo_search],
         llm=gemini_llm,
         verbose=True,
     )
     security_auditor = Agent(
         role="Security Auditor",
-        goal="Identify secrets, vulnerabilities, unsafe APIs, and dependencies.",
-        backstory="A white-hat hacker finding issues and giving fixes.",
-        tools=[repo_search],
         llm=gemini_llm,
         verbose=True,
     )
     doc_explainer = Agent(
         role="Documentation Explainer",
-        goal="Summarize architecture, data flow, and how to run the project.",
-        backstory="Explains tech systems simply and clearly with examples.",
-        tools=[repo_search],
         llm=gemini_llm,
         verbose=True,
     )
     manager = Agent(
         role="Engineering Manager",
-        goal="Coordinate all agents and compile a clear, cohesive final report.",
-        backstory="Ensures a professional, well-structured final document.",
         allow_delegation=True,
         llm=gemini_llm,
         verbose=True,
@@ -152,51 +114,52 @@ def make_agents(repo_url: str):
     return repo_mapper, code_reviewer, security_auditor, doc_explainer, manager
-# ---------------------------
 # TASKS
-# ---------------------------
 def make_tasks(repo_url: str, brief: str = ""):
-    prefix = f"Target Repository: {repo_url}\nBrief: {brief}\nInclude file paths where relevant."
     t_map = Task(
-        description=f"{prefix}\nMap the repository structure, dependencies, languages, and build tools.",
-        expected_output="Markdown-formatted repository map with bullets and paths.",
         agent_role="Repository Mapper",
     )
     t_review = Task(
-        description=f"{prefix}\nPerform detailed code review (readability, refactors, testing, etc.).",
-        expected_output="Actionable review bullets grouped by issue type.",
         agent_role="Code Reviewer",
     )
     t_sec = Task(
-        description=f"{prefix}\nPerform security audit (secrets, vulnerabilities, dependencies).",
-        expected_output="Security findings table (Issue | Evidence | Risk | Fix).",
         agent_role="Security Auditor",
     )
     t_doc = Task(
-        description=f"{prefix}\nExplain architecture, modules, data flow, setup, and usage.",
-        expected_output="Readable explanation with Quickstart and architecture overview.",
         agent_role="Documentation Explainer",
     )
     t_merge = Task(
-        description="Merge all outputs into a clean, single Markdown report with clear sections and TOC.",
-        expected_output="Final cohesive Markdown report.",
         agent_role="Engineering Manager",
     )
     return t_map, t_review, t_sec, t_doc, t_merge
-# ---------------------------
 # RUNNER
-# ---------------------------
 def run_repo_review(repo_url: str, brief: str = "") -> str:
-    repo_mapper, reviewer, auditor, explainer, manager = make_agents(repo_url)
     t_map, t_review, t_sec, t_doc, t_merge = make_tasks(repo_url, brief)
     crew = Crew(
@@ -206,6 +169,5 @@ def run_repo_review(repo_url: str, brief: str = "") -> str:
         manager_agent=manager,
         verbose=True,
     )
     result = crew.kickoff()
     return str(result)

 import os
+import requests
+from typing import List
 from crewai import Agent, Task, Crew, Process
+from google import genai  # Gemini client
 from dotenv import load_dotenv
 load_dotenv()
+# ---------------------------------
 # CONFIG
+# ---------------------------------
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
+MODEL_NAME = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
 if not GOOGLE_API_KEY:
+    raise RuntimeError("❌ Missing GOOGLE_API_KEY — get one at https://aistudio.google.com")
 client = genai.Client(api_key=GOOGLE_API_KEY)
+# ---------------------------------
+# HELPER: Simple GitHub Repo Fetcher (no embeddings)
+# ---------------------------------
+def fetch_repo_files(repo_url: str, max_files: int = 10) -> List[str]:
+    """Fetch first few code/text files from a GitHub repo using the REST API."""
+    try:
+        owner_repo = repo_url.strip().split("github.com/")[-1]
+        api_url = f"https://api.github.com/repos/{owner_repo}/contents"
+        headers = {"Authorization": f"token {GITHUB_TOKEN}"} if GITHUB_TOKEN else {}
+        response = requests.get(api_url, headers=headers)
+        response.raise_for_status()
+        data = response.json()
+        files = []
+        for f in data:
+            if f["type"] == "file" and f["name"].endswith((".py", ".js", ".ts", ".md")):
+                files.append(f["download_url"])
+            if len(files) >= max_files:
+                break
+        return files
+    except Exception as e:
+        return [f"⚠️ Error fetching repo: {e}"]
+def fetch_file_content(url: str) -> str:
+    try:
+        return requests.get(url).text
+    except Exception as e:
+        return f"⚠️ Could not fetch file: {url}\nError: {e}"
+# ---------------------------------
 # Gemini LLM Wrapper
+# ---------------------------------
 class GeminiLLM:
+    def __init__(self, model):
         self.model = model
     def generate(self, prompt: str) -> str:
         try:
+            res = client.models.generate_content(
                 model=self.model,
                 contents=prompt,
+                generation_config={"temperature": 0.7, "max_output_tokens": 2048}
             )
+            return res.text
         except Exception as e:
+            return f"⚠️ Gemini Error: {e}"
 gemini_llm = GeminiLLM(MODEL_NAME)
+# ---------------------------------
 # AGENTS
+# ---------------------------------
+def make_agents():
     repo_mapper = Agent(
         role="Repository Mapper",
+        goal="Map the project’s structure and identify its core technologies.",
+        backstory="You are skilled at reading GitHub repositories and summarizing their structure.",
         llm=gemini_llm,
         verbose=True,
     )
     code_reviewer = Agent(
         role="Code Reviewer",
+        goal="Perform code reviews to identify potential issues and refactors.",
+        backstory="A senior engineer reviewing open-source codebases with actionable advice.",
         llm=gemini_llm,
         verbose=True,
     )
     security_auditor = Agent(
         role="Security Auditor",
+        goal="Find potential security risks in code and suggest fixes.",
+        backstory="You think like an attacker but report like a professional auditor.",
         llm=gemini_llm,
         verbose=True,
     )
     doc_explainer = Agent(
         role="Documentation Explainer",
+        goal="Explain what the repo does and how to run it.",
+        backstory="You make technical systems understandable.",
         llm=gemini_llm,
         verbose=True,
     )
     manager = Agent(
         role="Engineering Manager",
+        goal="Merge all insights into a final cohesive report.",
+        backstory="You coordinate team outputs into a polished result.",
         allow_delegation=True,
         llm=gemini_llm,
         verbose=True,
     return repo_mapper, code_reviewer, security_auditor, doc_explainer, manager
+# ---------------------------------
 # TASKS
+# ---------------------------------
 def make_tasks(repo_url: str, brief: str = ""):
+    repo_files = fetch_repo_files(repo_url)
+    file_contents = "\n\n".join([fetch_file_content(u) for u in repo_files[:5]])
+    context = f"Repository: {repo_url}\n{brief}\nFetched files:\n{', '.join(repo_files[:5])}\n\n{file_contents[:6000]}"
     t_map = Task(
+        description=f"{context}\n\nCreate a summary of the repository’s structure, dependencies, and frameworks.",
+        expected_output="Markdown repo overview (sections: Structure, Tech, Dependencies).",
         agent_role="Repository Mapper",
     )
     t_review = Task(
+        description=f"{context}\n\nPerform a detailed code review and suggest refactors and improvements.",
+        expected_output="Actionable code review notes with example snippets.",
         agent_role="Code Reviewer",
     )
     t_sec = Task(
+        description=f"{context}\n\nPerform a security audit on visible files.",
+        expected_output="Table of Security Issues | Risk | Mitigation.",
         agent_role="Security Auditor",
     )
     t_doc = Task(
+        description=f"{context}\n\nExplain what this repo does and how to run it.",
+        expected_output="Simple explanation + setup instructions.",
         agent_role="Documentation Explainer",
     )
     t_merge = Task(
+        description="Combine all reports into one well-structured Markdown summary with a title and TOC.",
+        expected_output="Final comprehensive Markdown report.",
         agent_role="Engineering Manager",
     )
     return t_map, t_review, t_sec, t_doc, t_merge
+# ---------------------------------
 # RUNNER
+# ---------------------------------
 def run_repo_review(repo_url: str, brief: str = "") -> str:
+    repo_mapper, reviewer, auditor, explainer, manager = make_agents()
     t_map, t_review, t_sec, t_doc, t_merge = make_tasks(repo_url, brief)
     crew = Crew(
         manager_agent=manager,
         verbose=True,
     )
     result = crew.kickoff()
     return str(result)