File size: 4,886 Bytes
c0a24ec
 
 
45cd58a
 
 
c0a24ec
a4feaf2
 
c0a24ec
664e9b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0a24ec
 
a4feaf2
c0a24ec
 
a4feaf2
45cd58a
9455009
c0a24ec
9455009
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4feaf2
 
 
9455009
 
a4feaf2
 
c0a24ec
9455009
 
 
a4feaf2
 
 
 
c0a24ec
 
 
 
 
 
a4feaf2
 
 
 
 
 
 
 
 
 
 
c0a24ec
 
45cd58a
 
a4feaf2
 
45cd58a
 
a4feaf2
c0a24ec
a4feaf2
 
45cd58a
c0a24ec
45cd58a
 
 
 
 
 
 
 
 
 
 
 
 
a4feaf2
45cd58a
c0a24ec
a4feaf2
45cd58a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
import json
import httpx
import logging

logger = logging.getLogger(__name__)

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GEMINI_API_KEY}"

async def generate_change_summary(files: list) -> str:
    """
    Generate a concise summary of what changed in the PR using AI.
    """
    logger.info(f"πŸ“ Generating change summary for {len(files)} files...")
    
    # Build a summary of changes
    changes_text = ""
    for f in files:
        status = f.get("status", "modified")
        filename = f.get("filename", "unknown")
        additions = f.get("additions", 0)
        deletions = f.get("deletions", 0)
        changes_text += f"- {status.upper()}: {filename} (+{additions}/-{deletions})\n"
    
    prompt = f"""You are a code reviewer. Summarize what changed in this pull request in 1-2 short sentences.
Focus on WHAT was changed, not HOW. Be concise and clear.

Files changed:
{changes_text}

Respond with ONLY the summary text (no markdown, no extra formatting):"""

    headers = {"Content-Type": "application/json"}
    payload = {
        "contents": [{"parts": [{"text": prompt}]}],
        "generationConfig": {
            "temperature": 0.5,
            "maxOutputTokens": 150
        }
    }

    try:
        async with httpx.AsyncClient(timeout=30.0) as client:
            response = await client.post(GEMINI_API_URL, headers=headers, json=payload)
            response.raise_for_status()
            data = response.json()
            summary = data["candidates"][0]["content"]["parts"][0]["text"].strip()
            logger.info(f"βœ… Generated summary: {summary[:100]}...")
            return summary
    except Exception as e:
        logger.warning(f"⚠️  Failed to generate summary: {str(e)}")
        return f"Modified {len(files)} file(s)"

async def analyze_code(file_name: str, patch: str) -> list:
    """
    Analyze a single file diff using Google Gemini AI model.
    Returns a list of structured comments.
    """
    logger.info(f"πŸ€– Sending to Gemini AI: {file_name} ({len(patch)} chars)")
    
    prompt = f"""You are a senior code reviewer focused on finding REAL issues.

Review the following diff from `{file_name}` and provide feedback ONLY for:
- Security vulnerabilities
- Bugs or logic errors
- Performance issues
- Code that will break in production
- Missing error handling for critical operations
- Resource leaks (memory, connections, files)

DO NOT comment on:
- Code style or formatting
- Comments or documentation
- Variable naming (unless critically confusing)
- Minor suggestions or preferences
- Things that are already working fine

Respond ONLY with a JSON array (no markdown, no explanation):
[
  {{
    "line": 42,
    "severity": "high",
    "comment": "Potential SQL injection vulnerability - use parameterized queries"
  }}
]

Severity levels: "high" (critical bugs/security), "medium" (bugs/performance), "low" (minor issues)

If no REAL issues found, return an empty array: []

Code Diff:
{patch}
"""

    headers = {
        "Content-Type": "application/json"
    }

    payload = {
        "contents": [
            {
                "parts": [
                    {"text": prompt}
                ]
            }
        ],
        "generationConfig": {
            "temperature": 0.3,
            "maxOutputTokens": 2048
        }
    }

    try:
        async with httpx.AsyncClient(timeout=60.0) as client:
            logger.info("⏳ Waiting for Gemini response...")
            response = await client.post(GEMINI_API_URL, headers=headers, json=payload)
            response.raise_for_status()
            data = response.json()
            logger.info("βœ… Gemini response received")

        # Extract text from Gemini response structure
        text_output = data["candidates"][0]["content"]["parts"][0]["text"].strip()
        logger.info(f"πŸ“„ Response length: {len(text_output)} chars")

        # Defensive: handle non-JSON outputs
        try:
            # Remove markdown code blocks if present
            if text_output.startswith("```json"):
                text_output = text_output.replace("```json", "").replace("```", "").strip()
            elif text_output.startswith("```"):
                text_output = text_output.replace("```", "").strip()
            
            parsed = json.loads(text_output)
            logger.info(f"βœ… Parsed {len(parsed)} review comments")
            return parsed
        except Exception as e:
            logger.warning(f"⚠️  Failed to parse JSON, returning raw text: {str(e)}")
            logger.warning(f"Raw response: {text_output[:200]}")
            return [{"line": 1, "severity": "info", "comment": text_output}]
    except Exception as e:
        logger.error(f"❌ Gemini API error: {str(e)}")
        raise