langgraph_PR_Review_Bot

Sleeping

App Files Files Community

nikhmr1235 commited on Jul 9, 2025

Commit

158410c

verified ·

1 Parent(s): 224666a

Create langgraph_pr_review_bot.py

Browse files

Files changed (1) hide show

langgraph_pr_review_bot.py +782 -0

langgraph_pr_review_bot.py ADDED Viewed

	@@ -0,0 +1,782 @@

+from typing import Dict, Any, Optional
+from pydantic import BaseModel, Field
+from uuid import uuid4
+class PRReviewState(BaseModel):
+    # GitHub PR Information (Mandatory)
+    pr_id: int
+    repo_name: str # e.g., "owner/repo"
+    # GitHub PR Information (Now Optional)
+    diff_url: Optional[str] = None
+    pr_title: Optional[str] = None
+    pr_author: Optional[str] = None
+    # review_run_id: str = Field(default_factory=lambda: str(uuid.uuid4())) # Optional: Unique ID for this specific review run
+    # Code Content (Now Optional)
+    code_diff: Optional[str] = None # The fetched raw diff content
+    file_contents: Dict[str, str] = {} # Map of filename to full content for context (already has default)
+    # LLM Review Outputs (Already Optional)
+    llm_markdown_review: Optional[str] = None # The raw Markdown output from the LLM (e.g., from generate_code_review_markdown)
+    parsed_llm_review_data: Optional[Dict[str, Any]] = None # Structured dict from parsing the Markdown (e.g., from parse_llm_review_markdown)
+    # Human-in-the-Loop (Simplified for Phase 1) (Already Optional or has default)
+    require_human_approval: bool = False # Config flag, set at graph initialization
+    human_approval_status: Optional[bool] = None # True if approved, False if rejected
+    human_feedback_message: Optional[str] = None # Any message from human rejection
+    # System Status (Now Optional or has default)
+    review_status: str = "initiated" # e.g., "initiated", "fetching_code", "code_retrieved", "generating_llm_review", "llm_review_generated", "parsing_llm_review", "review_parsed", "awaiting_human_approval", "posting_review", "posted", "rejected", "failed"
+    last_error: Optional[str] = None # Stores the last encountered error message
+    # error_traceback: Optional[str] = None # Optional: For more detailed error debugging
+    review_id: Optional[int] = None # this is the pull-request-review-id (pending -> approved/Discarded based on HIL) (Now Optional)
+    review_comment_url: Optional[str] = None # URL of the main posted GitHub review comment
+'''
+from kaggle_secrets import UserSecretsClient
+user_secrets = UserSecretsClient()
+git_hub_token = user_secrets.get_secret("GITHUB_token_ID")
+google_api_key = user_secrets.get_secret("GOOGLE_API_KEY")
+'''
+import os
+from typing import Dict, Any, List, Optional, Tuple # Ensure Tuple is imported
+from github import Github, PullRequest
+from github.GithubException import GithubException, UnknownObjectException
+import requests # Make sure requests is imported for patch_url
+from dotenv import load_dotenv
+# For local testing, you might need to load dotenv if your environment variables
+# are managed via a .env file. In a deployed environment, they would likely be
+# set directly.
+# Only load dotenv if it's not already loaded (e.g., in __main__ or a test setup)
+# This prevents redundant loading in production or if your main script handles it.
+if not os.getenv("GITHUB_TOKEN"): # Only load if token not already set
+    load_dotenv() # Load environment variables from .env file
+# Assuming 'git_hub_token' is defined globally or passed in a larger context
+# If git_hub_token is expected to be a global variable, ensure it's imported or declared.
+# For better practice, pass it as an argument or rely solely on os.getenv.
+# Let's adjust to purely rely on os.getenv for this function.
+# github_token = os.getenv("GITHUB_TOKEN") # Moved inside function for safety
+def fetch_pr_code_changes(repo_name: str, pr_id: int) -> Tuple[Optional[str], Optional[Dict[str, str]], Optional[str], Optional[str]]:
+    """
+    Fetches the raw diff content, the full contents of changed files,
+    and the head commit SHA for a given PR.
+    Args:
+        repo_name (str): The full name of the repository (e.g., "octocat/Spoon-Knife").
+        pr_id (int): The ID of the Pull Request.
+    Returns:
+        Tuple[Optional[str], Optional[Dict[str, str]], Optional[str], Optional[str]]:
+            - raw_diff_content (str or None): The raw diff content of the PR.
+            - file_contents (Dict[str, str] or None): Dictionary mapping filename to its full content (after changes).
+            - head_commit_sha (str or None): The SHA of the head commit of the PR.
+            - error_message (str or None): An error message if something went wrong.
+    """
+    #github_token = os.getenv("GITHUB_TOKEN")
+    github_token = git_hub_token
+    if not github_token:
+        print("Error: GITHUB_TOKEN environment variable not set.")
+        return None, None, None, "GitHub token not found in environment variables."
+    try:
+        g = Github(github_token)
+        repo = g.get_repo(repo_name)
+        pull_request = repo.get_pull(pr_id)
+        # --- NEW: Get the head commit SHA ---
+        head_commit_sha = pull_request.head.sha
+        print(f"Fetched PR {pr_id} head commit SHA: {head_commit_sha}")
+        # 1. Fetch raw diff content (patch)
+        # Using requests directly for patch_url is good as PyGithub's get_patch() can sometimes be rate-limited differently
+        patch_url = pull_request.patch_url
+        headers = {"Authorization": f"token {github_token}"}
+        raw_diff_content = requests.get(patch_url, headers=headers).text
+        # 2. Fetch full content of changed files
+        file_contents: Dict[str, str] = {}
+        for file in pull_request.get_files():
+            # Skip files that were deleted, as their content cannot be retrieved from the current head.
+            if file.status == 'deleted':
+                file_contents[file.filename] = "[FILE DELETED]"
+                continue
+            try:
+                # We want the content *after* the change, which is from the PR's head branch.
+                # PyGithub's get_contents should be called with `ref` set to `pull_request.head.ref`
+                # or `pull_request.head.sha` for explicit content at the PR's head.
+                # Using pull_request.head.sha is more robust as ref might change.
+                file_content_obj = repo.get_contents(file.filename, ref=pull_request.head.sha)
+                if isinstance(file_content_obj, list):
+                    print(f"Warning: '{file.filename}' is a directory or multiple files, skipping content retrieval for now.")
+                    file_contents[file.filename] = "[DIRECTORY OR MULTIPLE FILES]"
+                    continue
+                file_contents[file.filename] = file_content_obj.decoded_content.decode('utf-8')
+            except GithubException as e:
+                print(f"Warning: GitHub API error fetching content for {file.filename} (PR {pr_id}, Repo {repo_name}): {e.status} - {e.data.get('message', 'No message')}")
+                file_contents[file.filename] = f"[ERROR: Could not fetch content. Status: {e.status}, Message: {e.data.get('message', 'No message')}]"
+            except Exception as e:
+                print(f"Unexpected error fetching content for {file.filename} (PR {pr_id}, Repo {repo_name}): {e}")
+                file_contents[file.filename] = f"[ERROR: Unexpected error fetching content: {e}]"
+        # Return the new head_commit_sha along with existing returns
+        return raw_diff_content, file_contents, head_commit_sha, None # No error message if successful
+    except UnknownObjectException as e:
+        error_msg = f"GitHub object not found (repo or PR): {e.data.get('message', 'No message')}"
+        print(f"Error in fetch_pr_code_changes: {error_msg}")
+        return None, None, None, error_msg
+    except GithubException as e:
+        error_msg = f"GitHub API error for PR {pr_id} from {repo_name}: {e.status} - {e.data.get('message', 'No message')}"
+        print(f"Error in fetch_pr_code_changes: {error_msg}")
+        return None, None, None, error_msg
+    except Exception as e:
+        error_msg = f"An unexpected error occurred while fetching PR {pr_id} from {repo_name}: {e}"
+        print(f"Error in fetch_pr_code_changes: {error_msg}")
+        return None, None, None, error_msg
+def code_retriever_node(state:PRReviewState):
+    repo_name = state.repo_name
+    pull_req_id = state.pr_id
+    print(f"repo_name :{repo_name}-------- pull_req_id:{pull_req_id}")
+    diff, contents,head_commit_sha, error = fetch_pr_code_changes(repo_name, pull_req_id)
+    # Don't forget to return an updated state, as nodes in LangGraph should always do
+    # For this simple example, we'll just return a copy with an updated status
+    updated_state = state.model_copy(update={
+        "review_status": "code_retrieved", # Update status after retrieval logic
+        "code_diff": diff,
+        "file_contents": contents
+    })
+    return updated_state
+import os
+from typing import Dict, Any
+from langchain_core.prompts import ChatPromptTemplate
+# Ensure you have your LLM provider installed, e.g., pip install langchain-google-genai
+from langchain_google_genai import ChatGoogleGenerativeAI # Using Gemini as per your preference
+# Initialize your LLM. Make sure your GOOGLE_API_KEY is set in environment variables.
+# You can also configure other models like "gemini-1.5-flash" or "gemini-1.5-pro"
+llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.0, api_key=google_api_key) # Lower temperature for more deterministic output
+def generate_code_review_markdown(code_diff: str, file_contents: Dict[str, str]) -> str:
+    """
+    Generates a detailed, human-readable code review in Markdown format from the LLM.
+    The prompt is designed to elicit structured Markdown output that can then be
+    parsed for GitHub PR comments, grouped by file and function.
+    Args:
+        code_diff (str): The string representation of the code diff.
+        file_contents (Dict[str, str]): A dictionary where keys are file paths
+                                        and values are their full content.
+    Returns:
+        str: A Markdown string representing the code review.
+    """
+    # Prepare full contents context
+    full_contents_str = ""
+    if file_contents:
+        for filename, content in file_contents.items():
+            # Add a clear separator and Markdown code block for each file
+            full_contents_str += f"--- Full Content of {filename} ---\n```python\n{content}\n```\n\n"
+    else:
+        full_contents_str = "No full file contents provided for additional context."
+    # Construct the Prompt Template
+    prompt = ChatPromptTemplate.from_messages(
+        [
+            ("system",
+             "You are an expert Senior Software Engineer and a meticulous code reviewer.\n"
+             "Your task is to review the provided code changes in a Pull Request.\n"
+             "Analyze the `code_diff` for potential bugs, performance issues, security vulnerabilities, code style violations, maintainability concerns, and missing tests or documentation.\n"
+             "Refer to the `full_file_contents` for additional context if the diff alone is insufficient to understand the changes or their implications.\n"
+             "Provide a comprehensive, actionable, and constructive review.\n"
+             "Format your review clearly using Markdown. Structure it with the following top-level sections:\n"
+             "1. **Overall Impression:** A brief summary of the PR's purpose and overall quality.\n"
+             "2. **Specific Observations and Suggestions:** Detailed feedback, grouped by file.\n"
+             "   - Within each file's section, group related comments, ideally by function or logical block.\n"
+             "   - For each observation/suggestion, include relevant line numbers from the *new* file for context (e.g., 'Line X-Y:').\n"
+             "3. **Potential Issues and Edge Cases:** Discuss any missed scenarios or potential problems.\n"
+             "4. **Security Implications:** Highlight any security concerns.\n"
+             "5. **Adherence to Best Practices (PEP 8):** Comment on style and best practice compliance.\n"
+             "6. **Performance Considerations:** Discuss performance aspects.\n"
+             "7. **Unit Testing Suggestions:** Recommend additional tests.\n"
+             "8. **Docstring/Comment Improvements:** Suggest documentation enhancements.\n"
+             "9. **Clarity and Conciseness:** Feedback on code readability.\n"
+             "10. **Summary:** A concise conclusion and recommended action (e.g., 'Approve', 'Request Changes', 'Comment').\n\n"
+             "For code suggestions, use GitHub's Markdown code block with 'suggestion' annotation, like this:\n"
+             "```suggestion\n"
+             "your_suggested_code_here\n"
+             "```\n"
+             "Ensure file paths are correctly formatted (e.g., `src/utils/data_processor.py`)."
+            ),
+            ("human",
+             "Here are the code changes (diff):\n"
+             "```diff\n"
+             "{code_diff}\n"
+             "```\n\n"
+             "Here are the full contents of the changed files (for additional context, use only if necessary to understand the diff):\n"
+             "{full_contents_context}\n\n"
+             "Please provide your structured code review in Markdown."
+            ),
+        ]
+    )
+    # Create the Chain
+    review_chain = prompt | llm
+    # Invoke the Chain
+    try:
+        review_markdown = review_chain.invoke({
+            "code_diff": code_diff,
+            "full_contents_context": full_contents_str
+        }).content # Access the content attribute for Chat model output
+        return review_markdown
+    except Exception as e:
+        print(f"Error generating code review: {e}")
+        return f"Error: Could not generate code review. {e}\n\n" \
+               f"Please check the LLM API call or token limits."
+def code_reviewer_node(state:PRReviewState):
+    code_diff = state.code_diff
+    file_contents = state.file_contents
+    review_markdown = generate_code_review_markdown(code_diff, file_contents)
+    # Don't forget to return an updated state, as nodes in LangGraph should always do
+    # For this simple example, we'll just return a copy with an updated status
+    updated_state = state.model_copy(update={
+        "review_status": "code_reviewed", # Update status after retrieval logic
+        "llm_markdown_review":review_markdown,
+    })
+    return updated_state
+import re
+from typing import List, Dict, Tuple, Optional, Any
+class ParsedComment:
+    """
+    Represents a single parsed comment from the LLM's review,
+    intended for grouping by file/function.
+    """
+    def __init__(self, message: str, suggestion: Optional[str] = None):
+        self.message = message
+        self.suggestion = suggestion
+    def __repr__(self):
+        return f"ParsedComment(msg='{self.message[:50]}...', has_suggestion={self.suggestion is not None})"
+class ParsedReviewSection:
+    """
+    Represents a categorized section of the review, e.g., 'Potential Issues'.
+    """
+    def __init__(self, title: str, content: str):
+        self.title = title
+        self.content = content
+    def __repr__(self):
+        return f"ParsedReviewSection(title='{self.title}', content='{self.content[:50]}...')"
+# Helper to extract suggestion block and clean message
+def _extract_suggestion(text: str) -> Tuple[Optional[str], str]:
+    """Helper to extract suggestion block and clean message."""
+    suggestion_match = re.search(r"```suggestion\n([\s\S]*?)\n```", text, re.MULTILINE)
+    suggestion_code = suggestion_match.group(1).strip() if suggestion_match else None
+    # Remove suggestion from the main message
+    cleaned_message = re.sub(r"```suggestion[\s\S]*?```", "", text).strip()
+    return suggestion_code, cleaned_message
+def _parse_bullet_comments(text_block: str) -> List[ParsedComment]:
+    """Helper to parse bullet-point comments from a given text block."""
+    comments = []
+    # FIX: Updated regex for bullet comments
+    # Now matches from a bullet point until the start of the next bullet point or end of the text block.
+    # This handles multi-line comments and embedded suggestion blocks more robustly.
+    comment_matches = re.finditer(r"(^ *[-*]\s*[\s\S]*?)(?=\n *[-*]\s*|\Z)", text_block, re.MULTILINE | re.DOTALL)
+    for cm in comment_matches:
+        full_comment_text = cm.group(1).strip()
+        if full_comment_text:
+            suggestion_code, cleaned_message = _extract_suggestion(full_comment_text)
+            comments.append(ParsedComment(message=cleaned_message, suggestion=suggestion_code))
+    return comments
+def parse_llm_review_markdown(markdown_review: str) -> Dict[str, Any]:
+    """
+    Parses the LLM-generated Markdown review into a structured dictionary.
+    It extracts the overall summary, file-specific/function-specific comments,
+    and other general review sections.
+    Args:
+        markdown_review (str): The full Markdown string generated by the LLM.
+    Returns:
+        Dict[str, Any]: A dictionary containing structured review data:
+            - 'overall_impression': str
+            - 'file_comments': Dict[str, Dict[str, List[ParsedComment]]]
+              (file_path -> function_name -> List[ParsedComment])
+            - 'general_sections': List[ParsedReviewSection]
+            - 'summary': str
+            - 'approval_status': str (extracted from summary, if present)
+    """
+    structured_review: Dict[str, Any] = {
+        'overall_impression': '',
+        'file_comments': {},
+        'general_sections': [],
+        'summary': '',
+        'approval_status': 'Comment' # Default status
+    }
+    # Helper to extract content between two headers.
+    # Now more flexible: allows optional numbering and variable header level for top sections
+    def extract_section_content(text: str, start_header_text: str, end_header_text: str) -> Optional[str]:
+        # Pattern to match headers with optional numbering and flexible spacing
+        start_pattern = r"^(?:##|###)\s*\d*\.?\s*" + re.escape(start_header_text) + r":\s*$"
+        end_pattern = r"^(?:##|###)\s*\d*\.?\s*" + re.escape(end_header_text) + r":\s*$"
+        # Use re.DOTALL to allow . to match newlines
+        match = re.search(f"{start_pattern}([\\s\\S]*?)(?={end_pattern}|\\Z)", text, re.MULTILINE | re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return None
+    # --- 1. Extract Overall Impression ---
+    overall_impression_content = extract_section_content(markdown_review, "Overall Impression", "Specific Observations and Suggestions")
+    if overall_impression_content:
+        structured_review['overall_impression'] = overall_impression_content
+    # --- 2. Extract Specific Observations and Suggestions (File/Function Comments) ---
+    specific_obs_section_content = extract_section_content(markdown_review, "Specific Observations and Suggestions", "Potential Issues and Edge Cases")
+    # Debug prints for specific_obs_section_content (kept for verification)
+    print(f"\n--- DEBUG: specific_obs_section_content (extracted from markdown_review) ---")
+    if specific_obs_section_content is None:
+        print("specific_obs_section_content is None")
+    elif not specific_obs_section_content.strip():
+        print("specific_obs_section_content is empty or only whitespace")
+    else:
+        print(specific_obs_section_content[:500] + "..." if len(specific_obs_section_content) > 500 else specific_obs_section_content)
+    print(f"--- END DEBUG: specific_obs_section_content ---\n")
+    print(f"\n--- DEBUG: Raw specific_obs_section_content (using repr()):")
+    if specific_obs_section_content is not None:
+        print(repr(specific_obs_section_content))
+        print(f"Length of specific_obs_section_content: {len(specific_obs_section_content)}")
+        print(f"Does it start with '### `data_processor.py`'? {specific_obs_section_content.startswith('### `data_processor.py`')}")
+        starts_as_file_header = False
+        if specific_obs_section_content.startswith('### `') or specific_obs_section_content.startswith('**File:'):
+            starts_as_file_header = True
+        print(f"Does it start with a common file header pattern? {starts_as_file_header}")
+    else:
+        print("specific_obs_section_content is None.")
+    print(f"--- END DEBUG: Raw specific_obs_section_content ---\n")
+    if specific_obs_section_content:
+        # NEW STRATEGY FOR FILE BLOCK PARSING:
+        # Step 1: Find all file header line matches first
+        file_header_line_pattern = re.compile(
+            r"^(?:\*\*File:\s*`?([\w\/\.\-_]+\.\w+)`?\*\*|###\s*`?([\w\/\.\-_]+\.\w+)`?)\s*$",
+            re.MULTILINE
+        )
+        header_matches = list(file_header_line_pattern.finditer(specific_obs_section_content))
+        print(f"--- DEBUG: Number of file_header_line_pattern matches found (New Strategy): {len(header_matches)} ---")
+        if not header_matches:
+            print("No file headers were found. Cannot parse file blocks.")
+            pass
+        else:
+            # Step 2: Iterate through header matches and extract content blocks
+            for i, header_match in enumerate(header_matches):
+                file_name = (header_match.group(1) or header_match.group(2)).strip().replace('`', '')
+                # Determine the start of the content block (after the header line)
+                content_start_index = header_match.end()
+                # Determine the end of the content block (start of next header or end of section content)
+                content_end_index = len(specific_obs_section_content)
+                if i + 1 < len(header_matches):
+                    content_end_index = header_matches[i+1].start()
+                file_content_block = specific_obs_section_content[content_start_index:content_end_index].strip()
+                print(f"\n--- DEBUG: Processing file (new strategy): {file_name} ---")
+                print(f"File content block (first 200 chars):\n{file_content_block[:200]}..." if len(file_content_block) > 200 else file_content_block)
+                if not file_name: continue
+                structured_review['file_comments'][file_name] = {}
+                general_comments_for_file: List[ParsedComment] = []
+                # Refined split to capture general file comments and specific function/section comments
+                # Matches '#### Function: `func_name`' OR '#### Any other section title'
+                sub_section_header_pattern = re.compile(
+                    r"^(####\s*(?:Function:\s*`?([\w_]+)`?|[\s\S]+?))\s*$",
+                    re.MULTILINE
+                )
+                sub_section_matches_list = list(sub_section_header_pattern.finditer(file_content_block))
+                print(f"--- DEBUG: Number of sub-section (####) matches for {file_name}: {len(sub_section_matches_list)} ---")
+                if not sub_section_matches_list:
+                    print(f"No '####' sub-sections were found in the block for {file_name}. All content will be general comments or missed.")
+                    if file_content_block.strip():
+                        parsed_general_comments = _parse_bullet_comments(file_content_block.strip())
+                        structured_review['file_comments'][file_name]["General_File_Comments"] = parsed_general_comments
+                        print(f"  - DEBUG: Parsed {len(parsed_general_comments)} general comments for {file_name}.")
+                    continue
+                # If sub-sections (#### headers) ARE found, process comments before the first sub-section header (these are file-level comments)
+                first_match_start_index = sub_section_matches_list[0].start()
+                pre_section_comments_content = file_content_block[:first_match_start_index].strip()
+                if pre_section_comments_content:
+                    general_comments_for_file.extend(_parse_bullet_comments(pre_section_comments_content))
+                    print(f"  - DEBUG: Added {len(general_comments_for_file)} general comments (before first sub-section) for {file_name}.")
+                # Process each sub-section
+                for k, current_match in enumerate(sub_section_matches_list):
+                    section_header_raw = current_match.group(1).strip()
+                    func_name_from_group = current_match.group(2)
+                    section_title_key = ""
+                    if func_name_from_group:
+                        section_title_key = func_name_from_group.replace('`', '')
+                    else:
+                        section_title_key = section_header_raw[section_header_raw.find('####') + 4:].strip().replace('`', '')
+                    content_start_index = current_match.end()
+                    content_end_index = (sub_section_matches_list[k+1].start()
+                                         if k + 1 < len(sub_section_matches_list)
+                                         else len(file_content_block))
+                    sub_section_content = file_content_block[content_start_index:content_end_index].strip()
+                    print(f"    - DEBUG: Sub-section '{section_title_key}' content (first 100 chars): {sub_section_content[:100]}..." if len(sub_section_content) > 100 else sub_section_content)
+                    if sub_section_content:
+                        parsed_comments_for_section = _parse_bullet_comments(sub_section_content)
+                        structured_review['file_comments'][file_name][section_title_key] = parsed_comments_for_section
+                        print(f"      - DEBUG: Parsed {len(parsed_comments_for_section)} comments for '{section_title_key}'.")
+                    else:
+                        structured_review['file_comments'][file_name][section_title_key] = []
+                        print(f"      - DEBUG: No content for sub-section '{section_title_key}'.")
+                if general_comments_for_file:
+                    structured_review['file_comments'][file_name]["General_File_Comments"] = general_comments_for_file
+    # --- 3. Extract General Sections ---
+    general_section_headers = [
+        ("Potential Issues and Edge Cases", "Potential Issues and Edge Cases"),
+        ("Security Implications", "Security Implications"),
+        ("Adherence to Best Practices (PEP 8)", "Adherence to Best Practices (PEP 8)"),
+        ("Performance Considerations", "Performance Considerations"),
+        ("Unit Testing Suggestions", "Unit Testing Suggestions"),
+        ("Docstring/Comment Improvements", "Docstring/Comment Improvements"),
+        ("Clarity and Conciseness", "Clarity and Conciseness"),
+        ("Summary", "Summary"),
+    ]
+    current_markdown_to_parse = markdown_review
+    start_parsing_from_match = re.search(r"^##\s*\d*\.?\s*Potential Issues and Edge Cases:\s*$", current_markdown_to_parse, re.MULTILINE)
+    if not start_parsing_from_match:
+        specific_obs_end_idx = 0
+        specific_obs_match = re.search(r"^##\s*\d*\.?\s*Specific Observations and Suggestions:\s*([\s\S]*?)(?=^##\s*\d*\.?\s*[\w\s\(\)\/]+:|\Z)", current_markdown_to_parse, re.MULTILINE | re.DOTALL)
+        if specific_obs_match:
+            current_markdown_to_parse = current_markdown_to_parse[specific_obs_match.end():].strip()
+        else:
+            pass
+    else:
+        current_markdown_to_parse = current_markdown_to_parse[start_parsing_from_match.start():].strip()
+    for i, (title, header_text) in enumerate(general_section_headers):
+        current_header_pattern = r"^##\s*\d*\.?\s*" + re.escape(header_text) + r":\s*$"
+        start_match = re.search(current_header_pattern, current_markdown_to_parse, re.MULTILINE)
+        if not start_match:
+            continue
+        section_start_idx = start_match.end()
+        section_end_idx = len(current_markdown_to_parse)
+        if i + 1 < len(general_section_headers):
+            next_header_text = general_section_headers[i+1][1]
+            next_header_pattern = r"^##\s*\d*\.?\s*" + re.escape(next_header_text) + r":\s*$"
+            next_match = re.search(next_header_pattern, current_markdown_to_parse[section_start_idx:], re.MULTILINE)
+            if next_match:
+                section_end_idx = section_start_idx + next_match.start()
+        content_raw = current_markdown_to_parse[section_start_idx:section_end_idx].strip()
+        if title == "Summary":
+            structured_review['summary'] = content_raw
+            structured_review['summary'] = re.sub(r'(`{3,})\s*$', '', structured_review['summary']).strip()
+            approval_match = re.search(r"^\s*\*\*(?:Action|Recommended Action|Status):\*\*\s*(Approve|Request Changes|Comment|No action required)", structured_review['summary'], re.IGNORECASE | re.MULTILINE)
+            if approval_match:
+                structured_review['approval_status'] = approval_match.group(1).strip().replace(' ', '').capitalize()
+            else:
+                structured_review['approval_status'] = 'Comment'
+        else:
+            structured_review['general_sections'].append(ParsedReviewSection(title=title, content=content_raw))
+        current_markdown_to_parse = current_markdown_to_parse[section_end_idx:].strip()
+    if not structured_review['summary']:
+        summary_match = re.search(r"^##\s*\d*\.?\s*Summary:\s*([\s\S]*)$", markdown_review, re.MULTILINE | re.DOTALL)
+        if summary_match:
+            structured_review['summary'] = summary_match.group(1).strip()
+            structured_review['summary'] = re.sub(r'(`{3,})\s*$', '', structured_review['summary']).strip()
+            approval_match = re.search(r"^\s*\*\*(?:Action|Recommended Action|Status):\*\*\s*(Approve|Request Changes|Comment|No action required)", structured_review['summary'], re.IGNORECASE | re.MULTILINE)
+            if approval_match:
+                structured_review['approval_status'] = approval_match.group(1).strip().replace(' ', '').capitalize()
+            else:
+                structured_review['approval_status'] = 'Comment'
+        else:
+            structured_review['summary'] = "Automated review completed."
+    return structured_review
+def feedback_formatter_node(state: PRReviewState):
+    llm_markdown_review = state.llm_markdown_review
+    parsed_llm_review_data = parse_llm_review_markdown(llm_markdown_review)
+    # Don't forget to return an updated state, as nodes in LangGraph should always do
+    # For this simple example, we'll just return a copy with an updated status
+    updated_state = state.model_copy(update={
+        "review_status": "review_parsed" ,# Update status after retrieval logic
+        "parsed_llm_review_data":parsed_llm_review_data,
+    })
+    return updated_state
+from github import Github, PullRequest
+from github.GithubException import GithubException, UnknownObjectException
+from github.Commit import Commit # Import Commit type for clarity and correctness
+from typing import Dict, Any, List, Optional
+import os
+import re
+import logging
+# IMPORTANT: These classes should be imported from src.utils.markdown_parser
+# For standalone execution or if import paths are complex, ensure they are correctly defined or imported.
+class ParsedComment:
+    def __init__(self, message: str, suggestion: Optional[str] = None):
+        self.message = message
+        self.suggestion = suggestion
+    def __repr__(self):
+        return f"ParsedComment(msg='{self.message[:50]}...', has_suggestion={self.suggestion is not None})"
+class ParsedReviewSection:
+    def __init__(self, title: str, content: str):
+        self.title = title
+        self.content = content
+    def __repr__(self):
+        return f"ParsedReviewSection(title='{self.title}', content='{self.content[:50]}...')"
+# Configure logging (optional, but good practice)
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def post_review_comments_on_github(
+    repo_name: str,
+    pr_id: int,
+    parsed_review_data: Dict[str, Any],
+    github_token: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Posts a structured code review to a GitHub Pull Request.
+    Args:
+        repo_name (str): The full name of the repository (e.g., "owner/repo").
+        pr_id (int): The Pull Request number.
+        parsed_review_data (Dict[str, Any]): The structured review data
+                                             as returned by parse_llm_review_markdown.
+        github_token (str, optional): GitHub Personal Access Token.
+                                       If None, tries to read from GITHUB_TOKEN env var.
+    Returns:
+        Dict[str, Any]: A dictionary containing details of the posted review,
+                        e.g., {'status': 'success', 'review_url': '...', 'main_comment_id': ...}.
+                        Raises an exception on failure.
+    """
+    if github_token is None:
+        github_token = os.getenv("GITHUB_TOKEN")
+        if github_token is None:
+            logging.error("GitHub token not provided and GITHUB_TOKEN environment variable not set.")
+            raise ValueError("GitHub token not provided and GITHUB_TOKEN environment variable not set.")
+    try:
+        g = Github(github_token)
+        repo = g.get_repo(repo_name)
+        pr = repo.get_pull(pr_id)
+        logging.info(f"Connected to GitHub repo '{repo_name}', PR #{pr_id}.")
+        # --- 1. Prepare the Main Review Body ---
+        overall_impression = parsed_review_data.get('overall_impression', '')
+        general_sections = parsed_review_data.get('general_sections', [])
+        summary = parsed_review_data.get('summary', '')
+        approval_status = parsed_review_data.get('approval_status', 'COMMENT').upper()
+        main_review_body = f"### 🤖 Automated Code Review\n\n"
+        if overall_impression.strip():
+            main_review_body += f"**Overall Impression:**\n{overall_impression}\n\n---\n\n"
+        for section in general_sections:
+            title_to_add = section.title
+            content_to_add = section.content
+            if content_to_add.strip():
+                main_review_body += f"### {title_to_add}\n{content_to_add}\n\n---\n\n"
+        if summary.strip():
+            main_review_body += f"### Summary\n{summary}\n\n"
+        main_review_body += f"**Recommended Action:** {approval_status}\n"
+        github_event = "COMMENT"
+        if approval_status == "APPROVE":
+            github_event = "APPROVE"
+        elif approval_status == "REQUEST CHANGES":
+            github_event = "REQUEST_CHANGES"
+        logging.info(f"Calculated GitHub review event: {github_event}")
+        # --- 2. Prepare Line/File Comments ---
+        github_comments = []
+        file_comments_data = parsed_review_data.get('file_comments', {})
+        head_commit_sha = pr.head.sha
+        # FIX: Get the Commit object from the SHA
+        pr_commit_obj = repo.get_commit(head_commit_sha) # <--- ADDED THIS LINE
+        logging.info(f"Using head commit SHA: {head_commit_sha} (as Commit object)")
+        if file_comments_data:
+            logging.info(f"Preparing {len(file_comments_data)} file-specific comments.")
+            for file_path, functions_data in file_comments_data.items():
+                consolidated_file_comment_body = f"### Review for `{file_path}`\n\n"
+                sorted_func_names = sorted(functions_data.keys(), key=lambda x: (0 if x == "General_File_Comments" else 1, x))
+                for func_name in sorted_func_names:
+                    comments_for_func = functions_data[func_name]
+                    if not comments_for_func:
+                        continue
+                    if func_name != "General_File_Comments":
+                        consolidated_file_comment_body += f"#### ⚙️ Function: `{func_name}`\n\n"
+                    else:
+                        if len(sorted_func_names) > 1 or (len(sorted_func_names) == 1 and func_name == "General_File_Comments"):
+                             consolidated_file_comment_body += f"#### 📄 General File Comments\n\n"
+                    for comment in comments_for_func:
+                        consolidated_file_comment_body += f"{comment.message}\n"
+                        if comment.suggestion:
+                            consolidated_file_comment_body += f"\n```suggestion\n{comment.suggestion}\n```\n\n"
+                        consolidated_file_comment_body += "\n---\n\n"
+                if consolidated_file_comment_body.strip() != f"### Review for `{file_path}`":
+                    github_comments.append({
+                        "path": file_path,
+                        "position": 1,
+                        "body": consolidated_file_comment_body.strip(),
+                    })
+        # --- 3. Submit the Review ---
+        # Pass the Commit object to the 'commit' parameter
+        review = pr.create_review(
+            commit=pr_commit_obj, # <--- CHANGED THIS LINE
+            body=main_review_body,
+            event=github_event,
+            comments=github_comments
+        )
+        logging.info(f"Successfully posted GitHub review. URL: {review.html_url}")
+        return {
+            'status': 'success',
+            'review_url': review.html_url,
+            'review_id': review.id,
+            'main_comment_body': main_review_body
+        }
+    except UnknownObjectException as e:
+        logging.error(f"GitHub object not found (repo or PR): {e}")
+        raise ValueError(f"GitHub object not found (repo or PR): {e}")
+    except GithubException as e:
+        logging.error(f"GitHub API error: {e}")
+        raise RuntimeError(f"GitHub API error: {e}")
+    except Exception as e:
+        logging.critical(f"An unexpected error occurred while posting review: {e}", exc_info=True)
+        raise RuntimeError(f"An unexpected error occurred while posting review: {e}")
+def post_review_coments_on_github_node(state:PRReviewState):
+    repo_name = state.repo_name
+    pr_id = state.pr_id
+    parsed_llm_review_data = state.parsed_llm_review_data
+    result = post_review_comments_on_github(repo_name,pr_id,parsed_llm_review_data, git_hub_token)
+    # Don't forget to return an updated state, as nodes in LangGraph should always do
+    # For this simple example, we'll just return a copy with an updated status
+    updated_state = state.model_copy(update={
+        "review_status": "posted", # Update status after retrieval logic
+        "review_comment_url":result['review_url'],
+        "review_id":result['review_id'],
+        "last_error":result['status'] # change this field later
+    })
+    return updated_state
+from IPython.display import Image, display
+from langgraph.graph import StateGraph, START, END
+# Build graph
+builder = StateGraph(PRReviewState)
+builder.add_node("code_retriever_node", code_retriever_node)
+builder.add_node("code_reviewer_node", code_reviewer_node)
+builder.add_node("feedback_formatter_node", feedback_formatter_node)
+builder.add_node("post_review_coments_on_github_node", post_review_coments_on_github_node)
+# Logic
+builder.add_edge(START, "code_retriever_node")
+builder.add_edge("code_retriever_node", "code_reviewer_node")
+builder.add_edge("code_reviewer_node", "feedback_formatter_node")
+builder.add_edge("feedback_formatter_node", "post_review_coments_on_github_node")
+builder.add_edge("post_review_coments_on_github_node", END)
+# need to fix ParsedComment serializable error
+#graph = builder.compile(checkpointer=memory)
+graph = builder.compile()
+# View
+#display(Image(graph.get_graph().draw_mermaid_png()))