Spaces:

nikhmr1235
/

PR_Review_Bot

Paused

App Files Files Community

PR_Review_Bot / langgraph_pr_review_bot.py

nikhmr1235

Update langgraph_pr_review_bot.py

21842f3 verified 6 months ago

raw

history blame contribute delete

40 kB

	from typing import Dict, Any, Optional
	from pydantic import BaseModel, Field
	from uuid import uuid4
	import os
	from typing import Dict, Any, List, Optional, Tuple # Ensure Tuple is imported
	from github import Github, PullRequest
	from github.GithubException import GithubException, UnknownObjectException
	import requests # Make sure requests is imported for patch_url
	from dotenv import load_dotenv

	class PRReviewState(BaseModel):
	# GitHub PR Information (Mandatory)
	pr_id: int
	repo_name: str # e.g., "owner/repo"

	# GitHub PR Information (Now Optional)
	diff_url: Optional[str] = None
	pr_title: Optional[str] = None
	pr_author: Optional[str] = None
	# review_run_id: str = Field(default_factory=lambda: str(uuid.uuid4())) # Optional: Unique ID for this specific review run

	# Code Content (Now Optional)
	code_diff: Optional[str] = None # The fetched raw diff content
	file_contents: Dict[str, str] = {} # Map of filename to full content for context (already has default)

	# LLM Review Outputs (Already Optional)
	llm_markdown_review: Optional[str] = None # The raw Markdown output from the LLM (e.g., from generate_code_review_markdown)
	parsed_llm_review_data: Optional[Dict[str, Any]] = None # Structured dict from parsing the Markdown (e.g., from parse_llm_review_markdown)

	# Human-in-the-Loop (Simplified for Phase 1) (Already Optional or has default)
	require_human_approval: bool = False # Config flag, set at graph initialization
	human_approval_status: Optional[bool] = None # True if approved, False if rejected
	human_feedback_message: Optional[str] = None # Any message from human rejection

	# System Status (Now Optional or has default)
	review_status: str = "initiated" # e.g., "initiated", "fetching_code", "code_retrieved", "generating_llm_review", "llm_review_generated", "parsing_llm_review", "review_parsed", "awaiting_human_approval", "posting_review", "posted", "rejected", "failed"
	last_error: Optional[str] = None # Stores the last encountered error message
	# error_traceback: Optional[str] = None # Optional: For more detailed error debugging
	review_id: Optional[int] = None # this is the pull-request-review-id (pending -> approved/Discarded based on HIL) (Now Optional)
	review_comment_url: Optional[str] = None # URL of the main posted GitHub review comment

	'''
	from kaggle_secrets import UserSecretsClient
	user_secrets = UserSecretsClient()
	git_hub_token = user_secrets.get_secret("GITHUB_token_ID")
	google_api_key = user_secrets.get_secret("GOOGLE_API_KEY")
	'''

	google_api_key = os.getenv("GOOGLE_API_KEY")
	if not google_api_key:
	print("Google API key not found in environment variables.")
	print(f"Using Google API key: {google_api_key[:4]}... (truncated for security)")

	git_hub_token = os.getenv("GITHUB_token_ID")
	if not git_hub_token:
	print("git_hub_token not found in environment variables.")
	print(f"git_hub_token : {git_hub_token[:4]}... (truncated for security)")




	# For local testing, you might need to load dotenv if your environment variables
	# are managed via a .env file. In a deployed environment, they would likely be
	# set directly.
	# Only load dotenv if it's not already loaded (e.g., in __main__ or a test setup)
	# This prevents redundant loading in production or if your main script handles it.
	if not os.getenv("GITHUB_TOKEN"): # Only load if token not already set
	load_dotenv() # Load environment variables from .env file

	# Assuming 'git_hub_token' is defined globally or passed in a larger context
	# If git_hub_token is expected to be a global variable, ensure it's imported or declared.
	# For better practice, pass it as an argument or rely solely on os.getenv.
	# Let's adjust to purely rely on os.getenv for this function.
	# github_token = os.getenv("GITHUB_TOKEN") # Moved inside function for safety


	def fetch_pr_code_changes(repo_name: str, pr_id: int) -> Tuple[Optional[str], Optional[Dict[str, str]], Optional[str], Optional[str]]:
	"""
	Fetches the raw diff content, the full contents of changed files,
	and the head commit SHA for a given PR.

	Args:
	repo_name (str): The full name of the repository (e.g., "octocat/Spoon-Knife").
	pr_id (int): The ID of the Pull Request.

	Returns:
	Tuple[Optional[str], Optional[Dict[str, str]], Optional[str], Optional[str]]:
	- raw_diff_content (str or None): The raw diff content of the PR.
	- file_contents (Dict[str, str] or None): Dictionary mapping filename to its full content (after changes).
	- head_commit_sha (str or None): The SHA of the head commit of the PR.
	- error_message (str or None): An error message if something went wrong.
	"""
	github_token = os.getenv("GITHUB_token_ID")
	#github_token = git_hub_token

	if not github_token:
	print("Error: GITHUB_TOKEN environment variable not set.")
	return None, None, None, "GitHub token not found in environment variables."

	try:
	g = Github(github_token)
	repo = g.get_repo(repo_name)
	pull_request = repo.get_pull(pr_id)

	# --- NEW: Get the head commit SHA ---
	head_commit_sha = pull_request.head.sha
	print(f"Fetched PR {pr_id} head commit SHA: {head_commit_sha}")


	# 1. Fetch raw diff content (patch)
	# Using requests directly for patch_url is good as PyGithub's get_patch() can sometimes be rate-limited differently
	patch_url = pull_request.patch_url
	headers = {"Authorization": f"token {github_token}"}
	raw_diff_content = requests.get(patch_url, headers=headers).text

	# 2. Fetch full content of changed files
	file_contents: Dict[str, str] = {}
	for file in pull_request.get_files():
	# Skip files that were deleted, as their content cannot be retrieved from the current head.
	if file.status == 'deleted':
	file_contents[file.filename] = "[FILE DELETED]"
	continue

	try:
	# We want the content after the change, which is from the PR's head branch.
	# PyGithub's get_contents should be called with `ref` set to `pull_request.head.ref`
	# or `pull_request.head.sha` for explicit content at the PR's head.
	# Using pull_request.head.sha is more robust as ref might change.
	file_content_obj = repo.get_contents(file.filename, ref=pull_request.head.sha)

	if isinstance(file_content_obj, list):
	print(f"Warning: '{file.filename}' is a directory or multiple files, skipping content retrieval for now.")
	file_contents[file.filename] = "[DIRECTORY OR MULTIPLE FILES]"
	continue

	file_contents[file.filename] = file_content_obj.decoded_content.decode('utf-8')

	except GithubException as e:
	print(f"Warning: GitHub API error fetching content for {file.filename} (PR {pr_id}, Repo {repo_name}): {e.status} - {e.data.get('message', 'No message')}")
	file_contents[file.filename] = f"[ERROR: Could not fetch content. Status: {e.status}, Message: {e.data.get('message', 'No message')}]"
	except Exception as e:
	print(f"Unexpected error fetching content for {file.filename} (PR {pr_id}, Repo {repo_name}): {e}")
	file_contents[file.filename] = f"[ERROR: Unexpected error fetching content: {e}]"

	# Return the new head_commit_sha along with existing returns
	return raw_diff_content, file_contents, head_commit_sha, None # No error message if successful

	except UnknownObjectException as e:
	error_msg = f"GitHub object not found (repo or PR): {e.data.get('message', 'No message')}"
	print(f"Error in fetch_pr_code_changes: {error_msg}")
	return None, None, None, error_msg
	except GithubException as e:
	error_msg = f"GitHub API error for PR {pr_id} from {repo_name}: {e.status} - {e.data.get('message', 'No message')}"
	print(f"Error in fetch_pr_code_changes: {error_msg}")
	return None, None, None, error_msg
	except Exception as e:
	error_msg = f"An unexpected error occurred while fetching PR {pr_id} from {repo_name}: {e}"
	print(f"Error in fetch_pr_code_changes: {error_msg}")
	return None, None, None, error_msg

	def code_retriever_node(state:PRReviewState):
	repo_name = state.repo_name
	pull_req_id = state.pr_id

	print(f"repo_name :{repo_name}-------- pull_req_id:{pull_req_id}")

	diff, contents,head_commit_sha, error = fetch_pr_code_changes(repo_name, pull_req_id)

	# Don't forget to return an updated state, as nodes in LangGraph should always do
	# For this simple example, we'll just return a copy with an updated status
	updated_state = state.model_copy(update={
	"review_status": "code_retrieved", # Update status after retrieval logic
	"code_diff": diff,
	"file_contents": contents
	})
	return updated_state

	import os
	from typing import Dict, Any
	from langchain_core.prompts import ChatPromptTemplate
	# Ensure you have your LLM provider installed, e.g., pip install langchain-google-genai
	from langchain_google_genai import ChatGoogleGenerativeAI # Using Gemini as per your preference

	# Initialize your LLM. Make sure your GOOGLE_API_KEY is set in environment variables.
	# You can also configure other models like "gemini-1.5-flash" or "gemini-1.5-pro"
	llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.0, api_key=google_api_key) # Lower temperature for more deterministic output

	def generate_code_review_markdown(code_diff: str, file_contents: Dict[str, str]) -> str:
	"""
	Generates a detailed, human-readable code review in Markdown format from the LLM.

	The prompt is designed to elicit structured Markdown output that can then be
	parsed for GitHub PR comments, grouped by file and function.

	Args:
	code_diff (str): The string representation of the code diff.
	file_contents (Dict[str, str]): A dictionary where keys are file paths
	and values are their full content.

	Returns:
	str: A Markdown string representing the code review.
	"""

	# Prepare full contents context
	full_contents_str = ""
	if file_contents:
	for filename, content in file_contents.items():
	# Add a clear separator and Markdown code block for each file
	full_contents_str += f"--- Full Content of {filename} ---\n```python\n{content}\n```\n\n"
	else:
	full_contents_str = "No full file contents provided for additional context."

	# Construct the Prompt Template
	prompt = ChatPromptTemplate.from_messages(
	[
	("system",
	"You are an expert Senior Software Engineer and a meticulous code reviewer.\n"
	"Your task is to review the provided code changes in a Pull Request.\n"
	"Analyze the `code_diff` for potential bugs, performance issues, security vulnerabilities, code style violations, maintainability concerns, and missing tests or documentation.\n"
	"Refer to the `full_file_contents` for additional context if the diff alone is insufficient to understand the changes or their implications.\n"
	"Provide a comprehensive, actionable, and constructive review.\n"
	"Format your review clearly using Markdown. Structure it with the following top-level sections:\n"
	"1. Overall Impression: A brief summary of the PR's purpose and overall quality.\n"
	"2. Specific Observations and Suggestions: Detailed feedback, grouped by file.\n"
	" - Within each file's section, group related comments, ideally by function or logical block.\n"
	" - For each observation/suggestion, include relevant line numbers from the new file for context (e.g., 'Line X-Y:').\n"
	"3. Potential Issues and Edge Cases: Discuss any missed scenarios or potential problems.\n"
	"4. Security Implications: Highlight any security concerns.\n"
	"5. Adherence to Best Practices (PEP 8): Comment on style and best practice compliance.\n"
	"6. Performance Considerations: Discuss performance aspects.\n"
	"7. Unit Testing Suggestions: Recommend additional tests.\n"
	"8. Docstring/Comment Improvements: Suggest documentation enhancements.\n"
	"9. Clarity and Conciseness: Feedback on code readability.\n"
	"10. Summary: A concise conclusion and recommended action (e.g., 'Approve', 'Request Changes', 'Comment').\n\n"
	"For code suggestions, use GitHub's Markdown code block with 'suggestion' annotation, like this:\n"
	"```suggestion\n"
	"your_suggested_code_here\n"
	"```\n"
	"Ensure file paths are correctly formatted (e.g., `src/utils/data_processor.py`)."
	),
	("human",
	"Here are the code changes (diff):\n"
	"```diff\n"
	"{code_diff}\n"
	"```\n\n"
	"Here are the full contents of the changed files (for additional context, use only if necessary to understand the diff):\n"
	"{full_contents_context}\n\n"
	"Please provide your structured code review in Markdown."
	),
	]
	)

	# Create the Chain
	review_chain = prompt \| llm

	# Invoke the Chain
	try:
	review_markdown = review_chain.invoke({
	"code_diff": code_diff,
	"full_contents_context": full_contents_str
	}).content # Access the content attribute for Chat model output
	return review_markdown
	except Exception as e:
	print(f"Error generating code review: {e}")
	return f"Error: Could not generate code review. {e}\n\n" \
	f"Please check the LLM API call or token limits."

	def code_reviewer_node(state:PRReviewState):
	code_diff = state.code_diff
	file_contents = state.file_contents

	review_markdown = generate_code_review_markdown(code_diff, file_contents)

	# Don't forget to return an updated state, as nodes in LangGraph should always do
	# For this simple example, we'll just return a copy with an updated status
	updated_state = state.model_copy(update={
	"review_status": "code_reviewed", # Update status after retrieval logic
	"llm_markdown_review":review_markdown,
	})
	return updated_state


	import re
	from typing import List, Dict, Tuple, Optional, Any

	class ParsedComment:
	"""
	Represents a single parsed comment from the LLM's review,
	intended for grouping by file/function.
	"""
	def __init__(self, message: str, suggestion: Optional[str] = None):
	self.message = message
	self.suggestion = suggestion

	def __repr__(self):
	return f"ParsedComment(msg='{self.message[:50]}...', has_suggestion={self.suggestion is not None})"

	class ParsedReviewSection:
	"""
	Represents a categorized section of the review, e.g., 'Potential Issues'.
	"""
	def __init__(self, title: str, content: str):
	self.title = title
	self.content = content

	def __repr__(self):
	return f"ParsedReviewSection(title='{self.title}', content='{self.content[:50]}...')"


	# Helper to extract suggestion block and clean message
	def _extract_suggestion(text: str) -> Tuple[Optional[str], str]:
	"""Helper to extract suggestion block and clean message."""
	suggestion_match = re.search(r"```suggestion\n([\s\S]*?)\n```", text, re.MULTILINE)
	suggestion_code = suggestion_match.group(1).strip() if suggestion_match else None

	# Remove suggestion from the main message
	cleaned_message = re.sub(r"```suggestion[\s\S]*?```", "", text).strip()
	return suggestion_code, cleaned_message

	def _parse_bullet_comments(text_block: str) -> List[ParsedComment]:
	"""Helper to parse bullet-point comments from a given text block."""
	comments = []
	# FIX: Updated regex for bullet comments
	# Now matches from a bullet point until the start of the next bullet point or end of the text block.
	# This handles multi-line comments and embedded suggestion blocks more robustly.
	comment_matches = re.finditer(r"(^ [-]\s[\s\S]?)(?=\n [-]\s*\|\Z)", text_block, re.MULTILINE \| re.DOTALL)
	for cm in comment_matches:
	full_comment_text = cm.group(1).strip()
	if full_comment_text:
	suggestion_code, cleaned_message = _extract_suggestion(full_comment_text)
	comments.append(ParsedComment(message=cleaned_message, suggestion=suggestion_code))
	return comments


	def parse_llm_review_markdown(markdown_review: str) -> Dict[str, Any]:
	"""
	Parses the LLM-generated Markdown review into a structured dictionary.
	It extracts the overall summary, file-specific/function-specific comments,
	and other general review sections.

	Args:
	markdown_review (str): The full Markdown string generated by the LLM.

	Returns:
	Dict[str, Any]: A dictionary containing structured review data:
	- 'overall_impression': str
	- 'file_comments': Dict[str, Dict[str, List[ParsedComment]]]
	(file_path -> function_name -> List[ParsedComment])
	- 'general_sections': List[ParsedReviewSection]
	- 'summary': str
	- 'approval_status': str (extracted from summary, if present)
	"""
	structured_review: Dict[str, Any] = {
	'overall_impression': '',
	'file_comments': {},
	'general_sections': [],
	'summary': '',
	'approval_status': 'Comment' # Default status
	}

	# Helper to extract content between two headers.
	# Now more flexible: allows optional numbering and variable header level for top sections
	def extract_section_content(text: str, start_header_text: str, end_header_text: str) -> Optional[str]:
	# Pattern to match headers with optional numbering and flexible spacing
	start_pattern = r"^(?:##\|###)\s\d\.?\s" + re.escape(start_header_text) + r":\s$"
	end_pattern = r"^(?:##\|###)\s\d\.?\s" + re.escape(end_header_text) + r":\s$"

	# Use re.DOTALL to allow . to match newlines
	match = re.search(f"{start_pattern}([\\s\\S]*?)(?={end_pattern}\|\\Z)", text, re.MULTILINE \| re.DOTALL)
	if match:
	return match.group(1).strip()
	return None

	# --- 1. Extract Overall Impression ---
	overall_impression_content = extract_section_content(markdown_review, "Overall Impression", "Specific Observations and Suggestions")
	if overall_impression_content:
	structured_review['overall_impression'] = overall_impression_content

	# --- 2. Extract Specific Observations and Suggestions (File/Function Comments) ---
	specific_obs_section_content = extract_section_content(markdown_review, "Specific Observations and Suggestions", "Potential Issues and Edge Cases")

	# Debug prints for specific_obs_section_content (kept for verification)
	print(f"\n--- DEBUG: specific_obs_section_content (extracted from markdown_review) ---")
	if specific_obs_section_content is None:
	print("specific_obs_section_content is None")
	elif not specific_obs_section_content.strip():
	print("specific_obs_section_content is empty or only whitespace")
	else:
	print(specific_obs_section_content[:500] + "..." if len(specific_obs_section_content) > 500 else specific_obs_section_content)
	print(f"--- END DEBUG: specific_obs_section_content ---\n")

	print(f"\n--- DEBUG: Raw specific_obs_section_content (using repr()):")
	if specific_obs_section_content is not None:
	print(repr(specific_obs_section_content))
	print(f"Length of specific_obs_section_content: {len(specific_obs_section_content)}")
	print(f"Does it start with '### `data_processor.py`'? {specific_obs_section_content.startswith('### `data_processor.py`')}")
	starts_as_file_header = False
	if specific_obs_section_content.startswith('### `') or specific_obs_section_content.startswith('**File:'):
	starts_as_file_header = True
	print(f"Does it start with a common file header pattern? {starts_as_file_header}")
	else:
	print("specific_obs_section_content is None.")
	print(f"--- END DEBUG: Raw specific_obs_section_content ---\n")


	if specific_obs_section_content:
	# NEW STRATEGY FOR FILE BLOCK PARSING:
	# Step 1: Find all file header line matches first
	file_header_line_pattern = re.compile(
	r"^(?:\\File:\s`?([\w\/\.\-_]+\.\w+)`?\\\|###\s`?([\w\/\.\-_]+\.\w+)`?)\s*$",
	re.MULTILINE
	)

	header_matches = list(file_header_line_pattern.finditer(specific_obs_section_content))

	print(f"--- DEBUG: Number of file_header_line_pattern matches found (New Strategy): {len(header_matches)} ---")
	if not header_matches:
	print("No file headers were found. Cannot parse file blocks.")
	pass
	else:
	# Step 2: Iterate through header matches and extract content blocks
	for i, header_match in enumerate(header_matches):
	file_name = (header_match.group(1) or header_match.group(2)).strip().replace('`', '')

	# Determine the start of the content block (after the header line)
	content_start_index = header_match.end()

	# Determine the end of the content block (start of next header or end of section content)
	content_end_index = len(specific_obs_section_content)
	if i + 1 < len(header_matches):
	content_end_index = header_matches[i+1].start()

	file_content_block = specific_obs_section_content[content_start_index:content_end_index].strip()

	print(f"\n--- DEBUG: Processing file (new strategy): {file_name} ---")
	print(f"File content block (first 200 chars):\n{file_content_block[:200]}..." if len(file_content_block) > 200 else file_content_block)

	if not file_name: continue

	structured_review['file_comments'][file_name] = {}
	general_comments_for_file: List[ParsedComment] = []

	# Refined split to capture general file comments and specific function/section comments
	# Matches '#### Function: `func_name`' OR '#### Any other section title'
	sub_section_header_pattern = re.compile(
	r"^(####\s(?:Function:\s`?([\w_]+)`?\|[\s\S]+?))\s*$",
	re.MULTILINE
	)

	sub_section_matches_list = list(sub_section_header_pattern.finditer(file_content_block))
	print(f"--- DEBUG: Number of sub-section (####) matches for {file_name}: {len(sub_section_matches_list)} ---")
	if not sub_section_matches_list:
	print(f"No '####' sub-sections were found in the block for {file_name}. All content will be general comments or missed.")
	if file_content_block.strip():
	parsed_general_comments = _parse_bullet_comments(file_content_block.strip())
	structured_review['file_comments'][file_name]["General_File_Comments"] = parsed_general_comments
	print(f" - DEBUG: Parsed {len(parsed_general_comments)} general comments for {file_name}.")
	continue

	# If sub-sections (#### headers) ARE found, process comments before the first sub-section header (these are file-level comments)
	first_match_start_index = sub_section_matches_list[0].start()
	pre_section_comments_content = file_content_block[:first_match_start_index].strip()
	if pre_section_comments_content:
	general_comments_for_file.extend(_parse_bullet_comments(pre_section_comments_content))
	print(f" - DEBUG: Added {len(general_comments_for_file)} general comments (before first sub-section) for {file_name}.")


	# Process each sub-section
	for k, current_match in enumerate(sub_section_matches_list):
	section_header_raw = current_match.group(1).strip()
	func_name_from_group = current_match.group(2)

	section_title_key = ""
	if func_name_from_group:
	section_title_key = func_name_from_group.replace('`', '')
	else:
	section_title_key = section_header_raw[section_header_raw.find('####') + 4:].strip().replace('`', '')

	content_start_index = current_match.end()
	content_end_index = (sub_section_matches_list[k+1].start()
	if k + 1 < len(sub_section_matches_list)
	else len(file_content_block))

	sub_section_content = file_content_block[content_start_index:content_end_index].strip()

	print(f" - DEBUG: Sub-section '{section_title_key}' content (first 100 chars): {sub_section_content[:100]}..." if len(sub_section_content) > 100 else sub_section_content)
	if sub_section_content:
	parsed_comments_for_section = _parse_bullet_comments(sub_section_content)
	structured_review['file_comments'][file_name][section_title_key] = parsed_comments_for_section
	print(f" - DEBUG: Parsed {len(parsed_comments_for_section)} comments for '{section_title_key}'.")
	else:
	structured_review['file_comments'][file_name][section_title_key] = []
	print(f" - DEBUG: No content for sub-section '{section_title_key}'.")

	if general_comments_for_file:
	structured_review['file_comments'][file_name]["General_File_Comments"] = general_comments_for_file


	# --- 3. Extract General Sections ---
	general_section_headers = [
	("Potential Issues and Edge Cases", "Potential Issues and Edge Cases"),
	("Security Implications", "Security Implications"),
	("Adherence to Best Practices (PEP 8)", "Adherence to Best Practices (PEP 8)"),
	("Performance Considerations", "Performance Considerations"),
	("Unit Testing Suggestions", "Unit Testing Suggestions"),
	("Docstring/Comment Improvements", "Docstring/Comment Improvements"),
	("Clarity and Conciseness", "Clarity and Conciseness"),
	("Summary", "Summary"),
	]

	current_markdown_to_parse = markdown_review

	start_parsing_from_match = re.search(r"^##\s\d\.?\sPotential Issues and Edge Cases:\s$", current_markdown_to_parse, re.MULTILINE)
	if not start_parsing_from_match:
	specific_obs_end_idx = 0
	specific_obs_match = re.search(r"^##\s\d\.?\sSpecific Observations and Suggestions:\s([\s\S]?)(?=^##\s\d\.?\s[\w\s\/]+:\|\Z)", current_markdown_to_parse, re.MULTILINE \| re.DOTALL)
	if specific_obs_match:
	current_markdown_to_parse = current_markdown_to_parse[specific_obs_match.end():].strip()
	else:
	pass
	else:
	current_markdown_to_parse = current_markdown_to_parse[start_parsing_from_match.start():].strip()


	for i, (title, header_text) in enumerate(general_section_headers):
	current_header_pattern = r"^##\s\d\.?\s" + re.escape(header_text) + r":\s$"

	start_match = re.search(current_header_pattern, current_markdown_to_parse, re.MULTILINE)
	if not start_match:
	continue

	section_start_idx = start_match.end()

	section_end_idx = len(current_markdown_to_parse)

	if i + 1 < len(general_section_headers):
	next_header_text = general_section_headers[i+1][1]
	next_header_pattern = r"^##\s\d\.?\s" + re.escape(next_header_text) + r":\s$"
	next_match = re.search(next_header_pattern, current_markdown_to_parse[section_start_idx:], re.MULTILINE)
	if next_match:
	section_end_idx = section_start_idx + next_match.start()

	content_raw = current_markdown_to_parse[section_start_idx:section_end_idx].strip()

	if title == "Summary":
	structured_review['summary'] = content_raw
	structured_review['summary'] = re.sub(r'(`{3,})\s*$', '', structured_review['summary']).strip()

	approval_match = re.search(r"^\s\\(?:Action\|Recommended Action\|Status):\\\s(Approve\|Request Changes\|Comment\|No action required)", structured_review['summary'], re.IGNORECASE \| re.MULTILINE)
	if approval_match:
	structured_review['approval_status'] = approval_match.group(1).strip().replace(' ', '').capitalize()
	else:
	structured_review['approval_status'] = 'Comment'
	else:
	structured_review['general_sections'].append(ParsedReviewSection(title=title, content=content_raw))

	current_markdown_to_parse = current_markdown_to_parse[section_end_idx:].strip()

	if not structured_review['summary']:
	summary_match = re.search(r"^##\s\d\.?\sSummary:\s([\s\S]*)$", markdown_review, re.MULTILINE \| re.DOTALL)
	if summary_match:
	structured_review['summary'] = summary_match.group(1).strip()
	structured_review['summary'] = re.sub(r'(`{3,})\s*$', '', structured_review['summary']).strip()

	approval_match = re.search(r"^\s\\(?:Action\|Recommended Action\|Status):\\\s(Approve\|Request Changes\|Comment\|No action required)", structured_review['summary'], re.IGNORECASE \| re.MULTILINE)
	if approval_match:
	structured_review['approval_status'] = approval_match.group(1).strip().replace(' ', '').capitalize()
	else:
	structured_review['approval_status'] = 'Comment'
	else:
	structured_review['summary'] = "Automated review completed."

	return structured_review

	def feedback_formatter_node(state: PRReviewState):
	llm_markdown_review = state.llm_markdown_review

	parsed_llm_review_data = parse_llm_review_markdown(llm_markdown_review)

	# Don't forget to return an updated state, as nodes in LangGraph should always do
	# For this simple example, we'll just return a copy with an updated status
	updated_state = state.model_copy(update={
	"review_status": "review_parsed" ,# Update status after retrieval logic
	"parsed_llm_review_data":parsed_llm_review_data,
	})
	return updated_state


	from github import Github, PullRequest
	from github.GithubException import GithubException, UnknownObjectException
	from github.Commit import Commit # Import Commit type for clarity and correctness
	from typing import Dict, Any, List, Optional
	import os
	import re
	import logging

	# IMPORTANT: These classes should be imported from src.utils.markdown_parser
	# For standalone execution or if import paths are complex, ensure they are correctly defined or imported.
	class ParsedComment:
	def __init__(self, message: str, suggestion: Optional[str] = None):
	self.message = message
	self.suggestion = suggestion
	def __repr__(self):
	return f"ParsedComment(msg='{self.message[:50]}...', has_suggestion={self.suggestion is not None})"

	class ParsedReviewSection:
	def __init__(self, title: str, content: str):
	self.title = title
	self.content = content
	def __repr__(self):
	return f"ParsedReviewSection(title='{self.title}', content='{self.content[:50]}...')"


	# Configure logging (optional, but good practice)
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


	def post_review_comments_on_github(
	repo_name: str,
	pr_id: int,
	parsed_review_data: Dict[str, Any],
	github_token: Optional[str] = None
	) -> Dict[str, Any]:
	"""
	Posts a structured code review to a GitHub Pull Request.

	Args:
	repo_name (str): The full name of the repository (e.g., "owner/repo").
	pr_id (int): The Pull Request number.
	parsed_review_data (Dict[str, Any]): The structured review data
	as returned by parse_llm_review_markdown.
	github_token (str, optional): GitHub Personal Access Token.
	If None, tries to read from GITHUB_TOKEN env var.

	Returns:
	Dict[str, Any]: A dictionary containing details of the posted review,
	e.g., {'status': 'success', 'review_url': '...', 'main_comment_id': ...}.
	Raises an exception on failure.
	"""
	if github_token is None:
	github_token = os.getenv("GITHUB_token_ID")
	if github_token is None:
	logging.error("GitHub token not provided and GITHUB_TOKEN environment variable not set.")
	raise ValueError("GitHub token not provided and GITHUB_TOKEN environment variable not set.")

	try:
	g = Github(github_token)
	repo = g.get_repo(repo_name)
	pr = repo.get_pull(pr_id)
	logging.info(f"Connected to GitHub repo '{repo_name}', PR #{pr_id}.")

	# --- 1. Prepare the Main Review Body ---
	overall_impression = parsed_review_data.get('overall_impression', '')
	general_sections = parsed_review_data.get('general_sections', [])
	summary = parsed_review_data.get('summary', '')
	approval_status = parsed_review_data.get('approval_status', 'COMMENT').upper()

	main_review_body = f"### 🤖 Automated Code Review\n\n"

	if overall_impression.strip():
	main_review_body += f"Overall Impression:\n{overall_impression}\n\n---\n\n"

	for section in general_sections:
	title_to_add = section.title
	content_to_add = section.content

	if content_to_add.strip():
	main_review_body += f"### {title_to_add}\n{content_to_add}\n\n---\n\n"

	if summary.strip():
	main_review_body += f"### Summary\n{summary}\n\n"

	main_review_body += f"Recommended Action: {approval_status}\n"

	github_event = "COMMENT"
	if approval_status == "APPROVE":
	github_event = "APPROVE"
	elif approval_status == "REQUEST CHANGES":
	github_event = "REQUEST_CHANGES"

	logging.info(f"Calculated GitHub review event: {github_event}")

	# --- 2. Prepare Line/File Comments ---
	github_comments = []
	file_comments_data = parsed_review_data.get('file_comments', {})

	head_commit_sha = pr.head.sha
	# FIX: Get the Commit object from the SHA
	pr_commit_obj = repo.get_commit(head_commit_sha) # <--- ADDED THIS LINE
	logging.info(f"Using head commit SHA: {head_commit_sha} (as Commit object)")

	if file_comments_data:
	logging.info(f"Preparing {len(file_comments_data)} file-specific comments.")
	for file_path, functions_data in file_comments_data.items():
	consolidated_file_comment_body = f"### Review for `{file_path}`\n\n"

	sorted_func_names = sorted(functions_data.keys(), key=lambda x: (0 if x == "General_File_Comments" else 1, x))

	for func_name in sorted_func_names:
	comments_for_func = functions_data[func_name]

	if not comments_for_func:
	continue

	if func_name != "General_File_Comments":
	consolidated_file_comment_body += f"#### ⚙️ Function: `{func_name}`\n\n"
	else:
	if len(sorted_func_names) > 1 or (len(sorted_func_names) == 1 and func_name == "General_File_Comments"):
	consolidated_file_comment_body += f"#### 📄 General File Comments\n\n"

	for comment in comments_for_func:
	consolidated_file_comment_body += f"{comment.message}\n"
	if comment.suggestion:
	consolidated_file_comment_body += f"\n```suggestion\n{comment.suggestion}\n```\n\n"
	consolidated_file_comment_body += "\n---\n\n"

	if consolidated_file_comment_body.strip() != f"### Review for `{file_path}`":
	github_comments.append({
	"path": file_path,
	"position": 1,
	"body": consolidated_file_comment_body.strip(),
	})

	# --- 3. Submit the Review ---
	# Pass the Commit object to the 'commit' parameter
	review = pr.create_review(
	commit=pr_commit_obj, # <--- CHANGED THIS LINE
	body=main_review_body,
	event=github_event,
	comments=github_comments
	)

	logging.info(f"Successfully posted GitHub review. URL: {review.html_url}")
	return {
	'status': 'success',
	'review_url': review.html_url,
	'review_id': review.id,
	'main_comment_body': main_review_body
	}

	except UnknownObjectException as e:
	logging.error(f"GitHub object not found (repo or PR): {e}")
	raise ValueError(f"GitHub object not found (repo or PR): {e}")
	except GithubException as e:
	logging.error(f"GitHub API error: {e}")
	raise RuntimeError(f"GitHub API error: {e}")
	except Exception as e:
	logging.critical(f"An unexpected error occurred while posting review: {e}", exc_info=True)
	raise RuntimeError(f"An unexpected error occurred while posting review: {e}")


	def post_review_coments_on_github_node(state:PRReviewState):
	repo_name = state.repo_name
	pr_id = state.pr_id
	parsed_llm_review_data = state.parsed_llm_review_data

	result = post_review_comments_on_github(repo_name,pr_id,parsed_llm_review_data, git_hub_token)

	# Don't forget to return an updated state, as nodes in LangGraph should always do
	# For this simple example, we'll just return a copy with an updated status
	updated_state = state.model_copy(update={
	"review_status": "posted", # Update status after retrieval logic
	"review_comment_url":result['review_url'],
	"review_id":result['review_id'],
	"last_error":result['status'] # change this field later

	})
	return updated_state


	#from IPython.display import Image, display
	from langgraph.graph import StateGraph, START, END

	# Build graph
	builder = StateGraph(PRReviewState)
	builder.add_node("code_retriever_node", code_retriever_node)
	builder.add_node("code_reviewer_node", code_reviewer_node)
	builder.add_node("feedback_formatter_node", feedback_formatter_node)
	builder.add_node("post_review_coments_on_github_node", post_review_coments_on_github_node)

	# Logic
	builder.add_edge(START, "code_retriever_node")
	builder.add_edge("code_retriever_node", "code_reviewer_node")
	builder.add_edge("code_reviewer_node", "feedback_formatter_node")
	builder.add_edge("feedback_formatter_node", "post_review_coments_on_github_node")
	builder.add_edge("post_review_coments_on_github_node", END)

	# need to fix ParsedComment serializable error
	#graph = builder.compile(checkpointer=memory)
	graph = builder.compile()


	# View
	#display(Image(graph.get_graph().draw_mermaid_png()))