langgraph_PR_Review_Bot

Sleeping

App Files Files Community

nikhmr1235 commited on Sep 17, 2025

Commit

269a5f3

verified ·

1 Parent(s): d669e92

update parse_llm_review_markdown() to be complete instead of skeleton code

Browse files

Files changed (1) hide show

src/langgraph_logic/nodes.py +103 -4

src/langgraph_logic/nodes.py CHANGED Viewed

@@ -184,10 +184,109 @@ def _parse_bullet_comments(text_block: str) -> List[ParsedComment]:
     return comments
 def parse_llm_review_markdown(markdown_review: str) -> LLMReviewOutput:
-    """Parses the LLM-generated Markdown review into a structured LLMReviewOutput Pydantic model."""
-    # Implementation from the original file, simplified for brevity
-    # ... (The full parsing logic would be here) ...
-    return LLMReviewOutput(overall_impression=markdown_review) # Placeholder for actual parsing
 # --- Graph Nodes ---

     return comments
 def parse_llm_review_markdown(markdown_review: str) -> LLMReviewOutput:
+    """
+    Parses the LLM-generated Markdown review into a structured LLMReviewOutput Pydantic model.
+    This version is designed to be robust against formatting variations by dynamically finding
+    section headers and extracting content between them.
+    Args:
+        markdown_review (str): The full Markdown string generated by the LLM.
+    Returns:
+        LLMReviewOutput: A Pydantic model containing structured review data.
+    """
+    temp_structured_data: Dict[str, Any] = {
+        'overall_impression': None,
+        'file_reviews': [],
+        'general_sections': [],
+        'summary': None,
+        'approval_status': 'Comment'
+    }
+    # --- 1. Find all major section headers and their positions ---
+    # This pattern recognizes "## Section Title:" and "1. **Section Title:**"
+    section_header_pattern = re.compile(
+        r"^(?:##\s+([\w\s/()]+):|(\d+)\.\s+\*\*([\w\s/()]+):\*\*)\s*$",
+        re.MULTILINE
+    )
+    sections = []
+    for match in section_header_pattern.finditer(markdown_review):
+        # Consolidate title from group 1 (for '## Title:') or group 3 (for '1. **Title:**')
+        title = match.group(1) or match.group(3)
+        if title:
+            sections.append({
+                'title': title.strip(),
+                'content_start': match.end(),
+                'header_start': match.start()
+            })
+    if not sections:
+        if markdown_review.strip():
+             temp_structured_data['summary'] = "Could not parse the review markdown. The format was not recognized."
+             temp_structured_data['overall_impression'] = markdown_review
+        return LLMReviewOutput(**temp_structured_data)
+    # --- 2. Process each identified section by extracting content between headers ---
+    for i in range(len(sections)):
+        current_section = sections[i]
+        title = current_section['title']
+        content_start = current_section['content_start']
+        # The content ends where the next section's header begins.
+        # For the last section, it ends at the end of the string.
+        content_end = sections[i+1]['header_start'] if i + 1 < len(sections) else len(markdown_review)
+        content = markdown_review[content_start:content_end].strip()
+        if "Overall Impression" in title:
+            temp_structured_data['overall_impression'] = content
+        elif "Specific Observations and Suggestions" in title:
+            # This pattern handles "- **`file.py`:**", "### file.py", and "**File: file.py**"
+            file_header_line_pattern = re.compile(
+                r"^\s*(?:-\s+\*\*(?:`?)([\w\/\.\-_]+\.\w+)(?:`?):\*\*|###\s*`?([\w\/\.\-_]+\.\w+)`?|\*\*File:\s*`?([\w\/\.\-_]+\.\w+)`?\*\*)\s*$",
+                re.MULTILINE
+            )
+            file_matches = list(file_header_line_pattern.finditer(content))
+            for j, match in enumerate(file_matches):
+                file_name = next((g for g in match.groups() if g is not None), None)
+                if not file_name: continue
+                file_name = file_name.strip().replace('`', '')
+                start_idx = match.end()
+                end_idx = file_matches[j+1].start() if j + 1 < len(file_matches) else len(content)
+                file_content_block = content[start_idx:end_idx].strip()
+                # Assumes _parse_bullet_comments is defined elsewhere and works correctly
+                parsed_comments = _parse_bullet_comments(file_content_block)
+                if parsed_comments:
+                    temp_structured_data['file_reviews'].append(FileReviewComments(
+                        file_path=file_name,
+                        sections={"General_File_Comments": parsed_comments}
+                    ))
+        elif "Summary" in title:
+            temp_structured_data['summary'] = content
+            # Extract approval status from the summary
+            approval_match = re.search(r"^\s*\*\*(?:Action|Recommended Action|Status):\*\*\s*(Approve|Request Changes|Comment)", content, re.IGNORECASE | re.MULTILINE)
+            if approval_match:
+                temp_structured_data['approval_status'] = approval_match.group(1).strip().capitalize()
+        else: # Any other section is treated as a general section
+            if content:
+                temp_structured_data['general_sections'].append(ParsedReviewSection(
+                    title=title,
+                    content=content
+                ))
+    # --- 3. Final fallbacks and cleanup ---
+    if not temp_structured_data['summary']:
+        temp_structured_data['summary'] = "Automated review completed."
+    return LLMReviewOutput(**temp_structured_data)
 # --- Graph Nodes ---