# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Doc Quality Env Environment Implementation.

A real-world environment for technical documentation quality assessment.
Agents evaluate documentation for clarity, completeness, accuracy, and structure.
"""

from uuid import uuid4
from typing import List, Dict
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State

try:
    from ..models import DocQualityAction, DocQualityObservation
except ImportError:
    from models import DocQualityAction, DocQualityObservation


# Task definitions with ground truth issues
TASKS = {
    "easy_api_doc": {
        "difficulty": "easy",
        "name": "Simple API Documentation Review",
        "description": "Review API documentation for missing sections",
        "doc": """
# User API Documentation

## Overview
The User API provides endpoints to manage user accounts and profiles.

## Endpoints

### GET /users/{id}
Retrieves user information.
- Parameter: user_id (integer)

### POST /users
Creates a new user account.

### PUT /users/{id}
Updates user information.
""",
        "known_issues": [
            "Missing response format documentation (what fields are returned)",
            "Missing error codes documentation",
            "Missing authentication requirements",
            "Missing rate limiting information"
        ],
        "max_steps": 8,
    },
    "medium_api_doc": {
        "difficulty": "medium",
        "name": "Complex API Documentation Review",
        "description": "Identify clarity, completeness, and consistency issues",
        "doc": """
# Payment Processing API

## Overview
The Payment API handles all transactions. It's robust and secure.

## Authentication
Use Bearer token in headers.

## Endpoints

### POST /transactions
Process a payment transaction.
Parameters:
- amount: decimal
- currency: string
- account_id: integer
Response: transaction_id, status

### GET /transactions/{id}
Get transaction details.
Returns: All transaction information

### POST /refunds
Issues a refund for a transaction.
Parameter: original_transaction_id
Returns: refund_id, refund_status

## Error Handling
Errors are returned as JSON with an error field containing the error message.

## Rate Limiting
API has rate limits but details in another document.
""",
        "known_issues": [
            "Vague language: 'robust and secure' - needs specifics",
            "Missing required vs optional parameters",
            "Inconsistent response documentation format",
            "Missing timeout values",
            "No example requests/responses shown",
            "Unclear which endpoints require authentication",
            "Missing field type specifications for responses"
        ],
        "max_steps": 10,
    },
    "hard_guide_review": {
        "difficulty": "hard",
        "name": "Comprehensive Documentation Guide Review",
        "description": "Identify structural, consistency, and cross-reference issues",
        "doc": """
# Complete Developer Guide

## Chapter 1: Getting Started
Install the SDK: `pip install myservice-sdk`

## Chapter 2: Authentication
Three authentication methods:
1. API Keys
2. OAuth 2.0
3. Service Accounts

See Chapter 5 for detailed implementation.

## Chapter 3: Making Requests
All requests use HTTP/REST. Use the format from Chapter 4.

## Chapter 4: Response Formats
JSON responses include:
- data: object with results
- error: null on success
- timestamp: ISO 8601 format

See examples in Chapter 6.

## Chapter 5: Authentication Deep Dive
API Keys: Generate in dashboard, pass as 'Authorization: Bearer <key>'
OAuth: See Chapter 2 for overview
Service Accounts: Use for server-to-server auth

## Chapter 6: Examples
Coming soon...

## Chapter 7: Error Codes
200: Success
400: Bad request
401: Unauthorized
429: Rate limited
See error handling in Chapter 3.

## Appendix: FAQ
Q: How do I authenticate?
A: Use one of the three methods in Chapter 2
""",
        "known_issues": [
            "Chapter 6 (Examples) is incomplete - promised but marked 'Coming soon'",
            "Circular reference: Ch2 says see Ch5, Ch5 says see Ch2 for OAuth",
            "Missing chapter numbers: mentions 'Chapter 3' error handling but spread across multiple chapters",
            "Inconsistent terminology: sometimes 'API Key', sometimes 'key'",
            "Response format in Ch4 missing success status field definition",
            "No versioning information mentioned",
            "Missing SLA/availability information",
            "Chapter 5 OAuth description incomplete vs Chapter 2",
            "No mention of SDK vs REST trade-offs",
            "FAQ too brief - needs more questions"
        ],
        "max_steps": 12,
    }
}


def calculate_issue_overlap(identified: List[str], known: List[str]) -> float:
    """Calculate how well identified issues match known issues (basic string similarity)."""
    if not known:
        return 1.0
    if not identified:
        return 0.0
    
    matches = 0
    for identified_issue in identified:
        # Simple substring matching
        for known_issue in known:
            if len(identified_issue) > 20 and len(known_issue) > 20:
                # For detailed issues, check if major keywords overlap
                identified_words = set(identified_issue.lower().split())
                known_words = set(known_issue.lower().split())
                overlap = len(identified_words & known_words) / max(len(identified_words), len(known_words))
                if overlap > 0.4:  # 40% keyword overlap
                    matches += 1
                    break
    
    return min(1.0, matches / len(known))


class DocQualityEnvironment(Environment):
    """
    Documentation Quality Assessment Environment.
    
    Agents evaluate technical documentation and identify quality issues.
    This is a real-world task: technical writers and product teams use similar processes
    to improve their documentation.
    
    The environment provides three tasks of increasing difficulty:
    1. Simple API docs (easy) - find missing sections
    2. Complex API docs (medium) - find clarity and completeness issues
    3. Guide structure (hard) - find structural and cross-reference issues
    """

    SUPPORTS_CONCURRENT_SESSIONS: bool = True

    def __init__(self):
        """Initialize the environment."""
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self._current_task_key: str = "easy_api_doc"
        self._current_task: Dict = TASKS[self._current_task_key]  # Initialize with default task
        self._identified_issues: List[str] = []
        self._final_score: float = 0.0
        self._episode_rewards: List[float] = []

    def reset(self) -> DocQualityObservation:
        """Reset the environment to start a new task."""
        self._state = State(episode_id=str(uuid4()), step_count=0)
        self._identified_issues = []
        self._final_score = 0.0
        self._episode_rewards = []
        
        # Start with easy task
        self._current_task_key = "easy_api_doc"
        self._current_task = TASKS[self._current_task_key]

        return DocQualityObservation(
            task_name=self._current_task["name"],
            task_difficulty=self._current_task["difficulty"],  # type: ignore
            current_doc=self._current_task["doc"],
            doc_section="Overview",
            issues_identified=[],
            known_issues=self._current_task["known_issues"][:2],  # Show 2 hints initially
            quality_score=0.0,
            step_count=0,
            max_steps=self._current_task["max_steps"],
            feedback="Task started. Analyze the documentation and identify quality issues.",
            done=False,
            reward=0.0,
        )

    def step(self, action: DocQualityAction) -> DocQualityObservation:  # type: ignore[override]
        """Execute one step of documentation review."""
        self._state.step_count += 1
        reward = 0.0
        feedback = ""
        done = False

        if action.action_type == "identify_issue":
            # Agent found an issue
            if action.content and len(action.content) > 10:
                self._identified_issues.append(action.content)
                
                # Calculate similarity to known issues
                known_issues = self._current_task["known_issues"]
                overlap = calculate_issue_overlap([action.content], known_issues)
                
                if overlap > 0.4:  # Decent match
                    reward = 0.2  # Good identification
                    feedback = "Valid issue identified."
                else:
                    reward = 0.1  # Partial credit for effort
                    feedback = "Issue noted, but may not be significant."
            else:
                reward = 0.0
                feedback = "Issue description too vague."

        elif action.action_type == "suggest_improvement":
            # Agent suggests how to fix an issue
            if action.content and len(action.content) > 15:
                reward = 0.15  # Credit for constructive suggestions
                feedback = "Good improvement suggestion."
            else:
                reward = 0.0
                feedback = "Suggestion too vague."

        elif action.action_type == "rate_quality":
            # Agent provides final quality assessment
            try:
                # Parse the content to extract a score
                score = float(action.content.split()[0]) if action.content else 0.0
                score = max(0.0, min(1.0, score))
                
                # Calculate accuracy of their assessment
                true_score = self._calculate_true_score()
                accuracy = 1.0 - abs(score - true_score)
                reward = accuracy * 0.25
                
                self._final_score = score
                feedback = f"Quality rated at {score:.2f}. Episode complete."
                done = True
            except:
                feedback = "Invalid quality rating format."
                reward = 0.0

        else:
            feedback = "Unknown action type."
            reward = 0.0

        self._episode_rewards.append(reward)

        # Check if max steps reached
        if self._state.step_count >= self._current_task["max_steps"]:
            done = True
            if not self._final_score:
                # Auto-score based on issues found
                self._final_score = self._calculate_true_score()

        # Prepare observation
        obs = DocQualityObservation(
            task_name=self._current_task["name"],
            task_difficulty=self._current_task["difficulty"],  # type: ignore
            current_doc=self._current_task["doc"],
            doc_section=f"Section {self._state.step_count}",
            issues_identified=self._identified_issues,
            known_issues=self._current_task["known_issues"],
            quality_score=self._final_score,
            step_count=self._state.step_count,
            max_steps=self._current_task["max_steps"],
            feedback=feedback,
            done=done,
            reward=reward,
            metadata={
                "issues_count": len(self._identified_issues),
                "step": self._state.step_count,
                "total_reward": sum(self._episode_rewards),
            }
        )

        return obs

    def _calculate_true_score(self) -> float:
        """Calculate true quality score based on issues found."""
        known_issues = self._current_task["known_issues"]
        
        if not known_issues:
            return 1.0
        
        if not self._identified_issues:
            return 0.0
        
        # Score based on how many issues were found
        overlap = calculate_issue_overlap(self._identified_issues, known_issues)
        
        # Base score on overlap, with bonus for finding more issues
        found_count = min(len(self._identified_issues), len(known_issues))
        base_score = found_count / len(known_issues)
        
        # Bonus for quality overlap
        bonus = overlap * 0.2
        
        return min(1.0, base_score + bonus)

    @property
    def state(self) -> State:
        """Get the current environment state."""
        return self._state