"""Research task — pitch deck extraction + web research."""
from __future__ import annotations

from crewai import Agent, Task


def build_research_task(
    researcher: Agent,
    pdf_path: str,
    company_name: str,
) -> Task:
    return Task(
        description=f"""
You are researching the startup **{company_name}** for investment due diligence.

## Step 1 — Extract Pitch Deck
Use the pdf_extractor tool on the file at: `{pdf_path}`
Extract:
- Company name and tagline
- Problem statement
- Proposed solution / product
- Business model
- Market size claims (TAM / SAM / SOM)
- Revenue / traction metrics
- Team members
- Funding ask and use of funds

## Step 2 — Web Research
Using SerperDevTool and ScrapeWebsiteTool, research:
1. **Market validation**: Is the TAM claim credible? Find 2-3 independent sources.
2. **Competitor landscape**: Who are the top 5 competitors? What are their funding stages?
3. **Team credibility**: Search LinkedIn/Crunchbase for the founding team.
4. **Recent news**: Any relevant regulatory, technology, or market shifts affecting this space?

## Output Format
Return a structured JSON report:
```json
{{
  "company_name": "...",
  "tagline": "...",
  "problem": "...",
  "solution": "...",
  "business_model": "...",
  "market": {{
    "tam": "...",
    "sam": "...",
    "som": "...",
    "validation_sources": ["source1", "source2"]
  }},
  "traction": {{
    "revenue": "...",
    "users": "...",
    "growth_rate": "..."
  }},
  "team": [{{"name": "...", "role": "...", "background": "..."}}],
  "competitors": [{{"name": "...", "stage": "...", "key_diff": "..."}}],
  "funding_ask": "...",
  "key_claims": ["claim1", "claim2"],
  "red_flags": ["flag1", "flag2"],
  "positive_signals": ["signal1", "signal2"]
}}
```
""",
        expected_output=(
            "A comprehensive JSON research report covering the pitch deck summary, "
            "market validation, competitor landscape, team analysis, and key findings."
        ),
        agent=researcher,
    )