"""Research task — pitch deck extraction + web research.""" from __future__ import annotations from crewai import Agent, Task def build_research_task( researcher: Agent, pdf_path: str, company_name: str, ) -> Task: return Task( description=f""" You are researching the startup **{company_name}** for investment due diligence. ## Step 1 — Extract Pitch Deck Use the pdf_extractor tool on the file at: `{pdf_path}` Extract: - Company name and tagline - Problem statement - Proposed solution / product - Business model - Market size claims (TAM / SAM / SOM) - Revenue / traction metrics - Team members - Funding ask and use of funds ## Step 2 — Web Research Using SerperDevTool and ScrapeWebsiteTool, research: 1. **Market validation**: Is the TAM claim credible? Find 2-3 independent sources. 2. **Competitor landscape**: Who are the top 5 competitors? What are their funding stages? 3. **Team credibility**: Search LinkedIn/Crunchbase for the founding team. 4. **Recent news**: Any relevant regulatory, technology, or market shifts affecting this space? ## Output Format Return a structured JSON report: ```json {{ "company_name": "...", "tagline": "...", "problem": "...", "solution": "...", "business_model": "...", "market": {{ "tam": "...", "sam": "...", "som": "...", "validation_sources": ["source1", "source2"] }}, "traction": {{ "revenue": "...", "users": "...", "growth_rate": "..." }}, "team": [{{"name": "...", "role": "...", "background": "..."}}], "competitors": [{{"name": "...", "stage": "...", "key_diff": "..."}}], "funding_ask": "...", "key_claims": ["claim1", "claim2"], "red_flags": ["flag1", "flag2"], "positive_signals": ["signal1", "signal2"] }} ``` """, expected_output=( "A comprehensive JSON research report covering the pitch deck summary, " "market validation, competitor landscape, team analysis, and key findings." ), agent=researcher, )