dealflow-ai / src /tasks /research_tasks.py
PeterBot22's picture
feat: DealFlow AI MVP β€” 3-agent CrewAI due diligence system on HF Spaces
8dcf472 verified
"""Research task β€” pitch deck extraction + web research."""
from __future__ import annotations
from crewai import Agent, Task
def build_research_task(
researcher: Agent,
pdf_path: str,
company_name: str,
) -> Task:
return Task(
description=f"""
You are researching the startup **{company_name}** for investment due diligence.
## Step 1 β€” Extract Pitch Deck
Use the pdf_extractor tool on the file at: `{pdf_path}`
Extract:
- Company name and tagline
- Problem statement
- Proposed solution / product
- Business model
- Market size claims (TAM / SAM / SOM)
- Revenue / traction metrics
- Team members
- Funding ask and use of funds
## Step 2 β€” Web Research
Using SerperDevTool and ScrapeWebsiteTool, research:
1. **Market validation**: Is the TAM claim credible? Find 2-3 independent sources.
2. **Competitor landscape**: Who are the top 5 competitors? What are their funding stages?
3. **Team credibility**: Search LinkedIn/Crunchbase for the founding team.
4. **Recent news**: Any relevant regulatory, technology, or market shifts affecting this space?
## Output Format
Return a structured JSON report:
```json
{{
"company_name": "...",
"tagline": "...",
"problem": "...",
"solution": "...",
"business_model": "...",
"market": {{
"tam": "...",
"sam": "...",
"som": "...",
"validation_sources": ["source1", "source2"]
}},
"traction": {{
"revenue": "...",
"users": "...",
"growth_rate": "..."
}},
"team": [{{"name": "...", "role": "...", "background": "..."}}],
"competitors": [{{"name": "...", "stage": "...", "key_diff": "..."}}],
"funding_ask": "...",
"key_claims": ["claim1", "claim2"],
"red_flags": ["flag1", "flag2"],
"positive_signals": ["signal1", "signal2"]
}}
```
""",
expected_output=(
"A comprehensive JSON research report covering the pitch deck summary, "
"market validation, competitor landscape, team analysis, and key findings."
),
agent=researcher,
)