File size: 7,274 Bytes
76c3b0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
"""
Opportunity Scanner for Award Identification
Scans funding databases and announcement sources for opportunities.
PLANNED DATA SOURCES:
---------------------
- Horizon Europe / CORDIS
- European Research Council (ERC)
- National funding agencies (DFG, ANR, UKRI, etc.)
- Foundation databases
- Corporate R&D partnerships
- Innovation prizes and awards
SCANNING STRATEGY:
-----------------
1. KEYWORD MATCHING:
- Technology-specific terms
- Research area keywords
- Institution eligibility terms
2. SEMANTIC SEARCH:
- Vector similarity to capability descriptions
- Cross-lingual matching for EU opportunities
3. FILTERING:
- Deadline filtering (exclude expired)
- Amount thresholds
- Eligibility pre-screening
Author: SPARKNET Team
Project: VISTA/Horizon EU
Status: Placeholder - In Development
"""
from typing import Optional, Dict, Any, List
from dataclasses import dataclass, field
from datetime import datetime, date
from enum import Enum
from loguru import logger
class DataSource(str, Enum):
"""Funding data sources."""
CORDIS = "cordis" # Horizon Europe
ERC = "erc" # European Research Council
NATIONAL = "national" # National agencies
FOUNDATION = "foundation"
CORPORATE = "corporate"
CUSTOM = "custom"
@dataclass
class ScanConfiguration:
"""
Configuration for opportunity scanning.
Defines what and how to scan for opportunities.
"""
config_id: str
name: str
sources: List[DataSource]
keywords: List[str]
research_areas: List[str]
min_amount: Optional[float] = None
max_amount: Optional[float] = None
currency: str = "EUR"
exclude_expired: bool = True
include_rolling: bool = True # Include opportunities with no fixed deadline
scan_frequency_hours: int = 24
last_scan: Optional[datetime] = None
enabled: bool = True
@dataclass
class ScanResult:
"""
Result of an opportunity scan.
Contains discovered opportunities and scan metadata.
"""
scan_id: str
config_id: str
started_at: datetime
completed_at: Optional[datetime] = None
sources_scanned: List[str] = field(default_factory=list)
opportunities_found: int = 0
new_opportunities: int = 0
updated_opportunities: int = 0
errors: List[str] = field(default_factory=list)
status: str = "in_progress"
class OpportunityScanner:
"""
Scans funding databases for opportunities.
This component:
- Connects to funding data sources
- Runs periodic scans
- Identifies new opportunities
- Updates existing opportunity data
INTEGRATION NOTES:
-----------------
For production deployment, integrate with:
1. HORIZON EUROPE (CORDIS):
- Use CORDIS API for call announcements
- Parse work programme documents
- Track topic deadlines
2. NATIONAL AGENCIES:
- DFG (Germany): RSS feeds
- ANR (France): Open data portal
- UKRI (UK): Gateway API
3. FOUNDATIONS:
- Scrape foundation websites
- Monitor RSS/newsletter feeds
- Parse PDF announcements
4. CUSTOM SOURCES:
- Support for institution-specific sources
- Private funding networks
- Industry partnership programs
"""
def __init__(
self,
database_url: Optional[str] = None,
embedding_client: Optional[Any] = None,
):
"""
Initialize Opportunity Scanner.
Args:
database_url: Database for storing opportunities
embedding_client: Client for semantic search embeddings
"""
self.database_url = database_url
self.embedding_client = embedding_client
self.name = "OpportunityScanner"
# Registered scan configurations
self.configurations: Dict[str, ScanConfiguration] = {}
logger.info(f"Initialized {self.name} (placeholder)")
async def register_configuration(
self,
config: ScanConfiguration,
) -> None:
"""
Register a scan configuration.
Args:
config: Scan configuration to register
"""
self.configurations[config.config_id] = config
logger.info(f"Registered scan configuration: {config.name}")
async def run_scan(
self,
config_id: Optional[str] = None,
) -> ScanResult:
"""
Run an opportunity scan.
Args:
config_id: Specific configuration to use (or all if None)
Returns:
Scan result with discovered opportunities
TODO: Implement actual scanning logic
"""
logger.info(f"Running opportunity scan (config: {config_id or 'all'})")
# Placeholder response
return ScanResult(
scan_id=f"scan_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
config_id=config_id or "all",
started_at=datetime.now(),
completed_at=datetime.now(),
sources_scanned=[],
opportunities_found=0,
new_opportunities=0,
updated_opportunities=0,
status="placeholder",
)
async def scan_cordis(
self,
keywords: List[str],
) -> List[Dict[str, Any]]:
"""
Scan CORDIS for Horizon Europe opportunities.
Args:
keywords: Keywords to search for
Returns:
List of opportunities from CORDIS
TODO: Implement CORDIS API integration
"""
logger.info(f"Scanning CORDIS with keywords: {keywords}")
# Placeholder - would use CORDIS API
return []
async def scan_erc(
self,
research_areas: List[str],
) -> List[Dict[str, Any]]:
"""
Scan ERC for grant opportunities.
Args:
research_areas: Research areas to match
Returns:
List of ERC opportunities
TODO: Implement ERC portal integration
"""
logger.info(f"Scanning ERC for research areas: {research_areas}")
# Placeholder - would scrape ERC portal
return []
async def semantic_search(
self,
query: str,
sources: Optional[List[DataSource]] = None,
top_k: int = 10,
) -> List[Dict[str, Any]]:
"""
Semantic search for relevant opportunities.
Uses vector similarity to find opportunities
matching natural language descriptions.
Args:
query: Natural language query
sources: Data sources to search
top_k: Number of results to return
Returns:
List of matching opportunities with scores
TODO: Implement embedding-based search
"""
logger.info(f"Semantic search: {query[:50]}...")
# Placeholder - would use embedding similarity
return []
async def get_scan_history(
self,
limit: int = 10,
) -> List[ScanResult]:
"""
Get history of recent scans.
Args:
limit: Maximum number of results
Returns:
List of recent scan results
TODO: Implement scan history retrieval
"""
logger.info(f"Getting scan history (limit: {limit})")
# Placeholder
return []
|