B1acB1rd commited on
Commit ·
4d92cd5
0
Parent(s):
PIOE 2.0 ready for deploymnet
Browse files- .env.example +62 -0
- .gitignore +63 -0
- Dockerfile +31 -0
- Procfile +2 -0
- README.md +136 -0
- backend/__init__.py +17 -0
- backend/config.py +76 -0
- backend/database.py +32 -0
- backend/delivery/__init__.py +6 -0
- backend/delivery/digest.py +171 -0
- backend/ingestion/__init__.py +27 -0
- backend/ingestion/arxiv_client.py +124 -0
- backend/ingestion/careers_client.py +403 -0
- backend/ingestion/github_client.py +154 -0
- backend/ingestion/grants_client.py +385 -0
- backend/ingestion/jobboard_client.py +472 -0
- backend/ingestion/reddit_client.py +185 -0
- backend/ingestion/rss_client.py +220 -0
- backend/ingestion/scheduler.py +371 -0
- backend/ingestion/superteam_client.py +178 -0
- backend/ingestion/web_scraper.py +227 -0
- backend/intelligence/__init__.py +22 -0
- backend/intelligence/classifier.py +214 -0
- backend/intelligence/credibility.py +125 -0
- backend/intelligence/llm_client.py +352 -0
- backend/intelligence/novelty.py +118 -0
- backend/intelligence/roi_scorer.py +340 -0
- backend/intelligence/scorer.py +101 -0
- backend/intelligence/silent_detector.py +313 -0
- backend/main.py +481 -0
- backend/models.py +237 -0
- config/sources.yaml +135 -0
- frontend/app.js +660 -0
- frontend/index.html +162 -0
- frontend/styles.css +905 -0
- render.yaml +25 -0
- requirements.txt +18 -0
.env.example
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ===========================================
|
| 2 |
+
# PIOE 2.0 Environment Configuration
|
| 3 |
+
# ===========================================
|
| 4 |
+
# Copy this file to .env and fill in your values
|
| 5 |
+
|
| 6 |
+
# ===========================================
|
| 7 |
+
# AI Provider (Required - pick one)
|
| 8 |
+
# ===========================================
|
| 9 |
+
AI_PROVIDER=gemini
|
| 10 |
+
|
| 11 |
+
# Gemini API (Free: https://makersuite.google.com/app/apikey)
|
| 12 |
+
GEMINI_API_KEY=your_gemini_api_key_here
|
| 13 |
+
|
| 14 |
+
# OpenAI API (Alternative to Gemini)
|
| 15 |
+
OPENAI_API_KEY=
|
| 16 |
+
|
| 17 |
+
# ===========================================
|
| 18 |
+
# Job Board APIs (Optional - get for more jobs)
|
| 19 |
+
# ===========================================
|
| 20 |
+
|
| 21 |
+
# Adzuna API (Free: 250 requests/day)
|
| 22 |
+
# Sign up at: https://developer.adzuna.com/
|
| 23 |
+
ADZUNA_APP_ID=
|
| 24 |
+
ADZUNA_API_KEY=
|
| 25 |
+
|
| 26 |
+
# Jooble API (Free tier, aggregates LinkedIn/Indeed/Glassdoor)
|
| 27 |
+
# Sign up at: https://jooble.org/api/about
|
| 28 |
+
JOOBLE_API_KEY=
|
| 29 |
+
|
| 30 |
+
# RapidAPI for LinkedIn Jobs (Free: 100 requests/month)
|
| 31 |
+
# Sign up at: https://rapidapi.com/jaypat87/api/linkedin-jobs-search
|
| 32 |
+
RAPIDAPI_KEY=
|
| 33 |
+
|
| 34 |
+
# ===========================================
|
| 35 |
+
# Social APIs (Optional - for more sources)
|
| 36 |
+
# ===========================================
|
| 37 |
+
|
| 38 |
+
# Reddit API (get from reddit.com/prefs/apps)
|
| 39 |
+
REDDIT_CLIENT_ID=
|
| 40 |
+
REDDIT_CLIENT_SECRET=
|
| 41 |
+
REDDIT_USER_AGENT=PIOE/2.0
|
| 42 |
+
|
| 43 |
+
# GitHub API (for higher rate limits)
|
| 44 |
+
# Get at: https://github.com/settings/tokens
|
| 45 |
+
GITHUB_TOKEN=
|
| 46 |
+
|
| 47 |
+
# ===========================================
|
| 48 |
+
# Database
|
| 49 |
+
# ===========================================
|
| 50 |
+
DATABASE_URL=sqlite:///./pioe.db
|
| 51 |
+
|
| 52 |
+
# ===========================================
|
| 53 |
+
# Ingestion Schedule
|
| 54 |
+
# ===========================================
|
| 55 |
+
INGESTION_INTERVAL_HOURS=6
|
| 56 |
+
|
| 57 |
+
# ===========================================
|
| 58 |
+
# Scoring Thresholds (Lower = More Results)
|
| 59 |
+
# ===========================================
|
| 60 |
+
MIN_RELEVANCE_SCORE=0.3
|
| 61 |
+
MIN_NOVELTY_SCORE=0.3
|
| 62 |
+
MIN_CREDIBILITY_SCORE=0.5
|
.gitignore
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PIOE .gitignore
|
| 2 |
+
|
| 3 |
+
# Environment files (contains secrets!)
|
| 4 |
+
.env
|
| 5 |
+
.env.local
|
| 6 |
+
|
| 7 |
+
# Database
|
| 8 |
+
*.db
|
| 9 |
+
*.sqlite
|
| 10 |
+
*.sqlite3
|
| 11 |
+
|
| 12 |
+
# Python
|
| 13 |
+
__pycache__/
|
| 14 |
+
*.py[cod]
|
| 15 |
+
*$py.class
|
| 16 |
+
*.so
|
| 17 |
+
.Python
|
| 18 |
+
build/
|
| 19 |
+
develop-eggs/
|
| 20 |
+
dist/
|
| 21 |
+
downloads/
|
| 22 |
+
eggs/
|
| 23 |
+
.eggs/
|
| 24 |
+
lib/
|
| 25 |
+
lib64/
|
| 26 |
+
parts/
|
| 27 |
+
sdist/
|
| 28 |
+
var/
|
| 29 |
+
wheels/
|
| 30 |
+
*.egg-info/
|
| 31 |
+
.installed.cfg
|
| 32 |
+
*.egg
|
| 33 |
+
|
| 34 |
+
# Virtual environments
|
| 35 |
+
venv/
|
| 36 |
+
ENV/
|
| 37 |
+
env/
|
| 38 |
+
.venv/
|
| 39 |
+
|
| 40 |
+
# IDE
|
| 41 |
+
.idea/
|
| 42 |
+
.vscode/
|
| 43 |
+
*.swp
|
| 44 |
+
*.swo
|
| 45 |
+
*~
|
| 46 |
+
|
| 47 |
+
# OS
|
| 48 |
+
.DS_Store
|
| 49 |
+
Thumbs.db
|
| 50 |
+
|
| 51 |
+
# Logs
|
| 52 |
+
*.log
|
| 53 |
+
logs/
|
| 54 |
+
|
| 55 |
+
# Testing
|
| 56 |
+
.pytest_cache/
|
| 57 |
+
.coverage
|
| 58 |
+
htmlcov/
|
| 59 |
+
|
| 60 |
+
# Misc
|
| 61 |
+
*.bak
|
| 62 |
+
tmp/
|
| 63 |
+
temp/
|
Dockerfile
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PIOE Docker Image
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
gcc \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Copy requirements first for caching
|
| 13 |
+
COPY requirements.txt .
|
| 14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
# Copy application code
|
| 17 |
+
COPY . .
|
| 18 |
+
|
| 19 |
+
# Create non-root user for security
|
| 20 |
+
RUN useradd -m appuser && chown -R appuser:appuser /app
|
| 21 |
+
USER appuser
|
| 22 |
+
|
| 23 |
+
# Expose port
|
| 24 |
+
EXPOSE 8000
|
| 25 |
+
|
| 26 |
+
# Health check
|
| 27 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 28 |
+
CMD curl -f http://localhost:8000/api/stats || exit 1
|
| 29 |
+
|
| 30 |
+
# Run the application
|
| 31 |
+
CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
Procfile
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Procfile for Render/Heroku
|
| 2 |
+
web: uvicorn backend.main:app --host 0.0.0.0 --port ${PORT:-8000}
|
README.md
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PIOE 2.0 - Personal Intelligence & Opportunity Engine
|
| 2 |
+
|
| 3 |
+
Signal intelligence system for detecting early opportunities in AI, Robotics, Computer Vision, Finance, Scholarships, and Hackathons.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
|
| 7 |
+
- **Multi-Source Ingestion**: arXiv, GitHub, RSS, Superteam, Web scraping
|
| 8 |
+
- **Job Board Aggregators**: Arbeitnow, TheMuse, Remotive, Adzuna, Jooble, LinkedIn
|
| 9 |
+
- **AI Classification**: Gemini-powered categorization and summarization
|
| 10 |
+
- **Smart Scoring**: Relevance, novelty, and credibility scoring with ROI analysis
|
| 11 |
+
- **Anti-Noise Filters**: Rejects recycled content and discussion posts
|
| 12 |
+
- **Modern Dashboard**: Real-time opportunity feed with filters
|
| 13 |
+
|
| 14 |
+
## Quick Start
|
| 15 |
+
|
| 16 |
+
### 1. Install Dependencies
|
| 17 |
+
|
| 18 |
+
```bash
|
| 19 |
+
cd PIOE
|
| 20 |
+
pip install -r requirements.txt
|
| 21 |
+
```
|
| 22 |
+
|
| 23 |
+
### 2. Configure Environment
|
| 24 |
+
|
| 25 |
+
```bash
|
| 26 |
+
cp .env.example .env
|
| 27 |
+
# Edit .env with your API keys
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
**Required:**
|
| 31 |
+
- `GEMINI_API_KEY` - Get from [Google AI Studio](https://makersuite.google.com/app/apikey)
|
| 32 |
+
|
| 33 |
+
**Optional (More Jobs):**
|
| 34 |
+
- `ADZUNA_APP_ID` / `ADZUNA_API_KEY` - [Adzuna Developer](https://developer.adzuna.com/) (Free: 250 req/day)
|
| 35 |
+
- `JOOBLE_API_KEY` - [Jooble API](https://jooble.org/api/about) (Free, aggregates LinkedIn/Indeed/Glassdoor)
|
| 36 |
+
- `RAPIDAPI_KEY` - [RapidAPI LinkedIn](https://rapidapi.com/jaypat87/api/linkedin-jobs-search) (Free: 100 req/month)
|
| 37 |
+
- `GITHUB_TOKEN` - For higher rate limits
|
| 38 |
+
|
| 39 |
+
### 3. Run the Server
|
| 40 |
+
|
| 41 |
+
```bash
|
| 42 |
+
uvicorn backend.main:app --reload
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
Open http://localhost:8000 in your browser.
|
| 46 |
+
|
| 47 |
+
### 4. Trigger First Ingestion
|
| 48 |
+
|
| 49 |
+
Click "Run Ingestion" in the dashboard or:
|
| 50 |
+
|
| 51 |
+
```bash
|
| 52 |
+
curl -X POST http://localhost:8000/api/ingest/run
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
## Data Sources
|
| 56 |
+
|
| 57 |
+
### Free (No API Key)
|
| 58 |
+
| Source | Type | Coverage |
|
| 59 |
+
|--------|------|----------|
|
| 60 |
+
| Arbeitnow | Jobs | Tech jobs worldwide |
|
| 61 |
+
| TheMuse | Jobs | Data Science, Engineering |
|
| 62 |
+
| Remotive | Remote Jobs | Software, DevOps, Data |
|
| 63 |
+
| ProFellow | Fellowships | Scholarships & Fellowships |
|
| 64 |
+
| RemoteOK | Remote Jobs | AI, ML, Internships |
|
| 65 |
+
| arXiv | Research | CS.CV, CS.RO, CS.AI papers |
|
| 66 |
+
| HN Jobs | Jobs | Startup jobs |
|
| 67 |
+
|
| 68 |
+
### With Free API Keys
|
| 69 |
+
| Source | Type | Coverage |
|
| 70 |
+
|--------|------|----------|
|
| 71 |
+
| Adzuna | Jobs | Indeed, Monster, CareerBuilder |
|
| 72 |
+
| Jooble | Jobs | LinkedIn, Indeed, Glassdoor (70+ sources) |
|
| 73 |
+
| RapidAPI LinkedIn | Jobs | Direct LinkedIn job listings |
|
| 74 |
+
| Superteam | Web3 | Bounties, grants |
|
| 75 |
+
|
| 76 |
+
## API Endpoints
|
| 77 |
+
|
| 78 |
+
| Endpoint | Method | Description |
|
| 79 |
+
|----------|--------|-------------|
|
| 80 |
+
| `/api/opportunities` | GET | List opportunities with filters |
|
| 81 |
+
| `/api/opportunities/{id}` | GET | Get single opportunity |
|
| 82 |
+
| `/api/opportunities/{id}/status` | PATCH | Update status (save, apply, dismiss) |
|
| 83 |
+
| `/api/digest/daily` | GET | Get daily intelligence brief |
|
| 84 |
+
| `/api/digest/weekly` | GET | Get weekly report |
|
| 85 |
+
| `/api/digest/urgent` | GET | Get opportunities with deadlines |
|
| 86 |
+
| `/api/ingest/run` | POST | Trigger full ingestion |
|
| 87 |
+
| `/api/stats` | GET | Get system statistics |
|
| 88 |
+
|
| 89 |
+
## Deployment
|
| 90 |
+
|
| 91 |
+
### Local Development
|
| 92 |
+
```bash
|
| 93 |
+
uvicorn backend.main:app --reload
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
### Production (with Gunicorn)
|
| 97 |
+
```bash
|
| 98 |
+
gunicorn backend.main:app -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
### Docker (Optional)
|
| 102 |
+
```dockerfile
|
| 103 |
+
FROM python:3.11-slim
|
| 104 |
+
WORKDIR /app
|
| 105 |
+
COPY requirements.txt .
|
| 106 |
+
RUN pip install -r requirements.txt
|
| 107 |
+
COPY . .
|
| 108 |
+
CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
| 109 |
+
```
|
| 110 |
+
|
| 111 |
+
## Opportunity Categories
|
| 112 |
+
|
| 113 |
+
- Scholarships & Fellowships
|
| 114 |
+
- Internships & Jobs
|
| 115 |
+
- Hackathons & Competitions
|
| 116 |
+
- Research Opportunities
|
| 117 |
+
- Grants & Funding
|
| 118 |
+
- Open Source Programs
|
| 119 |
+
- Web3 Bounties
|
| 120 |
+
|
| 121 |
+
## Anti-Noise Rules
|
| 122 |
+
|
| 123 |
+
PIOE automatically filters out:
|
| 124 |
+
- Discussion posts ("How do I get an internship?")
|
| 125 |
+
- Opinion-only content
|
| 126 |
+
- Reposted/recycled news
|
| 127 |
+
- "Top 10 tools" listicles
|
| 128 |
+
- Low engagement social posts
|
| 129 |
+
|
| 130 |
+
## License
|
| 131 |
+
|
| 132 |
+
MIT
|
| 133 |
+
|
| 134 |
+
---
|
| 135 |
+
|
| 136 |
+
**Most people search. You detect.**
|
backend/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Backend - Init
|
| 3 |
+
"""
|
| 4 |
+
from .config import get_settings
|
| 5 |
+
from .database import SessionLocal, init_db, get_db
|
| 6 |
+
from .models import Opportunity, Source, OpportunityCategory, OpportunityStatus
|
| 7 |
+
|
| 8 |
+
__all__ = [
|
| 9 |
+
"get_settings",
|
| 10 |
+
"SessionLocal",
|
| 11 |
+
"init_db",
|
| 12 |
+
"get_db",
|
| 13 |
+
"Opportunity",
|
| 14 |
+
"Source",
|
| 15 |
+
"OpportunityCategory",
|
| 16 |
+
"OpportunityStatus"
|
| 17 |
+
]
|
backend/config.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Configuration Management
|
| 3 |
+
"""
|
| 4 |
+
from pydantic_settings import BaseSettings
|
| 5 |
+
from functools import lru_cache
|
| 6 |
+
from typing import Literal
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Settings(BaseSettings):
|
| 10 |
+
"""Application settings loaded from environment variables."""
|
| 11 |
+
|
| 12 |
+
# AI Configuration
|
| 13 |
+
ai_provider: Literal["gemini", "openai"] = "gemini"
|
| 14 |
+
gemini_api_key: str = ""
|
| 15 |
+
openai_api_key: str = ""
|
| 16 |
+
|
| 17 |
+
# Reddit API
|
| 18 |
+
reddit_client_id: str = ""
|
| 19 |
+
reddit_client_secret: str = ""
|
| 20 |
+
reddit_user_agent: str = "PIOE/1.0"
|
| 21 |
+
|
| 22 |
+
# GitHub API
|
| 23 |
+
github_token: str = ""
|
| 24 |
+
|
| 25 |
+
# ===========================================
|
| 26 |
+
# JOB BOARD APIs (Optional - get free keys)
|
| 27 |
+
# ===========================================
|
| 28 |
+
|
| 29 |
+
# Adzuna API (Free: 250 req/day)
|
| 30 |
+
# Get at: https://developer.adzuna.com/
|
| 31 |
+
adzuna_app_id: str = ""
|
| 32 |
+
adzuna_api_key: str = ""
|
| 33 |
+
|
| 34 |
+
# Jooble API (Free tier available)
|
| 35 |
+
# Get at: https://jooble.org/api/about
|
| 36 |
+
jooble_api_key: str = ""
|
| 37 |
+
|
| 38 |
+
# RapidAPI LinkedIn Jobs (Free: 100 req/month)
|
| 39 |
+
# Get at: https://rapidapi.com/jaypat87/api/linkedin-jobs-search
|
| 40 |
+
rapidapi_key: str = ""
|
| 41 |
+
|
| 42 |
+
# ===========================================
|
| 43 |
+
# Database
|
| 44 |
+
# ===========================================
|
| 45 |
+
database_url: str = "sqlite:///./pioe.db"
|
| 46 |
+
|
| 47 |
+
# Ingestion
|
| 48 |
+
ingestion_interval_hours: int = 6
|
| 49 |
+
|
| 50 |
+
# Scoring Thresholds (lower = more results saved)
|
| 51 |
+
min_relevance_score: float = 0.3 # Lowered from 0.4 for more results
|
| 52 |
+
min_novelty_score: float = 0.3
|
| 53 |
+
min_credibility_score: float = 0.5
|
| 54 |
+
|
| 55 |
+
# Keywords for relevance scoring
|
| 56 |
+
high_priority_keywords: list[str] = [
|
| 57 |
+
"computer vision", "robotics", "ROS", "PyTorch", "TensorFlow",
|
| 58 |
+
"machine learning", "deep learning", "neural network",
|
| 59 |
+
"internship", "fellowship", "scholarship", "grant", "funding",
|
| 60 |
+
"hackathon", "competition", "challenge", "bounty",
|
| 61 |
+
"research assistant", "PhD", "postdoc", "hiring",
|
| 62 |
+
"early-stage", "seed", "Series A", "startup",
|
| 63 |
+
"AI", "artificial intelligence", "data science", "NLP"
|
| 64 |
+
]
|
| 65 |
+
|
| 66 |
+
class Config:
|
| 67 |
+
env_file = ".env"
|
| 68 |
+
env_file_encoding = "utf-8"
|
| 69 |
+
extra = "ignore"
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
@lru_cache
|
| 73 |
+
def get_settings() -> Settings:
|
| 74 |
+
"""Get cached settings instance."""
|
| 75 |
+
return Settings()
|
| 76 |
+
|
backend/database.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Database Configuration
|
| 3 |
+
"""
|
| 4 |
+
from sqlalchemy import create_engine
|
| 5 |
+
from sqlalchemy.orm import sessionmaker, declarative_base
|
| 6 |
+
from .config import get_settings
|
| 7 |
+
|
| 8 |
+
settings = get_settings()
|
| 9 |
+
|
| 10 |
+
engine = create_engine(
|
| 11 |
+
settings.database_url,
|
| 12 |
+
connect_args={"check_same_thread": False} if "sqlite" in settings.database_url else {}
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 16 |
+
|
| 17 |
+
Base = declarative_base()
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def get_db():
|
| 21 |
+
"""Dependency for FastAPI to get database session."""
|
| 22 |
+
db = SessionLocal()
|
| 23 |
+
try:
|
| 24 |
+
yield db
|
| 25 |
+
finally:
|
| 26 |
+
db.close()
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def init_db():
|
| 30 |
+
"""Initialize database tables."""
|
| 31 |
+
from . import models # noqa: F401
|
| 32 |
+
Base.metadata.create_all(bind=engine)
|
backend/delivery/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Delivery Layer - Init
|
| 3 |
+
"""
|
| 4 |
+
from .digest import DigestGenerator
|
| 5 |
+
|
| 6 |
+
__all__ = ["DigestGenerator"]
|
backend/delivery/digest.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Delivery Layer - Daily Digest Generator
|
| 3 |
+
"""
|
| 4 |
+
from datetime import datetime, timedelta
|
| 5 |
+
from typing import Optional
|
| 6 |
+
from sqlalchemy.orm import Session
|
| 7 |
+
|
| 8 |
+
from ..models import Opportunity, OpportunityCategory, OpportunityStatus
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class DigestGenerator:
|
| 12 |
+
"""
|
| 13 |
+
Generates daily/weekly opportunity digests.
|
| 14 |
+
Outputs in markdown format for easy reading.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
def __init__(self, db: Session):
|
| 18 |
+
self.db = db
|
| 19 |
+
|
| 20 |
+
def generate_daily(self, limit: int = 10) -> str:
|
| 21 |
+
"""Generate today's top opportunities digest."""
|
| 22 |
+
since = datetime.utcnow() - timedelta(days=1)
|
| 23 |
+
|
| 24 |
+
opportunities = self.db.query(Opportunity).filter(
|
| 25 |
+
Opportunity.discovered_at >= since,
|
| 26 |
+
Opportunity.status == OpportunityStatus.NEW
|
| 27 |
+
).order_by(
|
| 28 |
+
Opportunity.combined_score.desc()
|
| 29 |
+
).limit(limit).all()
|
| 30 |
+
|
| 31 |
+
return self._format_digest(opportunities, "Daily Intelligence Brief")
|
| 32 |
+
|
| 33 |
+
def generate_weekly(self, limit: int = 25) -> str:
|
| 34 |
+
"""Generate weekly opportunities digest."""
|
| 35 |
+
since = datetime.utcnow() - timedelta(days=7)
|
| 36 |
+
|
| 37 |
+
opportunities = self.db.query(Opportunity).filter(
|
| 38 |
+
Opportunity.discovered_at >= since,
|
| 39 |
+
Opportunity.status == OpportunityStatus.NEW
|
| 40 |
+
).order_by(
|
| 41 |
+
Opportunity.combined_score.desc()
|
| 42 |
+
).limit(limit).all()
|
| 43 |
+
|
| 44 |
+
return self._format_digest(opportunities, "Weekly Intelligence Report")
|
| 45 |
+
|
| 46 |
+
def generate_by_category(
|
| 47 |
+
self,
|
| 48 |
+
category: OpportunityCategory,
|
| 49 |
+
limit: int = 10
|
| 50 |
+
) -> str:
|
| 51 |
+
"""Generate digest for a specific category."""
|
| 52 |
+
since = datetime.utcnow() - timedelta(days=7)
|
| 53 |
+
|
| 54 |
+
opportunities = self.db.query(Opportunity).filter(
|
| 55 |
+
Opportunity.discovered_at >= since,
|
| 56 |
+
Opportunity.category == category,
|
| 57 |
+
Opportunity.status == OpportunityStatus.NEW
|
| 58 |
+
).order_by(
|
| 59 |
+
Opportunity.combined_score.desc()
|
| 60 |
+
).limit(limit).all()
|
| 61 |
+
|
| 62 |
+
return self._format_digest(
|
| 63 |
+
opportunities,
|
| 64 |
+
f"{category.value.title()} Opportunities"
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
def generate_urgent(self, limit: int = 10) -> str:
|
| 68 |
+
"""Generate digest for time-sensitive opportunities."""
|
| 69 |
+
now = datetime.utcnow()
|
| 70 |
+
soon = now + timedelta(days=14)
|
| 71 |
+
|
| 72 |
+
opportunities = self.db.query(Opportunity).filter(
|
| 73 |
+
Opportunity.deadline.isnot(None),
|
| 74 |
+
Opportunity.deadline > now,
|
| 75 |
+
Opportunity.deadline <= soon,
|
| 76 |
+
Opportunity.status == OpportunityStatus.NEW
|
| 77 |
+
).order_by(
|
| 78 |
+
Opportunity.deadline.asc()
|
| 79 |
+
).limit(limit).all()
|
| 80 |
+
|
| 81 |
+
return self._format_digest(opportunities, "⚡ Urgent - Deadlines Approaching")
|
| 82 |
+
|
| 83 |
+
def _format_digest(self, opportunities: list[Opportunity], title: str) -> str:
|
| 84 |
+
"""Format opportunities into markdown digest."""
|
| 85 |
+
lines = [
|
| 86 |
+
f"# {title}",
|
| 87 |
+
f"*Generated: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}*",
|
| 88 |
+
"",
|
| 89 |
+
f"**{len(opportunities)} opportunities detected**",
|
| 90 |
+
"",
|
| 91 |
+
"---",
|
| 92 |
+
""
|
| 93 |
+
]
|
| 94 |
+
|
| 95 |
+
if not opportunities:
|
| 96 |
+
lines.append("*No new opportunities matching your criteria.*")
|
| 97 |
+
return "\n".join(lines)
|
| 98 |
+
|
| 99 |
+
for i, opp in enumerate(opportunities, 1):
|
| 100 |
+
lines.extend(self._format_opportunity(opp, i))
|
| 101 |
+
|
| 102 |
+
# Summary stats
|
| 103 |
+
lines.extend([
|
| 104 |
+
"",
|
| 105 |
+
"---",
|
| 106 |
+
"",
|
| 107 |
+
"## Quick Stats",
|
| 108 |
+
"",
|
| 109 |
+
self._generate_stats(opportunities)
|
| 110 |
+
])
|
| 111 |
+
|
| 112 |
+
return "\n".join(lines)
|
| 113 |
+
|
| 114 |
+
def _format_opportunity(self, opp: Opportunity, index: int) -> list[str]:
|
| 115 |
+
"""Format single opportunity."""
|
| 116 |
+
# Category emoji
|
| 117 |
+
cat_emoji = {
|
| 118 |
+
OpportunityCategory.SCHOLARSHIP: "🎓",
|
| 119 |
+
OpportunityCategory.FELLOWSHIP: "🏆",
|
| 120 |
+
OpportunityCategory.INTERNSHIP: "💼",
|
| 121 |
+
OpportunityCategory.JOB: "👔",
|
| 122 |
+
OpportunityCategory.HACKATHON: "🚀",
|
| 123 |
+
OpportunityCategory.COMPETITION: "🏅",
|
| 124 |
+
OpportunityCategory.GRANT: "💰",
|
| 125 |
+
OpportunityCategory.RESEARCH: "🔬",
|
| 126 |
+
OpportunityCategory.OPEN_SOURCE: "💻",
|
| 127 |
+
OpportunityCategory.CONFERENCE: "📅",
|
| 128 |
+
}.get(opp.category, "📌")
|
| 129 |
+
|
| 130 |
+
# Score indicator
|
| 131 |
+
score_stars = "⭐" * min(int(opp.combined_score * 5), 5)
|
| 132 |
+
|
| 133 |
+
lines = [
|
| 134 |
+
f"### {index}. {cat_emoji} {opp.title}",
|
| 135 |
+
"",
|
| 136 |
+
f"**Category:** {opp.category.value.replace('_', ' ').title()}",
|
| 137 |
+
f"**Domain:** {opp.domain.value.replace('_', ' ').title()}",
|
| 138 |
+
f"**Source:** {opp.source_name}",
|
| 139 |
+
f"**Score:** {score_stars} ({opp.combined_score:.2f})",
|
| 140 |
+
]
|
| 141 |
+
|
| 142 |
+
if opp.deadline:
|
| 143 |
+
days_left = (opp.deadline - datetime.utcnow()).days
|
| 144 |
+
urgency = "🔴" if days_left < 7 else "🟡" if days_left < 14 else "🟢"
|
| 145 |
+
lines.append(f"**Deadline:** {urgency} {opp.deadline.strftime('%Y-%m-%d')} ({days_left} days)")
|
| 146 |
+
|
| 147 |
+
lines.extend([
|
| 148 |
+
"",
|
| 149 |
+
f"> {opp.raw_text[:300]}..." if len(opp.raw_text or '') > 300 else f"> {opp.raw_text}",
|
| 150 |
+
"",
|
| 151 |
+
f"🔗 [View Opportunity]({opp.url})",
|
| 152 |
+
"",
|
| 153 |
+
"---",
|
| 154 |
+
""
|
| 155 |
+
])
|
| 156 |
+
|
| 157 |
+
return lines
|
| 158 |
+
|
| 159 |
+
def _generate_stats(self, opportunities: list[Opportunity]) -> str:
|
| 160 |
+
"""Generate summary statistics."""
|
| 161 |
+
from collections import Counter
|
| 162 |
+
|
| 163 |
+
categories = Counter(o.category.value for o in opportunities)
|
| 164 |
+
domains = Counter(o.domain.value for o in opportunities)
|
| 165 |
+
|
| 166 |
+
stats = ["| Metric | Value |", "|--------|-------|"]
|
| 167 |
+
|
| 168 |
+
for cat, count in categories.most_common(5):
|
| 169 |
+
stats.append(f"| {cat.replace('_', ' ').title()} | {count} |")
|
| 170 |
+
|
| 171 |
+
return "\n".join(stats)
|
backend/ingestion/__init__.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Ingestion Layer - Version 2.0
|
| 3 |
+
"""
|
| 4 |
+
from .arxiv_client import ArxivClient
|
| 5 |
+
from .github_client import GitHubClient
|
| 6 |
+
from .rss_client import RSSClient
|
| 7 |
+
from .reddit_client import RedditClient
|
| 8 |
+
from .superteam_client import SuperteamClient
|
| 9 |
+
from .web_scraper import WebScraper
|
| 10 |
+
from .careers_client import CareersClient, InternshipClient
|
| 11 |
+
from .grants_client import GrantsClient, NigeriaGrantsClient
|
| 12 |
+
from .scheduler import IngestionScheduler
|
| 13 |
+
|
| 14 |
+
__all__ = [
|
| 15 |
+
"ArxivClient",
|
| 16 |
+
"GitHubClient",
|
| 17 |
+
"RSSClient",
|
| 18 |
+
"RedditClient",
|
| 19 |
+
"SuperteamClient",
|
| 20 |
+
"WebScraper",
|
| 21 |
+
"CareersClient",
|
| 22 |
+
"InternshipClient",
|
| 23 |
+
"GrantsClient",
|
| 24 |
+
"NigeriaGrantsClient",
|
| 25 |
+
"IngestionScheduler"
|
| 26 |
+
]
|
| 27 |
+
|
backend/ingestion/arxiv_client.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE arXiv Client
|
| 3 |
+
|
| 4 |
+
Fetches papers from arXiv API for CS.CV, CS.RO, CS.AI, CS.LG categories.
|
| 5 |
+
"""
|
| 6 |
+
import httpx
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from typing import Optional
|
| 9 |
+
import xml.etree.ElementTree as ET
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class ArxivClient:
|
| 13 |
+
"""
|
| 14 |
+
Client for arXiv API to fetch recent papers.
|
| 15 |
+
High credibility source for academic research.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
BASE_URL = "https://export.arxiv.org/api/query"
|
| 19 |
+
|
| 20 |
+
# Target categories for PIOE
|
| 21 |
+
CATEGORIES = [
|
| 22 |
+
"cs.CV", # Computer Vision
|
| 23 |
+
"cs.RO", # Robotics
|
| 24 |
+
"cs.AI", # Artificial Intelligence
|
| 25 |
+
"cs.LG", # Machine Learning
|
| 26 |
+
"cs.CL", # Computation and Language (NLP)
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
def __init__(self, max_results: int = 50):
|
| 30 |
+
self.max_results = max_results
|
| 31 |
+
|
| 32 |
+
async def fetch(self, categories: Optional[list[str]] = None) -> list[dict]:
|
| 33 |
+
"""
|
| 34 |
+
Fetch recent papers from specified categories.
|
| 35 |
+
|
| 36 |
+
Returns list of normalized opportunity dicts.
|
| 37 |
+
"""
|
| 38 |
+
categories = categories or self.CATEGORIES
|
| 39 |
+
|
| 40 |
+
# Build query for multiple categories
|
| 41 |
+
cat_query = " OR ".join(f"cat:{cat}" for cat in categories)
|
| 42 |
+
|
| 43 |
+
params = {
|
| 44 |
+
"search_query": cat_query,
|
| 45 |
+
"start": 0,
|
| 46 |
+
"max_results": self.max_results,
|
| 47 |
+
"sortBy": "submittedDate",
|
| 48 |
+
"sortOrder": "descending"
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
async with httpx.AsyncClient() as client:
|
| 52 |
+
response = await client.get(
|
| 53 |
+
self.BASE_URL,
|
| 54 |
+
params=params,
|
| 55 |
+
timeout=30,
|
| 56 |
+
follow_redirects=True
|
| 57 |
+
)
|
| 58 |
+
response.raise_for_status()
|
| 59 |
+
|
| 60 |
+
return self._parse_response(response.text)
|
| 61 |
+
|
| 62 |
+
def _parse_response(self, xml_content: str) -> list[dict]:
|
| 63 |
+
"""Parse arXiv Atom feed into normalized opportunities."""
|
| 64 |
+
opportunities = []
|
| 65 |
+
|
| 66 |
+
# Parse XML
|
| 67 |
+
root = ET.fromstring(xml_content)
|
| 68 |
+
ns = {"atom": "http://www.w3.org/2005/Atom"}
|
| 69 |
+
|
| 70 |
+
for entry in root.findall("atom:entry", ns):
|
| 71 |
+
try:
|
| 72 |
+
# Extract fields
|
| 73 |
+
title = entry.find("atom:title", ns)
|
| 74 |
+
summary = entry.find("atom:summary", ns)
|
| 75 |
+
published = entry.find("atom:published", ns)
|
| 76 |
+
link = entry.find("atom:id", ns)
|
| 77 |
+
|
| 78 |
+
# Get authors
|
| 79 |
+
authors = [
|
| 80 |
+
author.find("atom:name", ns).text
|
| 81 |
+
for author in entry.findall("atom:author", ns)
|
| 82 |
+
if author.find("atom:name", ns) is not None
|
| 83 |
+
]
|
| 84 |
+
|
| 85 |
+
# Get categories
|
| 86 |
+
categories = [
|
| 87 |
+
cat.get("term") for cat in entry.findall("atom:category", ns)
|
| 88 |
+
]
|
| 89 |
+
|
| 90 |
+
opportunity = {
|
| 91 |
+
"title": title.text.strip().replace("\n", " ") if title is not None else "",
|
| 92 |
+
"raw_text": summary.text.strip().replace("\n", " ") if summary is not None else "",
|
| 93 |
+
"url": link.text if link is not None else "",
|
| 94 |
+
"source_type": "arxiv",
|
| 95 |
+
"source_name": "arXiv",
|
| 96 |
+
"published_at": self._parse_date(published.text) if published is not None else None,
|
| 97 |
+
"metadata": {
|
| 98 |
+
"authors": authors,
|
| 99 |
+
"categories": categories
|
| 100 |
+
}
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
opportunities.append(opportunity)
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
print(f"Error parsing arXiv entry: {e}")
|
| 107 |
+
continue
|
| 108 |
+
|
| 109 |
+
return opportunities
|
| 110 |
+
|
| 111 |
+
def _parse_date(self, date_str: str) -> Optional[datetime]:
|
| 112 |
+
"""Parse arXiv date format."""
|
| 113 |
+
try:
|
| 114 |
+
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
| 115 |
+
except Exception:
|
| 116 |
+
return None
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
# Sync wrapper for non-async usage
|
| 120 |
+
def fetch_arxiv_sync(max_results: int = 50) -> list[dict]:
|
| 121 |
+
"""Synchronous wrapper for arXiv fetch."""
|
| 122 |
+
import asyncio
|
| 123 |
+
client = ArxivClient(max_results)
|
| 124 |
+
return asyncio.run(client.fetch())
|
backend/ingestion/careers_client.py
ADDED
|
@@ -0,0 +1,403 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Careers Client
|
| 3 |
+
|
| 4 |
+
Tracks job/internship opportunities from major tech companies.
|
| 5 |
+
Microsoft, NVIDIA, Google, Meta, OpenAI, DeepMind, etc.
|
| 6 |
+
"""
|
| 7 |
+
import httpx
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from typing import Optional
|
| 10 |
+
from bs4 import BeautifulSoup
|
| 11 |
+
import re
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class CareersClient:
|
| 15 |
+
"""
|
| 16 |
+
Scrapes career pages from major tech companies.
|
| 17 |
+
Focuses on AI, robotics, and computer vision roles.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
# Target companies with their career page configurations
|
| 21 |
+
COMPANIES = [
|
| 22 |
+
# Microsoft
|
| 23 |
+
{
|
| 24 |
+
"name": "Microsoft",
|
| 25 |
+
"search_url": "https://careers.microsoft.com/v2/global/en/search.json",
|
| 26 |
+
"type": "api",
|
| 27 |
+
"keywords": ["computer vision", "robotics", "machine learning", "AI", "research"],
|
| 28 |
+
"filters": {"lc": "United States", "exp": "Internship"}
|
| 29 |
+
},
|
| 30 |
+
# NVIDIA
|
| 31 |
+
{
|
| 32 |
+
"name": "NVIDIA",
|
| 33 |
+
"search_url": "https://nvidia.wd5.myworkdayjobs.com/wday/cxs/nvidia/NVIDIAExternalCareerSite/jobs",
|
| 34 |
+
"type": "workday",
|
| 35 |
+
"keywords": ["computer vision", "robotics", "deep learning", "AI research", "intern"]
|
| 36 |
+
},
|
| 37 |
+
# Google
|
| 38 |
+
{
|
| 39 |
+
"name": "Google",
|
| 40 |
+
"rss_url": "https://careers.google.com/jobs/rss",
|
| 41 |
+
"type": "rss",
|
| 42 |
+
"keywords": ["machine learning", "research", "robotics", "computer vision", "intern"]
|
| 43 |
+
},
|
| 44 |
+
# Meta
|
| 45 |
+
{
|
| 46 |
+
"name": "Meta",
|
| 47 |
+
"search_url": "https://www.metacareers.com/jobs",
|
| 48 |
+
"type": "scrape",
|
| 49 |
+
"keywords": ["AI", "research", "robotics", "computer vision", "intern"]
|
| 50 |
+
},
|
| 51 |
+
# OpenAI
|
| 52 |
+
{
|
| 53 |
+
"name": "OpenAI",
|
| 54 |
+
"careers_url": "https://openai.com/careers",
|
| 55 |
+
"type": "scrape",
|
| 56 |
+
"keywords": ["research", "engineering", "intern"]
|
| 57 |
+
},
|
| 58 |
+
# DeepMind
|
| 59 |
+
{
|
| 60 |
+
"name": "DeepMind",
|
| 61 |
+
"careers_url": "https://deepmind.google/about/careers/",
|
| 62 |
+
"type": "scrape",
|
| 63 |
+
"keywords": ["research", "intern", "robotics"]
|
| 64 |
+
},
|
| 65 |
+
# Boston Dynamics
|
| 66 |
+
{
|
| 67 |
+
"name": "Boston Dynamics",
|
| 68 |
+
"careers_url": "https://bostondynamics.wd1.myworkdayjobs.com/Boston_Dynamics",
|
| 69 |
+
"type": "workday",
|
| 70 |
+
"keywords": ["robotics", "perception", "control", "intern"]
|
| 71 |
+
},
|
| 72 |
+
# Tesla (Optimus/AI)
|
| 73 |
+
{
|
| 74 |
+
"name": "Tesla AI",
|
| 75 |
+
"careers_url": "https://www.tesla.com/careers/search/?query=AI%20robotics",
|
| 76 |
+
"type": "scrape",
|
| 77 |
+
"keywords": ["autopilot", "optimus", "robotics", "computer vision", "intern"]
|
| 78 |
+
},
|
| 79 |
+
]
|
| 80 |
+
|
| 81 |
+
# Internship-specific keywords
|
| 82 |
+
INTERNSHIP_KEYWORDS = [
|
| 83 |
+
"intern", "internship", "co-op", "summer", "student",
|
| 84 |
+
"graduate", "new grad", "entry level", "early career"
|
| 85 |
+
]
|
| 86 |
+
|
| 87 |
+
def __init__(self):
|
| 88 |
+
self._headers = {
|
| 89 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
async def fetch_all(self, internship_only: bool = False) -> list[dict]:
|
| 93 |
+
"""
|
| 94 |
+
Fetch opportunities from all configured companies.
|
| 95 |
+
|
| 96 |
+
Args:
|
| 97 |
+
internship_only: If True, filter to only internship positions
|
| 98 |
+
"""
|
| 99 |
+
all_opportunities = []
|
| 100 |
+
|
| 101 |
+
for company in self.COMPANIES:
|
| 102 |
+
try:
|
| 103 |
+
opps = await self.fetch_company(company)
|
| 104 |
+
|
| 105 |
+
if internship_only:
|
| 106 |
+
opps = [o for o in opps if self._is_internship(o)]
|
| 107 |
+
|
| 108 |
+
all_opportunities.extend(opps)
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"Error fetching {company['name']}: {e}")
|
| 111 |
+
|
| 112 |
+
return all_opportunities
|
| 113 |
+
|
| 114 |
+
async def fetch_company(self, company: dict) -> list[dict]:
|
| 115 |
+
"""Fetch jobs from a specific company."""
|
| 116 |
+
if company["type"] == "scrape":
|
| 117 |
+
return await self._scrape_careers_page(company)
|
| 118 |
+
elif company["type"] == "rss":
|
| 119 |
+
return await self._fetch_rss_careers(company)
|
| 120 |
+
elif company["type"] == "workday":
|
| 121 |
+
return await self._fetch_workday(company)
|
| 122 |
+
else:
|
| 123 |
+
return await self._scrape_careers_page(company)
|
| 124 |
+
|
| 125 |
+
async def _scrape_careers_page(self, company: dict) -> list[dict]:
|
| 126 |
+
"""Scrape a generic careers page."""
|
| 127 |
+
url = company.get("careers_url") or company.get("search_url")
|
| 128 |
+
|
| 129 |
+
async with httpx.AsyncClient() as client:
|
| 130 |
+
response = await client.get(
|
| 131 |
+
url,
|
| 132 |
+
headers=self._headers,
|
| 133 |
+
timeout=30,
|
| 134 |
+
follow_redirects=True
|
| 135 |
+
)
|
| 136 |
+
response.raise_for_status()
|
| 137 |
+
|
| 138 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 139 |
+
opportunities = []
|
| 140 |
+
|
| 141 |
+
# Look for job listing elements (common patterns)
|
| 142 |
+
job_selectors = [
|
| 143 |
+
"article", ".job-listing", ".job-card", ".position",
|
| 144 |
+
"[data-job]", ".career-item", ".opening"
|
| 145 |
+
]
|
| 146 |
+
|
| 147 |
+
jobs = []
|
| 148 |
+
for selector in job_selectors:
|
| 149 |
+
jobs = soup.select(selector)
|
| 150 |
+
if jobs:
|
| 151 |
+
break
|
| 152 |
+
|
| 153 |
+
for job in jobs[:30]:
|
| 154 |
+
try:
|
| 155 |
+
title_el = job.select_one("h2, h3, h4, .title, .job-title")
|
| 156 |
+
link_el = job.select_one("a[href]")
|
| 157 |
+
location_el = job.select_one(".location, .job-location")
|
| 158 |
+
|
| 159 |
+
if not title_el:
|
| 160 |
+
continue
|
| 161 |
+
|
| 162 |
+
title = title_el.get_text(strip=True)
|
| 163 |
+
|
| 164 |
+
# Filter by keywords
|
| 165 |
+
if not self._matches_keywords(title, company.get("keywords", [])):
|
| 166 |
+
continue
|
| 167 |
+
|
| 168 |
+
link = ""
|
| 169 |
+
if link_el and link_el.get("href"):
|
| 170 |
+
href = link_el["href"]
|
| 171 |
+
if href.startswith("http"):
|
| 172 |
+
link = href
|
| 173 |
+
else:
|
| 174 |
+
from urllib.parse import urljoin
|
| 175 |
+
link = urljoin(url, href)
|
| 176 |
+
|
| 177 |
+
opportunity = {
|
| 178 |
+
"title": f"[{company['name']}] {title}",
|
| 179 |
+
"raw_text": job.get_text(strip=True)[:500],
|
| 180 |
+
"url": link or url,
|
| 181 |
+
"source_type": "web_scrape",
|
| 182 |
+
"source_name": f"{company['name']} Careers",
|
| 183 |
+
"published_at": datetime.utcnow(),
|
| 184 |
+
"metadata": {
|
| 185 |
+
"company": company["name"],
|
| 186 |
+
"location": location_el.get_text(strip=True) if location_el else None,
|
| 187 |
+
"is_internship": self._is_internship({"title": title})
|
| 188 |
+
}
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
opportunities.append(opportunity)
|
| 192 |
+
|
| 193 |
+
except Exception as e:
|
| 194 |
+
print(f"Error parsing job listing: {e}")
|
| 195 |
+
|
| 196 |
+
return opportunities
|
| 197 |
+
|
| 198 |
+
async def _fetch_workday(self, company: dict) -> list[dict]:
|
| 199 |
+
"""Fetch from Workday-based career sites."""
|
| 200 |
+
url = company.get("search_url") or company.get("careers_url")
|
| 201 |
+
|
| 202 |
+
# Workday API format
|
| 203 |
+
payload = {
|
| 204 |
+
"limit": 20,
|
| 205 |
+
"offset": 0,
|
| 206 |
+
"searchText": " ".join(company.get("keywords", [])[:3])
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
try:
|
| 210 |
+
async with httpx.AsyncClient() as client:
|
| 211 |
+
response = await client.post(
|
| 212 |
+
url,
|
| 213 |
+
json=payload,
|
| 214 |
+
headers={**self._headers, "Content-Type": "application/json"},
|
| 215 |
+
timeout=30
|
| 216 |
+
)
|
| 217 |
+
response.raise_for_status()
|
| 218 |
+
|
| 219 |
+
data = response.json()
|
| 220 |
+
jobs = data.get("jobPostings", [])
|
| 221 |
+
|
| 222 |
+
return [
|
| 223 |
+
{
|
| 224 |
+
"title": f"[{company['name']}] {job.get('title', '')}",
|
| 225 |
+
"raw_text": job.get("bulletFields", [""])[0] if job.get("bulletFields") else "",
|
| 226 |
+
"url": job.get("externalPath", url),
|
| 227 |
+
"source_type": "web_scrape",
|
| 228 |
+
"source_name": f"{company['name']} Careers",
|
| 229 |
+
"published_at": datetime.utcnow(),
|
| 230 |
+
"metadata": {
|
| 231 |
+
"company": company["name"],
|
| 232 |
+
"location": job.get("locationsText"),
|
| 233 |
+
"is_internship": self._is_internship({"title": job.get("title", "")})
|
| 234 |
+
}
|
| 235 |
+
}
|
| 236 |
+
for job in jobs
|
| 237 |
+
]
|
| 238 |
+
except Exception as e:
|
| 239 |
+
print(f"Workday fetch error: {e}")
|
| 240 |
+
return await self._scrape_careers_page(company)
|
| 241 |
+
|
| 242 |
+
async def _fetch_rss_careers(self, company: dict) -> list[dict]:
|
| 243 |
+
"""Fetch from RSS-based career feeds."""
|
| 244 |
+
import feedparser
|
| 245 |
+
|
| 246 |
+
url = company.get("rss_url")
|
| 247 |
+
|
| 248 |
+
async with httpx.AsyncClient() as client:
|
| 249 |
+
response = await client.get(url, headers=self._headers, timeout=30)
|
| 250 |
+
content = response.text
|
| 251 |
+
|
| 252 |
+
feed = feedparser.parse(content)
|
| 253 |
+
opportunities = []
|
| 254 |
+
|
| 255 |
+
for entry in feed.entries[:20]:
|
| 256 |
+
title = entry.get("title", "")
|
| 257 |
+
|
| 258 |
+
if not self._matches_keywords(title, company.get("keywords", [])):
|
| 259 |
+
continue
|
| 260 |
+
|
| 261 |
+
opportunities.append({
|
| 262 |
+
"title": f"[{company['name']}] {title}",
|
| 263 |
+
"raw_text": entry.get("summary", "")[:500],
|
| 264 |
+
"url": entry.get("link", ""),
|
| 265 |
+
"source_type": "rss",
|
| 266 |
+
"source_name": f"{company['name']} Careers",
|
| 267 |
+
"published_at": datetime.utcnow(),
|
| 268 |
+
"metadata": {
|
| 269 |
+
"company": company["name"],
|
| 270 |
+
"is_internship": self._is_internship({"title": title})
|
| 271 |
+
}
|
| 272 |
+
})
|
| 273 |
+
|
| 274 |
+
return opportunities
|
| 275 |
+
|
| 276 |
+
def _matches_keywords(self, text: str, keywords: list[str]) -> bool:
|
| 277 |
+
"""Check if text matches any keyword."""
|
| 278 |
+
if not keywords:
|
| 279 |
+
return True
|
| 280 |
+
text_lower = text.lower()
|
| 281 |
+
return any(kw.lower() in text_lower for kw in keywords)
|
| 282 |
+
|
| 283 |
+
def _is_internship(self, opportunity: dict) -> bool:
|
| 284 |
+
"""Check if opportunity is an internship."""
|
| 285 |
+
title = opportunity.get("title", "").lower()
|
| 286 |
+
text = opportunity.get("raw_text", "").lower()
|
| 287 |
+
combined = f"{title} {text}"
|
| 288 |
+
|
| 289 |
+
return any(kw in combined for kw in self.INTERNSHIP_KEYWORDS)
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
class InternshipClient:
|
| 293 |
+
"""
|
| 294 |
+
Dedicated client for finding internship opportunities.
|
| 295 |
+
Aggregates from multiple sources with internship focus.
|
| 296 |
+
"""
|
| 297 |
+
|
| 298 |
+
# Internship-focused sites
|
| 299 |
+
INTERNSHIP_SOURCES = [
|
| 300 |
+
{
|
| 301 |
+
"name": "LinkedIn Internships",
|
| 302 |
+
"url": "https://www.linkedin.com/jobs/search/?keywords=AI%20robotics%20internship",
|
| 303 |
+
"type": "scrape"
|
| 304 |
+
},
|
| 305 |
+
{
|
| 306 |
+
"name": "Indeed Internships",
|
| 307 |
+
"url": "https://www.indeed.com/jobs?q=machine+learning+intern",
|
| 308 |
+
"type": "scrape"
|
| 309 |
+
},
|
| 310 |
+
{
|
| 311 |
+
"name": "Glassdoor Internships",
|
| 312 |
+
"url": "https://www.glassdoor.com/Job/computer-vision-intern-jobs-SRCH_KO0,22.htm",
|
| 313 |
+
"type": "scrape"
|
| 314 |
+
},
|
| 315 |
+
{
|
| 316 |
+
"name": "WayUp",
|
| 317 |
+
"url": "https://www.wayup.com/s/internships/computer-science/",
|
| 318 |
+
"type": "scrape"
|
| 319 |
+
},
|
| 320 |
+
{
|
| 321 |
+
"name": "Handshake",
|
| 322 |
+
"url": "https://joinhandshake.com",
|
| 323 |
+
"type": "scrape"
|
| 324 |
+
}
|
| 325 |
+
]
|
| 326 |
+
|
| 327 |
+
def __init__(self):
|
| 328 |
+
self.careers_client = CareersClient()
|
| 329 |
+
self._headers = {
|
| 330 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
async def fetch_all(self) -> list[dict]:
|
| 334 |
+
"""Fetch internships from all sources."""
|
| 335 |
+
opportunities = []
|
| 336 |
+
|
| 337 |
+
# Get internships from major companies
|
| 338 |
+
try:
|
| 339 |
+
company_internships = await self.careers_client.fetch_all(internship_only=True)
|
| 340 |
+
opportunities.extend(company_internships)
|
| 341 |
+
except Exception as e:
|
| 342 |
+
print(f"Careers client error: {e}")
|
| 343 |
+
|
| 344 |
+
# Scrape internship-focused sites
|
| 345 |
+
for source in self.INTERNSHIP_SOURCES[:3]: # Limit to avoid rate limiting
|
| 346 |
+
try:
|
| 347 |
+
opps = await self._scrape_internship_site(source)
|
| 348 |
+
opportunities.extend(opps)
|
| 349 |
+
except Exception as e:
|
| 350 |
+
print(f"Error fetching {source['name']}: {e}")
|
| 351 |
+
|
| 352 |
+
return opportunities
|
| 353 |
+
|
| 354 |
+
async def _scrape_internship_site(self, source: dict) -> list[dict]:
|
| 355 |
+
"""Scrape an internship-focused site."""
|
| 356 |
+
try:
|
| 357 |
+
async with httpx.AsyncClient() as client:
|
| 358 |
+
response = await client.get(
|
| 359 |
+
source["url"],
|
| 360 |
+
headers=self._headers,
|
| 361 |
+
timeout=30,
|
| 362 |
+
follow_redirects=True
|
| 363 |
+
)
|
| 364 |
+
response.raise_for_status()
|
| 365 |
+
except Exception:
|
| 366 |
+
return []
|
| 367 |
+
|
| 368 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 369 |
+
opportunities = []
|
| 370 |
+
|
| 371 |
+
# Find job cards
|
| 372 |
+
cards = soup.select(".job-card, .job-listing, article, .result")[:15]
|
| 373 |
+
|
| 374 |
+
for card in cards:
|
| 375 |
+
try:
|
| 376 |
+
title_el = card.select_one("h2, h3, .title, .job-title")
|
| 377 |
+
if not title_el:
|
| 378 |
+
continue
|
| 379 |
+
|
| 380 |
+
title = title_el.get_text(strip=True)
|
| 381 |
+
link_el = card.select_one("a[href]")
|
| 382 |
+
|
| 383 |
+
link = ""
|
| 384 |
+
if link_el and link_el.get("href"):
|
| 385 |
+
from urllib.parse import urljoin
|
| 386 |
+
link = urljoin(source["url"], link_el["href"])
|
| 387 |
+
|
| 388 |
+
opportunities.append({
|
| 389 |
+
"title": f"[Internship] {title}",
|
| 390 |
+
"raw_text": card.get_text(strip=True)[:500],
|
| 391 |
+
"url": link or source["url"],
|
| 392 |
+
"source_type": "web_scrape",
|
| 393 |
+
"source_name": source["name"],
|
| 394 |
+
"published_at": datetime.utcnow(),
|
| 395 |
+
"metadata": {
|
| 396 |
+
"is_internship": True,
|
| 397 |
+
"source_site": source["name"]
|
| 398 |
+
}
|
| 399 |
+
})
|
| 400 |
+
except Exception:
|
| 401 |
+
continue
|
| 402 |
+
|
| 403 |
+
return opportunities
|
backend/ingestion/github_client.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE GitHub Client
|
| 3 |
+
|
| 4 |
+
Tracks trending repositories and star velocity for AI/Robotics/CV projects.
|
| 5 |
+
"""
|
| 6 |
+
import httpx
|
| 7 |
+
from datetime import datetime, timedelta
|
| 8 |
+
from typing import Optional
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class GitHubClient:
|
| 12 |
+
"""
|
| 13 |
+
Client for GitHub API to discover trending repositories.
|
| 14 |
+
Tracks star velocity and contributor growth.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
BASE_URL = "https://api.github.com"
|
| 18 |
+
|
| 19 |
+
# Search queries for relevant topics
|
| 20 |
+
SEARCH_TOPICS = [
|
| 21 |
+
"computer-vision",
|
| 22 |
+
"robotics",
|
| 23 |
+
"machine-learning",
|
| 24 |
+
"deep-learning",
|
| 25 |
+
"ros",
|
| 26 |
+
"pytorch",
|
| 27 |
+
"transformers",
|
| 28 |
+
"llm"
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
def __init__(self, token: Optional[str] = None, max_results: int = 30):
|
| 32 |
+
self.token = token
|
| 33 |
+
self.max_results = max_results
|
| 34 |
+
self._headers = {
|
| 35 |
+
"Accept": "application/vnd.github+json",
|
| 36 |
+
"X-GitHub-Api-Version": "2022-11-28"
|
| 37 |
+
}
|
| 38 |
+
if token:
|
| 39 |
+
self._headers["Authorization"] = f"Bearer {token}"
|
| 40 |
+
|
| 41 |
+
async def fetch_trending(self, topics: Optional[list[str]] = None) -> list[dict]:
|
| 42 |
+
"""
|
| 43 |
+
Fetch recently popular repositories in target topics.
|
| 44 |
+
|
| 45 |
+
Returns list of normalized opportunity dicts.
|
| 46 |
+
"""
|
| 47 |
+
topics = topics or self.SEARCH_TOPICS
|
| 48 |
+
opportunities = []
|
| 49 |
+
|
| 50 |
+
# Get repos created or updated in last 7 days with high stars
|
| 51 |
+
week_ago = (datetime.utcnow() - timedelta(days=7)).strftime("%Y-%m-%d")
|
| 52 |
+
|
| 53 |
+
for topic in topics[:5]: # Limit to avoid rate limiting
|
| 54 |
+
try:
|
| 55 |
+
repos = await self._search_repos(topic, week_ago)
|
| 56 |
+
opportunities.extend(repos)
|
| 57 |
+
except Exception as e:
|
| 58 |
+
print(f"GitHub search error for {topic}: {e}")
|
| 59 |
+
|
| 60 |
+
# Deduplicate by URL
|
| 61 |
+
seen_urls = set()
|
| 62 |
+
unique = []
|
| 63 |
+
for opp in opportunities:
|
| 64 |
+
if opp["url"] not in seen_urls:
|
| 65 |
+
seen_urls.add(opp["url"])
|
| 66 |
+
unique.append(opp)
|
| 67 |
+
|
| 68 |
+
return unique[:self.max_results]
|
| 69 |
+
|
| 70 |
+
async def _search_repos(self, topic: str, since_date: str) -> list[dict]:
|
| 71 |
+
"""Search for repositories by topic."""
|
| 72 |
+
query = f"topic:{topic} pushed:>{since_date} stars:>50"
|
| 73 |
+
|
| 74 |
+
async with httpx.AsyncClient() as client:
|
| 75 |
+
response = await client.get(
|
| 76 |
+
f"{self.BASE_URL}/search/repositories",
|
| 77 |
+
params={
|
| 78 |
+
"q": query,
|
| 79 |
+
"sort": "stars",
|
| 80 |
+
"order": "desc",
|
| 81 |
+
"per_page": 10
|
| 82 |
+
},
|
| 83 |
+
headers=self._headers,
|
| 84 |
+
timeout=30,
|
| 85 |
+
follow_redirects=True
|
| 86 |
+
)
|
| 87 |
+
response.raise_for_status()
|
| 88 |
+
|
| 89 |
+
data = response.json()
|
| 90 |
+
return self._parse_repos(data.get("items", []), topic)
|
| 91 |
+
|
| 92 |
+
def _parse_repos(self, repos: list, topic: str) -> list[dict]:
|
| 93 |
+
"""Parse GitHub repos into normalized opportunities."""
|
| 94 |
+
opportunities = []
|
| 95 |
+
|
| 96 |
+
for repo in repos:
|
| 97 |
+
try:
|
| 98 |
+
opportunity = {
|
| 99 |
+
"title": f"[GitHub] {repo['full_name']}: {repo.get('description', '')[:100]}",
|
| 100 |
+
"raw_text": repo.get("description", "") or "",
|
| 101 |
+
"url": repo["html_url"],
|
| 102 |
+
"source_type": "github",
|
| 103 |
+
"source_name": f"GitHub/{topic}",
|
| 104 |
+
"published_at": self._parse_date(repo.get("created_at")),
|
| 105 |
+
"social_engagement": repo.get("stargazers_count", 0),
|
| 106 |
+
"metadata": {
|
| 107 |
+
"owner": repo["owner"]["login"],
|
| 108 |
+
"stars": repo.get("stargazers_count", 0),
|
| 109 |
+
"forks": repo.get("forks_count", 0),
|
| 110 |
+
"language": repo.get("language"),
|
| 111 |
+
"topics": repo.get("topics", []),
|
| 112 |
+
"open_issues": repo.get("open_issues_count", 0),
|
| 113 |
+
"updated_at": repo.get("updated_at")
|
| 114 |
+
}
|
| 115 |
+
}
|
| 116 |
+
opportunities.append(opportunity)
|
| 117 |
+
except Exception as e:
|
| 118 |
+
print(f"Error parsing repo: {e}")
|
| 119 |
+
|
| 120 |
+
return opportunities
|
| 121 |
+
|
| 122 |
+
async def fetch_gsoc_repos(self) -> list[dict]:
|
| 123 |
+
"""Fetch Google Summer of Code related repositories."""
|
| 124 |
+
async with httpx.AsyncClient() as client:
|
| 125 |
+
response = await client.get(
|
| 126 |
+
f"{self.BASE_URL}/search/repositories",
|
| 127 |
+
params={
|
| 128 |
+
"q": "topic:gsoc OR topic:google-summer-of-code",
|
| 129 |
+
"sort": "updated",
|
| 130 |
+
"per_page": 20
|
| 131 |
+
},
|
| 132 |
+
headers=self._headers,
|
| 133 |
+
timeout=30,
|
| 134 |
+
follow_redirects=True
|
| 135 |
+
)
|
| 136 |
+
response.raise_for_status()
|
| 137 |
+
|
| 138 |
+
data = response.json()
|
| 139 |
+
repos = self._parse_repos(data.get("items", []), "gsoc")
|
| 140 |
+
|
| 141 |
+
# Mark as open source opportunity
|
| 142 |
+
for repo in repos:
|
| 143 |
+
repo["title"] = f"[GSoC] {repo['title'].replace('[GitHub] ', '')}"
|
| 144 |
+
|
| 145 |
+
return repos
|
| 146 |
+
|
| 147 |
+
def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]:
|
| 148 |
+
"""Parse GitHub date format."""
|
| 149 |
+
if not date_str:
|
| 150 |
+
return None
|
| 151 |
+
try:
|
| 152 |
+
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
| 153 |
+
except Exception:
|
| 154 |
+
return None
|
backend/ingestion/grants_client.py
ADDED
|
@@ -0,0 +1,385 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Grants Client - Version 2.0
|
| 3 |
+
|
| 4 |
+
Fetches grant opportunities from crypto ecosystems and funding platforms.
|
| 5 |
+
High-leverage opportunities with money + credibility + access.
|
| 6 |
+
"""
|
| 7 |
+
import httpx
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from typing import Optional
|
| 10 |
+
from bs4 import BeautifulSoup
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class GrantsClient:
|
| 14 |
+
"""
|
| 15 |
+
Client for fetching grants from crypto ecosystems and funding platforms.
|
| 16 |
+
Prioritizes: Ethereum, Solana, Base, Starknet, Gitcoin.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
# Grant sources with their configurations
|
| 20 |
+
GRANT_SOURCES = [
|
| 21 |
+
# Ethereum Ecosystem
|
| 22 |
+
{
|
| 23 |
+
"name": "Ethereum Foundation Grants",
|
| 24 |
+
"url": "https://esp.ethereum.foundation/",
|
| 25 |
+
"ecosystem": "ethereum",
|
| 26 |
+
"type": "ecosystem_grant",
|
| 27 |
+
"typical_size": (5000, 100000),
|
| 28 |
+
},
|
| 29 |
+
# Solana Ecosystem
|
| 30 |
+
{
|
| 31 |
+
"name": "Solana Foundation Grants",
|
| 32 |
+
"url": "https://solana.org/grants",
|
| 33 |
+
"ecosystem": "solana",
|
| 34 |
+
"type": "ecosystem_grant",
|
| 35 |
+
"typical_size": (5000, 50000),
|
| 36 |
+
},
|
| 37 |
+
# Base (Coinbase L2)
|
| 38 |
+
{
|
| 39 |
+
"name": "Base Builder Grants",
|
| 40 |
+
"url": "https://base.org/builders",
|
| 41 |
+
"ecosystem": "base",
|
| 42 |
+
"type": "ecosystem_grant",
|
| 43 |
+
"typical_size": (5000, 25000),
|
| 44 |
+
},
|
| 45 |
+
# Starknet
|
| 46 |
+
{
|
| 47 |
+
"name": "Starknet Grants",
|
| 48 |
+
"url": "https://www.starknet.io/ecosystem/grants/",
|
| 49 |
+
"ecosystem": "starknet",
|
| 50 |
+
"type": "ecosystem_grant",
|
| 51 |
+
"typical_size": (5000, 50000),
|
| 52 |
+
},
|
| 53 |
+
# Gitcoin
|
| 54 |
+
{
|
| 55 |
+
"name": "Gitcoin Grants",
|
| 56 |
+
"url": "https://gitcoin.co/grants",
|
| 57 |
+
"ecosystem": "gitcoin",
|
| 58 |
+
"type": "micro_grant",
|
| 59 |
+
"typical_size": (500, 10000),
|
| 60 |
+
},
|
| 61 |
+
# Protocol-specific
|
| 62 |
+
{
|
| 63 |
+
"name": "Uniswap Grants",
|
| 64 |
+
"url": "https://www.uniswapfoundation.org/grants",
|
| 65 |
+
"ecosystem": "ethereum",
|
| 66 |
+
"type": "ecosystem_grant",
|
| 67 |
+
"typical_size": (10000, 100000),
|
| 68 |
+
},
|
| 69 |
+
{
|
| 70 |
+
"name": "Aave Grants DAO",
|
| 71 |
+
"url": "https://aavegrants.org/",
|
| 72 |
+
"ecosystem": "ethereum",
|
| 73 |
+
"type": "ecosystem_grant",
|
| 74 |
+
"typical_size": (5000, 100000),
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"name": "Polygon Grants",
|
| 78 |
+
"url": "https://polygon.technology/village/grants",
|
| 79 |
+
"ecosystem": "polygon",
|
| 80 |
+
"type": "ecosystem_grant",
|
| 81 |
+
"typical_size": (5000, 50000),
|
| 82 |
+
},
|
| 83 |
+
]
|
| 84 |
+
|
| 85 |
+
# RSS/API sources for grants
|
| 86 |
+
GRANT_RSS_FEEDS = [
|
| 87 |
+
{
|
| 88 |
+
"name": "Ethereum Blog - Grants",
|
| 89 |
+
"url": "https://blog.ethereum.org/feed.xml",
|
| 90 |
+
"filter_keywords": ["grant", "funding", "ecosystem"],
|
| 91 |
+
},
|
| 92 |
+
]
|
| 93 |
+
|
| 94 |
+
def __init__(self):
|
| 95 |
+
self._headers = {
|
| 96 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
async def fetch_all(self) -> list[dict]:
|
| 100 |
+
"""Fetch grants from all configured sources."""
|
| 101 |
+
opportunities = []
|
| 102 |
+
|
| 103 |
+
# Fetch from grant pages
|
| 104 |
+
for source in self.GRANT_SOURCES:
|
| 105 |
+
try:
|
| 106 |
+
grants = await self._scrape_grant_page(source)
|
| 107 |
+
opportunities.extend(grants)
|
| 108 |
+
except Exception as e:
|
| 109 |
+
print(f"Error fetching {source['name']}: {e}")
|
| 110 |
+
|
| 111 |
+
return opportunities
|
| 112 |
+
|
| 113 |
+
async def _scrape_grant_page(self, source: dict) -> list[dict]:
|
| 114 |
+
"""Scrape a grant program page for opportunities."""
|
| 115 |
+
try:
|
| 116 |
+
async with httpx.AsyncClient() as client:
|
| 117 |
+
response = await client.get(
|
| 118 |
+
source["url"],
|
| 119 |
+
headers=self._headers,
|
| 120 |
+
timeout=30,
|
| 121 |
+
follow_redirects=True
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
if response.status_code != 200:
|
| 125 |
+
return []
|
| 126 |
+
|
| 127 |
+
html = response.text
|
| 128 |
+
except Exception as e:
|
| 129 |
+
print(f"HTTP error for {source['name']}: {e}")
|
| 130 |
+
return []
|
| 131 |
+
|
| 132 |
+
soup = BeautifulSoup(html, "html.parser")
|
| 133 |
+
|
| 134 |
+
# Create a single opportunity for the grant program
|
| 135 |
+
# (These pages describe the program, not individual grants)
|
| 136 |
+
opportunity = {
|
| 137 |
+
"title": f"[{source['ecosystem'].upper()}] {source['name']}",
|
| 138 |
+
"raw_text": self._extract_page_text(soup)[:2000],
|
| 139 |
+
"url": source["url"],
|
| 140 |
+
"source_type": "grant_platform",
|
| 141 |
+
"source_name": source["name"],
|
| 142 |
+
"published_at": datetime.utcnow(),
|
| 143 |
+
"metadata": {
|
| 144 |
+
"ecosystem": source["ecosystem"],
|
| 145 |
+
"grant_type": source["type"],
|
| 146 |
+
"grant_size_min": source["typical_size"][0],
|
| 147 |
+
"grant_size_max": source["typical_size"][1],
|
| 148 |
+
"region": "global",
|
| 149 |
+
"technical_depth": "intermediate",
|
| 150 |
+
}
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
return [opportunity]
|
| 154 |
+
|
| 155 |
+
def _extract_page_text(self, soup: BeautifulSoup) -> str:
|
| 156 |
+
"""Extract meaningful text from page."""
|
| 157 |
+
# Remove scripts and styles
|
| 158 |
+
for tag in soup(["script", "style", "nav", "footer", "header"]):
|
| 159 |
+
tag.decompose()
|
| 160 |
+
|
| 161 |
+
# Get text
|
| 162 |
+
text = soup.get_text(separator=" ", strip=True)
|
| 163 |
+
return " ".join(text.split())[:2000]
|
| 164 |
+
|
| 165 |
+
async def fetch_active_rounds(self) -> list[dict]:
|
| 166 |
+
"""Fetch currently active grant rounds from Gitcoin."""
|
| 167 |
+
# Gitcoin has an API for active rounds
|
| 168 |
+
try:
|
| 169 |
+
async with httpx.AsyncClient() as client:
|
| 170 |
+
# This is a simplified version - actual API may differ
|
| 171 |
+
response = await client.get(
|
| 172 |
+
"https://api.gitcoin.co/grants/rounds/active",
|
| 173 |
+
headers=self._headers,
|
| 174 |
+
timeout=30,
|
| 175 |
+
follow_redirects=True
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
if response.status_code == 200:
|
| 179 |
+
data = response.json()
|
| 180 |
+
return self._parse_gitcoin_rounds(data)
|
| 181 |
+
except Exception as e:
|
| 182 |
+
print(f"Error fetching Gitcoin rounds: {e}")
|
| 183 |
+
|
| 184 |
+
return []
|
| 185 |
+
|
| 186 |
+
def _parse_gitcoin_rounds(self, data: dict) -> list[dict]:
|
| 187 |
+
"""Parse Gitcoin API response into opportunities."""
|
| 188 |
+
opportunities = []
|
| 189 |
+
|
| 190 |
+
for round_data in data.get("rounds", []):
|
| 191 |
+
opportunity = {
|
| 192 |
+
"title": f"[GITCOIN] {round_data.get('name', 'Gitcoin Round')}",
|
| 193 |
+
"raw_text": round_data.get("description", ""),
|
| 194 |
+
"url": f"https://gitcoin.co/grants/{round_data.get('id', '')}",
|
| 195 |
+
"source_type": "grant_platform",
|
| 196 |
+
"source_name": "Gitcoin",
|
| 197 |
+
"published_at": datetime.utcnow(),
|
| 198 |
+
"deadline": self._parse_date(round_data.get("end_date")),
|
| 199 |
+
"metadata": {
|
| 200 |
+
"ecosystem": "gitcoin",
|
| 201 |
+
"grant_type": "micro_grant",
|
| 202 |
+
"matching_pool": round_data.get("matching_pool", 0),
|
| 203 |
+
"grant_size_min": 100,
|
| 204 |
+
"grant_size_max": 10000,
|
| 205 |
+
"region": "global",
|
| 206 |
+
}
|
| 207 |
+
}
|
| 208 |
+
opportunities.append(opportunity)
|
| 209 |
+
|
| 210 |
+
return opportunities
|
| 211 |
+
|
| 212 |
+
def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]:
|
| 213 |
+
"""Parse date string."""
|
| 214 |
+
if not date_str:
|
| 215 |
+
return None
|
| 216 |
+
try:
|
| 217 |
+
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
| 218 |
+
except Exception:
|
| 219 |
+
return None
|
| 220 |
+
|
| 221 |
+
|
| 222 |
+
class NigeriaGrantsClient:
|
| 223 |
+
"""
|
| 224 |
+
Client for Nigeria-specific funding and grant opportunities.
|
| 225 |
+
Focuses on: NITDA, CcHub, BOI, Government programs.
|
| 226 |
+
"""
|
| 227 |
+
|
| 228 |
+
# Nigeria-specific grant sources
|
| 229 |
+
NIGERIA_SOURCES = [
|
| 230 |
+
{
|
| 231 |
+
"name": "NITDA Programs",
|
| 232 |
+
"url": "https://nitda.gov.ng/",
|
| 233 |
+
"type": "innovation_fund",
|
| 234 |
+
"region": "nigeria",
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"name": "CcHub Accelerator",
|
| 238 |
+
"url": "https://cchubnigeria.com/",
|
| 239 |
+
"type": "grant",
|
| 240 |
+
"region": "nigeria",
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"name": "Tony Elumelu Foundation",
|
| 244 |
+
"url": "https://www.tonyelumelufoundation.org/",
|
| 245 |
+
"type": "grant",
|
| 246 |
+
"region": "africa",
|
| 247 |
+
},
|
| 248 |
+
{
|
| 249 |
+
"name": "Ventures Platform",
|
| 250 |
+
"url": "https://www.venturesplatform.com/",
|
| 251 |
+
"type": "investment",
|
| 252 |
+
"region": "africa",
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"name": "BoI Youth Entrepreneurship",
|
| 256 |
+
"url": "https://www.boi.ng/",
|
| 257 |
+
"type": "innovation_fund",
|
| 258 |
+
"region": "nigeria",
|
| 259 |
+
},
|
| 260 |
+
]
|
| 261 |
+
|
| 262 |
+
# RSS feeds for Nigeria tech news
|
| 263 |
+
NIGERIA_RSS = [
|
| 264 |
+
{"name": "TechCabal", "url": "https://techcabal.com/feed/"},
|
| 265 |
+
{"name": "Disrupt Africa", "url": "https://disrupt-africa.com/feed/"},
|
| 266 |
+
]
|
| 267 |
+
|
| 268 |
+
def __init__(self):
|
| 269 |
+
self._headers = {
|
| 270 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
async def fetch_all(self) -> list[dict]:
|
| 274 |
+
"""Fetch all Nigeria-specific opportunities."""
|
| 275 |
+
opportunities = []
|
| 276 |
+
|
| 277 |
+
# Fetch from Nigeria sources
|
| 278 |
+
for source in self.NIGERIA_SOURCES:
|
| 279 |
+
try:
|
| 280 |
+
opps = await self._fetch_source(source)
|
| 281 |
+
opportunities.extend(opps)
|
| 282 |
+
except Exception as e:
|
| 283 |
+
print(f"Error fetching {source['name']}: {e}")
|
| 284 |
+
|
| 285 |
+
# Fetch from RSS feeds
|
| 286 |
+
for feed in self.NIGERIA_RSS:
|
| 287 |
+
try:
|
| 288 |
+
opps = await self._fetch_rss(feed)
|
| 289 |
+
opportunities.extend(opps)
|
| 290 |
+
except Exception as e:
|
| 291 |
+
print(f"Error fetching {feed['name']}: {e}")
|
| 292 |
+
|
| 293 |
+
return opportunities
|
| 294 |
+
|
| 295 |
+
async def _fetch_source(self, source: dict) -> list[dict]:
|
| 296 |
+
"""Fetch from a Nigeria source."""
|
| 297 |
+
try:
|
| 298 |
+
async with httpx.AsyncClient() as client:
|
| 299 |
+
response = await client.get(
|
| 300 |
+
source["url"],
|
| 301 |
+
headers=self._headers,
|
| 302 |
+
timeout=30,
|
| 303 |
+
follow_redirects=True
|
| 304 |
+
)
|
| 305 |
+
|
| 306 |
+
if response.status_code != 200:
|
| 307 |
+
return []
|
| 308 |
+
|
| 309 |
+
html = response.text
|
| 310 |
+
except Exception as e:
|
| 311 |
+
print(f"HTTP error for {source['name']}: {e}")
|
| 312 |
+
return []
|
| 313 |
+
|
| 314 |
+
soup = BeautifulSoup(html, "html.parser")
|
| 315 |
+
|
| 316 |
+
# Create opportunity for the program
|
| 317 |
+
opportunity = {
|
| 318 |
+
"title": f"[NIGERIA] {source['name']}",
|
| 319 |
+
"raw_text": self._extract_text(soup)[:2000],
|
| 320 |
+
"url": source["url"],
|
| 321 |
+
"source_type": "gov_portal",
|
| 322 |
+
"source_name": source["name"],
|
| 323 |
+
"published_at": datetime.utcnow(),
|
| 324 |
+
"metadata": {
|
| 325 |
+
"region": source["region"],
|
| 326 |
+
"grant_type": source["type"],
|
| 327 |
+
"nigeria_specific": True,
|
| 328 |
+
}
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
return [opportunity]
|
| 332 |
+
|
| 333 |
+
async def _fetch_rss(self, feed: dict) -> list[dict]:
|
| 334 |
+
"""Fetch from an RSS feed and filter for opportunities."""
|
| 335 |
+
import feedparser
|
| 336 |
+
|
| 337 |
+
try:
|
| 338 |
+
async with httpx.AsyncClient() as client:
|
| 339 |
+
response = await client.get(
|
| 340 |
+
feed["url"],
|
| 341 |
+
headers=self._headers,
|
| 342 |
+
timeout=30,
|
| 343 |
+
follow_redirects=True
|
| 344 |
+
)
|
| 345 |
+
content = response.text
|
| 346 |
+
except Exception as e:
|
| 347 |
+
print(f"Error fetching {feed['name']}: {e}")
|
| 348 |
+
return []
|
| 349 |
+
|
| 350 |
+
parsed = feedparser.parse(content)
|
| 351 |
+
opportunities = []
|
| 352 |
+
|
| 353 |
+
# Keywords indicating opportunities
|
| 354 |
+
opportunity_keywords = [
|
| 355 |
+
"grant", "funding", "accelerator", "apply", "opportunity",
|
| 356 |
+
"fellowship", "program", "investment", "startup", "launch"
|
| 357 |
+
]
|
| 358 |
+
|
| 359 |
+
for entry in parsed.entries[:20]:
|
| 360 |
+
title = entry.get("title", "").lower()
|
| 361 |
+
summary = entry.get("summary", "").lower()
|
| 362 |
+
|
| 363 |
+
# Check if contains opportunity keywords
|
| 364 |
+
if any(kw in title or kw in summary for kw in opportunity_keywords):
|
| 365 |
+
opportunity = {
|
| 366 |
+
"title": f"[AFRICA] {entry.get('title', '')}",
|
| 367 |
+
"raw_text": entry.get("summary", "")[:2000],
|
| 368 |
+
"url": entry.get("link", ""),
|
| 369 |
+
"source_type": "rss",
|
| 370 |
+
"source_name": feed["name"],
|
| 371 |
+
"published_at": datetime.utcnow(),
|
| 372 |
+
"metadata": {
|
| 373 |
+
"region": "africa",
|
| 374 |
+
"africa_focus": True,
|
| 375 |
+
}
|
| 376 |
+
}
|
| 377 |
+
opportunities.append(opportunity)
|
| 378 |
+
|
| 379 |
+
return opportunities
|
| 380 |
+
|
| 381 |
+
def _extract_text(self, soup: BeautifulSoup) -> str:
|
| 382 |
+
"""Extract text from soup."""
|
| 383 |
+
for tag in soup(["script", "style", "nav", "footer"]):
|
| 384 |
+
tag.decompose()
|
| 385 |
+
return " ".join(soup.get_text(separator=" ", strip=True).split())
|
backend/ingestion/jobboard_client.py
ADDED
|
@@ -0,0 +1,472 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Job Board Client
|
| 3 |
+
|
| 4 |
+
Fetches REAL job opportunities from structured job board APIs.
|
| 5 |
+
These return actual job listings, not discussions.
|
| 6 |
+
|
| 7 |
+
Supports:
|
| 8 |
+
- Arbeitnow (free, no key needed)
|
| 9 |
+
- TheMuse (free, no key needed)
|
| 10 |
+
- Remotive (free, no key needed)
|
| 11 |
+
- Adzuna (free key, 250 req/day)
|
| 12 |
+
- Jooble (free key, aggregates LinkedIn/Indeed/Glassdoor)
|
| 13 |
+
- RapidAPI LinkedIn (free key, 100 req/month)
|
| 14 |
+
"""
|
| 15 |
+
import httpx
|
| 16 |
+
from datetime import datetime
|
| 17 |
+
from typing import Optional
|
| 18 |
+
import re
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class JobBoardClient:
|
| 22 |
+
"""
|
| 23 |
+
Client for structured job board APIs.
|
| 24 |
+
Returns actual job listings you can apply to.
|
| 25 |
+
|
| 26 |
+
Usage:
|
| 27 |
+
client = JobBoardClient(
|
| 28 |
+
adzuna_app_id="xxx",
|
| 29 |
+
adzuna_api_key="xxx",
|
| 30 |
+
jooble_api_key="xxx",
|
| 31 |
+
rapidapi_key="xxx"
|
| 32 |
+
)
|
| 33 |
+
jobs = await client.fetch_all()
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def __init__(
|
| 37 |
+
self,
|
| 38 |
+
adzuna_app_id: str = "",
|
| 39 |
+
adzuna_api_key: str = "",
|
| 40 |
+
jooble_api_key: str = "",
|
| 41 |
+
rapidapi_key: str = ""
|
| 42 |
+
):
|
| 43 |
+
self.adzuna_app_id = adzuna_app_id
|
| 44 |
+
self.adzuna_api_key = adzuna_api_key
|
| 45 |
+
self.jooble_api_key = jooble_api_key
|
| 46 |
+
self.rapidapi_key = rapidapi_key
|
| 47 |
+
|
| 48 |
+
async def fetch_all(self) -> list[dict]:
|
| 49 |
+
"""Fetch from all available job board sources."""
|
| 50 |
+
opportunities = []
|
| 51 |
+
|
| 52 |
+
# === FREE APIs (no key needed) ===
|
| 53 |
+
|
| 54 |
+
# Arbeitnow (free job API)
|
| 55 |
+
try:
|
| 56 |
+
arbeitnow_jobs = await self.fetch_arbeitnow()
|
| 57 |
+
opportunities.extend(arbeitnow_jobs)
|
| 58 |
+
print(f" Arbeitnow: {len(arbeitnow_jobs)} jobs")
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f" Arbeitnow error: {e}")
|
| 61 |
+
|
| 62 |
+
# TheMuse (free job API)
|
| 63 |
+
try:
|
| 64 |
+
muse_jobs = await self.fetch_themuse()
|
| 65 |
+
opportunities.extend(muse_jobs)
|
| 66 |
+
print(f" TheMuse: {len(muse_jobs)} jobs")
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f" TheMuse error: {e}")
|
| 69 |
+
|
| 70 |
+
# Remotive (remote jobs, free)
|
| 71 |
+
try:
|
| 72 |
+
remote_jobs = await self.fetch_remotive()
|
| 73 |
+
opportunities.extend(remote_jobs)
|
| 74 |
+
print(f" Remotive: {len(remote_jobs)} remote jobs")
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f" Remotive error: {e}")
|
| 77 |
+
|
| 78 |
+
# === APIs WITH FREE KEYS ===
|
| 79 |
+
|
| 80 |
+
# Adzuna (if API key provided)
|
| 81 |
+
if self.adzuna_app_id and self.adzuna_api_key:
|
| 82 |
+
try:
|
| 83 |
+
adzuna_jobs = await self.fetch_adzuna()
|
| 84 |
+
opportunities.extend(adzuna_jobs)
|
| 85 |
+
print(f" Adzuna: {len(adzuna_jobs)} jobs")
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f" Adzuna error: {e}")
|
| 88 |
+
|
| 89 |
+
# Jooble (if API key provided) - aggregates LinkedIn, Indeed, Glassdoor
|
| 90 |
+
if self.jooble_api_key:
|
| 91 |
+
try:
|
| 92 |
+
jooble_jobs = await self.fetch_jooble()
|
| 93 |
+
opportunities.extend(jooble_jobs)
|
| 94 |
+
print(f" Jooble: {len(jooble_jobs)} jobs (LinkedIn/Indeed/Glassdoor)")
|
| 95 |
+
except Exception as e:
|
| 96 |
+
print(f" Jooble error: {e}")
|
| 97 |
+
|
| 98 |
+
# RapidAPI LinkedIn Jobs (if API key provided)
|
| 99 |
+
if self.rapidapi_key:
|
| 100 |
+
try:
|
| 101 |
+
linkedin_jobs = await self.fetch_linkedin_rapidapi()
|
| 102 |
+
opportunities.extend(linkedin_jobs)
|
| 103 |
+
print(f" LinkedIn (via RapidAPI): {len(linkedin_jobs)} jobs")
|
| 104 |
+
except Exception as e:
|
| 105 |
+
print(f" LinkedIn error: {e}")
|
| 106 |
+
|
| 107 |
+
return opportunities
|
| 108 |
+
|
| 109 |
+
# ===========================================
|
| 110 |
+
# FREE APIs (No registration needed)
|
| 111 |
+
# ===========================================
|
| 112 |
+
|
| 113 |
+
async def fetch_arbeitnow(self) -> list[dict]:
|
| 114 |
+
"""Fetch from Arbeitnow API - free, no registration."""
|
| 115 |
+
opportunities = []
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
url = "https://www.arbeitnow.com/api/job-board-api"
|
| 119 |
+
|
| 120 |
+
async with httpx.AsyncClient() as client:
|
| 121 |
+
response = await client.get(
|
| 122 |
+
url,
|
| 123 |
+
headers={"User-Agent": "PIOE/2.0"},
|
| 124 |
+
timeout=30
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
if response.status_code != 200:
|
| 128 |
+
return []
|
| 129 |
+
|
| 130 |
+
data = response.json()
|
| 131 |
+
|
| 132 |
+
for job in data.get("data", [])[:30]:
|
| 133 |
+
title = (job.get("title") or "").lower()
|
| 134 |
+
tags = " ".join(job.get("tags") or []).lower()
|
| 135 |
+
combined = f"{title} {tags}"
|
| 136 |
+
|
| 137 |
+
# Filter for relevant tech jobs
|
| 138 |
+
keywords = ["machine learning", "ai", "data", "engineer", "developer",
|
| 139 |
+
"software", "python", "intern", "research", "robotics",
|
| 140 |
+
"backend", "frontend", "fullstack", "devops"]
|
| 141 |
+
|
| 142 |
+
if not any(kw in combined for kw in keywords):
|
| 143 |
+
continue
|
| 144 |
+
|
| 145 |
+
opportunities.append({
|
| 146 |
+
"title": f"[Arbeitnow] {job.get('title', '')}",
|
| 147 |
+
"raw_text": self._strip_html(job.get("description", ""))[:2000],
|
| 148 |
+
"url": job.get("url", ""),
|
| 149 |
+
"source_type": "job",
|
| 150 |
+
"source_name": f"Arbeitnow ({job.get('company_name', 'Unknown')})",
|
| 151 |
+
"published_at": self._parse_date(job.get("created_at")),
|
| 152 |
+
"metadata": {
|
| 153 |
+
"company": job.get("company_name"),
|
| 154 |
+
"location": job.get("location"),
|
| 155 |
+
"remote": job.get("remote", False),
|
| 156 |
+
"tags": job.get("tags", []),
|
| 157 |
+
"region": "remote_global" if job.get("remote") else "global"
|
| 158 |
+
}
|
| 159 |
+
})
|
| 160 |
+
|
| 161 |
+
except Exception as e:
|
| 162 |
+
print(f" Arbeitnow fetch error: {e}")
|
| 163 |
+
|
| 164 |
+
return opportunities
|
| 165 |
+
|
| 166 |
+
async def fetch_themuse(self) -> list[dict]:
|
| 167 |
+
"""Fetch from The Muse API - free, no registration."""
|
| 168 |
+
opportunities = []
|
| 169 |
+
|
| 170 |
+
categories = ["Data Science", "Engineering", "Software Engineering"]
|
| 171 |
+
|
| 172 |
+
for category in categories:
|
| 173 |
+
try:
|
| 174 |
+
url = "https://www.themuse.com/api/public/jobs"
|
| 175 |
+
params = {"category": category, "page": 1}
|
| 176 |
+
|
| 177 |
+
async with httpx.AsyncClient() as client:
|
| 178 |
+
response = await client.get(
|
| 179 |
+
url, params=params,
|
| 180 |
+
headers={"User-Agent": "PIOE/2.0"},
|
| 181 |
+
timeout=30
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
if response.status_code != 200:
|
| 185 |
+
continue
|
| 186 |
+
|
| 187 |
+
data = response.json()
|
| 188 |
+
|
| 189 |
+
for job in data.get("results", [])[:10]:
|
| 190 |
+
company = job.get("company", {})
|
| 191 |
+
opportunities.append({
|
| 192 |
+
"title": f"[TheMuse] {job.get('name', '')}",
|
| 193 |
+
"raw_text": self._strip_html(job.get("contents", ""))[:2000],
|
| 194 |
+
"url": job.get("refs", {}).get("landing_page", ""),
|
| 195 |
+
"source_type": "job",
|
| 196 |
+
"source_name": f"TheMuse ({company.get('name', 'Unknown')})",
|
| 197 |
+
"published_at": self._parse_date(job.get("publication_date")),
|
| 198 |
+
"metadata": {
|
| 199 |
+
"company": company.get("name"),
|
| 200 |
+
"locations": [loc.get("name") for loc in job.get("locations", [])],
|
| 201 |
+
"level": job.get("levels", [{}])[0].get("name") if job.get("levels") else None,
|
| 202 |
+
"region": "global"
|
| 203 |
+
}
|
| 204 |
+
})
|
| 205 |
+
|
| 206 |
+
except Exception as e:
|
| 207 |
+
print(f" TheMuse '{category}' error: {e}")
|
| 208 |
+
|
| 209 |
+
return opportunities
|
| 210 |
+
|
| 211 |
+
async def fetch_remotive(self) -> list[dict]:
|
| 212 |
+
"""Fetch from Remotive API - free, no registration."""
|
| 213 |
+
opportunities = []
|
| 214 |
+
|
| 215 |
+
categories = ["software-dev", "data", "devops-sysadmin"]
|
| 216 |
+
|
| 217 |
+
for category in categories:
|
| 218 |
+
try:
|
| 219 |
+
url = "https://remotive.com/api/remote-jobs"
|
| 220 |
+
params = {"category": category, "limit": 15}
|
| 221 |
+
|
| 222 |
+
async with httpx.AsyncClient() as client:
|
| 223 |
+
response = await client.get(url, params=params, timeout=30)
|
| 224 |
+
|
| 225 |
+
if response.status_code != 200:
|
| 226 |
+
continue
|
| 227 |
+
|
| 228 |
+
data = response.json()
|
| 229 |
+
|
| 230 |
+
for job in data.get("jobs", []):
|
| 231 |
+
title_lower = (job.get("title") or "").lower()
|
| 232 |
+
|
| 233 |
+
# Skip non-tech roles
|
| 234 |
+
skip_keywords = ["sales", "marketing", "recruiter", "hr ", "customer support"]
|
| 235 |
+
if any(skip in title_lower for skip in skip_keywords):
|
| 236 |
+
continue
|
| 237 |
+
|
| 238 |
+
opportunities.append({
|
| 239 |
+
"title": f"[Remote] {job.get('title', '')}",
|
| 240 |
+
"raw_text": self._strip_html(job.get("description", ""))[:2000],
|
| 241 |
+
"url": job.get("url", ""),
|
| 242 |
+
"source_type": "job",
|
| 243 |
+
"source_name": f"Remotive ({job.get('company_name', 'Unknown')})",
|
| 244 |
+
"published_at": self._parse_date(job.get("publication_date")),
|
| 245 |
+
"metadata": {
|
| 246 |
+
"company": job.get("company_name"),
|
| 247 |
+
"location": job.get("candidate_required_location"),
|
| 248 |
+
"job_type": job.get("job_type"),
|
| 249 |
+
"salary": job.get("salary"),
|
| 250 |
+
"tags": job.get("tags", []),
|
| 251 |
+
"region": "remote_global"
|
| 252 |
+
}
|
| 253 |
+
})
|
| 254 |
+
|
| 255 |
+
except Exception as e:
|
| 256 |
+
print(f" Remotive '{category}' error: {e}")
|
| 257 |
+
|
| 258 |
+
return opportunities
|
| 259 |
+
|
| 260 |
+
# ===========================================
|
| 261 |
+
# APIs WITH FREE API KEYS
|
| 262 |
+
# ===========================================
|
| 263 |
+
|
| 264 |
+
async def fetch_adzuna(self) -> list[dict]:
|
| 265 |
+
"""
|
| 266 |
+
Fetch from Adzuna API.
|
| 267 |
+
Free tier: 250 requests/day
|
| 268 |
+
Get key at: https://developer.adzuna.com/
|
| 269 |
+
"""
|
| 270 |
+
opportunities = []
|
| 271 |
+
|
| 272 |
+
keywords = ["machine learning", "AI engineer", "data scientist", "robotics"]
|
| 273 |
+
|
| 274 |
+
for keyword in keywords[:2]: # Limit to conserve quota
|
| 275 |
+
try:
|
| 276 |
+
url = "https://api.adzuna.com/v1/api/jobs/us/search/1"
|
| 277 |
+
params = {
|
| 278 |
+
"app_id": self.adzuna_app_id,
|
| 279 |
+
"app_key": self.adzuna_api_key,
|
| 280 |
+
"what": keyword,
|
| 281 |
+
"results_per_page": 10,
|
| 282 |
+
"content-type": "application/json"
|
| 283 |
+
}
|
| 284 |
+
|
| 285 |
+
async with httpx.AsyncClient() as client:
|
| 286 |
+
response = await client.get(url, params=params, timeout=30)
|
| 287 |
+
|
| 288 |
+
if response.status_code != 200:
|
| 289 |
+
continue
|
| 290 |
+
|
| 291 |
+
data = response.json()
|
| 292 |
+
|
| 293 |
+
for job in data.get("results", []):
|
| 294 |
+
company = job.get("company", {})
|
| 295 |
+
location = job.get("location", {})
|
| 296 |
+
|
| 297 |
+
opportunities.append({
|
| 298 |
+
"title": f"[Adzuna] {job.get('title', '')}",
|
| 299 |
+
"raw_text": job.get("description", "")[:2000],
|
| 300 |
+
"url": job.get("redirect_url", ""),
|
| 301 |
+
"source_type": "job",
|
| 302 |
+
"source_name": f"Adzuna ({company.get('display_name', 'Unknown')})",
|
| 303 |
+
"published_at": self._parse_date(job.get("created")),
|
| 304 |
+
"metadata": {
|
| 305 |
+
"company": company.get("display_name"),
|
| 306 |
+
"location": location.get("display_name"),
|
| 307 |
+
"salary_min": job.get("salary_min"),
|
| 308 |
+
"salary_max": job.get("salary_max"),
|
| 309 |
+
"contract_type": job.get("contract_type"),
|
| 310 |
+
"region": "global"
|
| 311 |
+
}
|
| 312 |
+
})
|
| 313 |
+
|
| 314 |
+
except Exception as e:
|
| 315 |
+
print(f" Adzuna '{keyword}' error: {e}")
|
| 316 |
+
|
| 317 |
+
return opportunities
|
| 318 |
+
|
| 319 |
+
async def fetch_jooble(self) -> list[dict]:
|
| 320 |
+
"""
|
| 321 |
+
Fetch from Jooble API - aggregates 70+ sources including:
|
| 322 |
+
- LinkedIn
|
| 323 |
+
- Indeed
|
| 324 |
+
- Glassdoor
|
| 325 |
+
- Monster
|
| 326 |
+
- CareerBuilder
|
| 327 |
+
|
| 328 |
+
Free tier available.
|
| 329 |
+
Get key at: https://jooble.org/api/about
|
| 330 |
+
"""
|
| 331 |
+
opportunities = []
|
| 332 |
+
|
| 333 |
+
search_queries = [
|
| 334 |
+
"machine learning engineer",
|
| 335 |
+
"AI internship",
|
| 336 |
+
"data scientist",
|
| 337 |
+
"robotics engineer",
|
| 338 |
+
"computer vision",
|
| 339 |
+
"scholarship",
|
| 340 |
+
"fellowship"
|
| 341 |
+
]
|
| 342 |
+
|
| 343 |
+
for query in search_queries[:5]: # Limit to conserve quota
|
| 344 |
+
try:
|
| 345 |
+
url = f"https://jooble.org/api/{self.jooble_api_key}"
|
| 346 |
+
|
| 347 |
+
payload = {
|
| 348 |
+
"keywords": query,
|
| 349 |
+
"location": "", # Worldwide
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
async with httpx.AsyncClient() as client:
|
| 353 |
+
response = await client.post(
|
| 354 |
+
url,
|
| 355 |
+
json=payload,
|
| 356 |
+
headers={"Content-Type": "application/json"},
|
| 357 |
+
timeout=30
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
if response.status_code != 200:
|
| 361 |
+
continue
|
| 362 |
+
|
| 363 |
+
data = response.json()
|
| 364 |
+
|
| 365 |
+
for job in data.get("jobs", [])[:10]:
|
| 366 |
+
opportunities.append({
|
| 367 |
+
"title": f"[Jooble] {job.get('title', '')}",
|
| 368 |
+
"raw_text": self._strip_html(job.get("snippet", ""))[:2000],
|
| 369 |
+
"url": job.get("link", ""),
|
| 370 |
+
"source_type": "job",
|
| 371 |
+
"source_name": f"Jooble ({job.get('company', 'Unknown')})",
|
| 372 |
+
"published_at": self._parse_date(job.get("updated")),
|
| 373 |
+
"metadata": {
|
| 374 |
+
"company": job.get("company"),
|
| 375 |
+
"location": job.get("location"),
|
| 376 |
+
"salary": job.get("salary"),
|
| 377 |
+
"source": job.get("source"), # Original source (LinkedIn, Indeed, etc.)
|
| 378 |
+
"region": "global"
|
| 379 |
+
}
|
| 380 |
+
})
|
| 381 |
+
|
| 382 |
+
except Exception as e:
|
| 383 |
+
print(f" Jooble '{query}' error: {e}")
|
| 384 |
+
|
| 385 |
+
return opportunities
|
| 386 |
+
|
| 387 |
+
async def fetch_linkedin_rapidapi(self) -> list[dict]:
|
| 388 |
+
"""
|
| 389 |
+
Fetch LinkedIn jobs via RapidAPI.
|
| 390 |
+
Free tier: 100 requests/month
|
| 391 |
+
Get key at: https://rapidapi.com/jaypat87/api/linkedin-jobs-search
|
| 392 |
+
"""
|
| 393 |
+
opportunities = []
|
| 394 |
+
|
| 395 |
+
search_queries = [
|
| 396 |
+
"machine learning",
|
| 397 |
+
"AI engineer",
|
| 398 |
+
"computer vision intern",
|
| 399 |
+
"robotics"
|
| 400 |
+
]
|
| 401 |
+
|
| 402 |
+
for query in search_queries[:2]: # Limit to conserve quota
|
| 403 |
+
try:
|
| 404 |
+
url = "https://linkedin-jobs-search.p.rapidapi.com/"
|
| 405 |
+
|
| 406 |
+
payload = {
|
| 407 |
+
"search_terms": query,
|
| 408 |
+
"location": "United States",
|
| 409 |
+
"page": "1"
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
headers = {
|
| 413 |
+
"content-type": "application/json",
|
| 414 |
+
"X-RapidAPI-Key": self.rapidapi_key,
|
| 415 |
+
"X-RapidAPI-Host": "linkedin-jobs-search.p.rapidapi.com"
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
async with httpx.AsyncClient() as client:
|
| 419 |
+
response = await client.post(
|
| 420 |
+
url,
|
| 421 |
+
json=payload,
|
| 422 |
+
headers=headers,
|
| 423 |
+
timeout=30
|
| 424 |
+
)
|
| 425 |
+
|
| 426 |
+
if response.status_code != 200:
|
| 427 |
+
continue
|
| 428 |
+
|
| 429 |
+
data = response.json()
|
| 430 |
+
|
| 431 |
+
for job in data[:10] if isinstance(data, list) else []:
|
| 432 |
+
opportunities.append({
|
| 433 |
+
"title": f"[LinkedIn] {job.get('job_title', '')}",
|
| 434 |
+
"raw_text": job.get("job_description", "")[:2000],
|
| 435 |
+
"url": job.get("linkedin_job_url_cleaned", job.get("job_url", "")),
|
| 436 |
+
"source_type": "job",
|
| 437 |
+
"source_name": f"LinkedIn ({job.get('company_name', 'Unknown')})",
|
| 438 |
+
"published_at": self._parse_date(job.get("posted_date")),
|
| 439 |
+
"metadata": {
|
| 440 |
+
"company": job.get("company_name"),
|
| 441 |
+
"location": job.get("job_location"),
|
| 442 |
+
"linkedin_url": job.get("linkedin_job_url_cleaned"),
|
| 443 |
+
"region": "global"
|
| 444 |
+
}
|
| 445 |
+
})
|
| 446 |
+
|
| 447 |
+
except Exception as e:
|
| 448 |
+
print(f" LinkedIn '{query}' error: {e}")
|
| 449 |
+
|
| 450 |
+
return opportunities
|
| 451 |
+
|
| 452 |
+
# ===========================================
|
| 453 |
+
# HELPER METHODS
|
| 454 |
+
# ===========================================
|
| 455 |
+
|
| 456 |
+
def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]:
|
| 457 |
+
"""Parse various date formats."""
|
| 458 |
+
if not date_str:
|
| 459 |
+
return None
|
| 460 |
+
try:
|
| 461 |
+
if "T" in str(date_str):
|
| 462 |
+
return datetime.fromisoformat(str(date_str).replace("Z", "+00:00"))
|
| 463 |
+
return datetime.strptime(str(date_str)[:10], "%Y-%m-%d")
|
| 464 |
+
except Exception:
|
| 465 |
+
return None
|
| 466 |
+
|
| 467 |
+
def _strip_html(self, text: str) -> str:
|
| 468 |
+
"""Remove HTML tags from text."""
|
| 469 |
+
if not text:
|
| 470 |
+
return ""
|
| 471 |
+
clean = re.sub(r'<[^>]+>', '', text)
|
| 472 |
+
return " ".join(clean.split())
|
backend/ingestion/reddit_client.py
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Reddit Client
|
| 3 |
+
|
| 4 |
+
Monitors curated subreddits for opportunities with strict filtering.
|
| 5 |
+
"""
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from typing import Optional
|
| 8 |
+
import httpx
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class RedditClient:
|
| 12 |
+
"""
|
| 13 |
+
Client for Reddit using public JSON API.
|
| 14 |
+
|
| 15 |
+
Note: For production, consider using PRAW with OAuth for better rate limits.
|
| 16 |
+
This implementation uses public endpoints which are rate-limited.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
BASE_URL = "https://www.reddit.com"
|
| 20 |
+
|
| 21 |
+
# Curated subreddits for high-signal content
|
| 22 |
+
TARGET_SUBREDDITS = [
|
| 23 |
+
"computervision",
|
| 24 |
+
"robotics",
|
| 25 |
+
"MachineLearning",
|
| 26 |
+
"artificial",
|
| 27 |
+
"learnmachinelearning",
|
| 28 |
+
"deeplearning",
|
| 29 |
+
"hackathons",
|
| 30 |
+
"scholarships",
|
| 31 |
+
"cscareerquestions",
|
| 32 |
+
"roboticsengineering",
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
# Keywords that indicate opportunities
|
| 36 |
+
OPPORTUNITY_KEYWORDS = [
|
| 37 |
+
"internship", "intern", "hiring", "job",
|
| 38 |
+
"hackathon", "competition", "challenge",
|
| 39 |
+
"scholarship", "fellowship", "grant", "funding",
|
| 40 |
+
"research assistant", "ra position", "phd",
|
| 41 |
+
"call for papers", "cfp", "workshop",
|
| 42 |
+
"applications open", "apply now", "deadline"
|
| 43 |
+
]
|
| 44 |
+
|
| 45 |
+
# Keywords to filter out (noise)
|
| 46 |
+
NOISE_KEYWORDS = [
|
| 47 |
+
"meme", "funny", "eli5", "rant",
|
| 48 |
+
"top 10", "best tools", "what are",
|
| 49 |
+
"vs", "versus", "comparison"
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
def __init__(self, user_agent: str = "PIOE/1.0"):
|
| 53 |
+
self.user_agent = user_agent
|
| 54 |
+
self._headers = {"User-Agent": user_agent}
|
| 55 |
+
|
| 56 |
+
async def fetch_all(self, subreddits: Optional[list[str]] = None) -> list[dict]:
|
| 57 |
+
"""Fetch from all target subreddits with filtering."""
|
| 58 |
+
subreddits = subreddits or self.TARGET_SUBREDDITS
|
| 59 |
+
all_opportunities = []
|
| 60 |
+
|
| 61 |
+
for subreddit in subreddits:
|
| 62 |
+
try:
|
| 63 |
+
posts = await self.fetch_subreddit(subreddit)
|
| 64 |
+
all_opportunities.extend(posts)
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"Error fetching r/{subreddit}: {e}")
|
| 67 |
+
|
| 68 |
+
return all_opportunities
|
| 69 |
+
|
| 70 |
+
async def fetch_subreddit(
|
| 71 |
+
self,
|
| 72 |
+
subreddit: str,
|
| 73 |
+
sort: str = "new",
|
| 74 |
+
limit: int = 25
|
| 75 |
+
) -> list[dict]:
|
| 76 |
+
"""
|
| 77 |
+
Fetch posts from a subreddit with opportunity filtering.
|
| 78 |
+
|
| 79 |
+
Only returns posts that match opportunity keywords
|
| 80 |
+
and don't match noise keywords.
|
| 81 |
+
"""
|
| 82 |
+
url = f"{self.BASE_URL}/r/{subreddit}/{sort}.json"
|
| 83 |
+
|
| 84 |
+
async with httpx.AsyncClient() as client:
|
| 85 |
+
response = await client.get(
|
| 86 |
+
url,
|
| 87 |
+
params={"limit": limit},
|
| 88 |
+
headers=self._headers,
|
| 89 |
+
timeout=30
|
| 90 |
+
)
|
| 91 |
+
response.raise_for_status()
|
| 92 |
+
|
| 93 |
+
data = response.json()
|
| 94 |
+
posts = data.get("data", {}).get("children", [])
|
| 95 |
+
|
| 96 |
+
return self._filter_and_parse(posts, subreddit)
|
| 97 |
+
|
| 98 |
+
def _filter_and_parse(self, posts: list, subreddit: str) -> list[dict]:
|
| 99 |
+
"""Filter posts for opportunities and parse to normalized format."""
|
| 100 |
+
opportunities = []
|
| 101 |
+
|
| 102 |
+
for post_wrapper in posts:
|
| 103 |
+
post = post_wrapper.get("data", {})
|
| 104 |
+
|
| 105 |
+
# Skip removed/deleted posts
|
| 106 |
+
if post.get("removed_by_category") or post.get("selftext") == "[removed]":
|
| 107 |
+
continue
|
| 108 |
+
|
| 109 |
+
title = post.get("title", "").lower()
|
| 110 |
+
text = post.get("selftext", "").lower()
|
| 111 |
+
combined = f"{title} {text}"
|
| 112 |
+
|
| 113 |
+
# Filter out noise
|
| 114 |
+
if any(noise in combined for noise in self.NOISE_KEYWORDS):
|
| 115 |
+
continue
|
| 116 |
+
|
| 117 |
+
# Check for opportunity keywords
|
| 118 |
+
has_opportunity = any(kw in combined for kw in self.OPPORTUNITY_KEYWORDS)
|
| 119 |
+
|
| 120 |
+
# Also include posts with high scores (community validated)
|
| 121 |
+
high_score = post.get("score", 0) > 50
|
| 122 |
+
|
| 123 |
+
if not has_opportunity and not high_score:
|
| 124 |
+
continue
|
| 125 |
+
|
| 126 |
+
# Calculate engagement
|
| 127 |
+
engagement = post.get("score", 0) + post.get("num_comments", 0)
|
| 128 |
+
|
| 129 |
+
opportunity = {
|
| 130 |
+
"title": f"[Reddit] {post.get('title', '')}",
|
| 131 |
+
"raw_text": post.get("selftext", "")[:2000] or post.get("title", ""),
|
| 132 |
+
"url": f"https://reddit.com{post.get('permalink', '')}",
|
| 133 |
+
"source_type": "reddit",
|
| 134 |
+
"source_name": f"r/{subreddit}",
|
| 135 |
+
"published_at": self._parse_timestamp(post.get("created_utc")),
|
| 136 |
+
"social_engagement": engagement,
|
| 137 |
+
"metadata": {
|
| 138 |
+
"subreddit": subreddit,
|
| 139 |
+
"author": post.get("author"),
|
| 140 |
+
"score": post.get("score", 0),
|
| 141 |
+
"num_comments": post.get("num_comments", 0),
|
| 142 |
+
"flair": post.get("link_flair_text"),
|
| 143 |
+
"is_self": post.get("is_self", True),
|
| 144 |
+
"external_url": post.get("url") if not post.get("is_self") else None
|
| 145 |
+
}
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
opportunities.append(opportunity)
|
| 149 |
+
|
| 150 |
+
return opportunities
|
| 151 |
+
|
| 152 |
+
def _parse_timestamp(self, timestamp: Optional[float]) -> Optional[datetime]:
|
| 153 |
+
"""Convert Unix timestamp to datetime."""
|
| 154 |
+
if not timestamp:
|
| 155 |
+
return None
|
| 156 |
+
try:
|
| 157 |
+
return datetime.utcfromtimestamp(timestamp)
|
| 158 |
+
except Exception:
|
| 159 |
+
return None
|
| 160 |
+
|
| 161 |
+
async def search(self, query: str, subreddit: Optional[str] = None) -> list[dict]:
|
| 162 |
+
"""Search Reddit for specific opportunities."""
|
| 163 |
+
if subreddit:
|
| 164 |
+
url = f"{self.BASE_URL}/r/{subreddit}/search.json"
|
| 165 |
+
else:
|
| 166 |
+
url = f"{self.BASE_URL}/search.json"
|
| 167 |
+
|
| 168 |
+
async with httpx.AsyncClient() as client:
|
| 169 |
+
response = await client.get(
|
| 170 |
+
url,
|
| 171 |
+
params={
|
| 172 |
+
"q": query,
|
| 173 |
+
"sort": "new",
|
| 174 |
+
"limit": 25,
|
| 175 |
+
"restrict_sr": "on" if subreddit else "off"
|
| 176 |
+
},
|
| 177 |
+
headers=self._headers,
|
| 178 |
+
timeout=30
|
| 179 |
+
)
|
| 180 |
+
response.raise_for_status()
|
| 181 |
+
|
| 182 |
+
data = response.json()
|
| 183 |
+
posts = data.get("data", {}).get("children", [])
|
| 184 |
+
|
| 185 |
+
return self._filter_and_parse(posts, subreddit or "search")
|
backend/ingestion/rss_client.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE RSS Client
|
| 3 |
+
|
| 4 |
+
Parses RSS/Atom feeds from blogs, news sites, and announcement pages.
|
| 5 |
+
"""
|
| 6 |
+
import feedparser
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from typing import Optional
|
| 9 |
+
import httpx
|
| 10 |
+
import re
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class RSSClient:
|
| 14 |
+
"""
|
| 15 |
+
Client for RSS/Atom feeds.
|
| 16 |
+
Supports multiple feeds with configurable filtering.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
# Patterns that indicate non-actionable content (discussions, not opportunities)
|
| 20 |
+
FILTER_OUT_PATTERNS = [
|
| 21 |
+
r'^Ask HN:', # Hacker News discussions
|
| 22 |
+
r'^Show HN:', # Show HN posts (usually not opportunities)
|
| 23 |
+
r'^Tell HN:', # Tell HN posts
|
| 24 |
+
r'my internship', # Personal stories about internships
|
| 25 |
+
r'my experience', # Personal experiences
|
| 26 |
+
r'I (got|landed|received|missed)', # Personal stories
|
| 27 |
+
r'How (do|did|can|should) I', # Questions, not opportunities
|
| 28 |
+
r'\?$', # Questions
|
| 29 |
+
r'AMA$', # AMAs
|
| 30 |
+
r'white british', # News articles, not opportunities
|
| 31 |
+
r'is (this|it) (real|fake|legit)', # Verification questions
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
# Patterns that indicate REAL opportunities
|
| 35 |
+
OPPORTUNITY_PATTERNS = [
|
| 36 |
+
r'hiring',
|
| 37 |
+
r'apply now',
|
| 38 |
+
r'deadline',
|
| 39 |
+
r'applications? open',
|
| 40 |
+
r'we are looking',
|
| 41 |
+
r'join (our|the) team',
|
| 42 |
+
r'open position',
|
| 43 |
+
r'fellowship program',
|
| 44 |
+
r'grant program',
|
| 45 |
+
r'scholarship',
|
| 46 |
+
r'bounty',
|
| 47 |
+
r'\$\d+', # Money amounts
|
| 48 |
+
r'remote (ok|friendly|position)',
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
# Default feeds - ONLY actionable opportunity sources
|
| 52 |
+
DEFAULT_FEEDS = [
|
| 53 |
+
# HN Jobs - ACTUAL job postings, not discussions
|
| 54 |
+
{"name": "Hacker News Jobs", "url": "https://hnrss.org/jobs", "type": "job"},
|
| 55 |
+
|
| 56 |
+
# ArXiv RSS (research papers - always relevant)
|
| 57 |
+
{"name": "ArXiv CS.CV", "url": "https://rss.arxiv.org/rss/cs.CV", "type": "research"},
|
| 58 |
+
{"name": "ArXiv CS.RO", "url": "https://rss.arxiv.org/rss/cs.RO", "type": "research"},
|
| 59 |
+
{"name": "ArXiv CS.AI", "url": "https://rss.arxiv.org/rss/cs.AI", "type": "research"},
|
| 60 |
+
|
| 61 |
+
# Fellowships & Scholarships (working feeds only)
|
| 62 |
+
{"name": "ProFellow", "url": "https://www.profellow.com/feed/", "type": "fellowship"},
|
| 63 |
+
{"name": "Scholars4Dev", "url": "https://www.scholars4dev.com/feed/", "type": "scholarship"},
|
| 64 |
+
# NOTE: OpportunityDesk, AfterSchoolAfrica, WayUp removed - broken/invalid XML
|
| 65 |
+
|
| 66 |
+
# Remote Jobs
|
| 67 |
+
{"name": "RemoteOK AI", "url": "https://remoteok.com/remote-ai-jobs.rss", "type": "job"},
|
| 68 |
+
{"name": "RemoteOK Intern", "url": "https://remoteok.com/remote-intern-jobs.rss", "type": "internship"},
|
| 69 |
+
{"name": "RemoteOK ML", "url": "https://remoteok.com/remote-machine-learning-jobs.rss", "type": "job"},
|
| 70 |
+
]
|
| 71 |
+
|
| 72 |
+
def __init__(self, custom_feeds: Optional[list[dict]] = None):
|
| 73 |
+
self.feeds = custom_feeds or self.DEFAULT_FEEDS
|
| 74 |
+
|
| 75 |
+
async def fetch_all(self) -> list[dict]:
|
| 76 |
+
"""Fetch from all configured feeds."""
|
| 77 |
+
all_opportunities = []
|
| 78 |
+
|
| 79 |
+
for feed_config in self.feeds:
|
| 80 |
+
try:
|
| 81 |
+
opportunities = await self.fetch_feed(
|
| 82 |
+
feed_config["url"],
|
| 83 |
+
feed_config["name"],
|
| 84 |
+
feed_config.get("type", "rss")
|
| 85 |
+
)
|
| 86 |
+
all_opportunities.extend(opportunities)
|
| 87 |
+
except Exception as e:
|
| 88 |
+
print(f"Error fetching {feed_config['name']}: {e}")
|
| 89 |
+
|
| 90 |
+
return all_opportunities
|
| 91 |
+
|
| 92 |
+
async def fetch_feed(self, url: str, source_name: str, feed_type: str = "rss") -> list[dict]:
|
| 93 |
+
"""
|
| 94 |
+
Fetch and parse a single RSS feed.
|
| 95 |
+
|
| 96 |
+
Returns list of normalized opportunity dicts.
|
| 97 |
+
"""
|
| 98 |
+
try:
|
| 99 |
+
async with httpx.AsyncClient() as client:
|
| 100 |
+
response = await client.get(url, timeout=30, follow_redirects=True)
|
| 101 |
+
content = response.text
|
| 102 |
+
except Exception as e:
|
| 103 |
+
print(f"HTTP error for {url}: {e}")
|
| 104 |
+
return []
|
| 105 |
+
|
| 106 |
+
# Parse feed
|
| 107 |
+
feed = feedparser.parse(content)
|
| 108 |
+
|
| 109 |
+
if feed.bozo and not feed.entries:
|
| 110 |
+
print(f"Feed parse error for {url}: {feed.bozo_exception}")
|
| 111 |
+
return []
|
| 112 |
+
|
| 113 |
+
return self._parse_entries(feed.entries, source_name, feed_type)
|
| 114 |
+
|
| 115 |
+
def _is_discussion_not_opportunity(self, title: str, description: str) -> bool:
|
| 116 |
+
"""Check if content is a discussion post rather than an actionable opportunity."""
|
| 117 |
+
text = f"{title} {description}".lower()
|
| 118 |
+
|
| 119 |
+
# Check for filter-out patterns (discussions, personal stories)
|
| 120 |
+
for pattern in self.FILTER_OUT_PATTERNS:
|
| 121 |
+
if re.search(pattern, title, re.IGNORECASE):
|
| 122 |
+
return True
|
| 123 |
+
|
| 124 |
+
return False
|
| 125 |
+
|
| 126 |
+
def _is_likely_opportunity(self, title: str, description: str, feed_type: str) -> bool:
|
| 127 |
+
"""Check if content is likely a real opportunity."""
|
| 128 |
+
# Research papers are always opportunities
|
| 129 |
+
if feed_type == "research":
|
| 130 |
+
return True
|
| 131 |
+
|
| 132 |
+
# Fellowships/scholarships from ProFellow are always good
|
| 133 |
+
if feed_type in ["fellowship", "scholarship"]:
|
| 134 |
+
return True
|
| 135 |
+
|
| 136 |
+
# Jobs from HN Jobs feed are always real
|
| 137 |
+
if feed_type == "job":
|
| 138 |
+
return True
|
| 139 |
+
|
| 140 |
+
text = f"{title} {description}".lower()
|
| 141 |
+
|
| 142 |
+
# Check for opportunity patterns
|
| 143 |
+
for pattern in self.OPPORTUNITY_PATTERNS:
|
| 144 |
+
if re.search(pattern, text, re.IGNORECASE):
|
| 145 |
+
return True
|
| 146 |
+
|
| 147 |
+
return False
|
| 148 |
+
|
| 149 |
+
def _parse_entries(self, entries: list, source_name: str, feed_type: str) -> list[dict]:
|
| 150 |
+
"""Parse feed entries into normalized opportunities."""
|
| 151 |
+
opportunities = []
|
| 152 |
+
|
| 153 |
+
for entry in entries[:20]: # Limit per feed
|
| 154 |
+
try:
|
| 155 |
+
# Extract content
|
| 156 |
+
title = entry.get("title", "").strip()
|
| 157 |
+
|
| 158 |
+
# Get description/summary
|
| 159 |
+
description = ""
|
| 160 |
+
if "summary" in entry:
|
| 161 |
+
description = entry.summary
|
| 162 |
+
elif "description" in entry:
|
| 163 |
+
description = entry.description
|
| 164 |
+
elif "content" in entry and entry.content:
|
| 165 |
+
description = entry.content[0].get("value", "")
|
| 166 |
+
|
| 167 |
+
# Clean HTML tags (basic)
|
| 168 |
+
description = self._strip_html(description)
|
| 169 |
+
|
| 170 |
+
# QUALITY FILTER: Skip discussions and non-opportunities
|
| 171 |
+
if self._is_discussion_not_opportunity(title, description):
|
| 172 |
+
continue
|
| 173 |
+
|
| 174 |
+
# QUALITY FILTER: Only keep likely opportunities
|
| 175 |
+
if not self._is_likely_opportunity(title, description, feed_type):
|
| 176 |
+
# For unknown types, be more lenient
|
| 177 |
+
if feed_type not in ["news", "blog"]:
|
| 178 |
+
continue
|
| 179 |
+
|
| 180 |
+
# Get published date
|
| 181 |
+
published = None
|
| 182 |
+
if "published_parsed" in entry and entry.published_parsed:
|
| 183 |
+
published = datetime(*entry.published_parsed[:6])
|
| 184 |
+
elif "updated_parsed" in entry and entry.updated_parsed:
|
| 185 |
+
published = datetime(*entry.updated_parsed[:6])
|
| 186 |
+
|
| 187 |
+
opportunity = {
|
| 188 |
+
"title": title,
|
| 189 |
+
"raw_text": description[:2000],
|
| 190 |
+
"url": entry.get("link", ""),
|
| 191 |
+
"source_type": "rss",
|
| 192 |
+
"source_name": source_name,
|
| 193 |
+
"published_at": published,
|
| 194 |
+
"metadata": {
|
| 195 |
+
"feed_type": feed_type,
|
| 196 |
+
"author": entry.get("author"),
|
| 197 |
+
"tags": [tag.term for tag in entry.get("tags", [])]
|
| 198 |
+
}
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
opportunities.append(opportunity)
|
| 202 |
+
|
| 203 |
+
except Exception as e:
|
| 204 |
+
print(f"Error parsing entry: {e}")
|
| 205 |
+
|
| 206 |
+
return opportunities
|
| 207 |
+
|
| 208 |
+
def _strip_html(self, text: str) -> str:
|
| 209 |
+
"""Remove HTML tags from text."""
|
| 210 |
+
clean = re.sub(r'<[^>]+>', '', text)
|
| 211 |
+
return " ".join(clean.split())
|
| 212 |
+
|
| 213 |
+
def add_feed(self, name: str, url: str, feed_type: str = "rss"):
|
| 214 |
+
"""Add a new feed to monitor."""
|
| 215 |
+
self.feeds.append({
|
| 216 |
+
"name": name,
|
| 217 |
+
"url": url,
|
| 218 |
+
"type": feed_type
|
| 219 |
+
})
|
| 220 |
+
|
backend/ingestion/scheduler.py
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Ingestion Scheduler - Version 2.0
|
| 3 |
+
|
| 4 |
+
Orchestrates periodic data collection from all sources.
|
| 5 |
+
Now includes Grant Intelligence and ROI scoring.
|
| 6 |
+
"""
|
| 7 |
+
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from sqlalchemy.orm import Session
|
| 10 |
+
|
| 11 |
+
from ..config import get_settings
|
| 12 |
+
from ..database import SessionLocal
|
| 13 |
+
from ..models import Opportunity, Source, SourceType, OpportunityCategory, Domain, Region, RiskLevel
|
| 14 |
+
from ..intelligence import RelevanceScorer, NoveltyDetector, CredibilityScorer, OpportunityClassifier
|
| 15 |
+
from ..intelligence import ROIScorer, SilentOpportunityDetector
|
| 16 |
+
|
| 17 |
+
from .arxiv_client import ArxivClient
|
| 18 |
+
from .github_client import GitHubClient
|
| 19 |
+
from .rss_client import RSSClient
|
| 20 |
+
from .reddit_client import RedditClient
|
| 21 |
+
from .superteam_client import SuperteamClient
|
| 22 |
+
from .web_scraper import WebScraper
|
| 23 |
+
from .careers_client import CareersClient, InternshipClient
|
| 24 |
+
from .grants_client import GrantsClient, NigeriaGrantsClient
|
| 25 |
+
from .jobboard_client import JobBoardClient
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class IngestionScheduler:
|
| 29 |
+
"""
|
| 30 |
+
Coordinates all data ingestion and processing.
|
| 31 |
+
PIOE 2.0: Now includes grant intelligence and ROI scoring.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
def __init__(self, user_region: str = "nigeria"):
|
| 35 |
+
self.settings = get_settings()
|
| 36 |
+
self.scheduler = AsyncIOScheduler()
|
| 37 |
+
self.user_region = user_region
|
| 38 |
+
|
| 39 |
+
# Initialize clients
|
| 40 |
+
self.arxiv = ArxivClient(max_results=30)
|
| 41 |
+
self.github = GitHubClient(token=self.settings.github_token)
|
| 42 |
+
self.rss = RSSClient()
|
| 43 |
+
self.reddit = RedditClient()
|
| 44 |
+
self.superteam = SuperteamClient()
|
| 45 |
+
self.scraper = WebScraper()
|
| 46 |
+
self.careers = CareersClient()
|
| 47 |
+
self.internships = InternshipClient()
|
| 48 |
+
|
| 49 |
+
# PIOE 2.0: Job boards (REAL opportunities, not discussions)
|
| 50 |
+
self.jobboards = JobBoardClient(
|
| 51 |
+
adzuna_app_id=self.settings.adzuna_app_id,
|
| 52 |
+
adzuna_api_key=self.settings.adzuna_api_key,
|
| 53 |
+
jooble_api_key=self.settings.jooble_api_key,
|
| 54 |
+
rapidapi_key=self.settings.rapidapi_key
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
# PIOE 2.0: Grant clients
|
| 58 |
+
self.grants = GrantsClient()
|
| 59 |
+
self.nigeria_grants = NigeriaGrantsClient()
|
| 60 |
+
|
| 61 |
+
# Initialize intelligence
|
| 62 |
+
self.scorer = RelevanceScorer()
|
| 63 |
+
self.novelty = NoveltyDetector()
|
| 64 |
+
self.credibility = CredibilityScorer()
|
| 65 |
+
self.classifier = OpportunityClassifier()
|
| 66 |
+
|
| 67 |
+
# PIOE 2.0: Decision intelligence
|
| 68 |
+
self.roi_scorer = ROIScorer(user_region=user_region)
|
| 69 |
+
self.silent_detector = SilentOpportunityDetector()
|
| 70 |
+
|
| 71 |
+
def start(self):
|
| 72 |
+
"""Start the scheduler."""
|
| 73 |
+
# Run ingestion every N hours
|
| 74 |
+
self.scheduler.add_job(
|
| 75 |
+
self.run_full_ingestion,
|
| 76 |
+
'interval',
|
| 77 |
+
hours=self.settings.ingestion_interval_hours,
|
| 78 |
+
id='full_ingestion'
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# Run high-priority sources more frequently (every 2 hours)
|
| 82 |
+
self.scheduler.add_job(
|
| 83 |
+
self.run_priority_ingestion,
|
| 84 |
+
'interval',
|
| 85 |
+
hours=2,
|
| 86 |
+
id='priority_ingestion'
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
self.scheduler.start()
|
| 90 |
+
print(f"Scheduler started - full ingestion every {self.settings.ingestion_interval_hours}h")
|
| 91 |
+
|
| 92 |
+
def stop(self):
|
| 93 |
+
"""Stop the scheduler."""
|
| 94 |
+
try:
|
| 95 |
+
if self.scheduler.running:
|
| 96 |
+
self.scheduler.shutdown()
|
| 97 |
+
except Exception:
|
| 98 |
+
pass # Ignore if scheduler not running
|
| 99 |
+
|
| 100 |
+
async def run_full_ingestion(self):
|
| 101 |
+
"""Run ingestion from all sources."""
|
| 102 |
+
print(f"[{datetime.utcnow()}] Starting full ingestion...")
|
| 103 |
+
|
| 104 |
+
results = {
|
| 105 |
+
"total_fetched": 0,
|
| 106 |
+
"total_saved": 0,
|
| 107 |
+
"sources": {}
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
db = SessionLocal()
|
| 111 |
+
|
| 112 |
+
try:
|
| 113 |
+
# Fetch from all sources (PIOE 2.0 includes grant + job board sources)
|
| 114 |
+
sources = [
|
| 115 |
+
("arXiv", self.arxiv.fetch(), SourceType.ARXIV),
|
| 116 |
+
("GitHub", self.github.fetch_trending(), SourceType.GITHUB),
|
| 117 |
+
("RSS", self.rss.fetch_all(), SourceType.RSS),
|
| 118 |
+
# DISABLED: Reddit returns too many discussions, not opportunities
|
| 119 |
+
# ("Reddit", self.reddit.fetch_all(), SourceType.REDDIT),
|
| 120 |
+
("Superteam", self.superteam.fetch_all(), SourceType.SUPERTEAM),
|
| 121 |
+
# ("Web Scraper", self.scraper.fetch_all(), SourceType.WEB_SCRAPE), # Often blocked
|
| 122 |
+
# ("Careers", self.careers.fetch_all(), SourceType.WEB_SCRAPE), # Often blocked
|
| 123 |
+
# ("Internships", self.internships.fetch_all(), SourceType.WEB_SCRAPE), # Often blocked
|
| 124 |
+
# PIOE 2.0: Job boards (REAL opportunities)
|
| 125 |
+
("Job Boards", self.jobboards.fetch_all(), SourceType.WEB_SCRAPE),
|
| 126 |
+
# PIOE 2.0: Grant sources
|
| 127 |
+
("Ecosystem Grants", self.grants.fetch_all(), SourceType.GRANT_PLATFORM),
|
| 128 |
+
("Nigeria Grants", self.nigeria_grants.fetch_all(), SourceType.GOV_PORTAL),
|
| 129 |
+
]
|
| 130 |
+
|
| 131 |
+
for source_name, fetch_coro, source_type in sources:
|
| 132 |
+
try:
|
| 133 |
+
opportunities = await fetch_coro
|
| 134 |
+
saved = self._process_and_save(db, opportunities, source_type)
|
| 135 |
+
|
| 136 |
+
results["sources"][source_name] = {
|
| 137 |
+
"fetched": len(opportunities),
|
| 138 |
+
"saved": saved
|
| 139 |
+
}
|
| 140 |
+
results["total_fetched"] += len(opportunities)
|
| 141 |
+
results["total_saved"] += saved
|
| 142 |
+
|
| 143 |
+
print(f" {source_name}: {len(opportunities)} fetched, {saved} saved")
|
| 144 |
+
|
| 145 |
+
except Exception as e:
|
| 146 |
+
print(f" {source_name}: ERROR - {e}")
|
| 147 |
+
results["sources"][source_name] = {"error": str(e)}
|
| 148 |
+
|
| 149 |
+
finally:
|
| 150 |
+
db.close()
|
| 151 |
+
|
| 152 |
+
print(f"[{datetime.utcnow()}] Ingestion complete: {results['total_saved']}/{results['total_fetched']} saved")
|
| 153 |
+
return results
|
| 154 |
+
|
| 155 |
+
async def run_priority_ingestion(self):
|
| 156 |
+
"""Run ingestion for high-priority sources only."""
|
| 157 |
+
print(f"[{datetime.utcnow()}] Starting priority ingestion...")
|
| 158 |
+
|
| 159 |
+
db = SessionLocal()
|
| 160 |
+
|
| 161 |
+
try:
|
| 162 |
+
# Only run arXiv, GitHub, and Superteam (highest signal sources)
|
| 163 |
+
sources = [
|
| 164 |
+
("arXiv", self.arxiv.fetch(), SourceType.ARXIV),
|
| 165 |
+
("GitHub", self.github.fetch_trending(), SourceType.GITHUB),
|
| 166 |
+
("Superteam", self.superteam.fetch_all(), SourceType.SUPERTEAM),
|
| 167 |
+
]
|
| 168 |
+
|
| 169 |
+
for source_name, fetch_coro, source_type in sources:
|
| 170 |
+
try:
|
| 171 |
+
opportunities = await fetch_coro
|
| 172 |
+
saved = self._process_and_save(db, opportunities, source_type)
|
| 173 |
+
print(f" {source_name}: {saved} new")
|
| 174 |
+
except Exception as e:
|
| 175 |
+
print(f" {source_name}: ERROR - {e}")
|
| 176 |
+
|
| 177 |
+
finally:
|
| 178 |
+
db.close()
|
| 179 |
+
|
| 180 |
+
def _process_and_save(
|
| 181 |
+
self,
|
| 182 |
+
db: Session,
|
| 183 |
+
raw_opportunities: list[dict],
|
| 184 |
+
source_type: SourceType
|
| 185 |
+
) -> int:
|
| 186 |
+
"""
|
| 187 |
+
Process raw opportunities through intelligence layer and save.
|
| 188 |
+
Returns count of saved opportunities.
|
| 189 |
+
"""
|
| 190 |
+
saved_count = 0
|
| 191 |
+
|
| 192 |
+
for raw in raw_opportunities:
|
| 193 |
+
try:
|
| 194 |
+
# Skip if already exists (by URL)
|
| 195 |
+
existing = db.query(Opportunity).filter(
|
| 196 |
+
Opportunity.url == raw.get("url")
|
| 197 |
+
).first()
|
| 198 |
+
|
| 199 |
+
if existing:
|
| 200 |
+
continue
|
| 201 |
+
|
| 202 |
+
# Combine title and text for analysis
|
| 203 |
+
full_text = f"{raw.get('title', '')} {raw.get('raw_text', '')}"
|
| 204 |
+
|
| 205 |
+
# Score relevance
|
| 206 |
+
scores = self.scorer.score(raw.get("raw_text", ""), raw.get("title", ""))
|
| 207 |
+
|
| 208 |
+
# Skip low relevance
|
| 209 |
+
if scores["relevance_score"] < self.settings.min_relevance_score:
|
| 210 |
+
continue
|
| 211 |
+
|
| 212 |
+
# Get embedding for novelty detection
|
| 213 |
+
embedding = self.scorer.get_embedding(full_text[:1000])
|
| 214 |
+
|
| 215 |
+
# Check novelty
|
| 216 |
+
novelty_result = self.novelty.calculate_novelty(embedding, db)
|
| 217 |
+
|
| 218 |
+
# Skip duplicates
|
| 219 |
+
if novelty_result["is_duplicate"]:
|
| 220 |
+
continue
|
| 221 |
+
|
| 222 |
+
# Skip recycled content
|
| 223 |
+
if self.novelty.is_recycled_content(full_text):
|
| 224 |
+
continue
|
| 225 |
+
|
| 226 |
+
# Calculate credibility
|
| 227 |
+
cred_result = self.credibility.score(
|
| 228 |
+
source_type,
|
| 229 |
+
raw.get("raw_text", ""),
|
| 230 |
+
raw.get("metadata", {}),
|
| 231 |
+
social_engagement=raw.get("social_engagement", 0)
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
# Skip low credibility
|
| 235 |
+
if cred_result["credibility_score"] < self.settings.min_credibility_score:
|
| 236 |
+
continue
|
| 237 |
+
|
| 238 |
+
# Classify
|
| 239 |
+
classification = self.classifier.classify(
|
| 240 |
+
raw.get("raw_text", ""),
|
| 241 |
+
raw.get("title", ""),
|
| 242 |
+
source_type=raw.get("source_type", ""),
|
| 243 |
+
source_name=raw.get("source_name", "")
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
# PIOE 2.0: Check for silent opportunities
|
| 247 |
+
silent_result = self.silent_detector.detect(
|
| 248 |
+
raw.get("raw_text", ""),
|
| 249 |
+
raw.get("title", "")
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
# Override category if silent opportunity detected
|
| 253 |
+
final_category = classification["category"]
|
| 254 |
+
if silent_result["is_silent_opportunity"]:
|
| 255 |
+
final_category = silent_result["recommended_category"]
|
| 256 |
+
|
| 257 |
+
# PIOE 2.0: Calculate ROI score
|
| 258 |
+
metadata = raw.get("metadata", {})
|
| 259 |
+
roi_result = self.roi_scorer.calculate_roi(
|
| 260 |
+
category=final_category,
|
| 261 |
+
deadline=raw.get("deadline"),
|
| 262 |
+
grant_size=metadata.get("grant_size_max"),
|
| 263 |
+
region=metadata.get("region", "global"),
|
| 264 |
+
extra_data=metadata
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
# Calculate combined score (now includes ROI)
|
| 268 |
+
combined_score = (
|
| 269 |
+
0.3 * scores["relevance_score"] +
|
| 270 |
+
0.2 * novelty_result["novelty_score"] +
|
| 271 |
+
0.2 * cred_result["credibility_score"] +
|
| 272 |
+
0.3 * roi_result["roi_score"] # PIOE 2.0: Weight ROI heavily
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
# Prepare enhanced metadata
|
| 276 |
+
enhanced_metadata = {
|
| 277 |
+
**metadata,
|
| 278 |
+
"silent_opportunity": silent_result["is_silent_opportunity"],
|
| 279 |
+
"silent_type": silent_result.get("opportunity_type"),
|
| 280 |
+
"roi_reasoning": roi_result["reasoning"],
|
| 281 |
+
}
|
| 282 |
+
|
| 283 |
+
# Determine region
|
| 284 |
+
region_str = (metadata.get("region") or "global").lower()
|
| 285 |
+
region_map = {
|
| 286 |
+
"nigeria": Region.NIGERIA,
|
| 287 |
+
"africa": Region.AFRICA,
|
| 288 |
+
"global": Region.GLOBAL,
|
| 289 |
+
"remote_africa": Region.REMOTE_AFRICA,
|
| 290 |
+
"remote_global": Region.REMOTE_GLOBAL,
|
| 291 |
+
}
|
| 292 |
+
region = region_map.get(region_str, Region.GLOBAL)
|
| 293 |
+
|
| 294 |
+
# Map risk level
|
| 295 |
+
risk_map = {"low": RiskLevel.LOW, "medium": RiskLevel.MEDIUM, "high": RiskLevel.HIGH}
|
| 296 |
+
risk_level = risk_map.get(roi_result["risk_level"], RiskLevel.MEDIUM)
|
| 297 |
+
|
| 298 |
+
# Create opportunity record
|
| 299 |
+
opportunity = Opportunity(
|
| 300 |
+
title=raw.get("title", "")[:500],
|
| 301 |
+
source_type=source_type,
|
| 302 |
+
source_name=raw.get("source_name", ""),
|
| 303 |
+
domain=Domain(classification["domain"]) if classification["domain"] in [d.value for d in Domain] else Domain.MIXED,
|
| 304 |
+
category=OpportunityCategory(final_category) if final_category in [c.value for c in OpportunityCategory] else OpportunityCategory.OTHER,
|
| 305 |
+
region=region,
|
| 306 |
+
region_weight=1.0 if region_str == self.user_region else 0.7,
|
| 307 |
+
published_at=raw.get("published_at"),
|
| 308 |
+
deadline=raw.get("deadline"),
|
| 309 |
+
raw_text=raw.get("raw_text", "")[:5000],
|
| 310 |
+
url=raw.get("url", ""),
|
| 311 |
+
relevance_score=scores["relevance_score"],
|
| 312 |
+
novelty_score=novelty_result["novelty_score"],
|
| 313 |
+
credibility_score=cred_result["credibility_score"],
|
| 314 |
+
signal_strength=cred_result["signal_strength"],
|
| 315 |
+
combined_score=combined_score,
|
| 316 |
+
roi_score=roi_result["roi_score"],
|
| 317 |
+
unlock_potential=roi_result["unlock_potential"],
|
| 318 |
+
risk_level=risk_level,
|
| 319 |
+
competition_level=roi_result["competition_level"],
|
| 320 |
+
social_engagement=raw.get("social_engagement", 0),
|
| 321 |
+
extra_data=enhanced_metadata,
|
| 322 |
+
embedding=embedding
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
db.add(opportunity)
|
| 326 |
+
saved_count += 1
|
| 327 |
+
|
| 328 |
+
except Exception as e:
|
| 329 |
+
print(f"Error processing opportunity: {e}")
|
| 330 |
+
continue
|
| 331 |
+
|
| 332 |
+
# Commit batch
|
| 333 |
+
try:
|
| 334 |
+
db.commit()
|
| 335 |
+
except Exception as e:
|
| 336 |
+
print(f"Database commit error: {e}")
|
| 337 |
+
db.rollback()
|
| 338 |
+
saved_count = 0
|
| 339 |
+
|
| 340 |
+
return saved_count
|
| 341 |
+
|
| 342 |
+
async def ingest_single_source(self, source_name: str) -> dict:
|
| 343 |
+
"""Manually trigger ingestion for a single source."""
|
| 344 |
+
db = SessionLocal()
|
| 345 |
+
|
| 346 |
+
source_map = {
|
| 347 |
+
"arxiv": (self.arxiv.fetch(), SourceType.ARXIV),
|
| 348 |
+
"github": (self.github.fetch_trending(), SourceType.GITHUB),
|
| 349 |
+
"rss": (self.rss.fetch_all(), SourceType.RSS),
|
| 350 |
+
"reddit": (self.reddit.fetch_all(), SourceType.REDDIT),
|
| 351 |
+
"superteam": (self.superteam.fetch_all(), SourceType.SUPERTEAM),
|
| 352 |
+
"scraper": (self.scraper.fetch_all(), SourceType.WEB_SCRAPE),
|
| 353 |
+
"careers": (self.careers.fetch_all(), SourceType.WEB_SCRAPE),
|
| 354 |
+
"internships": (self.internships.fetch_all(), SourceType.WEB_SCRAPE),
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
if source_name.lower() not in source_map:
|
| 358 |
+
return {"error": f"Unknown source: {source_name}"}
|
| 359 |
+
|
| 360 |
+
try:
|
| 361 |
+
fetch_coro, source_type = source_map[source_name.lower()]
|
| 362 |
+
opportunities = await fetch_coro
|
| 363 |
+
saved = self._process_and_save(db, opportunities, source_type)
|
| 364 |
+
|
| 365 |
+
return {
|
| 366 |
+
"source": source_name,
|
| 367 |
+
"fetched": len(opportunities),
|
| 368 |
+
"saved": saved
|
| 369 |
+
}
|
| 370 |
+
finally:
|
| 371 |
+
db.close()
|
backend/ingestion/superteam_client.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Superteam Client
|
| 3 |
+
|
| 4 |
+
Fetches bounties, grants, and hackathons from Superteam ecosystem.
|
| 5 |
+
High-value source for crypto/web3 opportunities.
|
| 6 |
+
"""
|
| 7 |
+
import httpx
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from typing import Optional
|
| 10 |
+
from bs4 import BeautifulSoup
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class SuperteamClient:
|
| 14 |
+
"""
|
| 15 |
+
Client for Superteam ecosystem opportunities.
|
| 16 |
+
|
| 17 |
+
Superteam aggregates bounties, grants, hackathons, and jobs
|
| 18 |
+
across the Solana ecosystem and beyond.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
# Known Superteam endpoints
|
| 22 |
+
EARN_URL = "https://earn.superteam.fun"
|
| 23 |
+
BOUNTIES_API = "https://earn.superteam.fun/api/listings"
|
| 24 |
+
|
| 25 |
+
def __init__(self):
|
| 26 |
+
self._headers = {
|
| 27 |
+
"User-Agent": "PIOE/1.0",
|
| 28 |
+
"Accept": "application/json"
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
async def fetch_all(self) -> list[dict]:
|
| 32 |
+
"""Fetch all opportunity types from Superteam."""
|
| 33 |
+
opportunities = []
|
| 34 |
+
|
| 35 |
+
# Try API first
|
| 36 |
+
try:
|
| 37 |
+
api_opps = await self.fetch_from_api()
|
| 38 |
+
opportunities.extend(api_opps)
|
| 39 |
+
except Exception as e:
|
| 40 |
+
print(f"Superteam API error: {e}")
|
| 41 |
+
# Fall back to scraping
|
| 42 |
+
try:
|
| 43 |
+
scraped = await self.fetch_by_scraping()
|
| 44 |
+
opportunities.extend(scraped)
|
| 45 |
+
except Exception as e2:
|
| 46 |
+
print(f"Superteam scrape error: {e2}")
|
| 47 |
+
|
| 48 |
+
return opportunities
|
| 49 |
+
|
| 50 |
+
async def fetch_from_api(self) -> list[dict]:
|
| 51 |
+
"""Fetch listings from Superteam API."""
|
| 52 |
+
async with httpx.AsyncClient() as client:
|
| 53 |
+
response = await client.get(
|
| 54 |
+
self.BOUNTIES_API,
|
| 55 |
+
params={"type": "all"},
|
| 56 |
+
headers=self._headers,
|
| 57 |
+
timeout=30
|
| 58 |
+
)
|
| 59 |
+
response.raise_for_status()
|
| 60 |
+
|
| 61 |
+
data = response.json()
|
| 62 |
+
listings = data if isinstance(data, list) else data.get("listings", [])
|
| 63 |
+
|
| 64 |
+
return self._parse_listings(listings)
|
| 65 |
+
|
| 66 |
+
async def fetch_by_scraping(self) -> list[dict]:
|
| 67 |
+
"""Fallback: scrape Superteam Earn page."""
|
| 68 |
+
async with httpx.AsyncClient() as client:
|
| 69 |
+
response = await client.get(
|
| 70 |
+
self.EARN_URL,
|
| 71 |
+
headers={"User-Agent": "PIOE/1.0"},
|
| 72 |
+
timeout=30,
|
| 73 |
+
follow_redirects=True
|
| 74 |
+
)
|
| 75 |
+
response.raise_for_status()
|
| 76 |
+
|
| 77 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 78 |
+
opportunities = []
|
| 79 |
+
|
| 80 |
+
# Look for listing cards (structure may vary)
|
| 81 |
+
for card in soup.select("[data-testid='listing-card'], .listing-card, article"):
|
| 82 |
+
try:
|
| 83 |
+
title_el = card.select_one("h3, h2, .title")
|
| 84 |
+
link_el = card.select_one("a[href]")
|
| 85 |
+
reward_el = card.select_one(".reward, .prize, [data-testid='reward']")
|
| 86 |
+
deadline_el = card.select_one(".deadline, .due-date")
|
| 87 |
+
|
| 88 |
+
if not title_el:
|
| 89 |
+
continue
|
| 90 |
+
|
| 91 |
+
opportunity = {
|
| 92 |
+
"title": f"[Superteam] {title_el.get_text(strip=True)}",
|
| 93 |
+
"raw_text": card.get_text(strip=True)[:500],
|
| 94 |
+
"url": f"{self.EARN_URL}{link_el.get('href')}" if link_el else self.EARN_URL,
|
| 95 |
+
"source_type": "superteam",
|
| 96 |
+
"source_name": "Superteam Earn",
|
| 97 |
+
"published_at": datetime.utcnow(),
|
| 98 |
+
"metadata": {
|
| 99 |
+
"reward": reward_el.get_text(strip=True) if reward_el else None,
|
| 100 |
+
"deadline": deadline_el.get_text(strip=True) if deadline_el else None,
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
opportunities.append(opportunity)
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
print(f"Error parsing Superteam card: {e}")
|
| 107 |
+
|
| 108 |
+
return opportunities
|
| 109 |
+
|
| 110 |
+
def _parse_listings(self, listings: list) -> list[dict]:
|
| 111 |
+
"""Parse API listings to normalized format."""
|
| 112 |
+
opportunities = []
|
| 113 |
+
|
| 114 |
+
for listing in listings:
|
| 115 |
+
try:
|
| 116 |
+
# Determine opportunity type
|
| 117 |
+
listing_type = listing.get("type", "bounty").lower()
|
| 118 |
+
type_prefix = {
|
| 119 |
+
"bounty": "Bounty",
|
| 120 |
+
"grant": "Grant",
|
| 121 |
+
"hackathon": "Hackathon",
|
| 122 |
+
"job": "Job"
|
| 123 |
+
}.get(listing_type, "Opportunity")
|
| 124 |
+
|
| 125 |
+
# Parse reward
|
| 126 |
+
reward = None
|
| 127 |
+
if listing.get("rewardAmount"):
|
| 128 |
+
token = listing.get("token", "USDC")
|
| 129 |
+
reward = f"{listing['rewardAmount']} {token}"
|
| 130 |
+
|
| 131 |
+
# Parse deadline
|
| 132 |
+
deadline = None
|
| 133 |
+
if listing.get("deadline"):
|
| 134 |
+
try:
|
| 135 |
+
deadline = datetime.fromisoformat(
|
| 136 |
+
listing["deadline"].replace("Z", "+00:00")
|
| 137 |
+
)
|
| 138 |
+
except Exception:
|
| 139 |
+
pass
|
| 140 |
+
|
| 141 |
+
# Extract skills/requirements
|
| 142 |
+
skills = listing.get("skills", [])
|
| 143 |
+
if isinstance(skills, str):
|
| 144 |
+
skills = [s.strip() for s in skills.split(",")]
|
| 145 |
+
|
| 146 |
+
opportunity = {
|
| 147 |
+
"title": f"[Superteam {type_prefix}] {listing.get('title', '')}",
|
| 148 |
+
"raw_text": listing.get("description", "")[:2000],
|
| 149 |
+
"url": listing.get("link") or f"{self.EARN_URL}/listing/{listing.get('slug', '')}",
|
| 150 |
+
"source_type": "superteam",
|
| 151 |
+
"source_name": "Superteam Earn",
|
| 152 |
+
"published_at": self._parse_date(listing.get("publishedAt")),
|
| 153 |
+
"deadline": deadline,
|
| 154 |
+
"metadata": {
|
| 155 |
+
"listing_type": listing_type,
|
| 156 |
+
"reward": reward,
|
| 157 |
+
"skills": skills,
|
| 158 |
+
"sponsor": listing.get("sponsor", {}).get("name"),
|
| 159 |
+
"region": listing.get("region"),
|
| 160 |
+
"is_active": listing.get("isPublished", True)
|
| 161 |
+
}
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
opportunities.append(opportunity)
|
| 165 |
+
|
| 166 |
+
except Exception as e:
|
| 167 |
+
print(f"Error parsing Superteam listing: {e}")
|
| 168 |
+
|
| 169 |
+
return opportunities
|
| 170 |
+
|
| 171 |
+
def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]:
|
| 172 |
+
"""Parse date string to datetime."""
|
| 173 |
+
if not date_str:
|
| 174 |
+
return None
|
| 175 |
+
try:
|
| 176 |
+
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
| 177 |
+
except Exception:
|
| 178 |
+
return None
|
backend/ingestion/web_scraper.py
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Web Scraper
|
| 3 |
+
|
| 4 |
+
Generic web scraper for scholarship sites, hackathon platforms, and university pages.
|
| 5 |
+
Uses BeautifulSoup for static pages, Playwright for dynamic content.
|
| 6 |
+
"""
|
| 7 |
+
import httpx
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from typing import Optional
|
| 10 |
+
from bs4 import BeautifulSoup
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class WebScraper:
|
| 14 |
+
"""
|
| 15 |
+
Generic web scraper for pages without APIs.
|
| 16 |
+
Supports static and dynamic (JavaScript) pages.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
# Preconfigured scrape targets
|
| 20 |
+
TARGETS = [
|
| 21 |
+
# Hackathon Platforms
|
| 22 |
+
{
|
| 23 |
+
"name": "Devpost Hackathons",
|
| 24 |
+
"url": "https://devpost.com/hackathons",
|
| 25 |
+
"type": "hackathon",
|
| 26 |
+
"selectors": {
|
| 27 |
+
"items": ".hackathon-tile, .challenge-listing",
|
| 28 |
+
"title": "h2, h3, .title",
|
| 29 |
+
"link": "a",
|
| 30 |
+
"deadline": ".submission-period, .dates"
|
| 31 |
+
}
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"name": "Devfolio Hackathons",
|
| 35 |
+
"url": "https://devfolio.co/hackathons",
|
| 36 |
+
"type": "hackathon",
|
| 37 |
+
"selectors": {
|
| 38 |
+
"items": "[class*='HackathonCard'], article",
|
| 39 |
+
"title": "h3, h2, [class*='Name']",
|
| 40 |
+
"link": "a",
|
| 41 |
+
"deadline": "[class*='Date']"
|
| 42 |
+
}
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"name": "HackerEarth Challenges",
|
| 46 |
+
"url": "https://www.hackerearth.com/challenges/",
|
| 47 |
+
"type": "hackathon",
|
| 48 |
+
"selectors": {
|
| 49 |
+
"items": ".challenge-card, .event-card",
|
| 50 |
+
"title": ".challenge-name, h3",
|
| 51 |
+
"link": "a",
|
| 52 |
+
"deadline": ".date, .timing"
|
| 53 |
+
}
|
| 54 |
+
},
|
| 55 |
+
# Scholarship/Fellowship Sites
|
| 56 |
+
{
|
| 57 |
+
"name": "FindAPhD AI",
|
| 58 |
+
"url": "https://www.findaphd.com/phds/?Keywords=artificial+intelligence+machine+learning",
|
| 59 |
+
"type": "scholarship",
|
| 60 |
+
"selectors": {
|
| 61 |
+
"items": ".phd-result",
|
| 62 |
+
"title": "h4 a, .title a",
|
| 63 |
+
"link": "a",
|
| 64 |
+
"deadline": ".close-date"
|
| 65 |
+
}
|
| 66 |
+
},
|
| 67 |
+
{
|
| 68 |
+
"name": "FindAPhD Robotics",
|
| 69 |
+
"url": "https://www.findaphd.com/phds/?Keywords=robotics+computer+vision",
|
| 70 |
+
"type": "scholarship",
|
| 71 |
+
"selectors": {
|
| 72 |
+
"items": ".phd-result",
|
| 73 |
+
"title": "h4 a, .title a",
|
| 74 |
+
"link": "a",
|
| 75 |
+
"deadline": ".close-date"
|
| 76 |
+
}
|
| 77 |
+
},
|
| 78 |
+
# Grant/Fellowship
|
| 79 |
+
{
|
| 80 |
+
"name": "Opportunities.com",
|
| 81 |
+
"url": "https://www.opportunitiescircle.com/category/fellowships/",
|
| 82 |
+
"type": "fellowship",
|
| 83 |
+
"selectors": {
|
| 84 |
+
"items": "article, .post",
|
| 85 |
+
"title": "h2, h3, .entry-title",
|
| 86 |
+
"link": "a",
|
| 87 |
+
"deadline": ".deadline"
|
| 88 |
+
}
|
| 89 |
+
},
|
| 90 |
+
]
|
| 91 |
+
|
| 92 |
+
def __init__(self, use_playwright: bool = False):
|
| 93 |
+
self.use_playwright = use_playwright
|
| 94 |
+
self._headers = {
|
| 95 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
async def fetch_all(self, targets: Optional[list[dict]] = None) -> list[dict]:
|
| 99 |
+
"""Fetch from all configured targets."""
|
| 100 |
+
targets = targets or self.TARGETS
|
| 101 |
+
all_opportunities = []
|
| 102 |
+
|
| 103 |
+
for target in targets:
|
| 104 |
+
try:
|
| 105 |
+
opps = await self.scrape_target(target)
|
| 106 |
+
all_opportunities.extend(opps)
|
| 107 |
+
except Exception as e:
|
| 108 |
+
print(f"Scrape error for {target['name']}: {e}")
|
| 109 |
+
|
| 110 |
+
return all_opportunities
|
| 111 |
+
|
| 112 |
+
async def scrape_target(self, target: dict) -> list[dict]:
|
| 113 |
+
"""Scrape a single target configuration."""
|
| 114 |
+
html = await self._fetch_html(target["url"])
|
| 115 |
+
if not html:
|
| 116 |
+
return []
|
| 117 |
+
|
| 118 |
+
soup = BeautifulSoup(html, "html.parser")
|
| 119 |
+
selectors = target.get("selectors", {})
|
| 120 |
+
|
| 121 |
+
opportunities = []
|
| 122 |
+
items = soup.select(selectors.get("items", "article"))[:20]
|
| 123 |
+
|
| 124 |
+
for item in items:
|
| 125 |
+
try:
|
| 126 |
+
# Extract title
|
| 127 |
+
title_el = item.select_one(selectors.get("title", "h2, h3, .title"))
|
| 128 |
+
title = title_el.get_text(strip=True) if title_el else ""
|
| 129 |
+
|
| 130 |
+
if not title:
|
| 131 |
+
continue
|
| 132 |
+
|
| 133 |
+
# Extract link
|
| 134 |
+
link_el = item.select_one(selectors.get("link", "a"))
|
| 135 |
+
link = ""
|
| 136 |
+
if link_el and link_el.get("href"):
|
| 137 |
+
href = link_el.get("href")
|
| 138 |
+
if href.startswith("http"):
|
| 139 |
+
link = href
|
| 140 |
+
else:
|
| 141 |
+
# Relative URL - construct absolute
|
| 142 |
+
from urllib.parse import urljoin
|
| 143 |
+
link = urljoin(target["url"], href)
|
| 144 |
+
|
| 145 |
+
# Extract deadline if available
|
| 146 |
+
deadline_el = item.select_one(selectors.get("deadline", ".deadline"))
|
| 147 |
+
deadline_text = deadline_el.get_text(strip=True) if deadline_el else None
|
| 148 |
+
|
| 149 |
+
# Get full text content
|
| 150 |
+
raw_text = item.get_text(separator=" ", strip=True)[:1000]
|
| 151 |
+
|
| 152 |
+
opportunity = {
|
| 153 |
+
"title": f"[{target['type'].title()}] {title}",
|
| 154 |
+
"raw_text": raw_text,
|
| 155 |
+
"url": link or target["url"],
|
| 156 |
+
"source_type": "web_scrape",
|
| 157 |
+
"source_name": target["name"],
|
| 158 |
+
"published_at": datetime.utcnow(),
|
| 159 |
+
"metadata": {
|
| 160 |
+
"scrape_type": target["type"],
|
| 161 |
+
"deadline_text": deadline_text
|
| 162 |
+
}
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
opportunities.append(opportunity)
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"Error parsing item: {e}")
|
| 169 |
+
|
| 170 |
+
return opportunities
|
| 171 |
+
|
| 172 |
+
async def _fetch_html(self, url: str) -> Optional[str]:
|
| 173 |
+
"""Fetch HTML content from URL."""
|
| 174 |
+
if self.use_playwright:
|
| 175 |
+
return await self._fetch_with_playwright(url)
|
| 176 |
+
|
| 177 |
+
try:
|
| 178 |
+
async with httpx.AsyncClient() as client:
|
| 179 |
+
response = await client.get(
|
| 180 |
+
url,
|
| 181 |
+
headers=self._headers,
|
| 182 |
+
timeout=30,
|
| 183 |
+
follow_redirects=True
|
| 184 |
+
)
|
| 185 |
+
response.raise_for_status()
|
| 186 |
+
return response.text
|
| 187 |
+
except Exception as e:
|
| 188 |
+
print(f"HTTP fetch error: {e}")
|
| 189 |
+
return None
|
| 190 |
+
|
| 191 |
+
async def _fetch_with_playwright(self, url: str) -> Optional[str]:
|
| 192 |
+
"""Fetch dynamic content using Playwright."""
|
| 193 |
+
try:
|
| 194 |
+
from playwright.async_api import async_playwright
|
| 195 |
+
|
| 196 |
+
async with async_playwright() as p:
|
| 197 |
+
browser = await p.chromium.launch(headless=True)
|
| 198 |
+
page = await browser.new_page()
|
| 199 |
+
await page.goto(url, wait_until="networkidle", timeout=30000)
|
| 200 |
+
html = await page.content()
|
| 201 |
+
await browser.close()
|
| 202 |
+
return html
|
| 203 |
+
except Exception as e:
|
| 204 |
+
print(f"Playwright error: {e}")
|
| 205 |
+
return None
|
| 206 |
+
|
| 207 |
+
async def scrape_custom(
|
| 208 |
+
self,
|
| 209 |
+
url: str,
|
| 210 |
+
name: str,
|
| 211 |
+
item_selector: str,
|
| 212 |
+
title_selector: str = "h2, h3",
|
| 213 |
+
link_selector: str = "a",
|
| 214 |
+
scrape_type: str = "custom"
|
| 215 |
+
) -> list[dict]:
|
| 216 |
+
"""Scrape a custom URL with provided selectors."""
|
| 217 |
+
target = {
|
| 218 |
+
"name": name,
|
| 219 |
+
"url": url,
|
| 220 |
+
"type": scrape_type,
|
| 221 |
+
"selectors": {
|
| 222 |
+
"items": item_selector,
|
| 223 |
+
"title": title_selector,
|
| 224 |
+
"link": link_selector
|
| 225 |
+
}
|
| 226 |
+
}
|
| 227 |
+
return await self.scrape_target(target)
|
backend/intelligence/__init__.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Intelligence Layer - Version 2.0
|
| 3 |
+
"""
|
| 4 |
+
from .llm_client import LLMClient
|
| 5 |
+
from .scorer import RelevanceScorer
|
| 6 |
+
from .novelty import NoveltyDetector
|
| 7 |
+
from .classifier import OpportunityClassifier
|
| 8 |
+
from .credibility import CredibilityScorer
|
| 9 |
+
from .roi_scorer import ROIScorer
|
| 10 |
+
from .silent_detector import SilentOpportunityDetector, OpportunityLanguageDetector
|
| 11 |
+
|
| 12 |
+
__all__ = [
|
| 13 |
+
"LLMClient",
|
| 14 |
+
"RelevanceScorer",
|
| 15 |
+
"NoveltyDetector",
|
| 16 |
+
"OpportunityClassifier",
|
| 17 |
+
"CredibilityScorer",
|
| 18 |
+
"ROIScorer",
|
| 19 |
+
"SilentOpportunityDetector",
|
| 20 |
+
"OpportunityLanguageDetector",
|
| 21 |
+
]
|
| 22 |
+
|
backend/intelligence/classifier.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Opportunity Classifier
|
| 3 |
+
|
| 4 |
+
Classifies opportunities into categories using rules and LLM.
|
| 5 |
+
"""
|
| 6 |
+
from ..models import OpportunityCategory, Domain
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class OpportunityClassifier:
|
| 10 |
+
"""
|
| 11 |
+
Classifies opportunities into categories and domains.
|
| 12 |
+
Uses rule-based classification first, LLM for ambiguous cases.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
# Source type to category mapping (high priority)
|
| 16 |
+
SOURCE_CATEGORY_MAP = {
|
| 17 |
+
"arxiv": OpportunityCategory.RESEARCH,
|
| 18 |
+
"github": OpportunityCategory.OPEN_SOURCE,
|
| 19 |
+
"superteam": OpportunityCategory.BOUNTY,
|
| 20 |
+
"grant_platform": OpportunityCategory.GRANT,
|
| 21 |
+
"gov_portal": OpportunityCategory.GRANT,
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
# Keyword patterns for each category
|
| 25 |
+
CATEGORY_PATTERNS = {
|
| 26 |
+
OpportunityCategory.SCHOLARSHIP: [
|
| 27 |
+
"scholarship", "tuition", "financial aid", "merit award"
|
| 28 |
+
],
|
| 29 |
+
OpportunityCategory.FELLOWSHIP: [
|
| 30 |
+
"fellowship", "fellow program", "research fellow"
|
| 31 |
+
],
|
| 32 |
+
OpportunityCategory.INTERNSHIP: [
|
| 33 |
+
"internship", "intern ", "summer program", "co-op"
|
| 34 |
+
],
|
| 35 |
+
OpportunityCategory.JOB: [
|
| 36 |
+
"hiring", "job opening", "position available", "career opportunity",
|
| 37 |
+
"we're looking for", "full-time", "remote job"
|
| 38 |
+
],
|
| 39 |
+
OpportunityCategory.RESEARCH: [
|
| 40 |
+
"research assistant", "ra position", "research opportunity", "arxiv",
|
| 41 |
+
"abstract:", "we present", "we propose", "our method"
|
| 42 |
+
],
|
| 43 |
+
OpportunityCategory.HACKATHON: [
|
| 44 |
+
"hackathon", "buildathon", "hackers wanted", "hack day"
|
| 45 |
+
],
|
| 46 |
+
OpportunityCategory.COMPETITION: [
|
| 47 |
+
"competition", "challenge", "contest", "prize pool"
|
| 48 |
+
],
|
| 49 |
+
OpportunityCategory.GRANT: [
|
| 50 |
+
"grant program", "grant application", "grant funding", "grant deadline"
|
| 51 |
+
],
|
| 52 |
+
OpportunityCategory.CONFERENCE: [
|
| 53 |
+
"conference", "call for papers", "summit", "symposium"
|
| 54 |
+
],
|
| 55 |
+
OpportunityCategory.OPEN_SOURCE: [
|
| 56 |
+
"open source", "gsoc", "outreachy", "contributor wanted"
|
| 57 |
+
],
|
| 58 |
+
OpportunityCategory.INVESTMENT: [
|
| 59 |
+
"funding round", "series a", "series b", "vc funding", "raised $"
|
| 60 |
+
],
|
| 61 |
+
OpportunityCategory.BOUNTY: [
|
| 62 |
+
"bounty", "bug bounty", "earn reward", "usdc reward"
|
| 63 |
+
],
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
# Domain patterns
|
| 67 |
+
DOMAIN_PATTERNS = {
|
| 68 |
+
Domain.COMPUTER_VISION: [
|
| 69 |
+
"computer vision", "image", "visual", "object detection", "segmentation", "opencv"
|
| 70 |
+
],
|
| 71 |
+
Domain.ROBOTICS: [
|
| 72 |
+
"robot", "ros", "autonomous", "manipulation", "navigation"
|
| 73 |
+
],
|
| 74 |
+
Domain.AI: [
|
| 75 |
+
"ai", "artificial intelligence", "machine learning", "deep learning",
|
| 76 |
+
"neural network", "llm", "transformer", "gpt"
|
| 77 |
+
],
|
| 78 |
+
Domain.FINANCE: [
|
| 79 |
+
"finance", "fintech", "trading", "investment", "stock", "quantitative"
|
| 80 |
+
],
|
| 81 |
+
Domain.CRYPTO: [
|
| 82 |
+
"crypto", "blockchain", "web3", "defi", "solana", "ethereum", "nft"
|
| 83 |
+
],
|
| 84 |
+
Domain.ACADEMIA: [
|
| 85 |
+
"research", "phd", "postdoc", "university", "academic", "professor"
|
| 86 |
+
],
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
def classify_by_source(self, source_type: str, source_name: str = "") -> OpportunityCategory | None:
|
| 90 |
+
"""
|
| 91 |
+
Classify primarily by source type.
|
| 92 |
+
Returns category or None if source doesn't determine category.
|
| 93 |
+
"""
|
| 94 |
+
source_lower = (source_type or "").lower()
|
| 95 |
+
source_name_lower = (source_name or "").lower()
|
| 96 |
+
|
| 97 |
+
# Check direct source mapping
|
| 98 |
+
if source_lower in self.SOURCE_CATEGORY_MAP:
|
| 99 |
+
return self.SOURCE_CATEGORY_MAP[source_lower]
|
| 100 |
+
|
| 101 |
+
# Check source name patterns
|
| 102 |
+
if "arxiv" in source_name_lower:
|
| 103 |
+
return OpportunityCategory.RESEARCH
|
| 104 |
+
if "github" in source_name_lower:
|
| 105 |
+
return OpportunityCategory.OPEN_SOURCE
|
| 106 |
+
if "profellow" in source_name_lower:
|
| 107 |
+
return OpportunityCategory.FELLOWSHIP
|
| 108 |
+
if "remoteok" in source_name_lower:
|
| 109 |
+
return OpportunityCategory.JOB
|
| 110 |
+
if "hacker news" in source_name_lower:
|
| 111 |
+
if "internship" in source_name_lower:
|
| 112 |
+
return OpportunityCategory.INTERNSHIP
|
| 113 |
+
if "robotics" in source_name_lower:
|
| 114 |
+
return OpportunityCategory.RESEARCH
|
| 115 |
+
if "jobs" in source_name_lower:
|
| 116 |
+
return OpportunityCategory.JOB
|
| 117 |
+
if "devfolio" in source_name_lower:
|
| 118 |
+
return OpportunityCategory.HACKATHON
|
| 119 |
+
|
| 120 |
+
return None
|
| 121 |
+
|
| 122 |
+
def classify_by_rules(self, text: str) -> tuple[OpportunityCategory, Domain, float]:
|
| 123 |
+
"""
|
| 124 |
+
Classify using keyword matching.
|
| 125 |
+
Returns (category, domain, confidence)
|
| 126 |
+
"""
|
| 127 |
+
if not text:
|
| 128 |
+
return OpportunityCategory.OTHER, Domain.MIXED, 0.0
|
| 129 |
+
|
| 130 |
+
text_lower = text.lower()
|
| 131 |
+
|
| 132 |
+
# Find matching category
|
| 133 |
+
category = OpportunityCategory.OTHER
|
| 134 |
+
cat_confidence = 0.0
|
| 135 |
+
|
| 136 |
+
for cat, patterns in self.CATEGORY_PATTERNS.items():
|
| 137 |
+
matches = sum(1 for p in patterns if p in text_lower)
|
| 138 |
+
if matches > cat_confidence:
|
| 139 |
+
category = cat
|
| 140 |
+
cat_confidence = min(matches * 0.3, 0.9)
|
| 141 |
+
|
| 142 |
+
# Find matching domain
|
| 143 |
+
domain = Domain.MIXED
|
| 144 |
+
domain_matches = 0
|
| 145 |
+
|
| 146 |
+
for dom, patterns in self.DOMAIN_PATTERNS.items():
|
| 147 |
+
matches = sum(1 for p in patterns if p in text_lower)
|
| 148 |
+
if matches > domain_matches:
|
| 149 |
+
domain = dom
|
| 150 |
+
domain_matches = matches
|
| 151 |
+
|
| 152 |
+
# If multiple domains match well, keep as mixed
|
| 153 |
+
domain_counts = {
|
| 154 |
+
dom: sum(1 for p in patterns if p in text_lower)
|
| 155 |
+
for dom, patterns in self.DOMAIN_PATTERNS.items()
|
| 156 |
+
}
|
| 157 |
+
high_matches = [d for d, c in domain_counts.items() if c >= domain_matches and c > 0]
|
| 158 |
+
if len(high_matches) > 1:
|
| 159 |
+
domain = Domain.MIXED
|
| 160 |
+
|
| 161 |
+
return category, domain, cat_confidence
|
| 162 |
+
|
| 163 |
+
def classify(
|
| 164 |
+
self,
|
| 165 |
+
text: str,
|
| 166 |
+
title: str = "",
|
| 167 |
+
source_type: str = "",
|
| 168 |
+
source_name: str = "",
|
| 169 |
+
use_llm: bool = False,
|
| 170 |
+
llm_client = None
|
| 171 |
+
) -> dict:
|
| 172 |
+
"""
|
| 173 |
+
Classify opportunity with optional LLM enhancement.
|
| 174 |
+
|
| 175 |
+
Returns dict with category, domain, confidence, method
|
| 176 |
+
"""
|
| 177 |
+
full_text = f"{title} {text}".strip()
|
| 178 |
+
|
| 179 |
+
# PRIORITY 1: Source-based classification (most reliable)
|
| 180 |
+
source_category = self.classify_by_source(source_type, source_name)
|
| 181 |
+
|
| 182 |
+
# PRIORITY 2: Rule-based keyword matching
|
| 183 |
+
rule_category, domain, confidence = self.classify_by_rules(full_text)
|
| 184 |
+
|
| 185 |
+
# Use source category if available (overrides keyword matching)
|
| 186 |
+
if source_category:
|
| 187 |
+
category = source_category
|
| 188 |
+
confidence = 0.85 # High confidence for source-based
|
| 189 |
+
method = "source"
|
| 190 |
+
else:
|
| 191 |
+
category = rule_category
|
| 192 |
+
method = "rules"
|
| 193 |
+
|
| 194 |
+
# Use LLM for low-confidence or ambiguous cases (only if no source match)
|
| 195 |
+
if use_llm and llm_client and confidence < 0.5 and not source_category:
|
| 196 |
+
try:
|
| 197 |
+
llm_result = llm_client.classify(full_text)
|
| 198 |
+
if llm_result.get("confidence", 0) > confidence:
|
| 199 |
+
return {
|
| 200 |
+
"category": llm_result.get("category", category.value),
|
| 201 |
+
"domain": llm_result.get("domain", domain.value),
|
| 202 |
+
"confidence": llm_result.get("confidence", confidence),
|
| 203 |
+
"method": "llm"
|
| 204 |
+
}
|
| 205 |
+
except Exception as e:
|
| 206 |
+
print(f"LLM classification failed: {e}")
|
| 207 |
+
|
| 208 |
+
return {
|
| 209 |
+
"category": category.value,
|
| 210 |
+
"domain": domain.value,
|
| 211 |
+
"confidence": confidence,
|
| 212 |
+
"method": method
|
| 213 |
+
}
|
| 214 |
+
|
backend/intelligence/credibility.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Credibility Scorer
|
| 3 |
+
|
| 4 |
+
Evaluates trustworthiness of sources and authors.
|
| 5 |
+
"""
|
| 6 |
+
from ..models import SourceType
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class CredibilityScorer:
|
| 10 |
+
"""
|
| 11 |
+
Scores credibility based on source type, author history, and content signals.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
# Base credibility scores by source type
|
| 15 |
+
SOURCE_CREDIBILITY = {
|
| 16 |
+
SourceType.ARXIV: 0.95, # Academic papers - highest trust
|
| 17 |
+
SourceType.GITHUB: 0.8, # Open source - high trust
|
| 18 |
+
SourceType.RSS: 0.7, # Varies by feed
|
| 19 |
+
SourceType.SUPERTEAM: 0.85, # Official platform
|
| 20 |
+
SourceType.REDDIT: 0.5, # Community - variable
|
| 21 |
+
SourceType.TWITTER: 0.4, # Social - requires filtering
|
| 22 |
+
SourceType.LINKEDIN: 0.6, # Professional but noisy
|
| 23 |
+
SourceType.WEB_SCRAPE: 0.5, # Unknown quality
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
def __init__(self):
|
| 27 |
+
pass
|
| 28 |
+
|
| 29 |
+
def score_source(self, source_type: SourceType) -> float:
|
| 30 |
+
"""Get base credibility score for source type."""
|
| 31 |
+
return self.SOURCE_CREDIBILITY.get(source_type, 0.5)
|
| 32 |
+
|
| 33 |
+
def score_content_signals(self, text: str, metadata: dict = None) -> dict:
|
| 34 |
+
"""
|
| 35 |
+
Evaluate content signals that indicate credibility.
|
| 36 |
+
Returns individual signal scores.
|
| 37 |
+
"""
|
| 38 |
+
metadata = metadata or {}
|
| 39 |
+
signals = {}
|
| 40 |
+
|
| 41 |
+
text_lower = text.lower() if text else ""
|
| 42 |
+
|
| 43 |
+
# Has deadline (official announcements usually have deadlines)
|
| 44 |
+
signals["has_deadline"] = 1.0 if metadata.get("deadline") or \
|
| 45 |
+
any(kw in text_lower for kw in ["deadline", "due date", "apply by", "closes"]) else 0.0
|
| 46 |
+
|
| 47 |
+
# Has organization/institution
|
| 48 |
+
signals["has_organization"] = 1.0 if metadata.get("organization") else 0.5
|
| 49 |
+
|
| 50 |
+
# Contains action URL
|
| 51 |
+
signals["has_action_url"] = 1.0 if metadata.get("url") or \
|
| 52 |
+
any(kw in text_lower for kw in ["apply here", "register at", "sign up"]) else 0.0
|
| 53 |
+
|
| 54 |
+
# Is first announcement (not a repost)
|
| 55 |
+
signals["is_original"] = 0.0 if any(kw in text_lower for kw in [
|
| 56 |
+
"repost", "sharing", "fyi", "icymi", "in case you missed"
|
| 57 |
+
]) else 1.0
|
| 58 |
+
|
| 59 |
+
# Has specific requirements (detailed = more credible)
|
| 60 |
+
signals["has_requirements"] = 1.0 if metadata.get("requirements") or \
|
| 61 |
+
any(kw in text_lower for kw in ["requirements", "qualifications", "must have"]) else 0.0
|
| 62 |
+
|
| 63 |
+
return signals
|
| 64 |
+
|
| 65 |
+
def calculate_signal_strength(self, signals: dict) -> float:
|
| 66 |
+
"""
|
| 67 |
+
Calculate overall signal strength from content signals.
|
| 68 |
+
High signal strength = actionable, official, time-sensitive.
|
| 69 |
+
"""
|
| 70 |
+
weights = {
|
| 71 |
+
"has_deadline": 0.3,
|
| 72 |
+
"has_organization": 0.2,
|
| 73 |
+
"has_action_url": 0.2,
|
| 74 |
+
"is_original": 0.2,
|
| 75 |
+
"has_requirements": 0.1
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
total = sum(signals.get(k, 0) * w for k, w in weights.items())
|
| 79 |
+
return round(total, 3)
|
| 80 |
+
|
| 81 |
+
def score(
|
| 82 |
+
self,
|
| 83 |
+
source_type: SourceType,
|
| 84 |
+
text: str = "",
|
| 85 |
+
metadata: dict = None,
|
| 86 |
+
author_credibility: float = 0.5,
|
| 87 |
+
social_engagement: int = 0
|
| 88 |
+
) -> dict:
|
| 89 |
+
"""
|
| 90 |
+
Calculate comprehensive credibility score.
|
| 91 |
+
|
| 92 |
+
Returns dict with:
|
| 93 |
+
- source_score: Base source credibility
|
| 94 |
+
- signal_strength: Content actionability
|
| 95 |
+
- credibility_score: Combined score
|
| 96 |
+
"""
|
| 97 |
+
source_score = self.score_source(source_type)
|
| 98 |
+
content_signals = self.score_content_signals(text, metadata)
|
| 99 |
+
signal_strength = self.calculate_signal_strength(content_signals)
|
| 100 |
+
|
| 101 |
+
# Social engagement boost (for social sources)
|
| 102 |
+
engagement_boost = 0.0
|
| 103 |
+
if source_type in [SourceType.REDDIT, SourceType.TWITTER]:
|
| 104 |
+
if social_engagement > 100:
|
| 105 |
+
engagement_boost = 0.15
|
| 106 |
+
elif social_engagement > 50:
|
| 107 |
+
engagement_boost = 0.1
|
| 108 |
+
elif social_engagement > 20:
|
| 109 |
+
engagement_boost = 0.05
|
| 110 |
+
|
| 111 |
+
# Combined credibility:
|
| 112 |
+
# 50% source, 30% signals, 10% author, 10% engagement
|
| 113 |
+
credibility_score = (
|
| 114 |
+
0.5 * source_score +
|
| 115 |
+
0.3 * signal_strength +
|
| 116 |
+
0.1 * author_credibility +
|
| 117 |
+
0.1 * min(engagement_boost + 0.5, 1.0)
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
return {
|
| 121 |
+
"source_score": round(source_score, 3),
|
| 122 |
+
"signal_strength": signal_strength,
|
| 123 |
+
"signals": content_signals,
|
| 124 |
+
"credibility_score": round(credibility_score, 3)
|
| 125 |
+
}
|
backend/intelligence/llm_client.py
ADDED
|
@@ -0,0 +1,352 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE LLM Client Abstraction Layer
|
| 3 |
+
|
| 4 |
+
Supports Gemini (default) and OpenAI as providers.
|
| 5 |
+
"""
|
| 6 |
+
from abc import ABC, abstractmethod
|
| 7 |
+
from typing import Optional
|
| 8 |
+
import json
|
| 9 |
+
|
| 10 |
+
from ..config import get_settings
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class BaseLLMClient(ABC):
|
| 14 |
+
"""Abstract base class for LLM providers."""
|
| 15 |
+
|
| 16 |
+
@abstractmethod
|
| 17 |
+
def classify(self, text: str) -> dict:
|
| 18 |
+
"""Classify opportunity text into category and domain."""
|
| 19 |
+
pass
|
| 20 |
+
|
| 21 |
+
@abstractmethod
|
| 22 |
+
def summarize(self, text: str, max_length: int = 150) -> str:
|
| 23 |
+
"""Generate concise summary of opportunity."""
|
| 24 |
+
pass
|
| 25 |
+
|
| 26 |
+
@abstractmethod
|
| 27 |
+
def recommend_action(self, opportunity: dict) -> dict:
|
| 28 |
+
"""Recommend action based on opportunity context."""
|
| 29 |
+
pass
|
| 30 |
+
|
| 31 |
+
@abstractmethod
|
| 32 |
+
def extract_metadata(self, text: str) -> dict:
|
| 33 |
+
"""Extract structured metadata (deadline, location, reward, etc.)."""
|
| 34 |
+
pass
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class GeminiClient(BaseLLMClient):
|
| 38 |
+
"""Google Gemini implementation."""
|
| 39 |
+
|
| 40 |
+
def __init__(self, api_key: str):
|
| 41 |
+
import google.generativeai as genai
|
| 42 |
+
genai.configure(api_key=api_key)
|
| 43 |
+
self.model = genai.GenerativeModel('gemini-1.5-flash')
|
| 44 |
+
|
| 45 |
+
def _generate(self, prompt: str, as_json: bool = False) -> str:
|
| 46 |
+
"""Generate response from Gemini."""
|
| 47 |
+
response = self.model.generate_content(prompt)
|
| 48 |
+
return response.text
|
| 49 |
+
|
| 50 |
+
def classify(self, text: str) -> dict:
|
| 51 |
+
"""Classify opportunity into category and domain."""
|
| 52 |
+
prompt = f"""Analyze this opportunity and classify it. Return JSON only.
|
| 53 |
+
|
| 54 |
+
TEXT: {text[:2000]}
|
| 55 |
+
|
| 56 |
+
Return this exact JSON structure:
|
| 57 |
+
{{
|
| 58 |
+
"category": "one of: scholarship, fellowship, internship, job, research, hackathon, competition, grant, conference, open_source, investment, weak_signal, other",
|
| 59 |
+
"domain": "one of: ai, computer_vision, robotics, finance, crypto, academia, mixed",
|
| 60 |
+
"confidence": 0.0 to 1.0
|
| 61 |
+
}}"""
|
| 62 |
+
|
| 63 |
+
try:
|
| 64 |
+
result = self._generate(prompt)
|
| 65 |
+
# Extract JSON from response
|
| 66 |
+
start = result.find('{')
|
| 67 |
+
end = result.rfind('}') + 1
|
| 68 |
+
if start != -1 and end > start:
|
| 69 |
+
return json.loads(result[start:end])
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"Classification error: {e}")
|
| 72 |
+
|
| 73 |
+
return {"category": "other", "domain": "mixed", "confidence": 0.0}
|
| 74 |
+
|
| 75 |
+
def summarize(self, text: str, max_length: int = 150) -> str:
|
| 76 |
+
"""Generate concise summary."""
|
| 77 |
+
prompt = f"""Summarize this opportunity in {max_length} characters or less.
|
| 78 |
+
Focus on: what it is, who it's for, and deadline if any.
|
| 79 |
+
|
| 80 |
+
TEXT: {text[:2000]}
|
| 81 |
+
|
| 82 |
+
Return only the summary, no quotes or labels."""
|
| 83 |
+
|
| 84 |
+
try:
|
| 85 |
+
return self._generate(prompt).strip()[:max_length]
|
| 86 |
+
except Exception as e:
|
| 87 |
+
print(f"Summary error: {e}")
|
| 88 |
+
return text[:max_length]
|
| 89 |
+
|
| 90 |
+
def recommend_action(self, opportunity: dict) -> dict:
|
| 91 |
+
"""
|
| 92 |
+
PIOE 2.0 Enhanced Action Guidance.
|
| 93 |
+
Returns comprehensive recommendations for how to approach the opportunity.
|
| 94 |
+
"""
|
| 95 |
+
prompt = f"""You are an expert career and opportunity advisor. Analyze this opportunity and provide detailed action guidance.
|
| 96 |
+
|
| 97 |
+
OPPORTUNITY DETAILS:
|
| 98 |
+
- Title: {opportunity.get('title', '')}
|
| 99 |
+
- Category: {opportunity.get('category', '')}
|
| 100 |
+
- Domain: {opportunity.get('domain', '')}
|
| 101 |
+
- Deadline: {opportunity.get('deadline', 'No deadline specified')}
|
| 102 |
+
- Description: {opportunity.get('raw_text', '')[:1500]}
|
| 103 |
+
- ROI Score: {opportunity.get('roi_score', 'N/A')}
|
| 104 |
+
- Competition Level: {opportunity.get('competition_level', 'N/A')}
|
| 105 |
+
- Region: {opportunity.get('region', 'global')}
|
| 106 |
+
|
| 107 |
+
USER CONTEXT:
|
| 108 |
+
- Location: Nigeria, Africa
|
| 109 |
+
- Interests: AI, Computer Vision, Robotics, Web3
|
| 110 |
+
- Status: Student/Early Career
|
| 111 |
+
|
| 112 |
+
Provide strategic action guidance. Return JSON only:
|
| 113 |
+
{{
|
| 114 |
+
"primary_action": "one of: apply_now, apply_prepared, track, save_for_later, deep_research, network_first, skip",
|
| 115 |
+
"urgency": "one of: immediate, this_week, this_month, whenever, expired",
|
| 116 |
+
"timing_status": "one of: early, optimal, late, unknown",
|
| 117 |
+
|
| 118 |
+
"skills_to_highlight": ["skill1", "skill2", "skill3"],
|
| 119 |
+
"portfolio_pieces": ["project type 1", "project type 2"],
|
| 120 |
+
|
| 121 |
+
"preparation_steps": [
|
| 122 |
+
"step 1",
|
| 123 |
+
"step 2",
|
| 124 |
+
"step 3"
|
| 125 |
+
],
|
| 126 |
+
|
| 127 |
+
"networking_tips": "who to contact or how to stand out (1 sentence)",
|
| 128 |
+
"differentiation_angle": "what unique angle to take (1 sentence)",
|
| 129 |
+
|
| 130 |
+
"success_probability": 0.0 to 1.0,
|
| 131 |
+
"time_investment_hours": estimated hours to apply well,
|
| 132 |
+
"risk_level": "low, medium, or high",
|
| 133 |
+
|
| 134 |
+
"why": "brief strategic reasoning (max 100 chars)",
|
| 135 |
+
"red_flags": ["any concerns"] or []
|
| 136 |
+
}}"""
|
| 137 |
+
|
| 138 |
+
try:
|
| 139 |
+
result = self._generate(prompt)
|
| 140 |
+
start = result.find('{')
|
| 141 |
+
end = result.rfind('}') + 1
|
| 142 |
+
if start != -1 and end > start:
|
| 143 |
+
parsed = json.loads(result[start:end])
|
| 144 |
+
# Ensure required fields exist
|
| 145 |
+
return {
|
| 146 |
+
"primary_action": parsed.get("primary_action", "save_for_later"),
|
| 147 |
+
"urgency": parsed.get("urgency", "whenever"),
|
| 148 |
+
"timing_status": parsed.get("timing_status", "unknown"),
|
| 149 |
+
"skills_to_highlight": parsed.get("skills_to_highlight", []),
|
| 150 |
+
"portfolio_pieces": parsed.get("portfolio_pieces", []),
|
| 151 |
+
"preparation_steps": parsed.get("preparation_steps", []),
|
| 152 |
+
"networking_tips": parsed.get("networking_tips", ""),
|
| 153 |
+
"differentiation_angle": parsed.get("differentiation_angle", ""),
|
| 154 |
+
"success_probability": parsed.get("success_probability", 0.3),
|
| 155 |
+
"time_investment_hours": parsed.get("time_investment_hours", 10),
|
| 156 |
+
"risk_level": parsed.get("risk_level", "medium"),
|
| 157 |
+
"why": parsed.get("why", "Review and decide"),
|
| 158 |
+
"red_flags": parsed.get("red_flags", []),
|
| 159 |
+
}
|
| 160 |
+
except Exception as e:
|
| 161 |
+
print(f"Action guidance error: {e}")
|
| 162 |
+
|
| 163 |
+
# Fallback response
|
| 164 |
+
return {
|
| 165 |
+
"primary_action": "save_for_later",
|
| 166 |
+
"urgency": "whenever",
|
| 167 |
+
"timing_status": "unknown",
|
| 168 |
+
"skills_to_highlight": [],
|
| 169 |
+
"portfolio_pieces": [],
|
| 170 |
+
"preparation_steps": ["Review the opportunity details", "Assess fit with your goals"],
|
| 171 |
+
"networking_tips": "",
|
| 172 |
+
"differentiation_angle": "",
|
| 173 |
+
"success_probability": 0.3,
|
| 174 |
+
"time_investment_hours": 10,
|
| 175 |
+
"risk_level": "medium",
|
| 176 |
+
"why": "Needs manual review",
|
| 177 |
+
"red_flags": [],
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
def extract_metadata(self, text: str) -> dict:
|
| 181 |
+
"""Extract structured metadata from text."""
|
| 182 |
+
prompt = f"""Extract metadata from this opportunity text. Return JSON only.
|
| 183 |
+
|
| 184 |
+
TEXT: {text[:2000]}
|
| 185 |
+
|
| 186 |
+
Return this structure (use null for missing info):
|
| 187 |
+
{{
|
| 188 |
+
"deadline": "YYYY-MM-DD or null",
|
| 189 |
+
"location": "location or 'remote' or null",
|
| 190 |
+
"reward": "amount or null",
|
| 191 |
+
"organization": "org name or null",
|
| 192 |
+
"requirements": ["skill1", "skill2"] or [],
|
| 193 |
+
"url": "application url or null"
|
| 194 |
+
}}"""
|
| 195 |
+
|
| 196 |
+
try:
|
| 197 |
+
result = self._generate(prompt)
|
| 198 |
+
start = result.find('{')
|
| 199 |
+
end = result.rfind('}') + 1
|
| 200 |
+
if start != -1 and end > start:
|
| 201 |
+
return json.loads(result[start:end])
|
| 202 |
+
except Exception as e:
|
| 203 |
+
print(f"Metadata extraction error: {e}")
|
| 204 |
+
|
| 205 |
+
return {}
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
class OpenAIClient(BaseLLMClient):
|
| 209 |
+
"""OpenAI implementation (fallback)."""
|
| 210 |
+
|
| 211 |
+
def __init__(self, api_key: str):
|
| 212 |
+
from openai import OpenAI
|
| 213 |
+
self.client = OpenAI(api_key=api_key)
|
| 214 |
+
self.model = "gpt-3.5-turbo"
|
| 215 |
+
|
| 216 |
+
def _generate(self, prompt: str) -> str:
|
| 217 |
+
"""Generate response from OpenAI."""
|
| 218 |
+
response = self.client.chat.completions.create(
|
| 219 |
+
model=self.model,
|
| 220 |
+
messages=[{"role": "user", "content": prompt}],
|
| 221 |
+
temperature=0.3
|
| 222 |
+
)
|
| 223 |
+
return response.choices[0].message.content
|
| 224 |
+
|
| 225 |
+
def classify(self, text: str) -> dict:
|
| 226 |
+
"""Classify opportunity - same logic as Gemini."""
|
| 227 |
+
prompt = f"""Classify this opportunity. Return JSON only with keys: category, domain, confidence.
|
| 228 |
+
Categories: scholarship, fellowship, internship, job, research, hackathon, competition, grant, conference, open_source, investment, weak_signal, other
|
| 229 |
+
Domains: ai, computer_vision, robotics, finance, crypto, academia, mixed
|
| 230 |
+
|
| 231 |
+
TEXT: {text[:2000]}"""
|
| 232 |
+
|
| 233 |
+
try:
|
| 234 |
+
result = self._generate(prompt)
|
| 235 |
+
start = result.find('{')
|
| 236 |
+
end = result.rfind('}') + 1
|
| 237 |
+
if start != -1 and end > start:
|
| 238 |
+
return json.loads(result[start:end])
|
| 239 |
+
except Exception:
|
| 240 |
+
pass
|
| 241 |
+
return {"category": "other", "domain": "mixed", "confidence": 0.0}
|
| 242 |
+
|
| 243 |
+
def summarize(self, text: str, max_length: int = 150) -> str:
|
| 244 |
+
prompt = f"Summarize in {max_length} chars: {text[:2000]}"
|
| 245 |
+
try:
|
| 246 |
+
return self._generate(prompt).strip()[:max_length]
|
| 247 |
+
except Exception:
|
| 248 |
+
return text[:max_length]
|
| 249 |
+
|
| 250 |
+
def recommend_action(self, opportunity: dict) -> dict:
|
| 251 |
+
return {"action": "save", "reason": "Review later", "urgency": "low"}
|
| 252 |
+
|
| 253 |
+
def extract_metadata(self, text: str) -> dict:
|
| 254 |
+
return {}
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
class LLMClient:
|
| 258 |
+
"""
|
| 259 |
+
Factory class that provides the configured LLM client.
|
| 260 |
+
Uses Gemini by default, falls back to OpenAI if configured.
|
| 261 |
+
"""
|
| 262 |
+
|
| 263 |
+
_instance: Optional[BaseLLMClient] = None
|
| 264 |
+
|
| 265 |
+
@classmethod
|
| 266 |
+
def get_client(cls) -> BaseLLMClient:
|
| 267 |
+
"""Get or create the LLM client instance."""
|
| 268 |
+
if cls._instance is None:
|
| 269 |
+
settings = get_settings()
|
| 270 |
+
|
| 271 |
+
if settings.ai_provider == "gemini" and settings.gemini_api_key:
|
| 272 |
+
cls._instance = GeminiClient(settings.gemini_api_key)
|
| 273 |
+
elif settings.openai_api_key:
|
| 274 |
+
cls._instance = OpenAIClient(settings.openai_api_key)
|
| 275 |
+
else:
|
| 276 |
+
# Return a mock client if no API keys configured
|
| 277 |
+
cls._instance = MockLLMClient()
|
| 278 |
+
|
| 279 |
+
return cls._instance
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
class MockLLMClient(BaseLLMClient):
|
| 283 |
+
"""Mock client for development without API keys. PIOE 2.0 compatible."""
|
| 284 |
+
|
| 285 |
+
def classify(self, text: str) -> dict:
|
| 286 |
+
# Basic rule-based classification
|
| 287 |
+
text_lower = text.lower()
|
| 288 |
+
|
| 289 |
+
if any(kw in text_lower for kw in ["scholarship", "fellowship", "grant"]):
|
| 290 |
+
return {"category": "scholarship", "domain": "academia", "confidence": 0.7}
|
| 291 |
+
elif any(kw in text_lower for kw in ["hackathon", "competition", "challenge"]):
|
| 292 |
+
return {"category": "hackathon", "domain": "ai", "confidence": 0.7}
|
| 293 |
+
elif any(kw in text_lower for kw in ["internship", "intern"]):
|
| 294 |
+
return {"category": "internship", "domain": "mixed", "confidence": 0.7}
|
| 295 |
+
elif any(kw in text_lower for kw in ["job", "hiring", "position"]):
|
| 296 |
+
return {"category": "job", "domain": "mixed", "confidence": 0.7}
|
| 297 |
+
elif any(kw in text_lower for kw in ["bounty", "ecosystem", "solana", "ethereum"]):
|
| 298 |
+
return {"category": "bounty", "domain": "crypto", "confidence": 0.7}
|
| 299 |
+
elif any(kw in text_lower for kw in ["pitch", "demo day", "accelerator"]):
|
| 300 |
+
return {"category": "pitch_event", "domain": "mixed", "confidence": 0.7}
|
| 301 |
+
elif any(kw in text_lower for kw in ["collaborat", "partner", "looking for"]):
|
| 302 |
+
return {"category": "collaboration", "domain": "mixed", "confidence": 0.6}
|
| 303 |
+
|
| 304 |
+
return {"category": "other", "domain": "mixed", "confidence": 0.3}
|
| 305 |
+
|
| 306 |
+
def summarize(self, text: str, max_length: int = 150) -> str:
|
| 307 |
+
return text[:max_length]
|
| 308 |
+
|
| 309 |
+
def recommend_action(self, opportunity: dict) -> dict:
|
| 310 |
+
"""PIOE 2.0 action guidance - rule-based fallback."""
|
| 311 |
+
category = opportunity.get("category", "other")
|
| 312 |
+
|
| 313 |
+
# Category-based action mapping
|
| 314 |
+
action_map = {
|
| 315 |
+
"hackathon": ("apply_now", "this_week", ["Python", "ML/AI"], ["Previous hackathon project"]),
|
| 316 |
+
"grant": ("apply_prepared", "this_month", ["Technical writing", "Project planning"], ["Open source contributions"]),
|
| 317 |
+
"ecosystem_grant": ("apply_prepared", "this_month", ["Solidity/Rust", "Web3"], ["DApp or smart contract"]),
|
| 318 |
+
"internship": ("apply_now", "this_week", ["Relevant coursework", "Projects"], ["GitHub portfolio"]),
|
| 319 |
+
"scholarship": ("apply_prepared", "this_month", ["Academic excellence", "Leadership"], ["Research paper or thesis"]),
|
| 320 |
+
"bounty": ("apply_now", "immediate", ["Specific tech stack"], ["Related code samples"]),
|
| 321 |
+
"pitch_event": ("apply_prepared", "this_month", ["Presentation", "Business model"], ["Pitch deck", "Demo video"]),
|
| 322 |
+
"collaboration": ("network_first", "whenever", ["Domain expertise"], ["Relevant projects"]),
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
action, urgency, skills, portfolio = action_map.get(
|
| 326 |
+
category,
|
| 327 |
+
("save_for_later", "whenever", [], [])
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
return {
|
| 331 |
+
"primary_action": action,
|
| 332 |
+
"urgency": urgency,
|
| 333 |
+
"timing_status": "unknown",
|
| 334 |
+
"skills_to_highlight": skills,
|
| 335 |
+
"portfolio_pieces": portfolio,
|
| 336 |
+
"preparation_steps": [
|
| 337 |
+
"Review the opportunity requirements",
|
| 338 |
+
"Prepare relevant materials",
|
| 339 |
+
"Submit before deadline"
|
| 340 |
+
],
|
| 341 |
+
"networking_tips": "Research the organization and connect with past participants",
|
| 342 |
+
"differentiation_angle": "Highlight unique projects and Africa/Nigeria perspective",
|
| 343 |
+
"success_probability": 0.3,
|
| 344 |
+
"time_investment_hours": 10,
|
| 345 |
+
"risk_level": "medium",
|
| 346 |
+
"why": f"Standard approach for {category}",
|
| 347 |
+
"red_flags": [],
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
def extract_metadata(self, text: str) -> dict:
|
| 351 |
+
return {}
|
| 352 |
+
|
backend/intelligence/novelty.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Novelty Detector
|
| 3 |
+
|
| 4 |
+
Detects if an opportunity is novel or a repeat of existing content.
|
| 5 |
+
Uses embedding similarity against historical database.
|
| 6 |
+
"""
|
| 7 |
+
from typing import Optional
|
| 8 |
+
import numpy as np
|
| 9 |
+
from sqlalchemy.orm import Session
|
| 10 |
+
|
| 11 |
+
from ..models import Opportunity
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class NoveltyDetector:
|
| 15 |
+
"""
|
| 16 |
+
Detects novelty by comparing against historical opportunity embeddings.
|
| 17 |
+
High novelty = new and unseen topics/opportunities.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
def __init__(self, similarity_threshold: float = 0.85):
|
| 21 |
+
"""
|
| 22 |
+
Args:
|
| 23 |
+
similarity_threshold: If similarity > threshold, item is considered duplicate.
|
| 24 |
+
"""
|
| 25 |
+
self.similarity_threshold = similarity_threshold
|
| 26 |
+
|
| 27 |
+
def cosine_similarity(self, vec1: list[float], vec2: list[float]) -> float:
|
| 28 |
+
"""Calculate cosine similarity between two vectors."""
|
| 29 |
+
a = np.array(vec1)
|
| 30 |
+
b = np.array(vec2)
|
| 31 |
+
|
| 32 |
+
norm_a = np.linalg.norm(a)
|
| 33 |
+
norm_b = np.linalg.norm(b)
|
| 34 |
+
|
| 35 |
+
if norm_a == 0 or norm_b == 0:
|
| 36 |
+
return 0.0
|
| 37 |
+
|
| 38 |
+
return float(np.dot(a, b) / (norm_a * norm_b))
|
| 39 |
+
|
| 40 |
+
def calculate_novelty(
|
| 41 |
+
self,
|
| 42 |
+
embedding: list[float],
|
| 43 |
+
db: Session,
|
| 44 |
+
limit: int = 100
|
| 45 |
+
) -> dict:
|
| 46 |
+
"""
|
| 47 |
+
Calculate novelty score by comparing against recent opportunities.
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
dict with novelty_score, is_duplicate, most_similar_id
|
| 51 |
+
"""
|
| 52 |
+
if not embedding:
|
| 53 |
+
return {
|
| 54 |
+
"novelty_score": 1.0,
|
| 55 |
+
"is_duplicate": False,
|
| 56 |
+
"most_similar_id": None
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
# Get recent opportunities with embeddings
|
| 60 |
+
recent = db.query(Opportunity).filter(
|
| 61 |
+
Opportunity.embedding.isnot(None)
|
| 62 |
+
).order_by(
|
| 63 |
+
Opportunity.discovered_at.desc()
|
| 64 |
+
).limit(limit).all()
|
| 65 |
+
|
| 66 |
+
if not recent:
|
| 67 |
+
return {
|
| 68 |
+
"novelty_score": 1.0,
|
| 69 |
+
"is_duplicate": False,
|
| 70 |
+
"most_similar_id": None
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
max_similarity = 0.0
|
| 74 |
+
most_similar_id = None
|
| 75 |
+
|
| 76 |
+
for opp in recent:
|
| 77 |
+
if opp.embedding:
|
| 78 |
+
similarity = self.cosine_similarity(embedding, opp.embedding)
|
| 79 |
+
if similarity > max_similarity:
|
| 80 |
+
max_similarity = similarity
|
| 81 |
+
most_similar_id = opp.id
|
| 82 |
+
|
| 83 |
+
# Novelty is inverse of maximum similarity
|
| 84 |
+
novelty_score = 1.0 - max_similarity
|
| 85 |
+
is_duplicate = max_similarity > self.similarity_threshold
|
| 86 |
+
|
| 87 |
+
return {
|
| 88 |
+
"novelty_score": round(novelty_score, 3),
|
| 89 |
+
"is_duplicate": is_duplicate,
|
| 90 |
+
"most_similar_id": most_similar_id if is_duplicate else None,
|
| 91 |
+
"max_similarity": round(max_similarity, 3)
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
def is_recycled_content(self, text: str) -> bool:
|
| 95 |
+
"""
|
| 96 |
+
Rule-based check for recycled/aggregated content.
|
| 97 |
+
Returns True if content appears to be recycled.
|
| 98 |
+
"""
|
| 99 |
+
if not text:
|
| 100 |
+
return False
|
| 101 |
+
|
| 102 |
+
text_lower = text.lower()
|
| 103 |
+
|
| 104 |
+
# Patterns indicating recycled content
|
| 105 |
+
recycled_patterns = [
|
| 106 |
+
"top 10",
|
| 107 |
+
"top 5",
|
| 108 |
+
"best tools",
|
| 109 |
+
"complete guide",
|
| 110 |
+
"everything you need to know",
|
| 111 |
+
"roundup",
|
| 112 |
+
"weekly digest",
|
| 113 |
+
"news summary",
|
| 114 |
+
"in case you missed",
|
| 115 |
+
"trending this week"
|
| 116 |
+
]
|
| 117 |
+
|
| 118 |
+
return any(pattern in text_lower for pattern in recycled_patterns)
|
backend/intelligence/roi_scorer.py
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE ROI Scorer - Version 2.0
|
| 3 |
+
|
| 4 |
+
Calculates "Is this worth my time?" score.
|
| 5 |
+
Key decision intelligence for prioritizing opportunities.
|
| 6 |
+
"""
|
| 7 |
+
from datetime import datetime, timedelta
|
| 8 |
+
from typing import Optional
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class ROIScorer:
|
| 12 |
+
"""
|
| 13 |
+
Calculates ROI (Return on Investment) score for opportunities.
|
| 14 |
+
|
| 15 |
+
Considers:
|
| 16 |
+
- Time required
|
| 17 |
+
- Probability of success
|
| 18 |
+
- Financial/career upside
|
| 19 |
+
- Opportunity chain unlocks
|
| 20 |
+
- Competition level
|
| 21 |
+
- Regional accessibility
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
# Weights for ROI calculation
|
| 25 |
+
WEIGHTS = {
|
| 26 |
+
"time_efficiency": 0.15,
|
| 27 |
+
"success_probability": 0.25,
|
| 28 |
+
"upside_potential": 0.25,
|
| 29 |
+
"unlock_potential": 0.15,
|
| 30 |
+
"competition": 0.10,
|
| 31 |
+
"accessibility": 0.10,
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
# Category time requirements (hours)
|
| 35 |
+
CATEGORY_TIME = {
|
| 36 |
+
"hackathon": 40,
|
| 37 |
+
"grant": 20,
|
| 38 |
+
"micro_grant": 8,
|
| 39 |
+
"ecosystem_grant": 25,
|
| 40 |
+
"scholarship": 15,
|
| 41 |
+
"fellowship": 20,
|
| 42 |
+
"internship": 10,
|
| 43 |
+
"job": 5,
|
| 44 |
+
"research": 30,
|
| 45 |
+
"bounty": 15,
|
| 46 |
+
"pitch_event": 20,
|
| 47 |
+
"ambassador": 10,
|
| 48 |
+
"partnership": 5,
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
# Category upside potential (0-1)
|
| 52 |
+
CATEGORY_UPSIDE = {
|
| 53 |
+
"ecosystem_grant": 0.9,
|
| 54 |
+
"grant": 0.85,
|
| 55 |
+
"fellowship": 0.85,
|
| 56 |
+
"scholarship": 0.8,
|
| 57 |
+
"hackathon": 0.8,
|
| 58 |
+
"micro_grant": 0.6,
|
| 59 |
+
"pitch_event": 0.75,
|
| 60 |
+
"internship": 0.7,
|
| 61 |
+
"bounty": 0.5,
|
| 62 |
+
"job": 0.6,
|
| 63 |
+
"research": 0.65,
|
| 64 |
+
"ambassador": 0.4,
|
| 65 |
+
"partnership": 0.7,
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
# Category competition levels (0-1, higher = more competitive)
|
| 69 |
+
CATEGORY_COMPETITION = {
|
| 70 |
+
"scholarship": 0.9,
|
| 71 |
+
"fellowship": 0.85,
|
| 72 |
+
"job": 0.7,
|
| 73 |
+
"internship": 0.75,
|
| 74 |
+
"hackathon": 0.6,
|
| 75 |
+
"grant": 0.5,
|
| 76 |
+
"ecosystem_grant": 0.4,
|
| 77 |
+
"micro_grant": 0.3,
|
| 78 |
+
"bounty": 0.3,
|
| 79 |
+
"pitch_event": 0.5,
|
| 80 |
+
"ambassador": 0.35,
|
| 81 |
+
"partnership": 0.4,
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
# Chain unlock values (which categories open doors)
|
| 85 |
+
UNLOCK_VALUES = {
|
| 86 |
+
"hackathon": 0.8, # Opens: grants, accelerators, jobs
|
| 87 |
+
"fellowship": 0.9, # Opens: PhD, research, network
|
| 88 |
+
"ecosystem_grant": 0.85, # Opens: ecosystem jobs, more grants
|
| 89 |
+
"internship": 0.7, # Opens: full-time, network
|
| 90 |
+
"research": 0.75, # Opens: PhD, conference, collaboration
|
| 91 |
+
"pitch_event": 0.7, # Opens: investment, visibility
|
| 92 |
+
"bounty": 0.4, # Opens: ecosystem roles
|
| 93 |
+
"ambassador": 0.5, # Opens: community, ecosystem
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
def __init__(self, user_region: str = "nigeria"):
|
| 97 |
+
self.user_region = user_region.lower()
|
| 98 |
+
|
| 99 |
+
def calculate_roi(
|
| 100 |
+
self,
|
| 101 |
+
category: str,
|
| 102 |
+
deadline: Optional[datetime] = None,
|
| 103 |
+
grant_size: Optional[int] = None,
|
| 104 |
+
region: str = "global",
|
| 105 |
+
extra_data: dict = None
|
| 106 |
+
) -> dict:
|
| 107 |
+
"""
|
| 108 |
+
Calculate ROI score for an opportunity.
|
| 109 |
+
|
| 110 |
+
Returns dict with:
|
| 111 |
+
- roi_score: 0.0 to 1.0
|
| 112 |
+
- risk_level: low/medium/high
|
| 113 |
+
- unlock_potential: 0.0 to 1.0
|
| 114 |
+
- competition_level: 0.0 to 1.0
|
| 115 |
+
- reasoning: explanation
|
| 116 |
+
"""
|
| 117 |
+
extra_data = extra_data or {}
|
| 118 |
+
category = category.lower() if category else "other"
|
| 119 |
+
|
| 120 |
+
# Calculate component scores
|
| 121 |
+
time_efficiency = self._calculate_time_efficiency(category, deadline)
|
| 122 |
+
success_prob = self._calculate_success_probability(category, extra_data)
|
| 123 |
+
upside = self._calculate_upside(category, grant_size)
|
| 124 |
+
unlock = self._calculate_unlock_potential(category)
|
| 125 |
+
competition = self._calculate_competition(category)
|
| 126 |
+
accessibility = self._calculate_accessibility(region)
|
| 127 |
+
|
| 128 |
+
# Weighted ROI score
|
| 129 |
+
roi_score = (
|
| 130 |
+
self.WEIGHTS["time_efficiency"] * time_efficiency +
|
| 131 |
+
self.WEIGHTS["success_probability"] * success_prob +
|
| 132 |
+
self.WEIGHTS["upside_potential"] * upside +
|
| 133 |
+
self.WEIGHTS["unlock_potential"] * unlock +
|
| 134 |
+
self.WEIGHTS["competition"] * (1 - competition) + # Invert competition
|
| 135 |
+
self.WEIGHTS["accessibility"] * accessibility
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
# Determine risk level
|
| 139 |
+
risk_level = self._determine_risk(category, competition, deadline)
|
| 140 |
+
|
| 141 |
+
# Generate reasoning
|
| 142 |
+
reasoning = self._generate_reasoning(
|
| 143 |
+
category, roi_score, risk_level,
|
| 144 |
+
time_efficiency, success_prob, upside, accessibility
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
return {
|
| 148 |
+
"roi_score": round(roi_score, 3),
|
| 149 |
+
"risk_level": risk_level,
|
| 150 |
+
"unlock_potential": round(unlock, 3),
|
| 151 |
+
"competition_level": round(competition, 3),
|
| 152 |
+
"time_hours": self.CATEGORY_TIME.get(category, 15),
|
| 153 |
+
"reasoning": reasoning,
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
def _calculate_time_efficiency(
|
| 157 |
+
self,
|
| 158 |
+
category: str,
|
| 159 |
+
deadline: Optional[datetime]
|
| 160 |
+
) -> float:
|
| 161 |
+
"""Score based on time required and deadline pressure."""
|
| 162 |
+
base_hours = self.CATEGORY_TIME.get(category, 15)
|
| 163 |
+
|
| 164 |
+
# Lower hours = higher efficiency
|
| 165 |
+
efficiency = 1.0 - (min(base_hours, 60) / 60)
|
| 166 |
+
|
| 167 |
+
# Deadline factor
|
| 168 |
+
if deadline:
|
| 169 |
+
# Handle timezone-aware datetimes
|
| 170 |
+
try:
|
| 171 |
+
if deadline.tzinfo is not None:
|
| 172 |
+
deadline = deadline.replace(tzinfo=None)
|
| 173 |
+
days_left = (deadline - datetime.utcnow()).days
|
| 174 |
+
except Exception:
|
| 175 |
+
days_left = 30 # Default if comparison fails
|
| 176 |
+
if days_left < 3:
|
| 177 |
+
efficiency *= 0.5 # Too rushed
|
| 178 |
+
elif days_left < 7:
|
| 179 |
+
efficiency *= 0.8 # Tight
|
| 180 |
+
elif days_left > 30:
|
| 181 |
+
efficiency *= 1.0 # Good time
|
| 182 |
+
|
| 183 |
+
return min(efficiency, 1.0)
|
| 184 |
+
|
| 185 |
+
def _calculate_success_probability(
|
| 186 |
+
self,
|
| 187 |
+
category: str,
|
| 188 |
+
extra_data: dict
|
| 189 |
+
) -> float:
|
| 190 |
+
"""Estimate probability of success."""
|
| 191 |
+
base_prob = {
|
| 192 |
+
"bounty": 0.7,
|
| 193 |
+
"micro_grant": 0.5,
|
| 194 |
+
"ambassador": 0.5,
|
| 195 |
+
"hackathon": 0.3,
|
| 196 |
+
"ecosystem_grant": 0.25,
|
| 197 |
+
"grant": 0.2,
|
| 198 |
+
"internship": 0.2,
|
| 199 |
+
"job": 0.15,
|
| 200 |
+
"fellowship": 0.1,
|
| 201 |
+
"scholarship": 0.1,
|
| 202 |
+
}.get(category, 0.2)
|
| 203 |
+
|
| 204 |
+
# Adjust based on extra data
|
| 205 |
+
if extra_data.get("technical_depth") == "beginner":
|
| 206 |
+
base_prob += 0.1
|
| 207 |
+
if extra_data.get("africa_focus") or extra_data.get("nigeria_specific"):
|
| 208 |
+
base_prob += 0.15 # Regional programs often less competitive
|
| 209 |
+
|
| 210 |
+
return min(base_prob, 1.0)
|
| 211 |
+
|
| 212 |
+
def _calculate_upside(
|
| 213 |
+
self,
|
| 214 |
+
category: str,
|
| 215 |
+
grant_size: Optional[int]
|
| 216 |
+
) -> float:
|
| 217 |
+
"""Calculate potential upside."""
|
| 218 |
+
base_upside = self.CATEGORY_UPSIDE.get(category, 0.5)
|
| 219 |
+
|
| 220 |
+
# Adjust for grant size
|
| 221 |
+
if grant_size:
|
| 222 |
+
if grant_size > 50000:
|
| 223 |
+
base_upside = min(base_upside + 0.2, 1.0)
|
| 224 |
+
elif grant_size > 10000:
|
| 225 |
+
base_upside = min(base_upside + 0.1, 1.0)
|
| 226 |
+
|
| 227 |
+
return base_upside
|
| 228 |
+
|
| 229 |
+
def _calculate_unlock_potential(self, category: str) -> float:
|
| 230 |
+
"""Calculate what doors this opens."""
|
| 231 |
+
return self.UNLOCK_VALUES.get(category, 0.3)
|
| 232 |
+
|
| 233 |
+
def _calculate_competition(self, category: str) -> float:
|
| 234 |
+
"""Estimate competition level."""
|
| 235 |
+
return self.CATEGORY_COMPETITION.get(category, 0.5)
|
| 236 |
+
|
| 237 |
+
def _calculate_accessibility(self, region: str) -> float:
|
| 238 |
+
"""Calculate accessibility based on user region."""
|
| 239 |
+
region = (region or "global").lower()
|
| 240 |
+
|
| 241 |
+
# Perfect match
|
| 242 |
+
if region == self.user_region:
|
| 243 |
+
return 1.0
|
| 244 |
+
|
| 245 |
+
# Regional matches
|
| 246 |
+
if self.user_region == "nigeria":
|
| 247 |
+
if region in ["africa", "remote_africa"]:
|
| 248 |
+
return 0.9
|
| 249 |
+
elif region in ["global", "remote_global"]:
|
| 250 |
+
return 0.7
|
| 251 |
+
else:
|
| 252 |
+
return 0.3
|
| 253 |
+
|
| 254 |
+
# Global is accessible
|
| 255 |
+
if region in ["global", "remote_global"]:
|
| 256 |
+
return 0.8
|
| 257 |
+
|
| 258 |
+
return 0.5
|
| 259 |
+
|
| 260 |
+
def _determine_risk(
|
| 261 |
+
self,
|
| 262 |
+
category: str,
|
| 263 |
+
competition: float,
|
| 264 |
+
deadline: Optional[datetime]
|
| 265 |
+
) -> str:
|
| 266 |
+
"""Determine risk level (time sink risk)."""
|
| 267 |
+
risk_score = 0
|
| 268 |
+
|
| 269 |
+
# High time = high risk
|
| 270 |
+
time_hours = self.CATEGORY_TIME.get(category, 15)
|
| 271 |
+
if time_hours > 30:
|
| 272 |
+
risk_score += 2
|
| 273 |
+
elif time_hours > 15:
|
| 274 |
+
risk_score += 1
|
| 275 |
+
|
| 276 |
+
# High competition = high risk
|
| 277 |
+
if competition > 0.7:
|
| 278 |
+
risk_score += 2
|
| 279 |
+
elif competition > 0.5:
|
| 280 |
+
risk_score += 1
|
| 281 |
+
|
| 282 |
+
# Tight deadline = high risk
|
| 283 |
+
if deadline:
|
| 284 |
+
try:
|
| 285 |
+
if deadline.tzinfo is not None:
|
| 286 |
+
deadline = deadline.replace(tzinfo=None)
|
| 287 |
+
days_left = (deadline - datetime.utcnow()).days
|
| 288 |
+
except Exception:
|
| 289 |
+
days_left = 30 # Default if comparison fails
|
| 290 |
+
if days_left < 5:
|
| 291 |
+
risk_score += 2
|
| 292 |
+
|
| 293 |
+
if risk_score >= 4:
|
| 294 |
+
return "high"
|
| 295 |
+
elif risk_score >= 2:
|
| 296 |
+
return "medium"
|
| 297 |
+
else:
|
| 298 |
+
return "low"
|
| 299 |
+
|
| 300 |
+
def _generate_reasoning(
|
| 301 |
+
self,
|
| 302 |
+
category: str,
|
| 303 |
+
roi_score: float,
|
| 304 |
+
risk_level: str,
|
| 305 |
+
time_eff: float,
|
| 306 |
+
success_prob: float,
|
| 307 |
+
upside: float,
|
| 308 |
+
accessibility: float
|
| 309 |
+
) -> str:
|
| 310 |
+
"""Generate human-readable reasoning."""
|
| 311 |
+
reasons = []
|
| 312 |
+
|
| 313 |
+
if roi_score > 0.7:
|
| 314 |
+
reasons.append("High-value opportunity")
|
| 315 |
+
elif roi_score > 0.5:
|
| 316 |
+
reasons.append("Moderate value")
|
| 317 |
+
else:
|
| 318 |
+
reasons.append("Consider carefully")
|
| 319 |
+
|
| 320 |
+
if time_eff > 0.7:
|
| 321 |
+
reasons.append("time-efficient")
|
| 322 |
+
elif time_eff < 0.4:
|
| 323 |
+
reasons.append("requires significant time")
|
| 324 |
+
|
| 325 |
+
if success_prob > 0.4:
|
| 326 |
+
reasons.append("good success odds")
|
| 327 |
+
elif success_prob < 0.15:
|
| 328 |
+
reasons.append("highly competitive")
|
| 329 |
+
|
| 330 |
+
if accessibility > 0.8:
|
| 331 |
+
reasons.append("region-accessible")
|
| 332 |
+
elif accessibility < 0.5:
|
| 333 |
+
reasons.append("may have access barriers")
|
| 334 |
+
|
| 335 |
+
if risk_level == "low":
|
| 336 |
+
reasons.append("low time-sink risk")
|
| 337 |
+
elif risk_level == "high":
|
| 338 |
+
reasons.append("high time investment")
|
| 339 |
+
|
| 340 |
+
return ". ".join(reasons) + "."
|
backend/intelligence/scorer.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Relevance Scorer
|
| 3 |
+
|
| 4 |
+
Calculates relevance score based on keyword matching and semantic similarity.
|
| 5 |
+
"""
|
| 6 |
+
from typing import Optional
|
| 7 |
+
import numpy as np
|
| 8 |
+
from sentence_transformers import SentenceTransformer
|
| 9 |
+
|
| 10 |
+
from ..config import get_settings
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class RelevanceScorer:
|
| 14 |
+
"""
|
| 15 |
+
Scores opportunities based on relevance to user interests.
|
| 16 |
+
Uses both keyword matching and semantic similarity.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(self):
|
| 20 |
+
self.settings = get_settings()
|
| 21 |
+
self._model: Optional[SentenceTransformer] = None
|
| 22 |
+
self._interest_embedding: Optional[np.ndarray] = None
|
| 23 |
+
|
| 24 |
+
# Build interest text from keywords
|
| 25 |
+
self.interest_text = " ".join(self.settings.high_priority_keywords)
|
| 26 |
+
|
| 27 |
+
@property
|
| 28 |
+
def model(self) -> SentenceTransformer:
|
| 29 |
+
"""Lazy load the embedding model."""
|
| 30 |
+
if self._model is None:
|
| 31 |
+
self._model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 32 |
+
return self._model
|
| 33 |
+
|
| 34 |
+
@property
|
| 35 |
+
def interest_embedding(self) -> np.ndarray:
|
| 36 |
+
"""Get cached interest vector embedding."""
|
| 37 |
+
if self._interest_embedding is None:
|
| 38 |
+
self._interest_embedding = self.model.encode(self.interest_text)
|
| 39 |
+
return self._interest_embedding
|
| 40 |
+
|
| 41 |
+
def get_embedding(self, text: str) -> list[float]:
|
| 42 |
+
"""Generate embedding for text."""
|
| 43 |
+
embedding = self.model.encode(text)
|
| 44 |
+
return embedding.tolist()
|
| 45 |
+
|
| 46 |
+
def score_keywords(self, text: str) -> float:
|
| 47 |
+
"""
|
| 48 |
+
Score based on keyword presence.
|
| 49 |
+
Returns 0.0 to 1.0
|
| 50 |
+
"""
|
| 51 |
+
if not text:
|
| 52 |
+
return 0.0
|
| 53 |
+
|
| 54 |
+
text_lower = text.lower()
|
| 55 |
+
matches = sum(
|
| 56 |
+
1 for keyword in self.settings.high_priority_keywords
|
| 57 |
+
if keyword.lower() in text_lower
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Normalize: more matches = higher score, capped at 1.0
|
| 61 |
+
max_expected = 5 # Expect 5+ matches for full score
|
| 62 |
+
return min(matches / max_expected, 1.0)
|
| 63 |
+
|
| 64 |
+
def score_semantic(self, text: str) -> float:
|
| 65 |
+
"""
|
| 66 |
+
Score based on semantic similarity to interest vector.
|
| 67 |
+
Returns 0.0 to 1.0
|
| 68 |
+
"""
|
| 69 |
+
if not text:
|
| 70 |
+
return 0.0
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
text_embedding = self.model.encode(text)
|
| 74 |
+
# Cosine similarity
|
| 75 |
+
similarity = np.dot(text_embedding, self.interest_embedding) / (
|
| 76 |
+
np.linalg.norm(text_embedding) * np.linalg.norm(self.interest_embedding)
|
| 77 |
+
)
|
| 78 |
+
# Normalize from [-1, 1] to [0, 1]
|
| 79 |
+
return float((similarity + 1) / 2)
|
| 80 |
+
except Exception as e:
|
| 81 |
+
print(f"Semantic scoring error: {e}")
|
| 82 |
+
return 0.5
|
| 83 |
+
|
| 84 |
+
def score(self, text: str, title: str = "") -> dict:
|
| 85 |
+
"""
|
| 86 |
+
Calculate combined relevance score.
|
| 87 |
+
Returns dict with individual and combined scores.
|
| 88 |
+
"""
|
| 89 |
+
full_text = f"{title} {text}".strip()
|
| 90 |
+
|
| 91 |
+
keyword_score = self.score_keywords(full_text)
|
| 92 |
+
semantic_score = self.score_semantic(full_text)
|
| 93 |
+
|
| 94 |
+
# Weighted average: keywords 40%, semantic 60%
|
| 95 |
+
combined = 0.4 * keyword_score + 0.6 * semantic_score
|
| 96 |
+
|
| 97 |
+
return {
|
| 98 |
+
"keyword_score": round(keyword_score, 3),
|
| 99 |
+
"semantic_score": round(semantic_score, 3),
|
| 100 |
+
"relevance_score": round(combined, 3)
|
| 101 |
+
}
|
backend/intelligence/silent_detector.py
ADDED
|
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Silent Opportunities Detector - Version 2.0
|
| 3 |
+
|
| 4 |
+
Detects implicit/hidden opportunities that are never announced clearly.
|
| 5 |
+
These appear in blog posts, tweets, Discord messages, research updates.
|
| 6 |
+
|
| 7 |
+
Examples:
|
| 8 |
+
- "We're exploring ideas around..."
|
| 9 |
+
- "We're looking for collaborators..."
|
| 10 |
+
- "If anyone is interested..."
|
| 11 |
+
- "We're building something new..."
|
| 12 |
+
"""
|
| 13 |
+
import re
|
| 14 |
+
from typing import Optional
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class SilentOpportunityDetector:
|
| 18 |
+
"""
|
| 19 |
+
Detects implicit opportunities from content that doesn't
|
| 20 |
+
explicitly announce them as opportunities.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
# Patterns for implicit opportunities
|
| 24 |
+
SIGNAL_PATTERNS = {
|
| 25 |
+
# Pre-hiring signals
|
| 26 |
+
"pre_hiring": [
|
| 27 |
+
r"we(?:'re| are) (?:actively )?(?:looking|searching) for",
|
| 28 |
+
r"we need (?:a |someone|people)",
|
| 29 |
+
r"hiring (?:soon|next|this)",
|
| 30 |
+
r"building (?:a |our |the )?team",
|
| 31 |
+
r"if you(?:'re| are) interested in joining",
|
| 32 |
+
r"open roles? (?:coming|soon)",
|
| 33 |
+
r"dm (?:me|us) if (?:you(?:'re| are)|interested)",
|
| 34 |
+
r"reach out if",
|
| 35 |
+
],
|
| 36 |
+
|
| 37 |
+
# Pre-grant signals
|
| 38 |
+
"pre_grant": [
|
| 39 |
+
r"(?:we(?:'re| are)|we will be) (?:funding|supporting|backing)",
|
| 40 |
+
r"grants? (?:coming|opening|soon|next)",
|
| 41 |
+
r"ecosystem fund",
|
| 42 |
+
r"builder(?:s)? program",
|
| 43 |
+
r"retroactive (?:funding|rewards)",
|
| 44 |
+
r"announcing.{0,30}funding",
|
| 45 |
+
r"accepting applications",
|
| 46 |
+
],
|
| 47 |
+
|
| 48 |
+
# Collaboration signals
|
| 49 |
+
"collaboration": [
|
| 50 |
+
r"looking for (?:collaborators?|partners?|co-founder)",
|
| 51 |
+
r"seeking (?:collaborat|partner)",
|
| 52 |
+
r"open to (?:collaborat|partner|work)",
|
| 53 |
+
r"anyone (?:want|interested).{0,30}(?:build|work|collaborat)",
|
| 54 |
+
r"let(?:'s| us) (?:build|work|create) together",
|
| 55 |
+
r"who wants to",
|
| 56 |
+
r"exploring.{0,30}partnership",
|
| 57 |
+
],
|
| 58 |
+
|
| 59 |
+
# Project/research signals
|
| 60 |
+
"research": [
|
| 61 |
+
r"we(?:'re| are) (?:exploring|researching|investigating)",
|
| 62 |
+
r"new (?:research|project|initiative)",
|
| 63 |
+
r"call for (?:papers?|proposals?|abstracts?)",
|
| 64 |
+
r"(?:research|academic) (?:collaboration|partnership)",
|
| 65 |
+
r"phd (?:position|opportunity|student)",
|
| 66 |
+
r"postdoc",
|
| 67 |
+
r"looking for (?:interns?|students?)",
|
| 68 |
+
],
|
| 69 |
+
|
| 70 |
+
# Community/ambassador signals
|
| 71 |
+
"ambassador": [
|
| 72 |
+
r"ambassador program",
|
| 73 |
+
r"community (?:lead|manager|role)",
|
| 74 |
+
r"help (?:us )?(?:grow|build|spread)",
|
| 75 |
+
r"join (?:our|the) (?:community|team|movement)",
|
| 76 |
+
r"early (?:adopter|supporter)",
|
| 77 |
+
],
|
| 78 |
+
|
| 79 |
+
# Investment/demo signals
|
| 80 |
+
"investment": [
|
| 81 |
+
r"demo day",
|
| 82 |
+
r"pitch (?:competition|event|day)",
|
| 83 |
+
r"investor (?:meeting|demo|call)",
|
| 84 |
+
r"raising (?:a |our )?(?:seed|round|series)",
|
| 85 |
+
r"open to (?:investment|investors)",
|
| 86 |
+
],
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
# Strength indicators (modifiers)
|
| 90 |
+
STRENGTH_BOOSTERS = [
|
| 91 |
+
r"immediately",
|
| 92 |
+
r"urgently",
|
| 93 |
+
r"actively",
|
| 94 |
+
r"now",
|
| 95 |
+
r"today",
|
| 96 |
+
r"this week",
|
| 97 |
+
r"asap",
|
| 98 |
+
r"serious",
|
| 99 |
+
r"exciting",
|
| 100 |
+
]
|
| 101 |
+
|
| 102 |
+
# Negative patterns (reduce signal)
|
| 103 |
+
NOISE_PATTERNS = [
|
| 104 |
+
r"not (?:looking|hiring|seeking)",
|
| 105 |
+
r"no longer",
|
| 106 |
+
r"was (?:looking|hiring)",
|
| 107 |
+
r"used to",
|
| 108 |
+
r"back in",
|
| 109 |
+
r"years? ago",
|
| 110 |
+
r"hypothetically",
|
| 111 |
+
r"if only",
|
| 112 |
+
]
|
| 113 |
+
|
| 114 |
+
def detect(self, text: str, title: str = "") -> dict:
|
| 115 |
+
"""
|
| 116 |
+
Analyze text for silent opportunity signals.
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
- is_silent_opportunity: bool
|
| 120 |
+
- opportunity_type: str (pre_hiring, pre_grant, etc.)
|
| 121 |
+
- signal_strength: float (0.0 to 1.0)
|
| 122 |
+
- detected_patterns: list
|
| 123 |
+
- recommended_category: str
|
| 124 |
+
"""
|
| 125 |
+
full_text = f"{title} {text}".lower()
|
| 126 |
+
|
| 127 |
+
# Check for noise patterns first
|
| 128 |
+
if self._has_noise(full_text):
|
| 129 |
+
return {
|
| 130 |
+
"is_silent_opportunity": False,
|
| 131 |
+
"opportunity_type": None,
|
| 132 |
+
"signal_strength": 0.0,
|
| 133 |
+
"detected_patterns": [],
|
| 134 |
+
"recommended_category": None,
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
# Detect patterns
|
| 138 |
+
detected = {}
|
| 139 |
+
for opp_type, patterns in self.SIGNAL_PATTERNS.items():
|
| 140 |
+
matches = self._find_matches(full_text, patterns)
|
| 141 |
+
if matches:
|
| 142 |
+
detected[opp_type] = matches
|
| 143 |
+
|
| 144 |
+
if not detected:
|
| 145 |
+
return {
|
| 146 |
+
"is_silent_opportunity": False,
|
| 147 |
+
"opportunity_type": None,
|
| 148 |
+
"signal_strength": 0.0,
|
| 149 |
+
"detected_patterns": [],
|
| 150 |
+
"recommended_category": None,
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
# Find primary opportunity type
|
| 154 |
+
primary_type = max(detected, key=lambda k: len(detected[k]))
|
| 155 |
+
|
| 156 |
+
# Calculate signal strength
|
| 157 |
+
signal_strength = self._calculate_strength(
|
| 158 |
+
full_text, detected, primary_type
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
# Map to category
|
| 162 |
+
category_map = {
|
| 163 |
+
"pre_hiring": "pre_hiring_signal",
|
| 164 |
+
"pre_grant": "pre_grant_signal",
|
| 165 |
+
"collaboration": "collaboration",
|
| 166 |
+
"research": "research",
|
| 167 |
+
"ambassador": "ambassador",
|
| 168 |
+
"investment": "pitch_event",
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
return {
|
| 172 |
+
"is_silent_opportunity": True,
|
| 173 |
+
"opportunity_type": primary_type,
|
| 174 |
+
"signal_strength": round(signal_strength, 3),
|
| 175 |
+
"detected_patterns": detected[primary_type],
|
| 176 |
+
"recommended_category": category_map.get(primary_type, "weak_signal"),
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
def _find_matches(self, text: str, patterns: list) -> list:
|
| 180 |
+
"""Find all matching patterns in text."""
|
| 181 |
+
matches = []
|
| 182 |
+
for pattern in patterns:
|
| 183 |
+
if re.search(pattern, text, re.IGNORECASE):
|
| 184 |
+
# Extract the matching context
|
| 185 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 186 |
+
if match:
|
| 187 |
+
# Get surrounding context
|
| 188 |
+
start = max(0, match.start() - 20)
|
| 189 |
+
end = min(len(text), match.end() + 20)
|
| 190 |
+
context = text[start:end]
|
| 191 |
+
matches.append(context.strip())
|
| 192 |
+
return matches
|
| 193 |
+
|
| 194 |
+
def _has_noise(self, text: str) -> bool:
|
| 195 |
+
"""Check if text contains noise patterns."""
|
| 196 |
+
for pattern in self.NOISE_PATTERNS:
|
| 197 |
+
if re.search(pattern, text, re.IGNORECASE):
|
| 198 |
+
return True
|
| 199 |
+
return False
|
| 200 |
+
|
| 201 |
+
def _calculate_strength(
|
| 202 |
+
self,
|
| 203 |
+
text: str,
|
| 204 |
+
detected: dict,
|
| 205 |
+
primary_type: str
|
| 206 |
+
) -> float:
|
| 207 |
+
"""Calculate signal strength."""
|
| 208 |
+
base_strength = 0.5
|
| 209 |
+
|
| 210 |
+
# More patterns = stronger signal
|
| 211 |
+
pattern_count = len(detected[primary_type])
|
| 212 |
+
base_strength += min(pattern_count * 0.1, 0.3)
|
| 213 |
+
|
| 214 |
+
# Check for strength boosters
|
| 215 |
+
for booster in self.STRENGTH_BOOSTERS:
|
| 216 |
+
if re.search(booster, text, re.IGNORECASE):
|
| 217 |
+
base_strength += 0.05
|
| 218 |
+
|
| 219 |
+
# Multiple types of signals = stronger
|
| 220 |
+
if len(detected) > 1:
|
| 221 |
+
base_strength += 0.1
|
| 222 |
+
|
| 223 |
+
# Cap at 1.0
|
| 224 |
+
return min(base_strength, 1.0)
|
| 225 |
+
|
| 226 |
+
def reclassify_opportunity(
|
| 227 |
+
self,
|
| 228 |
+
opportunity: dict
|
| 229 |
+
) -> tuple[str, float]:
|
| 230 |
+
"""
|
| 231 |
+
Re-evaluate an existing opportunity for silent signals.
|
| 232 |
+
|
| 233 |
+
Returns (new_category, confidence)
|
| 234 |
+
"""
|
| 235 |
+
title = opportunity.get("title", "")
|
| 236 |
+
text = opportunity.get("raw_text", "")
|
| 237 |
+
|
| 238 |
+
result = self.detect(text, title)
|
| 239 |
+
|
| 240 |
+
if result["is_silent_opportunity"]:
|
| 241 |
+
return (
|
| 242 |
+
result["recommended_category"],
|
| 243 |
+
result["signal_strength"]
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
return (None, 0.0)
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
class OpportunityLanguageDetector:
|
| 250 |
+
"""
|
| 251 |
+
Detects the urgency, timing, and action language in opportunities.
|
| 252 |
+
"""
|
| 253 |
+
|
| 254 |
+
TIMING_PATTERNS = {
|
| 255 |
+
"early": [
|
| 256 |
+
r"early (?:bird|access|application)",
|
| 257 |
+
r"just (?:launched|announced|opened)",
|
| 258 |
+
r"applications? (?:now )?open",
|
| 259 |
+
r"first (?:round|batch|cohort)",
|
| 260 |
+
r"founding",
|
| 261 |
+
r"new program",
|
| 262 |
+
],
|
| 263 |
+
"optimal": [
|
| 264 |
+
r"applications? (?:open|accepted)",
|
| 265 |
+
r"deadline (?:is )?(?:soon|approaching)",
|
| 266 |
+
r"apply (?:now|today)",
|
| 267 |
+
r"last call",
|
| 268 |
+
r"extended deadline",
|
| 269 |
+
],
|
| 270 |
+
"late": [
|
| 271 |
+
r"deadline (?:in )?(?:days?|hours?)",
|
| 272 |
+
r"closes? (?:soon|tomorrow|today)",
|
| 273 |
+
r"final (?:day|hour|chance)",
|
| 274 |
+
r"last (?:day|chance)",
|
| 275 |
+
],
|
| 276 |
+
}
|
| 277 |
+
|
| 278 |
+
def detect_timing(self, text: str) -> str:
|
| 279 |
+
"""Detect application timing."""
|
| 280 |
+
text = text.lower()
|
| 281 |
+
|
| 282 |
+
for timing, patterns in self.TIMING_PATTERNS.items():
|
| 283 |
+
for pattern in patterns:
|
| 284 |
+
if re.search(pattern, text, re.IGNORECASE):
|
| 285 |
+
return timing
|
| 286 |
+
|
| 287 |
+
return "unknown"
|
| 288 |
+
|
| 289 |
+
def extract_action_items(self, text: str) -> list:
|
| 290 |
+
"""Extract actionable items from text."""
|
| 291 |
+
actions = []
|
| 292 |
+
|
| 293 |
+
# Common action patterns
|
| 294 |
+
action_patterns = [
|
| 295 |
+
r"apply (?:at|via|through|here)",
|
| 296 |
+
r"visit (?:our|the) (?:website|page|link)",
|
| 297 |
+
r"(?:fill|submit).{0,20}(?:form|application)",
|
| 298 |
+
r"send.{0,20}(?:email|resume|cv|portfolio)",
|
| 299 |
+
r"register (?:at|on|here)",
|
| 300 |
+
r"sign up",
|
| 301 |
+
r"join.{0,20}(?:discord|telegram|slack)",
|
| 302 |
+
r"dm (?:me|us)",
|
| 303 |
+
r"follow.{0,10}on",
|
| 304 |
+
]
|
| 305 |
+
|
| 306 |
+
for pattern in action_patterns:
|
| 307 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 308 |
+
if match:
|
| 309 |
+
start = max(0, match.start() - 10)
|
| 310 |
+
end = min(len(text), match.end() + 30)
|
| 311 |
+
actions.append(text[start:end].strip())
|
| 312 |
+
|
| 313 |
+
return actions[:5] # Limit to 5 actions
|
backend/main.py
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE - Personal Intelligence & Opportunity Engine
|
| 3 |
+
|
| 4 |
+
FastAPI Backend Application
|
| 5 |
+
"""
|
| 6 |
+
from fastapi import FastAPI, Depends, HTTPException, Query, BackgroundTasks
|
| 7 |
+
from fastapi.staticfiles import StaticFiles
|
| 8 |
+
from fastapi.responses import HTMLResponse, JSONResponse
|
| 9 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 10 |
+
from sqlalchemy.orm import Session
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from typing import Optional
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
from .database import get_db, init_db
|
| 16 |
+
from .models import Opportunity, OpportunityCategory, OpportunityStatus, Domain
|
| 17 |
+
from .delivery import DigestGenerator
|
| 18 |
+
from .ingestion import IngestionScheduler
|
| 19 |
+
|
| 20 |
+
# Initialize app
|
| 21 |
+
app = FastAPI(
|
| 22 |
+
title="PIOE - Personal Intelligence & Opportunity Engine",
|
| 23 |
+
description="Signal intelligence system for opportunities in AI, Robotics, and more",
|
| 24 |
+
version="1.0.0"
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# CORS middleware
|
| 28 |
+
app.add_middleware(
|
| 29 |
+
CORSMiddleware,
|
| 30 |
+
allow_origins=["*"],
|
| 31 |
+
allow_credentials=True,
|
| 32 |
+
allow_methods=["*"],
|
| 33 |
+
allow_headers=["*"],
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# Global scheduler instance
|
| 37 |
+
scheduler: Optional[IngestionScheduler] = None
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@app.on_event("startup")
|
| 41 |
+
async def startup():
|
| 42 |
+
"""Initialize database and scheduler on startup."""
|
| 43 |
+
init_db()
|
| 44 |
+
global scheduler
|
| 45 |
+
scheduler = IngestionScheduler()
|
| 46 |
+
# Don't auto-start scheduler - let user trigger manually first
|
| 47 |
+
print("PIOE Backend started. Run /api/ingest/start to begin ingestion.")
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@app.on_event("shutdown")
|
| 51 |
+
async def shutdown():
|
| 52 |
+
"""Cleanup on shutdown."""
|
| 53 |
+
global scheduler
|
| 54 |
+
if scheduler:
|
| 55 |
+
scheduler.stop()
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# ============== API Routes ==============
|
| 59 |
+
|
| 60 |
+
@app.get("/", response_class=HTMLResponse)
|
| 61 |
+
async def serve_dashboard():
|
| 62 |
+
"""Serve the frontend dashboard."""
|
| 63 |
+
frontend_path = Path(__file__).parent.parent / "frontend" / "index.html"
|
| 64 |
+
if frontend_path.exists():
|
| 65 |
+
return HTMLResponse(content=frontend_path.read_text(), status_code=200)
|
| 66 |
+
return HTMLResponse(content="<h1>PIOE Dashboard - Frontend not found</h1>", status_code=200)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# ---------- Opportunities ----------
|
| 70 |
+
|
| 71 |
+
@app.get("/api/opportunities")
|
| 72 |
+
async def get_opportunities(
|
| 73 |
+
db: Session = Depends(get_db),
|
| 74 |
+
category: Optional[str] = None,
|
| 75 |
+
domain: Optional[str] = None,
|
| 76 |
+
status: Optional[str] = None,
|
| 77 |
+
min_score: float = 0.0,
|
| 78 |
+
limit: int = Query(default=50, le=200),
|
| 79 |
+
offset: int = 0
|
| 80 |
+
):
|
| 81 |
+
"""Get filtered list of opportunities."""
|
| 82 |
+
query = db.query(Opportunity).filter(
|
| 83 |
+
Opportunity.combined_score >= min_score
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
if category:
|
| 87 |
+
try:
|
| 88 |
+
query = query.filter(Opportunity.category == OpportunityCategory(category))
|
| 89 |
+
except ValueError:
|
| 90 |
+
pass
|
| 91 |
+
|
| 92 |
+
if domain:
|
| 93 |
+
try:
|
| 94 |
+
query = query.filter(Opportunity.domain == Domain(domain))
|
| 95 |
+
except ValueError:
|
| 96 |
+
pass
|
| 97 |
+
|
| 98 |
+
if status:
|
| 99 |
+
try:
|
| 100 |
+
query = query.filter(Opportunity.status == OpportunityStatus(status))
|
| 101 |
+
except ValueError:
|
| 102 |
+
pass
|
| 103 |
+
|
| 104 |
+
total = query.count()
|
| 105 |
+
|
| 106 |
+
opportunities = query.order_by(
|
| 107 |
+
Opportunity.combined_score.desc()
|
| 108 |
+
).offset(offset).limit(limit).all()
|
| 109 |
+
|
| 110 |
+
return {
|
| 111 |
+
"total": total,
|
| 112 |
+
"limit": limit,
|
| 113 |
+
"offset": offset,
|
| 114 |
+
"opportunities": [
|
| 115 |
+
{
|
| 116 |
+
"id": o.id,
|
| 117 |
+
"title": o.title,
|
| 118 |
+
"category": o.category.value if o.category else None,
|
| 119 |
+
"domain": o.domain.value if o.domain else None,
|
| 120 |
+
"source_name": o.source_name,
|
| 121 |
+
"url": o.url,
|
| 122 |
+
"deadline": o.deadline.isoformat() if o.deadline else None,
|
| 123 |
+
"relevance_score": o.relevance_score,
|
| 124 |
+
"novelty_score": o.novelty_score,
|
| 125 |
+
"credibility_score": o.credibility_score,
|
| 126 |
+
"combined_score": o.combined_score,
|
| 127 |
+
# PIOE 2.0 fields
|
| 128 |
+
"roi_score": getattr(o, 'roi_score', None),
|
| 129 |
+
"risk_level": o.risk_level.value if hasattr(o, 'risk_level') and o.risk_level else "medium",
|
| 130 |
+
"region": o.region.value if hasattr(o, 'region') and o.region else "global",
|
| 131 |
+
"status": o.status.value if o.status else None,
|
| 132 |
+
"discovered_at": o.discovered_at.isoformat() if o.discovered_at else None,
|
| 133 |
+
"raw_text": o.raw_text[:500] if o.raw_text else None
|
| 134 |
+
}
|
| 135 |
+
for o in opportunities
|
| 136 |
+
]
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
@app.get("/api/opportunities/{opportunity_id}")
|
| 141 |
+
async def get_opportunity(opportunity_id: str, db: Session = Depends(get_db)):
|
| 142 |
+
"""Get single opportunity by ID with full PIOE 2.0 details."""
|
| 143 |
+
opp = db.query(Opportunity).filter(Opportunity.id == opportunity_id).first()
|
| 144 |
+
if not opp:
|
| 145 |
+
raise HTTPException(status_code=404, detail="Opportunity not found")
|
| 146 |
+
|
| 147 |
+
return {
|
| 148 |
+
"id": opp.id,
|
| 149 |
+
"title": opp.title,
|
| 150 |
+
"category": opp.category.value if opp.category else None,
|
| 151 |
+
"domain": opp.domain.value if opp.domain else None,
|
| 152 |
+
"source_name": opp.source_name,
|
| 153 |
+
"source_type": opp.source_type.value if opp.source_type else None,
|
| 154 |
+
"url": opp.url,
|
| 155 |
+
"deadline": opp.deadline.isoformat() if opp.deadline else None,
|
| 156 |
+
"published_at": opp.published_at.isoformat() if opp.published_at else None,
|
| 157 |
+
"discovered_at": opp.discovered_at.isoformat() if opp.discovered_at else None,
|
| 158 |
+
"raw_text": opp.raw_text,
|
| 159 |
+
# Core scores
|
| 160 |
+
"relevance_score": opp.relevance_score,
|
| 161 |
+
"novelty_score": opp.novelty_score,
|
| 162 |
+
"credibility_score": opp.credibility_score,
|
| 163 |
+
"signal_strength": opp.signal_strength,
|
| 164 |
+
"combined_score": opp.combined_score,
|
| 165 |
+
# PIOE 2.0: Decision intelligence
|
| 166 |
+
"roi_score": getattr(opp, 'roi_score', None),
|
| 167 |
+
"unlock_potential": getattr(opp, 'unlock_potential', None),
|
| 168 |
+
"risk_level": opp.risk_level.value if hasattr(opp, 'risk_level') and opp.risk_level else "medium",
|
| 169 |
+
"competition_level": getattr(opp, 'competition_level', None),
|
| 170 |
+
# PIOE 2.0: Regional
|
| 171 |
+
"region": opp.region.value if hasattr(opp, 'region') and opp.region else "global",
|
| 172 |
+
"region_weight": getattr(opp, 'region_weight', 1.0),
|
| 173 |
+
# Status and metadata
|
| 174 |
+
"status": opp.status.value if opp.status else None,
|
| 175 |
+
"metadata": opp.extra_data
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
@app.get("/api/opportunities/{opportunity_id}/guidance")
|
| 180 |
+
async def get_action_guidance(opportunity_id: str, db: Session = Depends(get_db)):
|
| 181 |
+
"""PIOE 2.0: Get AI-powered action guidance for an opportunity."""
|
| 182 |
+
from .intelligence import LLMClient
|
| 183 |
+
|
| 184 |
+
opp = db.query(Opportunity).filter(Opportunity.id == opportunity_id).first()
|
| 185 |
+
if not opp:
|
| 186 |
+
raise HTTPException(status_code=404, detail="Opportunity not found")
|
| 187 |
+
|
| 188 |
+
# Build opportunity dict for LLM
|
| 189 |
+
opp_dict = {
|
| 190 |
+
"title": opp.title,
|
| 191 |
+
"category": opp.category.value if opp.category else "other",
|
| 192 |
+
"domain": opp.domain.value if opp.domain else "mixed",
|
| 193 |
+
"deadline": opp.deadline.isoformat() if opp.deadline else None,
|
| 194 |
+
"raw_text": opp.raw_text or "",
|
| 195 |
+
"roi_score": getattr(opp, 'roi_score', 0.5),
|
| 196 |
+
"competition_level": getattr(opp, 'competition_level', 0.5),
|
| 197 |
+
"region": opp.region.value if hasattr(opp, 'region') and opp.region else "global",
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
# Get action guidance from LLM
|
| 201 |
+
llm = LLMClient.get_client()
|
| 202 |
+
guidance = llm.recommend_action(opp_dict)
|
| 203 |
+
|
| 204 |
+
return {
|
| 205 |
+
"opportunity_id": opportunity_id,
|
| 206 |
+
"guidance": guidance
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
@app.patch("/api/opportunities/{opportunity_id}/status")
|
| 211 |
+
async def update_opportunity_status(
|
| 212 |
+
opportunity_id: str,
|
| 213 |
+
status: str,
|
| 214 |
+
db: Session = Depends(get_db)
|
| 215 |
+
):
|
| 216 |
+
"""Update opportunity status (save, apply, dismiss, etc.)."""
|
| 217 |
+
opp = db.query(Opportunity).filter(Opportunity.id == opportunity_id).first()
|
| 218 |
+
if not opp:
|
| 219 |
+
raise HTTPException(status_code=404, detail="Opportunity not found")
|
| 220 |
+
|
| 221 |
+
try:
|
| 222 |
+
opp.status = OpportunityStatus(status)
|
| 223 |
+
db.commit()
|
| 224 |
+
return {"success": True, "new_status": status}
|
| 225 |
+
except ValueError:
|
| 226 |
+
raise HTTPException(status_code=400, detail=f"Invalid status: {status}")
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
# ---------- Digest ----------
|
| 230 |
+
|
| 231 |
+
@app.get("/api/digest/daily")
|
| 232 |
+
async def get_daily_digest(db: Session = Depends(get_db), limit: int = 10):
|
| 233 |
+
"""Get today's opportunity digest."""
|
| 234 |
+
generator = DigestGenerator(db)
|
| 235 |
+
digest = generator.generate_daily(limit)
|
| 236 |
+
return {"digest": digest}
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
@app.get("/api/digest/weekly")
|
| 240 |
+
async def get_weekly_digest(db: Session = Depends(get_db), limit: int = 25):
|
| 241 |
+
"""Get weekly opportunity digest."""
|
| 242 |
+
generator = DigestGenerator(db)
|
| 243 |
+
digest = generator.generate_weekly(limit)
|
| 244 |
+
return {"digest": digest}
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
@app.get("/api/digest/urgent")
|
| 248 |
+
async def get_urgent_digest(db: Session = Depends(get_db), limit: int = 10):
|
| 249 |
+
"""Get urgent opportunities with approaching deadlines."""
|
| 250 |
+
generator = DigestGenerator(db)
|
| 251 |
+
digest = generator.generate_urgent(limit)
|
| 252 |
+
return {"digest": digest}
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
@app.get("/api/digest/{category}")
|
| 256 |
+
async def get_category_digest(
|
| 257 |
+
category: str,
|
| 258 |
+
db: Session = Depends(get_db),
|
| 259 |
+
limit: int = 10
|
| 260 |
+
):
|
| 261 |
+
"""Get digest for specific category."""
|
| 262 |
+
try:
|
| 263 |
+
cat = OpportunityCategory(category)
|
| 264 |
+
except ValueError:
|
| 265 |
+
raise HTTPException(status_code=400, detail=f"Invalid category: {category}")
|
| 266 |
+
|
| 267 |
+
generator = DigestGenerator(db)
|
| 268 |
+
digest = generator.generate_by_category(cat, limit)
|
| 269 |
+
return {"digest": digest}
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
# ---------- Ingestion Control ----------
|
| 273 |
+
|
| 274 |
+
@app.post("/api/ingest/run")
|
| 275 |
+
async def run_ingestion(background_tasks: BackgroundTasks):
|
| 276 |
+
"""Trigger full ingestion manually."""
|
| 277 |
+
global scheduler
|
| 278 |
+
if not scheduler:
|
| 279 |
+
scheduler = IngestionScheduler()
|
| 280 |
+
|
| 281 |
+
background_tasks.add_task(scheduler.run_full_ingestion)
|
| 282 |
+
return {"message": "Ingestion started in background"}
|
| 283 |
+
|
| 284 |
+
|
| 285 |
+
@app.post("/api/ingest/source/{source_name}")
|
| 286 |
+
async def run_source_ingestion(source_name: str, background_tasks: BackgroundTasks):
|
| 287 |
+
"""Trigger ingestion for specific source."""
|
| 288 |
+
global scheduler
|
| 289 |
+
if not scheduler:
|
| 290 |
+
scheduler = IngestionScheduler()
|
| 291 |
+
|
| 292 |
+
background_tasks.add_task(scheduler.ingest_single_source, source_name)
|
| 293 |
+
return {"message": f"Ingestion started for {source_name}"}
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
@app.post("/api/ingest/start")
|
| 297 |
+
async def start_scheduler():
|
| 298 |
+
"""Start the automatic ingestion scheduler."""
|
| 299 |
+
global scheduler
|
| 300 |
+
if not scheduler:
|
| 301 |
+
scheduler = IngestionScheduler()
|
| 302 |
+
|
| 303 |
+
scheduler.start()
|
| 304 |
+
return {"message": "Scheduler started"}
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
@app.post("/api/ingest/stop")
|
| 308 |
+
async def stop_scheduler():
|
| 309 |
+
"""Stop the automatic ingestion scheduler."""
|
| 310 |
+
global scheduler
|
| 311 |
+
if scheduler:
|
| 312 |
+
scheduler.stop()
|
| 313 |
+
return {"message": "Scheduler stopped"}
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
# ---------- Stats ----------
|
| 317 |
+
|
| 318 |
+
@app.get("/api/stats")
|
| 319 |
+
async def get_stats(db: Session = Depends(get_db)):
|
| 320 |
+
"""Get overview statistics."""
|
| 321 |
+
from sqlalchemy import func
|
| 322 |
+
|
| 323 |
+
total = db.query(Opportunity).count()
|
| 324 |
+
new_count = db.query(Opportunity).filter(
|
| 325 |
+
Opportunity.status == OpportunityStatus.NEW
|
| 326 |
+
).count()
|
| 327 |
+
|
| 328 |
+
# Category breakdown
|
| 329 |
+
categories = db.query(
|
| 330 |
+
Opportunity.category, func.count(Opportunity.id)
|
| 331 |
+
).group_by(Opportunity.category).all()
|
| 332 |
+
|
| 333 |
+
# Domain breakdown
|
| 334 |
+
domains = db.query(
|
| 335 |
+
Opportunity.domain, func.count(Opportunity.id)
|
| 336 |
+
).group_by(Opportunity.domain).all()
|
| 337 |
+
|
| 338 |
+
return {
|
| 339 |
+
"total_opportunities": total,
|
| 340 |
+
"new_opportunities": new_count,
|
| 341 |
+
"by_category": {
|
| 342 |
+
cat.value if cat else "unknown": count
|
| 343 |
+
for cat, count in categories
|
| 344 |
+
},
|
| 345 |
+
"by_domain": {
|
| 346 |
+
dom.value if dom else "unknown": count
|
| 347 |
+
for dom, count in domains
|
| 348 |
+
}
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
# ---------- AI Chat ----------
|
| 353 |
+
|
| 354 |
+
from pydantic import BaseModel
|
| 355 |
+
|
| 356 |
+
class ChatMessage(BaseModel):
|
| 357 |
+
message: str
|
| 358 |
+
|
| 359 |
+
@app.post("/api/chat")
|
| 360 |
+
async def chat_with_opportunities(
|
| 361 |
+
chat: ChatMessage,
|
| 362 |
+
db: Session = Depends(get_db)
|
| 363 |
+
):
|
| 364 |
+
"""
|
| 365 |
+
PIOE 2.0: AI-powered chat to search and explore opportunities.
|
| 366 |
+
Ask questions like:
|
| 367 |
+
- "Find me hackathons in Nigeria"
|
| 368 |
+
- "What grants are available for AI projects?"
|
| 369 |
+
- "Show me high ROI opportunities with low competition"
|
| 370 |
+
"""
|
| 371 |
+
from .intelligence import LLMClient
|
| 372 |
+
|
| 373 |
+
user_message = chat.message.strip()
|
| 374 |
+
if not user_message:
|
| 375 |
+
return {"response": "Please ask a question about opportunities.", "opportunities": []}
|
| 376 |
+
|
| 377 |
+
# Get all opportunities for context (limit to recent high-scoring ones)
|
| 378 |
+
opportunities = db.query(Opportunity).filter(
|
| 379 |
+
Opportunity.combined_score >= 0.3
|
| 380 |
+
).order_by(Opportunity.combined_score.desc()).limit(100).all()
|
| 381 |
+
|
| 382 |
+
# Build context for LLM
|
| 383 |
+
opp_summaries = []
|
| 384 |
+
for o in opportunities:
|
| 385 |
+
summary = f"[{o.id}] {o.title} | Category: {o.category.value if o.category else 'other'} | Domain: {o.domain.value if o.domain else 'mixed'} | Region: {o.region.value if hasattr(o, 'region') and o.region else 'global'} | ROI: {getattr(o, 'roi_score', 0.5):.0%} | Risk: {o.risk_level.value if hasattr(o, 'risk_level') and o.risk_level else 'medium'}"
|
| 386 |
+
opp_summaries.append(summary)
|
| 387 |
+
|
| 388 |
+
opp_context = "\n".join(opp_summaries[:50]) if opp_summaries else "No opportunities found in database."
|
| 389 |
+
|
| 390 |
+
# Create prompt for LLM
|
| 391 |
+
prompt = f"""You are PIOE, a Personal Intelligence & Opportunity Engine assistant.
|
| 392 |
+
The user is from Nigeria and interested in AI, Computer Vision, Robotics, and Web3 opportunities.
|
| 393 |
+
|
| 394 |
+
AVAILABLE OPPORTUNITIES:
|
| 395 |
+
{opp_context}
|
| 396 |
+
|
| 397 |
+
USER QUESTION: {user_message}
|
| 398 |
+
|
| 399 |
+
Instructions:
|
| 400 |
+
1. Answer the user's question based on the opportunities above
|
| 401 |
+
2. If they're searching for specific types, list the most relevant opportunity IDs
|
| 402 |
+
3. Provide actionable advice
|
| 403 |
+
4. Be concise but helpful
|
| 404 |
+
5. If no matching opportunities exist, suggest what to search for
|
| 405 |
+
|
| 406 |
+
Return a JSON response:
|
| 407 |
+
{{
|
| 408 |
+
"response": "Your helpful answer here",
|
| 409 |
+
"matched_ids": ["id1", "id2"] or [] if none match,
|
| 410 |
+
"suggested_action": "What the user should do next"
|
| 411 |
+
}}"""
|
| 412 |
+
|
| 413 |
+
try:
|
| 414 |
+
llm = LLMClient.get_client()
|
| 415 |
+
result = llm._generate(prompt) if hasattr(llm, '_generate') else '{"response": "AI not configured", "matched_ids": [], "suggested_action": "Configure Gemini API key"}'
|
| 416 |
+
|
| 417 |
+
import json
|
| 418 |
+
# Try to parse JSON response
|
| 419 |
+
start = result.find('{')
|
| 420 |
+
end = result.rfind('}') + 1
|
| 421 |
+
if start != -1 and end > start:
|
| 422 |
+
parsed = json.loads(result[start:end])
|
| 423 |
+
response_text = parsed.get("response", result)
|
| 424 |
+
matched_ids = parsed.get("matched_ids", [])
|
| 425 |
+
suggested_action = parsed.get("suggested_action", "")
|
| 426 |
+
else:
|
| 427 |
+
response_text = result
|
| 428 |
+
matched_ids = []
|
| 429 |
+
suggested_action = ""
|
| 430 |
+
|
| 431 |
+
# Get the matched opportunities
|
| 432 |
+
matched_opps = []
|
| 433 |
+
if matched_ids:
|
| 434 |
+
for opp in opportunities:
|
| 435 |
+
if opp.id in matched_ids:
|
| 436 |
+
matched_opps.append({
|
| 437 |
+
"id": opp.id,
|
| 438 |
+
"title": opp.title,
|
| 439 |
+
"category": opp.category.value if opp.category else None,
|
| 440 |
+
"domain": opp.domain.value if opp.domain else None,
|
| 441 |
+
"url": opp.url,
|
| 442 |
+
"roi_score": getattr(opp, 'roi_score', None),
|
| 443 |
+
"risk_level": opp.risk_level.value if hasattr(opp, 'risk_level') and opp.risk_level else "medium",
|
| 444 |
+
"region": opp.region.value if hasattr(opp, 'region') and opp.region else "global",
|
| 445 |
+
})
|
| 446 |
+
|
| 447 |
+
return {
|
| 448 |
+
"response": response_text,
|
| 449 |
+
"opportunities": matched_opps[:10],
|
| 450 |
+
"suggested_action": suggested_action,
|
| 451 |
+
"total_searched": len(opportunities)
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
except Exception as e:
|
| 455 |
+
# Fallback: Simple keyword search
|
| 456 |
+
keywords = user_message.lower().split()
|
| 457 |
+
matched = []
|
| 458 |
+
for o in opportunities:
|
| 459 |
+
text = f"{o.title} {o.raw_text or ''}".lower()
|
| 460 |
+
if any(kw in text for kw in keywords):
|
| 461 |
+
matched.append({
|
| 462 |
+
"id": o.id,
|
| 463 |
+
"title": o.title,
|
| 464 |
+
"category": o.category.value if o.category else None,
|
| 465 |
+
"url": o.url,
|
| 466 |
+
"roi_score": getattr(o, 'roi_score', None),
|
| 467 |
+
})
|
| 468 |
+
|
| 469 |
+
return {
|
| 470 |
+
"response": f"Found {len(matched)} opportunities matching your search. (AI unavailable: {str(e)[:50]})",
|
| 471 |
+
"opportunities": matched[:10],
|
| 472 |
+
"suggested_action": "Click on any opportunity for details",
|
| 473 |
+
"total_searched": len(opportunities)
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
# Mount static files (frontend assets)
|
| 478 |
+
frontend_dir = Path(__file__).parent.parent / "frontend"
|
| 479 |
+
if frontend_dir.exists():
|
| 480 |
+
app.mount("/static", StaticFiles(directory=str(frontend_dir)), name="static")
|
| 481 |
+
|
backend/models.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PIOE Database Models - Version 2.0
|
| 3 |
+
Personal Advantage Engine
|
| 4 |
+
"""
|
| 5 |
+
from sqlalchemy import Column, String, Float, DateTime, Text, Boolean, Integer, JSON, ForeignKey, Enum as SQLEnum
|
| 6 |
+
from sqlalchemy.orm import relationship
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
import uuid
|
| 9 |
+
import enum
|
| 10 |
+
|
| 11 |
+
from .database import Base
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class OpportunityCategory(str, enum.Enum):
|
| 15 |
+
"""Categories for opportunity classification - PIOE 2.0 Extended."""
|
| 16 |
+
# Standard opportunities
|
| 17 |
+
SCHOLARSHIP = "scholarship"
|
| 18 |
+
FELLOWSHIP = "fellowship"
|
| 19 |
+
INTERNSHIP = "internship"
|
| 20 |
+
JOB = "job"
|
| 21 |
+
RESEARCH = "research"
|
| 22 |
+
HACKATHON = "hackathon"
|
| 23 |
+
COMPETITION = "competition"
|
| 24 |
+
CONFERENCE = "conference"
|
| 25 |
+
OPEN_SOURCE = "open_source"
|
| 26 |
+
|
| 27 |
+
# Grant types (PIOE 2.0)
|
| 28 |
+
GRANT = "grant"
|
| 29 |
+
MICRO_GRANT = "micro_grant"
|
| 30 |
+
ECOSYSTEM_GRANT = "ecosystem_grant"
|
| 31 |
+
INNOVATION_FUND = "innovation_fund"
|
| 32 |
+
|
| 33 |
+
# Partnership & Collaboration (PIOE 2.0)
|
| 34 |
+
PARTNERSHIP = "partnership"
|
| 35 |
+
COLLABORATION = "collaboration"
|
| 36 |
+
|
| 37 |
+
# Events & Showcases (PIOE 2.0)
|
| 38 |
+
PITCH_EVENT = "pitch_event"
|
| 39 |
+
DEMO_DAY = "demo_day"
|
| 40 |
+
TALENT_CALL = "talent_call"
|
| 41 |
+
|
| 42 |
+
# Web3/Crypto specific (PIOE 2.0)
|
| 43 |
+
BOUNTY = "bounty"
|
| 44 |
+
AMBASSADOR = "ambassador"
|
| 45 |
+
|
| 46 |
+
# Silent/Implicit opportunities (PIOE 2.0)
|
| 47 |
+
PRE_GRANT_SIGNAL = "pre_grant_signal"
|
| 48 |
+
PRE_HIRING_SIGNAL = "pre_hiring_signal"
|
| 49 |
+
WEAK_SIGNAL = "weak_signal"
|
| 50 |
+
|
| 51 |
+
# Other
|
| 52 |
+
INVESTMENT = "investment"
|
| 53 |
+
OTHER = "other"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class OpportunityStatus(str, enum.Enum):
|
| 57 |
+
"""User interaction status."""
|
| 58 |
+
NEW = "new"
|
| 59 |
+
SAVED = "saved"
|
| 60 |
+
APPLIED = "applied"
|
| 61 |
+
TRACKING = "tracking"
|
| 62 |
+
DISMISSED = "dismissed"
|
| 63 |
+
EXPIRED = "expired"
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
class SourceType(str, enum.Enum):
|
| 67 |
+
"""Types of data sources."""
|
| 68 |
+
ARXIV = "arxiv"
|
| 69 |
+
GITHUB = "github"
|
| 70 |
+
RSS = "rss"
|
| 71 |
+
REDDIT = "reddit"
|
| 72 |
+
TWITTER = "twitter"
|
| 73 |
+
LINKEDIN = "linkedin"
|
| 74 |
+
SUPERTEAM = "superteam"
|
| 75 |
+
WEB_SCRAPE = "web_scrape"
|
| 76 |
+
DISCORD = "discord"
|
| 77 |
+
GOV_PORTAL = "gov_portal"
|
| 78 |
+
GRANT_PLATFORM = "grant_platform"
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class Domain(str, enum.Enum):
|
| 82 |
+
"""Domain classification."""
|
| 83 |
+
AI = "ai"
|
| 84 |
+
COMPUTER_VISION = "computer_vision"
|
| 85 |
+
ROBOTICS = "robotics"
|
| 86 |
+
FINANCE = "finance"
|
| 87 |
+
CRYPTO = "crypto"
|
| 88 |
+
ACADEMIA = "academia"
|
| 89 |
+
WEB3 = "web3"
|
| 90 |
+
MIXED = "mixed"
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
class Region(str, enum.Enum):
|
| 94 |
+
"""Regional accessibility - PIOE 2.0."""
|
| 95 |
+
NIGERIA = "nigeria"
|
| 96 |
+
AFRICA = "africa"
|
| 97 |
+
GLOBAL = "global"
|
| 98 |
+
REMOTE_AFRICA = "remote_africa" # Remote but Africa-accessible
|
| 99 |
+
REMOTE_GLOBAL = "remote_global"
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
class RiskLevel(str, enum.Enum):
|
| 103 |
+
"""Time investment risk level."""
|
| 104 |
+
LOW = "low"
|
| 105 |
+
MEDIUM = "medium"
|
| 106 |
+
HIGH = "high"
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
class Source(Base):
|
| 110 |
+
"""Data source configuration."""
|
| 111 |
+
__tablename__ = "sources"
|
| 112 |
+
|
| 113 |
+
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
| 114 |
+
name = Column(String, nullable=False)
|
| 115 |
+
type = Column(SQLEnum(SourceType), nullable=False)
|
| 116 |
+
url = Column(String)
|
| 117 |
+
config = Column(JSON, default={})
|
| 118 |
+
credibility_score = Column(Float, default=0.7)
|
| 119 |
+
last_fetch = Column(DateTime)
|
| 120 |
+
is_active = Column(Boolean, default=True)
|
| 121 |
+
created_at = Column(DateTime, default=datetime.utcnow)
|
| 122 |
+
|
| 123 |
+
opportunities = relationship("Opportunity", back_populates="source")
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
class Opportunity(Base):
|
| 127 |
+
"""Normalized opportunity item - PIOE 2.0 Enhanced."""
|
| 128 |
+
__tablename__ = "opportunities"
|
| 129 |
+
|
| 130 |
+
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
| 131 |
+
title = Column(String, nullable=False)
|
| 132 |
+
source_id = Column(String, ForeignKey("sources.id"))
|
| 133 |
+
source_name = Column(String)
|
| 134 |
+
source_type = Column(SQLEnum(SourceType))
|
| 135 |
+
domain = Column(SQLEnum(Domain), default=Domain.MIXED)
|
| 136 |
+
category = Column(SQLEnum(OpportunityCategory), default=OpportunityCategory.OTHER)
|
| 137 |
+
|
| 138 |
+
# Regional accessibility (PIOE 2.0)
|
| 139 |
+
region = Column(SQLEnum(Region), default=Region.GLOBAL)
|
| 140 |
+
region_weight = Column(Float, default=1.0) # 1.0 = perfect match for user
|
| 141 |
+
|
| 142 |
+
# Timestamps
|
| 143 |
+
discovered_at = Column(DateTime, default=datetime.utcnow)
|
| 144 |
+
published_at = Column(DateTime)
|
| 145 |
+
deadline = Column(DateTime)
|
| 146 |
+
|
| 147 |
+
# Content
|
| 148 |
+
raw_text = Column(Text)
|
| 149 |
+
summary = Column(Text)
|
| 150 |
+
url = Column(String)
|
| 151 |
+
|
| 152 |
+
# Core Scores (0.0 to 1.0)
|
| 153 |
+
relevance_score = Column(Float, default=0.0)
|
| 154 |
+
novelty_score = Column(Float, default=1.0)
|
| 155 |
+
credibility_score = Column(Float, default=0.5)
|
| 156 |
+
signal_strength = Column(Float, default=0.5)
|
| 157 |
+
combined_score = Column(Float, default=0.0)
|
| 158 |
+
|
| 159 |
+
# PIOE 2.0: Decision Intelligence Scores
|
| 160 |
+
roi_score = Column(Float, default=0.5) # Is this worth my time?
|
| 161 |
+
unlock_potential = Column(Float, default=0.0) # Opens doors to what?
|
| 162 |
+
risk_level = Column(SQLEnum(RiskLevel), default=RiskLevel.MEDIUM)
|
| 163 |
+
competition_level = Column(Float, default=0.5) # Estimated competition
|
| 164 |
+
|
| 165 |
+
# Social engagement (from social sources)
|
| 166 |
+
social_engagement = Column(Integer, default=0)
|
| 167 |
+
|
| 168 |
+
# User status
|
| 169 |
+
status = Column(SQLEnum(OpportunityStatus), default=OpportunityStatus.NEW)
|
| 170 |
+
|
| 171 |
+
# Grant-specific metadata (PIOE 2.0)
|
| 172 |
+
# Stored in extra_data:
|
| 173 |
+
# - grant_size_min, grant_size_max
|
| 174 |
+
# - required_output (MVP, paper, OSS)
|
| 175 |
+
# - timeline_months
|
| 176 |
+
# - ecosystem (ethereum, solana, government)
|
| 177 |
+
# - eligibility_regions
|
| 178 |
+
# - technical_depth
|
| 179 |
+
|
| 180 |
+
# Action guidance (PIOE 2.0)
|
| 181 |
+
# Stored in extra_data:
|
| 182 |
+
# - recommended_action
|
| 183 |
+
# - skill_to_highlight
|
| 184 |
+
# - timing (early/optimal/late)
|
| 185 |
+
# - success_probability
|
| 186 |
+
# - preparation_steps
|
| 187 |
+
|
| 188 |
+
# Opportunity chaining (PIOE 2.0)
|
| 189 |
+
# - chain_next: list of potential next opportunity IDs
|
| 190 |
+
# - chain_unlocks: what this unlocks
|
| 191 |
+
|
| 192 |
+
extra_data = Column(JSON, default={})
|
| 193 |
+
|
| 194 |
+
# Embedding for novelty detection
|
| 195 |
+
embedding = Column(JSON)
|
| 196 |
+
|
| 197 |
+
source = relationship("Source", back_populates="opportunities")
|
| 198 |
+
interactions = relationship("UserInteraction", back_populates="opportunity")
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
class UserInteraction(Base):
|
| 202 |
+
"""Track user actions for personalization."""
|
| 203 |
+
__tablename__ = "user_interactions"
|
| 204 |
+
|
| 205 |
+
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
| 206 |
+
opportunity_id = Column(String, ForeignKey("opportunities.id"))
|
| 207 |
+
action = Column(String) # view, apply, save, dismiss, track
|
| 208 |
+
timestamp = Column(DateTime, default=datetime.utcnow)
|
| 209 |
+
|
| 210 |
+
opportunity = relationship("Opportunity", back_populates="interactions")
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
class Author(Base):
|
| 214 |
+
"""Track authors for credibility and social graph."""
|
| 215 |
+
__tablename__ = "authors"
|
| 216 |
+
|
| 217 |
+
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
| 218 |
+
name = Column(String, nullable=False)
|
| 219 |
+
platform = Column(String) # reddit, twitter, github, etc.
|
| 220 |
+
platform_id = Column(String) # username or ID on platform
|
| 221 |
+
credibility_score = Column(Float, default=0.5)
|
| 222 |
+
opportunity_creator_score = Column(Float, default=0.0) # Do they create opportunities?
|
| 223 |
+
first_seen = Column(DateTime, default=datetime.utcnow)
|
| 224 |
+
extra_data = Column(JSON, default={})
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
class OpportunityChain(Base):
|
| 228 |
+
"""Track opportunity sequences/paths - PIOE 2.0."""
|
| 229 |
+
__tablename__ = "opportunity_chains"
|
| 230 |
+
|
| 231 |
+
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
|
| 232 |
+
name = Column(String) # e.g., "Hackathon to Startup Path"
|
| 233 |
+
description = Column(Text)
|
| 234 |
+
steps = Column(JSON) # Ordered list of opportunity categories/types
|
| 235 |
+
success_rate = Column(Float, default=0.0)
|
| 236 |
+
example_urls = Column(JSON, default=[])
|
| 237 |
+
created_at = Column(DateTime, default=datetime.utcnow)
|
config/sources.yaml
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PIOE Default Sources Configuration
|
| 2 |
+
|
| 3 |
+
# arXiv Categories
|
| 4 |
+
arxiv:
|
| 5 |
+
enabled: true
|
| 6 |
+
categories:
|
| 7 |
+
- cs.CV # Computer Vision
|
| 8 |
+
- cs.RO # Robotics
|
| 9 |
+
- cs.AI # Artificial Intelligence
|
| 10 |
+
- cs.LG # Machine Learning
|
| 11 |
+
- cs.CL # Natural Language Processing
|
| 12 |
+
max_results: 50
|
| 13 |
+
schedule: "daily"
|
| 14 |
+
|
| 15 |
+
# GitHub Topics/Search
|
| 16 |
+
github:
|
| 17 |
+
enabled: true
|
| 18 |
+
topics:
|
| 19 |
+
- computer-vision
|
| 20 |
+
- robotics
|
| 21 |
+
- machine-learning
|
| 22 |
+
- deep-learning
|
| 23 |
+
- ros
|
| 24 |
+
- pytorch
|
| 25 |
+
- transformers
|
| 26 |
+
- llm
|
| 27 |
+
min_stars: 50
|
| 28 |
+
schedule: "daily"
|
| 29 |
+
|
| 30 |
+
# RSS Feeds
|
| 31 |
+
rss:
|
| 32 |
+
enabled: true
|
| 33 |
+
feeds:
|
| 34 |
+
# AI Research Labs
|
| 35 |
+
- name: "Google AI Blog"
|
| 36 |
+
url: "https://blog.google/technology/ai/rss/"
|
| 37 |
+
type: blog
|
| 38 |
+
|
| 39 |
+
- name: "OpenAI Blog"
|
| 40 |
+
url: "https://openai.com/blog/rss/"
|
| 41 |
+
type: blog
|
| 42 |
+
|
| 43 |
+
- name: "DeepMind Blog"
|
| 44 |
+
url: "https://www.deepmind.com/blog/rss.xml"
|
| 45 |
+
type: blog
|
| 46 |
+
|
| 47 |
+
# Tech News
|
| 48 |
+
- name: "Hacker News - AI"
|
| 49 |
+
url: "https://hnrss.org/newest?q=ai+machine+learning"
|
| 50 |
+
type: news
|
| 51 |
+
|
| 52 |
+
- name: "Hacker News - Robotics"
|
| 53 |
+
url: "https://hnrss.org/newest?q=robotics"
|
| 54 |
+
type: news
|
| 55 |
+
|
| 56 |
+
- name: "TechCrunch AI"
|
| 57 |
+
url: "https://techcrunch.com/category/artificial-intelligence/feed/"
|
| 58 |
+
type: news
|
| 59 |
+
|
| 60 |
+
# Reddit Subreddits
|
| 61 |
+
reddit:
|
| 62 |
+
enabled: true
|
| 63 |
+
subreddits:
|
| 64 |
+
- computervision
|
| 65 |
+
- robotics
|
| 66 |
+
- MachineLearning
|
| 67 |
+
- artificial
|
| 68 |
+
- learnmachinelearning
|
| 69 |
+
- deeplearning
|
| 70 |
+
- hackathons
|
| 71 |
+
- scholarships
|
| 72 |
+
- cscareerquestions
|
| 73 |
+
min_score: 10
|
| 74 |
+
schedule: "every_6_hours"
|
| 75 |
+
|
| 76 |
+
# Superteam (Web3/Crypto Opportunities)
|
| 77 |
+
superteam:
|
| 78 |
+
enabled: true
|
| 79 |
+
focus:
|
| 80 |
+
- bounties
|
| 81 |
+
- grants
|
| 82 |
+
- hackathons
|
| 83 |
+
schedule: "daily"
|
| 84 |
+
|
| 85 |
+
# Major Tech Company Careers
|
| 86 |
+
careers:
|
| 87 |
+
enabled: true
|
| 88 |
+
companies:
|
| 89 |
+
- name: Microsoft
|
| 90 |
+
keywords: ["computer vision", "robotics", "AI", "machine learning", "intern"]
|
| 91 |
+
- name: NVIDIA
|
| 92 |
+
keywords: ["deep learning", "computer vision", "robotics", "intern"]
|
| 93 |
+
- name: Google
|
| 94 |
+
keywords: ["machine learning", "research", "robotics", "intern"]
|
| 95 |
+
- name: Meta
|
| 96 |
+
keywords: ["AI", "research", "robotics", "computer vision", "intern"]
|
| 97 |
+
- name: OpenAI
|
| 98 |
+
keywords: ["research", "engineering"]
|
| 99 |
+
- name: DeepMind
|
| 100 |
+
keywords: ["research", "robotics"]
|
| 101 |
+
- name: "Boston Dynamics"
|
| 102 |
+
keywords: ["robotics", "perception", "control"]
|
| 103 |
+
- name: "Tesla AI"
|
| 104 |
+
keywords: ["autopilot", "optimus", "robotics", "computer vision"]
|
| 105 |
+
schedule: "daily"
|
| 106 |
+
|
| 107 |
+
# Web Scraping Targets
|
| 108 |
+
scraper:
|
| 109 |
+
enabled: true
|
| 110 |
+
targets:
|
| 111 |
+
# Hackathons
|
| 112 |
+
- name: "Devpost Hackathons"
|
| 113 |
+
url: "https://devpost.com/hackathons"
|
| 114 |
+
type: hackathon
|
| 115 |
+
|
| 116 |
+
- name: "MLH Events"
|
| 117 |
+
url: "https://mlh.io/seasons/2024/events"
|
| 118 |
+
type: hackathon
|
| 119 |
+
|
| 120 |
+
# Scholarships
|
| 121 |
+
- name: "FindAPhD"
|
| 122 |
+
url: "https://www.findaphd.com/phds/?Keywords=computer+vision+robotics"
|
| 123 |
+
type: scholarship
|
| 124 |
+
schedule: "daily"
|
| 125 |
+
|
| 126 |
+
# Scheduling
|
| 127 |
+
schedule:
|
| 128 |
+
full_ingestion_hours: 6
|
| 129 |
+
priority_ingestion_hours: 2
|
| 130 |
+
|
| 131 |
+
# Scoring Thresholds
|
| 132 |
+
scoring:
|
| 133 |
+
min_relevance: 0.4
|
| 134 |
+
min_novelty: 0.3
|
| 135 |
+
min_credibility: 0.5
|
frontend/app.js
ADDED
|
@@ -0,0 +1,660 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* PIOE - Personal Intelligence & Opportunity Engine
|
| 3 |
+
* Frontend JavaScript Application
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
class PIOEApp {
|
| 7 |
+
constructor() {
|
| 8 |
+
this.currentCategory = null;
|
| 9 |
+
this.currentDomain = null;
|
| 10 |
+
this.minScore = 0;
|
| 11 |
+
this.opportunities = [];
|
| 12 |
+
|
| 13 |
+
this.init();
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
init() {
|
| 17 |
+
this.bindEvents();
|
| 18 |
+
this.loadStats();
|
| 19 |
+
this.loadOpportunities();
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
bindEvents() {
|
| 23 |
+
// Navigation items
|
| 24 |
+
document.querySelectorAll('.nav-item[data-view]').forEach(item => {
|
| 25 |
+
item.addEventListener('click', (e) => {
|
| 26 |
+
e.preventDefault();
|
| 27 |
+
this.setActiveNav(item);
|
| 28 |
+
this.handleViewChange(item.dataset.view);
|
| 29 |
+
});
|
| 30 |
+
});
|
| 31 |
+
|
| 32 |
+
// Category filters
|
| 33 |
+
document.querySelectorAll('.nav-item[data-category]').forEach(item => {
|
| 34 |
+
item.addEventListener('click', (e) => {
|
| 35 |
+
e.preventDefault();
|
| 36 |
+
this.setActiveNav(item);
|
| 37 |
+
this.currentCategory = item.dataset.category;
|
| 38 |
+
this.loadOpportunities();
|
| 39 |
+
this.showFeedView();
|
| 40 |
+
});
|
| 41 |
+
});
|
| 42 |
+
|
| 43 |
+
// Domain filter
|
| 44 |
+
document.getElementById('domain-filter').addEventListener('change', (e) => {
|
| 45 |
+
this.currentDomain = e.target.value || null;
|
| 46 |
+
this.loadOpportunities();
|
| 47 |
+
});
|
| 48 |
+
|
| 49 |
+
// Score filter
|
| 50 |
+
document.getElementById('score-filter').addEventListener('change', (e) => {
|
| 51 |
+
this.minScore = parseFloat(e.target.value) || 0;
|
| 52 |
+
this.loadOpportunities();
|
| 53 |
+
});
|
| 54 |
+
|
| 55 |
+
// Run ingestion
|
| 56 |
+
document.getElementById('run-ingestion').addEventListener('click', (e) => {
|
| 57 |
+
e.preventDefault();
|
| 58 |
+
this.runIngestion();
|
| 59 |
+
});
|
| 60 |
+
|
| 61 |
+
// View stats
|
| 62 |
+
document.getElementById('view-stats').addEventListener('click', (e) => {
|
| 63 |
+
e.preventDefault();
|
| 64 |
+
this.showStatsModal();
|
| 65 |
+
});
|
| 66 |
+
|
| 67 |
+
// Modal close
|
| 68 |
+
document.querySelector('.modal-close').addEventListener('click', () => {
|
| 69 |
+
this.closeModal();
|
| 70 |
+
});
|
| 71 |
+
|
| 72 |
+
document.querySelector('.modal-backdrop').addEventListener('click', () => {
|
| 73 |
+
this.closeModal();
|
| 74 |
+
});
|
| 75 |
+
|
| 76 |
+
// PIOE 2.0: AI Chat
|
| 77 |
+
document.getElementById('open-chat')?.addEventListener('click', (e) => {
|
| 78 |
+
e.preventDefault();
|
| 79 |
+
this.toggleChat();
|
| 80 |
+
});
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
// PIOE 2.0: Chat Methods
|
| 84 |
+
toggleChat() {
|
| 85 |
+
const panel = document.getElementById('chat-panel');
|
| 86 |
+
panel.classList.toggle('active');
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
async sendChatMessage() {
|
| 90 |
+
const input = document.getElementById('chat-input');
|
| 91 |
+
const messagesContainer = document.getElementById('chat-messages');
|
| 92 |
+
const message = input.value.trim();
|
| 93 |
+
|
| 94 |
+
if (!message) return;
|
| 95 |
+
|
| 96 |
+
// Add user message to chat
|
| 97 |
+
messagesContainer.innerHTML += `
|
| 98 |
+
<div class="chat-message user">
|
| 99 |
+
<p>${this.escapeHtml(message)}</p>
|
| 100 |
+
</div>
|
| 101 |
+
`;
|
| 102 |
+
input.value = '';
|
| 103 |
+
messagesContainer.scrollTop = messagesContainer.scrollHeight;
|
| 104 |
+
|
| 105 |
+
// Add loading indicator
|
| 106 |
+
const loadingId = `loading-${Date.now()}`;
|
| 107 |
+
messagesContainer.innerHTML += `
|
| 108 |
+
<div class="chat-message bot" id="${loadingId}">
|
| 109 |
+
<p>[...] Searching opportunities...</p>
|
| 110 |
+
</div>
|
| 111 |
+
`;
|
| 112 |
+
messagesContainer.scrollTop = messagesContainer.scrollHeight;
|
| 113 |
+
|
| 114 |
+
try {
|
| 115 |
+
const response = await fetch('/api/chat', {
|
| 116 |
+
method: 'POST',
|
| 117 |
+
headers: { 'Content-Type': 'application/json' },
|
| 118 |
+
body: JSON.stringify({ message })
|
| 119 |
+
});
|
| 120 |
+
const data = await response.json();
|
| 121 |
+
|
| 122 |
+
// Remove loading indicator
|
| 123 |
+
document.getElementById(loadingId)?.remove();
|
| 124 |
+
|
| 125 |
+
// Build response HTML
|
| 126 |
+
let responseHtml = `<p>${this.escapeHtml(data.response || 'No response')}</p>`;
|
| 127 |
+
|
| 128 |
+
// Add matched opportunities if any
|
| 129 |
+
if (data.opportunities && data.opportunities.length > 0) {
|
| 130 |
+
responseHtml += `<div style="margin-top: 12px">`;
|
| 131 |
+
for (const opp of data.opportunities) {
|
| 132 |
+
const roiDisplay = opp.roi_score ? `${Math.round(opp.roi_score * 100)}% ROI` : '';
|
| 133 |
+
responseHtml += `
|
| 134 |
+
<a href="${opp.url}" target="_blank" class="opp-link">
|
| 135 |
+
${this.getCategoryEmoji(opp.category)} ${this.escapeHtml(opp.title.slice(0, 60))}${opp.title.length > 60 ? '...' : ''}
|
| 136 |
+
<span style="opacity: 0.7; margin-left: 8px">${roiDisplay}</span>
|
| 137 |
+
</a>
|
| 138 |
+
`;
|
| 139 |
+
}
|
| 140 |
+
responseHtml += `</div>`;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
// Add suggested action if any
|
| 144 |
+
if (data.suggested_action) {
|
| 145 |
+
responseHtml += `<p style="margin-top: 12px; font-style: italic; opacity: 0.8">[TIP] ${this.escapeHtml(data.suggested_action)}</p>`;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
messagesContainer.innerHTML += `
|
| 149 |
+
<div class="chat-message bot">
|
| 150 |
+
${responseHtml}
|
| 151 |
+
</div>
|
| 152 |
+
`;
|
| 153 |
+
|
| 154 |
+
} catch (error) {
|
| 155 |
+
document.getElementById(loadingId)?.remove();
|
| 156 |
+
messagesContainer.innerHTML += `
|
| 157 |
+
<div class="chat-message bot">
|
| 158 |
+
<p style="color: var(--danger)">Error: ${error.message}</p>
|
| 159 |
+
</div>
|
| 160 |
+
`;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
messagesContainer.scrollTop = messagesContainer.scrollHeight;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
setActiveNav(activeItem) {
|
| 167 |
+
document.querySelectorAll('.nav-item').forEach(item => {
|
| 168 |
+
item.classList.remove('active');
|
| 169 |
+
});
|
| 170 |
+
activeItem.classList.add('active');
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
handleViewChange(view) {
|
| 174 |
+
if (view === 'feed') {
|
| 175 |
+
this.currentCategory = null;
|
| 176 |
+
this.loadOpportunities();
|
| 177 |
+
this.showFeedView();
|
| 178 |
+
this.updateHeader('Opportunity Feed', 'High-signal opportunities detected by PIOE');
|
| 179 |
+
} else if (view === 'digest') {
|
| 180 |
+
this.loadDigest('daily');
|
| 181 |
+
this.showDigestView();
|
| 182 |
+
this.updateHeader('Daily Brief', 'Your personalized intelligence report');
|
| 183 |
+
} else if (view === 'urgent') {
|
| 184 |
+
this.loadDigest('urgent');
|
| 185 |
+
this.showDigestView();
|
| 186 |
+
this.updateHeader('Urgent Opportunities', 'Deadlines approaching soon');
|
| 187 |
+
}
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
updateHeader(title, subtitle) {
|
| 191 |
+
document.getElementById('page-title').textContent = title;
|
| 192 |
+
document.getElementById('page-subtitle').textContent = subtitle;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
showFeedView() {
|
| 196 |
+
document.getElementById('opportunity-feed').style.display = 'flex';
|
| 197 |
+
document.getElementById('digest-view').style.display = 'none';
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
showDigestView() {
|
| 201 |
+
document.getElementById('opportunity-feed').style.display = 'none';
|
| 202 |
+
document.getElementById('digest-view').style.display = 'block';
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
async loadStats() {
|
| 206 |
+
try {
|
| 207 |
+
const response = await fetch('/api/stats');
|
| 208 |
+
const stats = await response.json();
|
| 209 |
+
|
| 210 |
+
document.getElementById('total-count').textContent = stats.total_opportunities || 0;
|
| 211 |
+
document.getElementById('new-count').textContent = stats.new_opportunities || 0;
|
| 212 |
+
document.getElementById('hackathon-count').textContent = stats.by_category?.hackathon || 0;
|
| 213 |
+
document.getElementById('internship-count').textContent = stats.by_category?.internship || 0;
|
| 214 |
+
} catch (error) {
|
| 215 |
+
console.error('Failed to load stats:', error);
|
| 216 |
+
}
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
async loadOpportunities() {
|
| 220 |
+
const feed = document.getElementById('opportunity-feed');
|
| 221 |
+
feed.innerHTML = '<div class="loading">Loading opportunities...</div>';
|
| 222 |
+
|
| 223 |
+
try {
|
| 224 |
+
const params = new URLSearchParams();
|
| 225 |
+
if (this.currentCategory) params.set('category', this.currentCategory);
|
| 226 |
+
if (this.currentDomain) params.set('domain', this.currentDomain);
|
| 227 |
+
if (this.minScore) params.set('min_score', this.minScore);
|
| 228 |
+
params.set('limit', '50');
|
| 229 |
+
|
| 230 |
+
const response = await fetch(`/api/opportunities?${params}`);
|
| 231 |
+
const data = await response.json();
|
| 232 |
+
|
| 233 |
+
this.opportunities = data.opportunities || [];
|
| 234 |
+
this.renderOpportunities();
|
| 235 |
+
} catch (error) {
|
| 236 |
+
feed.innerHTML = `<div class="loading">Error loading opportunities: ${error.message}</div>`;
|
| 237 |
+
}
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
renderOpportunities() {
|
| 241 |
+
const feed = document.getElementById('opportunity-feed');
|
| 242 |
+
|
| 243 |
+
if (this.opportunities.length === 0) {
|
| 244 |
+
feed.innerHTML = `
|
| 245 |
+
<div class="loading">
|
| 246 |
+
No opportunities found. Try running ingestion first!
|
| 247 |
+
</div>
|
| 248 |
+
`;
|
| 249 |
+
return;
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
feed.innerHTML = this.opportunities.map(opp => this.renderOpportunityCard(opp)).join('');
|
| 253 |
+
|
| 254 |
+
// Bind card click events
|
| 255 |
+
feed.querySelectorAll('.opportunity-card').forEach((card, index) => {
|
| 256 |
+
card.addEventListener('click', () => {
|
| 257 |
+
this.showOpportunityDetail(this.opportunities[index]);
|
| 258 |
+
});
|
| 259 |
+
|
| 260 |
+
// Action buttons
|
| 261 |
+
card.querySelector('.action-btn.primary')?.addEventListener('click', (e) => {
|
| 262 |
+
e.stopPropagation();
|
| 263 |
+
window.open(this.opportunities[index].url, '_blank');
|
| 264 |
+
});
|
| 265 |
+
|
| 266 |
+
card.querySelector('.action-btn.secondary')?.addEventListener('click', (e) => {
|
| 267 |
+
e.stopPropagation();
|
| 268 |
+
this.updateStatus(this.opportunities[index].id, 'saved');
|
| 269 |
+
});
|
| 270 |
+
});
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
renderOpportunityCard(opp) {
|
| 274 |
+
const category = opp.category || 'other';
|
| 275 |
+
const categoryEmoji = this.getCategoryEmoji(category);
|
| 276 |
+
const scorePercent = Math.round((opp.combined_score || 0) * 100);
|
| 277 |
+
const roiPercent = Math.round((opp.roi_score || 0.5) * 100);
|
| 278 |
+
const riskLevel = opp.risk_level || 'medium';
|
| 279 |
+
const region = opp.region || 'global';
|
| 280 |
+
|
| 281 |
+
let deadlineBadge = '';
|
| 282 |
+
if (opp.deadline) {
|
| 283 |
+
const daysLeft = Math.ceil((new Date(opp.deadline) - new Date()) / (1000 * 60 * 60 * 24));
|
| 284 |
+
let urgency = 'ok';
|
| 285 |
+
if (daysLeft < 7) urgency = 'urgent';
|
| 286 |
+
else if (daysLeft < 14) urgency = 'soon';
|
| 287 |
+
|
| 288 |
+
deadlineBadge = `
|
| 289 |
+
<span class="deadline-badge ${urgency}">
|
| 290 |
+
[!] ${daysLeft} days left
|
| 291 |
+
</span>
|
| 292 |
+
`;
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
// Risk level badge
|
| 296 |
+
const riskColors = { low: '#10b981', medium: '#f59e0b', high: '#ef4444' };
|
| 297 |
+
const riskLabels = { low: '[OK]', medium: '[!]', high: '[!!]' };
|
| 298 |
+
|
| 299 |
+
// Region badge
|
| 300 |
+
const regionLabels = { nigeria: 'NG', africa: 'AFR', global: 'GLB', remote_africa: 'AFR-R', remote_global: 'GLB-R' };
|
| 301 |
+
|
| 302 |
+
return `
|
| 303 |
+
<div class="opportunity-card">
|
| 304 |
+
<div class="card-header">
|
| 305 |
+
<span class="card-category ${category}">
|
| 306 |
+
${categoryEmoji} ${category.replace('_', ' ')}
|
| 307 |
+
</span>
|
| 308 |
+
<div class="card-score">
|
| 309 |
+
<div class="score-bar">
|
| 310 |
+
<div class="score-fill" style="width: ${scorePercent}%"></div>
|
| 311 |
+
</div>
|
| 312 |
+
<span>${scorePercent}%</span>
|
| 313 |
+
</div>
|
| 314 |
+
</div>
|
| 315 |
+
|
| 316 |
+
<h3 class="card-title">${this.escapeHtml(opp.title)}</h3>
|
| 317 |
+
|
| 318 |
+
<div class="card-meta">
|
| 319 |
+
<span>[SRC] ${opp.source_name || 'Unknown'}</span>
|
| 320 |
+
<span>[${regionLabels[region] || 'GLB'}] ${region.replace('_', ' ')}</span>
|
| 321 |
+
<span style="color: ${riskColors[riskLevel]}">${riskLabels[riskLevel]} ${riskLevel} risk</span>
|
| 322 |
+
</div>
|
| 323 |
+
|
| 324 |
+
<div class="card-meta" style="margin-top: 8px">
|
| 325 |
+
<span title="ROI Score">[ROI] ${roiPercent}%</span>
|
| 326 |
+
<span>[DATE] ${this.formatDate(opp.discovered_at)}</span>
|
| 327 |
+
</div>
|
| 328 |
+
|
| 329 |
+
<p class="card-summary">${this.escapeHtml(opp.raw_text?.slice(0, 200) || '')}</p>
|
| 330 |
+
|
| 331 |
+
<div class="card-footer">
|
| 332 |
+
${deadlineBadge}
|
| 333 |
+
<div class="card-actions">
|
| 334 |
+
<button class="action-btn secondary">Save</button>
|
| 335 |
+
<button class="action-btn primary">Open</button>
|
| 336 |
+
</div>
|
| 337 |
+
</div>
|
| 338 |
+
</div>
|
| 339 |
+
`;
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
getCategoryEmoji(category) {
|
| 343 |
+
const labels = {
|
| 344 |
+
scholarship: '[S]',
|
| 345 |
+
fellowship: '[F]',
|
| 346 |
+
internship: '[I]',
|
| 347 |
+
job: '[J]',
|
| 348 |
+
hackathon: '[H]',
|
| 349 |
+
competition: '[C]',
|
| 350 |
+
grant: '[G]',
|
| 351 |
+
micro_grant: '[MG]',
|
| 352 |
+
ecosystem_grant: '[EG]',
|
| 353 |
+
innovation_fund: '[IF]',
|
| 354 |
+
research: '[R]',
|
| 355 |
+
open_source: '[OS]',
|
| 356 |
+
conference: '[CF]',
|
| 357 |
+
investment: '[IV]',
|
| 358 |
+
partnership: '[P]',
|
| 359 |
+
collaboration: '[CO]',
|
| 360 |
+
pitch_event: '[PE]',
|
| 361 |
+
demo_day: '[DD]',
|
| 362 |
+
talent_call: '[TC]',
|
| 363 |
+
bounty: '[B]',
|
| 364 |
+
ambassador: '[A]',
|
| 365 |
+
pre_grant_signal: '[PG]',
|
| 366 |
+
pre_hiring_signal: '[PH]',
|
| 367 |
+
weak_signal: '[WS]',
|
| 368 |
+
other: '[?]'
|
| 369 |
+
};
|
| 370 |
+
return labels[category] || '[?]';
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
async loadDigest(type) {
|
| 374 |
+
const content = document.getElementById('digest-content');
|
| 375 |
+
content.innerHTML = '<div class="loading">Generating digest...</div>';
|
| 376 |
+
|
| 377 |
+
try {
|
| 378 |
+
const response = await fetch(`/api/digest/${type}`);
|
| 379 |
+
const data = await response.json();
|
| 380 |
+
|
| 381 |
+
// Convert markdown to HTML (simple conversion)
|
| 382 |
+
content.innerHTML = this.markdownToHtml(data.digest || 'No digest available.');
|
| 383 |
+
} catch (error) {
|
| 384 |
+
content.innerHTML = `<p>Error loading digest: ${error.message}</p>`;
|
| 385 |
+
}
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
markdownToHtml(md) {
|
| 389 |
+
return md
|
| 390 |
+
.replace(/^### (.*$)/gim, '<h3>$1</h3>')
|
| 391 |
+
.replace(/^## (.*$)/gim, '<h2>$1</h2>')
|
| 392 |
+
.replace(/^# (.*$)/gim, '<h1>$1</h1>')
|
| 393 |
+
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
|
| 394 |
+
.replace(/\*(.*?)\*/g, '<em>$1</em>')
|
| 395 |
+
.replace(/^> (.*$)/gim, '<blockquote>$1</blockquote>')
|
| 396 |
+
.replace(/\[(.*?)\]\((.*?)\)/g, '<a href="$2" target="_blank">$1</a>')
|
| 397 |
+
.replace(/^---$/gim, '<hr>')
|
| 398 |
+
.replace(/\n/g, '<br>');
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
showOpportunityDetail(opp) {
|
| 402 |
+
const modal = document.getElementById('detail-modal');
|
| 403 |
+
const body = document.getElementById('modal-body');
|
| 404 |
+
|
| 405 |
+
const roiPercent = Math.round((opp.roi_score || 0.5) * 100);
|
| 406 |
+
const riskLevel = opp.risk_level || 'medium';
|
| 407 |
+
const region = opp.region || 'global';
|
| 408 |
+
const riskColors = { low: '#10b981', medium: '#f59e0b', high: '#ef4444' };
|
| 409 |
+
|
| 410 |
+
body.innerHTML = `
|
| 411 |
+
<span class="card-category ${opp.category}" style="margin-bottom: 16px">
|
| 412 |
+
${this.getCategoryEmoji(opp.category)} ${(opp.category || 'other').replace('_', ' ')}
|
| 413 |
+
</span>
|
| 414 |
+
|
| 415 |
+
<h2 style="margin: 16px 0">${this.escapeHtml(opp.title)}</h2>
|
| 416 |
+
|
| 417 |
+
<div class="card-meta" style="margin-bottom: 20px">
|
| 418 |
+
<span>📡 ${opp.source_name}</span>
|
| 419 |
+
<span>🌐 ${region.replace('_', ' ')}</span>
|
| 420 |
+
<span style="color: ${riskColors[riskLevel]}">${riskLevel} risk</span>
|
| 421 |
+
</div>
|
| 422 |
+
|
| 423 |
+
<div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px; margin-bottom: 24px">
|
| 424 |
+
<div class="stat-card">
|
| 425 |
+
<span class="stat-value">${Math.round((opp.relevance_score || 0) * 100)}%</span>
|
| 426 |
+
<span class="stat-label">Relevance</span>
|
| 427 |
+
</div>
|
| 428 |
+
<div class="stat-card">
|
| 429 |
+
<span class="stat-value">${Math.round((opp.novelty_score || 0) * 100)}%</span>
|
| 430 |
+
<span class="stat-label">Novelty</span>
|
| 431 |
+
</div>
|
| 432 |
+
<div class="stat-card">
|
| 433 |
+
<span class="stat-value">${Math.round((opp.credibility_score || 0) * 100)}%</span>
|
| 434 |
+
<span class="stat-label">Credibility</span>
|
| 435 |
+
</div>
|
| 436 |
+
<div class="stat-card highlight">
|
| 437 |
+
<span class="stat-value">${roiPercent}%</span>
|
| 438 |
+
<span class="stat-label">💎 ROI</span>
|
| 439 |
+
</div>
|
| 440 |
+
</div>
|
| 441 |
+
|
| 442 |
+
${opp.deadline ? `<p style="color: var(--warning); margin-bottom: 16px">⏰ Deadline: ${new Date(opp.deadline).toLocaleDateString()}</p>` : ''}
|
| 443 |
+
|
| 444 |
+
<p style="color: var(--text-secondary); line-height: 1.8; margin-bottom: 24px">
|
| 445 |
+
${this.escapeHtml(opp.raw_text || 'No description available.')}
|
| 446 |
+
</p>
|
| 447 |
+
|
| 448 |
+
<!-- Action Guidance Container -->
|
| 449 |
+
<div id="guidance-container" style="margin-bottom: 24px; padding: 16px; background: rgba(99, 102, 241, 0.1); border-radius: 12px; display: none;">
|
| 450 |
+
<h3 style="margin-bottom: 12px; color: var(--accent)">🎯 Action Guidance</h3>
|
| 451 |
+
<div id="guidance-content"></div>
|
| 452 |
+
</div>
|
| 453 |
+
|
| 454 |
+
<div style="display: flex; flex-wrap: wrap; gap: 12px">
|
| 455 |
+
<button class="action-btn primary" onclick="app.getGuidance('${opp.id}')" style="padding: 12px 24px; background: linear-gradient(135deg, #8b5cf6, #6366f1)">
|
| 456 |
+
🧠 Get Guidance
|
| 457 |
+
</button>
|
| 458 |
+
<a href="${opp.url}" target="_blank" class="action-btn primary" style="text-decoration: none; padding: 12px 24px">
|
| 459 |
+
🔗 View Original
|
| 460 |
+
</a>
|
| 461 |
+
<button class="action-btn secondary" onclick="app.updateStatus('${opp.id}', 'saved')" style="padding: 12px 24px">
|
| 462 |
+
💾 Save
|
| 463 |
+
</button>
|
| 464 |
+
<button class="action-btn secondary" onclick="app.updateStatus('${opp.id}', 'applied')" style="padding: 12px 24px">
|
| 465 |
+
✅ Mark Applied
|
| 466 |
+
</button>
|
| 467 |
+
</div>
|
| 468 |
+
`;
|
| 469 |
+
|
| 470 |
+
modal.classList.add('active');
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
+
async getGuidance(opportunityId) {
|
| 474 |
+
const container = document.getElementById('guidance-container');
|
| 475 |
+
const content = document.getElementById('guidance-content');
|
| 476 |
+
|
| 477 |
+
container.style.display = 'block';
|
| 478 |
+
content.innerHTML = '<p>🔄 Analyzing opportunity...</p>';
|
| 479 |
+
|
| 480 |
+
try {
|
| 481 |
+
const response = await fetch(`/api/opportunities/${opportunityId}/guidance`);
|
| 482 |
+
const data = await response.json();
|
| 483 |
+
const g = data.guidance;
|
| 484 |
+
|
| 485 |
+
content.innerHTML = `
|
| 486 |
+
<div style="display: grid; gap: 16px">
|
| 487 |
+
<div style="display: flex; gap: 16px; flex-wrap: wrap">
|
| 488 |
+
<div class="stat-card" style="flex: 1; min-width: 120px">
|
| 489 |
+
<span class="stat-value" style="font-size: 14px">${g.primary_action?.replace('_', ' ') || 'Review'}</span>
|
| 490 |
+
<span class="stat-label">Action</span>
|
| 491 |
+
</div>
|
| 492 |
+
<div class="stat-card" style="flex: 1; min-width: 120px">
|
| 493 |
+
<span class="stat-value" style="font-size: 14px">${g.urgency || 'whenever'}</span>
|
| 494 |
+
<span class="stat-label">Urgency</span>
|
| 495 |
+
</div>
|
| 496 |
+
<div class="stat-card" style="flex: 1; min-width: 120px">
|
| 497 |
+
<span class="stat-value" style="font-size: 14px">${Math.round((g.success_probability || 0.3) * 100)}%</span>
|
| 498 |
+
<span class="stat-label">Success Odds</span>
|
| 499 |
+
</div>
|
| 500 |
+
<div class="stat-card" style="flex: 1; min-width: 120px">
|
| 501 |
+
<span class="stat-value" style="font-size: 14px">${g.time_investment_hours || 10}h</span>
|
| 502 |
+
<span class="stat-label">Time Needed</span>
|
| 503 |
+
</div>
|
| 504 |
+
</div>
|
| 505 |
+
|
| 506 |
+
${g.skills_to_highlight?.length ? `
|
| 507 |
+
<div>
|
| 508 |
+
<strong>Skills to Highlight:</strong>
|
| 509 |
+
<div style="display: flex; gap: 8px; flex-wrap: wrap; margin-top: 8px">
|
| 510 |
+
${g.skills_to_highlight.map(s => `<span style="background: var(--accent); padding: 4px 12px; border-radius: 20px; font-size: 12px">${s}</span>`).join('')}
|
| 511 |
+
</div>
|
| 512 |
+
</div>
|
| 513 |
+
` : ''}
|
| 514 |
+
|
| 515 |
+
${g.portfolio_pieces?.length ? `
|
| 516 |
+
<div>
|
| 517 |
+
<strong>Portfolio to Show:</strong>
|
| 518 |
+
<div style="display: flex; gap: 8px; flex-wrap: wrap; margin-top: 8px">
|
| 519 |
+
${g.portfolio_pieces.map(p => `<span style="background: var(--success); padding: 4px 12px; border-radius: 20px; font-size: 12px">${p}</span>`).join('')}
|
| 520 |
+
</div>
|
| 521 |
+
</div>
|
| 522 |
+
` : ''}
|
| 523 |
+
|
| 524 |
+
${g.preparation_steps?.length ? `
|
| 525 |
+
<div>
|
| 526 |
+
<strong>Preparation Steps:</strong>
|
| 527 |
+
<ol style="margin-top: 8px; padding-left: 20px">
|
| 528 |
+
${g.preparation_steps.map(s => `<li style="margin-bottom: 4px">${s}</li>`).join('')}
|
| 529 |
+
</ol>
|
| 530 |
+
</div>
|
| 531 |
+
` : ''}
|
| 532 |
+
|
| 533 |
+
${g.networking_tips ? `
|
| 534 |
+
<div>
|
| 535 |
+
<strong>💡 Networking Tip:</strong>
|
| 536 |
+
<p style="margin-top: 4px; color: var(--text-secondary)">${g.networking_tips}</p>
|
| 537 |
+
</div>
|
| 538 |
+
` : ''}
|
| 539 |
+
|
| 540 |
+
${g.differentiation_angle ? `
|
| 541 |
+
<div>
|
| 542 |
+
<strong>🎯 Your Angle:</strong>
|
| 543 |
+
<p style="margin-top: 4px; color: var(--text-secondary)">${g.differentiation_angle}</p>
|
| 544 |
+
</div>
|
| 545 |
+
` : ''}
|
| 546 |
+
|
| 547 |
+
${g.red_flags?.length ? `
|
| 548 |
+
<div style="background: rgba(239, 68, 68, 0.1); padding: 12px; border-radius: 8px">
|
| 549 |
+
<strong style="color: #ef4444">⚠️ Red Flags:</strong>
|
| 550 |
+
<ul style="margin-top: 8px; padding-left: 20px">
|
| 551 |
+
${g.red_flags.map(f => `<li style="color: #ef4444">${f}</li>`).join('')}
|
| 552 |
+
</ul>
|
| 553 |
+
</div>
|
| 554 |
+
` : ''}
|
| 555 |
+
|
| 556 |
+
<p style="font-style: italic; color: var(--text-secondary); font-size: 12px">
|
| 557 |
+
${g.why || 'Personalized guidance based on your profile'}
|
| 558 |
+
</p>
|
| 559 |
+
</div>
|
| 560 |
+
`;
|
| 561 |
+
} catch (error) {
|
| 562 |
+
content.innerHTML = `<p style="color: var(--error)">Failed to get guidance: ${error.message}</p>`;
|
| 563 |
+
}
|
| 564 |
+
}
|
| 565 |
+
|
| 566 |
+
closeModal() {
|
| 567 |
+
document.getElementById('detail-modal').classList.remove('active');
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
async updateStatus(id, status) {
|
| 571 |
+
try {
|
| 572 |
+
await fetch(`/api/opportunities/${id}/status`, {
|
| 573 |
+
method: 'PATCH',
|
| 574 |
+
headers: { 'Content-Type': 'application/json' },
|
| 575 |
+
body: JSON.stringify({ status })
|
| 576 |
+
});
|
| 577 |
+
|
| 578 |
+
// Visual feedback
|
| 579 |
+
this.showNotification(`Status updated to ${status}`);
|
| 580 |
+
} catch (error) {
|
| 581 |
+
console.error('Failed to update status:', error);
|
| 582 |
+
}
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
async runIngestion() {
|
| 586 |
+
this.showNotification('Starting ingestion... This may take a few minutes.');
|
| 587 |
+
|
| 588 |
+
try {
|
| 589 |
+
await fetch('/api/ingest/run', { method: 'POST' });
|
| 590 |
+
this.showNotification('Ingestion started! Refresh in a few minutes to see new opportunities.');
|
| 591 |
+
} catch (error) {
|
| 592 |
+
this.showNotification('Failed to start ingestion: ' + error.message);
|
| 593 |
+
}
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
async showStatsModal() {
|
| 597 |
+
try {
|
| 598 |
+
const response = await fetch('/api/stats');
|
| 599 |
+
const stats = await response.json();
|
| 600 |
+
|
| 601 |
+
const body = document.getElementById('modal-body');
|
| 602 |
+
body.innerHTML = `
|
| 603 |
+
<h2 style="margin-bottom: 24px">📊 System Statistics</h2>
|
| 604 |
+
|
| 605 |
+
<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-bottom: 24px">
|
| 606 |
+
<div class="stat-card highlight">
|
| 607 |
+
<span class="stat-value">${stats.total_opportunities || 0}</span>
|
| 608 |
+
<span class="stat-label">Total Opportunities</span>
|
| 609 |
+
</div>
|
| 610 |
+
<div class="stat-card">
|
| 611 |
+
<span class="stat-value">${stats.new_opportunities || 0}</span>
|
| 612 |
+
<span class="stat-label">New (Unread)</span>
|
| 613 |
+
</div>
|
| 614 |
+
</div>
|
| 615 |
+
|
| 616 |
+
<h3 style="margin: 24px 0 16px">By Category</h3>
|
| 617 |
+
${Object.entries(stats.by_category || {}).map(([cat, count]) => `
|
| 618 |
+
<div style="display: flex; justify-content: space-between; padding: 8px 0; border-bottom: 1px solid var(--border-color)">
|
| 619 |
+
<span>${this.getCategoryEmoji(cat)} ${cat.replace('_', ' ')}</span>
|
| 620 |
+
<span style="font-weight: 600">${count}</span>
|
| 621 |
+
</div>
|
| 622 |
+
`).join('')}
|
| 623 |
+
|
| 624 |
+
<h3 style="margin: 24px 0 16px">By Domain</h3>
|
| 625 |
+
${Object.entries(stats.by_domain || {}).map(([dom, count]) => `
|
| 626 |
+
<div style="display: flex; justify-content: space-between; padding: 8px 0; border-bottom: 1px solid var(--border-color)">
|
| 627 |
+
<span>${dom.replace('_', ' ')}</span>
|
| 628 |
+
<span style="font-weight: 600">${count}</span>
|
| 629 |
+
</div>
|
| 630 |
+
`).join('')}
|
| 631 |
+
`;
|
| 632 |
+
|
| 633 |
+
document.getElementById('detail-modal').classList.add('active');
|
| 634 |
+
} catch (error) {
|
| 635 |
+
console.error('Failed to load stats:', error);
|
| 636 |
+
}
|
| 637 |
+
}
|
| 638 |
+
|
| 639 |
+
showNotification(message) {
|
| 640 |
+
// Simple notification - could be enhanced with toast UI
|
| 641 |
+
console.log('PIOE:', message);
|
| 642 |
+
alert(message);
|
| 643 |
+
}
|
| 644 |
+
|
| 645 |
+
formatDate(dateStr) {
|
| 646 |
+
if (!dateStr) return 'Unknown';
|
| 647 |
+
const date = new Date(dateStr);
|
| 648 |
+
return date.toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
|
| 649 |
+
}
|
| 650 |
+
|
| 651 |
+
escapeHtml(text) {
|
| 652 |
+
if (!text) return '';
|
| 653 |
+
const div = document.createElement('div');
|
| 654 |
+
div.textContent = text;
|
| 655 |
+
return div.innerHTML;
|
| 656 |
+
}
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
// Initialize app
|
| 660 |
+
const app = new PIOEApp();
|
frontend/index.html
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title>PIOE 2.0 - Personal Advantage Engine</title>
|
| 8 |
+
<link rel="stylesheet" href="/static/styles.css">
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
|
| 10 |
+
</head>
|
| 11 |
+
|
| 12 |
+
<body>
|
| 13 |
+
<div class="app">
|
| 14 |
+
<!-- Sidebar -->
|
| 15 |
+
<nav class="sidebar">
|
| 16 |
+
<div class="logo">
|
| 17 |
+
<span class="logo-icon">[P]</span>
|
| 18 |
+
<span class="logo-text">PIOE 2.0</span>
|
| 19 |
+
</div>
|
| 20 |
+
|
| 21 |
+
<div class="nav-section">
|
| 22 |
+
<span class="nav-label">Dashboard</span>
|
| 23 |
+
<a href="#" class="nav-item active" data-view="feed">
|
| 24 |
+
<span class="icon">[F]</span> Opportunity Feed
|
| 25 |
+
</a>
|
| 26 |
+
<a href="#" class="nav-item" data-view="digest">
|
| 27 |
+
<span class="icon">[D]</span> Daily Brief
|
| 28 |
+
</a>
|
| 29 |
+
<a href="#" class="nav-item" data-view="urgent">
|
| 30 |
+
<span class="icon">[!]</span> Urgent
|
| 31 |
+
</a>
|
| 32 |
+
<a href="#" class="nav-item" id="open-chat">
|
| 33 |
+
<span class="icon">[AI]</span> AI Search
|
| 34 |
+
</a>
|
| 35 |
+
</div>
|
| 36 |
+
|
| 37 |
+
<div class="nav-section">
|
| 38 |
+
<span class="nav-label">Categories</span>
|
| 39 |
+
<a href="#" class="nav-item" data-category="hackathon">[H] Hackathons</a>
|
| 40 |
+
<a href="#" class="nav-item" data-category="internship">[I] Internships</a>
|
| 41 |
+
<a href="#" class="nav-item" data-category="scholarship">[S] Scholarships</a>
|
| 42 |
+
<a href="#" class="nav-item" data-category="research">[R] Research</a>
|
| 43 |
+
<a href="#" class="nav-item" data-category="job">[J] Jobs</a>
|
| 44 |
+
<a href="#" class="nav-item" data-category="grant">[G] Grants</a>
|
| 45 |
+
<a href="#" class="nav-item" data-category="ecosystem_grant">[E] Ecosystem Grants</a>
|
| 46 |
+
<a href="#" class="nav-item" data-category="bounty">[B] Bounties</a>
|
| 47 |
+
<a href="#" class="nav-item" data-category="open_source">[O] Open Source</a>
|
| 48 |
+
</div>
|
| 49 |
+
|
| 50 |
+
<div class="nav-section">
|
| 51 |
+
<span class="nav-label">System</span>
|
| 52 |
+
<a href="#" class="nav-item" id="run-ingestion">
|
| 53 |
+
<span class="icon">[>]</span> Run Ingestion
|
| 54 |
+
</a>
|
| 55 |
+
<a href="#" class="nav-item" id="view-stats">
|
| 56 |
+
<span class="icon">[#]</span> Statistics
|
| 57 |
+
</a>
|
| 58 |
+
</div>
|
| 59 |
+
</nav>
|
| 60 |
+
|
| 61 |
+
<!-- Main Content -->
|
| 62 |
+
<main class="main-content">
|
| 63 |
+
<header class="header">
|
| 64 |
+
<div class="header-title">
|
| 65 |
+
<h1 id="page-title">Opportunity Feed</h1>
|
| 66 |
+
<p class="subtitle" id="page-subtitle">High-signal opportunities detected by PIOE</p>
|
| 67 |
+
</div>
|
| 68 |
+
<div class="header-actions">
|
| 69 |
+
<select id="domain-filter" class="filter-select">
|
| 70 |
+
<option value="">All Domains</option>
|
| 71 |
+
<option value="ai">AI</option>
|
| 72 |
+
<option value="computer_vision">Computer Vision</option>
|
| 73 |
+
<option value="robotics">Robotics</option>
|
| 74 |
+
<option value="finance">Finance</option>
|
| 75 |
+
<option value="crypto">Crypto</option>
|
| 76 |
+
<option value="academia">Academia</option>
|
| 77 |
+
</select>
|
| 78 |
+
<select id="score-filter" class="filter-select">
|
| 79 |
+
<option value="0">All Scores</option>
|
| 80 |
+
<option value="0.5">Score > 0.5</option>
|
| 81 |
+
<option value="0.7">Score > 0.7</option>
|
| 82 |
+
<option value="0.8">Score > 0.8</option>
|
| 83 |
+
</select>
|
| 84 |
+
</div>
|
| 85 |
+
</header>
|
| 86 |
+
|
| 87 |
+
<div class="content-area">
|
| 88 |
+
<!-- Stats Banner -->
|
| 89 |
+
<div class="stats-banner" id="stats-banner">
|
| 90 |
+
<div class="stat-card">
|
| 91 |
+
<span class="stat-value" id="total-count">-</span>
|
| 92 |
+
<span class="stat-label">Total</span>
|
| 93 |
+
</div>
|
| 94 |
+
<div class="stat-card">
|
| 95 |
+
<span class="stat-value" id="new-count">-</span>
|
| 96 |
+
<span class="stat-label">New</span>
|
| 97 |
+
</div>
|
| 98 |
+
<div class="stat-card highlight">
|
| 99 |
+
<span class="stat-value" id="hackathon-count">-</span>
|
| 100 |
+
<span class="stat-label">Hackathons</span>
|
| 101 |
+
</div>
|
| 102 |
+
<div class="stat-card">
|
| 103 |
+
<span class="stat-value" id="internship-count">-</span>
|
| 104 |
+
<span class="stat-label">Internships</span>
|
| 105 |
+
</div>
|
| 106 |
+
</div>
|
| 107 |
+
|
| 108 |
+
<!-- Opportunity Feed -->
|
| 109 |
+
<div class="feed" id="opportunity-feed">
|
| 110 |
+
<div class="loading">Loading opportunities...</div>
|
| 111 |
+
</div>
|
| 112 |
+
|
| 113 |
+
<!-- Digest View (Hidden by default) -->
|
| 114 |
+
<div class="digest-view" id="digest-view" style="display: none;">
|
| 115 |
+
<div class="digest-content" id="digest-content"></div>
|
| 116 |
+
</div>
|
| 117 |
+
</div>
|
| 118 |
+
</main>
|
| 119 |
+
</div>
|
| 120 |
+
|
| 121 |
+
<!-- Opportunity Detail Modal -->
|
| 122 |
+
<div class="modal" id="detail-modal">
|
| 123 |
+
<div class="modal-backdrop"></div>
|
| 124 |
+
<div class="modal-content">
|
| 125 |
+
<button class="modal-close">×</button>
|
| 126 |
+
<div id="modal-body"></div>
|
| 127 |
+
</div>
|
| 128 |
+
</div>
|
| 129 |
+
|
| 130 |
+
<!-- AI Chat Panel -->
|
| 131 |
+
<div class="chat-panel" id="chat-panel">
|
| 132 |
+
<div class="chat-header">
|
| 133 |
+
<span>PIOE AI Search</span>
|
| 134 |
+
<button class="chat-close" onclick="app.toggleChat()">×</button>
|
| 135 |
+
</div>
|
| 136 |
+
<div class="chat-messages" id="chat-messages">
|
| 137 |
+
<div class="chat-message bot">
|
| 138 |
+
<p>Hi! I'm PIOE AI. Ask me to find opportunities:</p>
|
| 139 |
+
<ul style="margin: 8px 0; padding-left: 20px; font-size: 12px; opacity: 0.8">
|
| 140 |
+
<li>"Find hackathons in Nigeria"</li>
|
| 141 |
+
<li>"What grants are available for AI?"</li>
|
| 142 |
+
<li>"Show high ROI opportunities"</li>
|
| 143 |
+
<li>"Internships in robotics"</li>
|
| 144 |
+
</ul>
|
| 145 |
+
</div>
|
| 146 |
+
</div>
|
| 147 |
+
<div class="chat-input-area">
|
| 148 |
+
<input type="text" id="chat-input" placeholder="Ask about opportunities..."
|
| 149 |
+
onkeypress="if(event.key==='Enter') app.sendChatMessage()">
|
| 150 |
+
<button onclick="app.sendChatMessage()">Send</button>
|
| 151 |
+
</div>
|
| 152 |
+
</div>
|
| 153 |
+
|
| 154 |
+
<!-- Floating Chat Button -->
|
| 155 |
+
<button class="chat-fab" id="chat-fab" onclick="app.toggleChat()">
|
| 156 |
+
AI
|
| 157 |
+
</button>
|
| 158 |
+
|
| 159 |
+
<script src="/static/app.js"></script>
|
| 160 |
+
</body>
|
| 161 |
+
|
| 162 |
+
</html>
|
frontend/styles.css
ADDED
|
@@ -0,0 +1,905 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* PIOE - Personal Intelligence & Opportunity Engine
|
| 2 |
+
Modern Dark Theme with Glassmorphism */
|
| 3 |
+
|
| 4 |
+
:root {
|
| 5 |
+
/* Color Palette */
|
| 6 |
+
--bg-primary: #0a0a0f;
|
| 7 |
+
--bg-secondary: #12121a;
|
| 8 |
+
--bg-tertiary: #1a1a24;
|
| 9 |
+
--bg-card: rgba(26, 26, 36, 0.8);
|
| 10 |
+
--bg-glass: rgba(255, 255, 255, 0.03);
|
| 11 |
+
|
| 12 |
+
--accent-primary: #6366f1;
|
| 13 |
+
--accent-secondary: #8b5cf6;
|
| 14 |
+
--accent-gradient: linear-gradient(135deg, #6366f1, #8b5cf6);
|
| 15 |
+
|
| 16 |
+
--text-primary: #ffffff;
|
| 17 |
+
--text-secondary: #a1a1aa;
|
| 18 |
+
--text-muted: #71717a;
|
| 19 |
+
|
| 20 |
+
--border-color: rgba(255, 255, 255, 0.08);
|
| 21 |
+
--border-hover: rgba(255, 255, 255, 0.15);
|
| 22 |
+
|
| 23 |
+
/* Status Colors */
|
| 24 |
+
--success: #22c55e;
|
| 25 |
+
--warning: #f59e0b;
|
| 26 |
+
--danger: #ef4444;
|
| 27 |
+
--info: #3b82f6;
|
| 28 |
+
|
| 29 |
+
/* Category Colors */
|
| 30 |
+
--cat-hackathon: #f43f5e;
|
| 31 |
+
--cat-internship: #3b82f6;
|
| 32 |
+
--cat-scholarship: #22c55e;
|
| 33 |
+
--cat-research: #8b5cf6;
|
| 34 |
+
--cat-job: #f59e0b;
|
| 35 |
+
--cat-grant: #14b8a6;
|
| 36 |
+
--cat-opensource: #ec4899;
|
| 37 |
+
|
| 38 |
+
/* Spacing */
|
| 39 |
+
--sidebar-width: 260px;
|
| 40 |
+
--header-height: 70px;
|
| 41 |
+
--radius-sm: 8px;
|
| 42 |
+
--radius-md: 12px;
|
| 43 |
+
--radius-lg: 16px;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
* {
|
| 47 |
+
margin: 0;
|
| 48 |
+
padding: 0;
|
| 49 |
+
box-sizing: border-box;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
body {
|
| 53 |
+
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 54 |
+
background: var(--bg-primary);
|
| 55 |
+
color: var(--text-primary);
|
| 56 |
+
line-height: 1.6;
|
| 57 |
+
min-height: 100vh;
|
| 58 |
+
overflow-x: hidden;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
/* App Layout */
|
| 62 |
+
.app {
|
| 63 |
+
display: flex;
|
| 64 |
+
min-height: 100vh;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
/* Sidebar */
|
| 68 |
+
.sidebar {
|
| 69 |
+
width: var(--sidebar-width);
|
| 70 |
+
background: var(--bg-secondary);
|
| 71 |
+
border-right: 1px solid var(--border-color);
|
| 72 |
+
padding: 24px 16px;
|
| 73 |
+
position: fixed;
|
| 74 |
+
height: 100vh;
|
| 75 |
+
overflow-y: auto;
|
| 76 |
+
z-index: 100;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
.logo {
|
| 80 |
+
display: flex;
|
| 81 |
+
align-items: center;
|
| 82 |
+
gap: 12px;
|
| 83 |
+
padding: 8px 12px;
|
| 84 |
+
margin-bottom: 32px;
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
.logo-icon {
|
| 88 |
+
font-size: 28px;
|
| 89 |
+
filter: drop-shadow(0 0 8px rgba(99, 102, 241, 0.5));
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
.logo-text {
|
| 93 |
+
font-size: 24px;
|
| 94 |
+
font-weight: 700;
|
| 95 |
+
background: var(--accent-gradient);
|
| 96 |
+
-webkit-background-clip: text;
|
| 97 |
+
-webkit-text-fill-color: transparent;
|
| 98 |
+
background-clip: text;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
.nav-section {
|
| 102 |
+
margin-bottom: 24px;
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
.nav-label {
|
| 106 |
+
display: block;
|
| 107 |
+
font-size: 11px;
|
| 108 |
+
font-weight: 600;
|
| 109 |
+
text-transform: uppercase;
|
| 110 |
+
letter-spacing: 0.05em;
|
| 111 |
+
color: var(--text-muted);
|
| 112 |
+
padding: 0 12px;
|
| 113 |
+
margin-bottom: 8px;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
.nav-item {
|
| 117 |
+
display: flex;
|
| 118 |
+
align-items: center;
|
| 119 |
+
gap: 10px;
|
| 120 |
+
padding: 10px 12px;
|
| 121 |
+
border-radius: var(--radius-sm);
|
| 122 |
+
color: var(--text-secondary);
|
| 123 |
+
text-decoration: none;
|
| 124 |
+
font-size: 14px;
|
| 125 |
+
font-weight: 500;
|
| 126 |
+
transition: all 0.2s ease;
|
| 127 |
+
cursor: pointer;
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
.nav-item:hover {
|
| 131 |
+
background: var(--bg-glass);
|
| 132 |
+
color: var(--text-primary);
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
.nav-item.active {
|
| 136 |
+
background: var(--accent-gradient);
|
| 137 |
+
color: white;
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
.nav-item .icon {
|
| 141 |
+
font-size: 16px;
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
/* Main Content */
|
| 145 |
+
.main-content {
|
| 146 |
+
flex: 1;
|
| 147 |
+
margin-left: var(--sidebar-width);
|
| 148 |
+
min-height: 100vh;
|
| 149 |
+
display: flex;
|
| 150 |
+
flex-direction: column;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
/* Header */
|
| 154 |
+
.header {
|
| 155 |
+
height: var(--header-height);
|
| 156 |
+
background: var(--bg-secondary);
|
| 157 |
+
border-bottom: 1px solid var(--border-color);
|
| 158 |
+
display: flex;
|
| 159 |
+
align-items: center;
|
| 160 |
+
justify-content: space-between;
|
| 161 |
+
padding: 0 32px;
|
| 162 |
+
position: sticky;
|
| 163 |
+
top: 0;
|
| 164 |
+
z-index: 50;
|
| 165 |
+
backdrop-filter: blur(12px);
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
.header-title h1 {
|
| 169 |
+
font-size: 20px;
|
| 170 |
+
font-weight: 600;
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
.subtitle {
|
| 174 |
+
font-size: 13px;
|
| 175 |
+
color: var(--text-muted);
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
.header-actions {
|
| 179 |
+
display: flex;
|
| 180 |
+
gap: 12px;
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
.filter-select {
|
| 184 |
+
background: var(--bg-tertiary);
|
| 185 |
+
border: 1px solid var(--border-color);
|
| 186 |
+
color: var(--text-primary);
|
| 187 |
+
padding: 8px 16px;
|
| 188 |
+
border-radius: var(--radius-sm);
|
| 189 |
+
font-size: 13px;
|
| 190 |
+
cursor: pointer;
|
| 191 |
+
transition: border-color 0.2s;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
.filter-select:hover {
|
| 195 |
+
border-color: var(--border-hover);
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
.filter-select:focus {
|
| 199 |
+
outline: none;
|
| 200 |
+
border-color: var(--accent-primary);
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
/* Content Area */
|
| 204 |
+
.content-area {
|
| 205 |
+
flex: 1;
|
| 206 |
+
padding: 24px 32px;
|
| 207 |
+
overflow-y: auto;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
/* Stats Banner */
|
| 211 |
+
.stats-banner {
|
| 212 |
+
display: grid;
|
| 213 |
+
grid-template-columns: repeat(4, 1fr);
|
| 214 |
+
gap: 16px;
|
| 215 |
+
margin-bottom: 24px;
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
.stat-card {
|
| 219 |
+
background: var(--bg-card);
|
| 220 |
+
border: 1px solid var(--border-color);
|
| 221 |
+
border-radius: var(--radius-md);
|
| 222 |
+
padding: 20px;
|
| 223 |
+
display: flex;
|
| 224 |
+
flex-direction: column;
|
| 225 |
+
gap: 4px;
|
| 226 |
+
backdrop-filter: blur(8px);
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
.stat-card.highlight {
|
| 230 |
+
background: var(--accent-gradient);
|
| 231 |
+
border: none;
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
.stat-value {
|
| 235 |
+
font-size: 28px;
|
| 236 |
+
font-weight: 700;
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
.stat-label {
|
| 240 |
+
font-size: 12px;
|
| 241 |
+
color: var(--text-secondary);
|
| 242 |
+
text-transform: uppercase;
|
| 243 |
+
letter-spacing: 0.05em;
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
.stat-card.highlight .stat-label {
|
| 247 |
+
color: rgba(255, 255, 255, 0.8);
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
/* Opportunity Feed */
|
| 251 |
+
.feed {
|
| 252 |
+
display: flex;
|
| 253 |
+
flex-direction: column;
|
| 254 |
+
gap: 16px;
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
.loading {
|
| 258 |
+
text-align: center;
|
| 259 |
+
padding: 60px;
|
| 260 |
+
color: var(--text-muted);
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
/* Opportunity Card */
|
| 264 |
+
.opportunity-card {
|
| 265 |
+
background: var(--bg-card);
|
| 266 |
+
border: 1px solid var(--border-color);
|
| 267 |
+
border-radius: var(--radius-md);
|
| 268 |
+
padding: 20px;
|
| 269 |
+
transition: all 0.2s ease;
|
| 270 |
+
cursor: pointer;
|
| 271 |
+
backdrop-filter: blur(8px);
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
.opportunity-card:hover {
|
| 275 |
+
border-color: var(--border-hover);
|
| 276 |
+
transform: translateY(-2px);
|
| 277 |
+
box-shadow: 0 8px 24px rgba(0, 0, 0, 0.3);
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
.card-header {
|
| 281 |
+
display: flex;
|
| 282 |
+
align-items: flex-start;
|
| 283 |
+
justify-content: space-between;
|
| 284 |
+
margin-bottom: 12px;
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
.card-category {
|
| 288 |
+
display: inline-flex;
|
| 289 |
+
align-items: center;
|
| 290 |
+
gap: 6px;
|
| 291 |
+
padding: 4px 10px;
|
| 292 |
+
border-radius: 20px;
|
| 293 |
+
font-size: 11px;
|
| 294 |
+
font-weight: 600;
|
| 295 |
+
text-transform: uppercase;
|
| 296 |
+
letter-spacing: 0.03em;
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
.card-category.hackathon {
|
| 300 |
+
background: rgba(244, 63, 94, 0.2);
|
| 301 |
+
color: var(--cat-hackathon);
|
| 302 |
+
}
|
| 303 |
+
|
| 304 |
+
.card-category.internship {
|
| 305 |
+
background: rgba(59, 130, 246, 0.2);
|
| 306 |
+
color: var(--cat-internship);
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
.card-category.scholarship {
|
| 310 |
+
background: rgba(34, 197, 94, 0.2);
|
| 311 |
+
color: var(--cat-scholarship);
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
.card-category.research {
|
| 315 |
+
background: rgba(139, 92, 246, 0.2);
|
| 316 |
+
color: var(--cat-research);
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
.card-category.job {
|
| 320 |
+
background: rgba(245, 158, 11, 0.2);
|
| 321 |
+
color: var(--cat-job);
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
.card-category.grant {
|
| 325 |
+
background: rgba(20, 184, 166, 0.2);
|
| 326 |
+
color: var(--cat-grant);
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
.card-category.open_source {
|
| 330 |
+
background: rgba(236, 72, 153, 0.2);
|
| 331 |
+
color: var(--cat-opensource);
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
.card-category.other {
|
| 335 |
+
background: rgba(161, 161, 170, 0.2);
|
| 336 |
+
color: var(--text-secondary);
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
.card-score {
|
| 340 |
+
display: flex;
|
| 341 |
+
align-items: center;
|
| 342 |
+
gap: 4px;
|
| 343 |
+
font-size: 13px;
|
| 344 |
+
color: var(--text-secondary);
|
| 345 |
+
}
|
| 346 |
+
|
| 347 |
+
.score-bar {
|
| 348 |
+
width: 60px;
|
| 349 |
+
height: 6px;
|
| 350 |
+
background: var(--bg-tertiary);
|
| 351 |
+
border-radius: 3px;
|
| 352 |
+
overflow: hidden;
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
.score-fill {
|
| 356 |
+
height: 100%;
|
| 357 |
+
background: var(--accent-gradient);
|
| 358 |
+
border-radius: 3px;
|
| 359 |
+
transition: width 0.3s ease;
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
.card-title {
|
| 363 |
+
font-size: 16px;
|
| 364 |
+
font-weight: 600;
|
| 365 |
+
margin-bottom: 8px;
|
| 366 |
+
line-height: 1.4;
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
.card-meta {
|
| 370 |
+
display: flex;
|
| 371 |
+
gap: 16px;
|
| 372 |
+
font-size: 12px;
|
| 373 |
+
color: var(--text-muted);
|
| 374 |
+
margin-bottom: 12px;
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
.card-meta span {
|
| 378 |
+
display: flex;
|
| 379 |
+
align-items: center;
|
| 380 |
+
gap: 4px;
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
.card-summary {
|
| 384 |
+
font-size: 14px;
|
| 385 |
+
color: var(--text-secondary);
|
| 386 |
+
line-height: 1.6;
|
| 387 |
+
display: -webkit-box;
|
| 388 |
+
-webkit-line-clamp: 2;
|
| 389 |
+
-webkit-box-orient: vertical;
|
| 390 |
+
overflow: hidden;
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
.card-footer {
|
| 394 |
+
display: flex;
|
| 395 |
+
align-items: center;
|
| 396 |
+
justify-content: space-between;
|
| 397 |
+
margin-top: 16px;
|
| 398 |
+
padding-top: 16px;
|
| 399 |
+
border-top: 1px solid var(--border-color);
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
.deadline-badge {
|
| 403 |
+
display: inline-flex;
|
| 404 |
+
align-items: center;
|
| 405 |
+
gap: 6px;
|
| 406 |
+
padding: 4px 10px;
|
| 407 |
+
border-radius: var(--radius-sm);
|
| 408 |
+
font-size: 12px;
|
| 409 |
+
font-weight: 500;
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
.deadline-badge.urgent {
|
| 413 |
+
background: rgba(239, 68, 68, 0.2);
|
| 414 |
+
color: var(--danger);
|
| 415 |
+
}
|
| 416 |
+
|
| 417 |
+
.deadline-badge.soon {
|
| 418 |
+
background: rgba(245, 158, 11, 0.2);
|
| 419 |
+
color: var(--warning);
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
.deadline-badge.ok {
|
| 423 |
+
background: rgba(34, 197, 94, 0.2);
|
| 424 |
+
color: var(--success);
|
| 425 |
+
}
|
| 426 |
+
|
| 427 |
+
.card-actions {
|
| 428 |
+
display: flex;
|
| 429 |
+
gap: 8px;
|
| 430 |
+
}
|
| 431 |
+
|
| 432 |
+
.action-btn {
|
| 433 |
+
padding: 6px 12px;
|
| 434 |
+
border-radius: var(--radius-sm);
|
| 435 |
+
font-size: 12px;
|
| 436 |
+
font-weight: 500;
|
| 437 |
+
border: none;
|
| 438 |
+
cursor: pointer;
|
| 439 |
+
transition: all 0.2s;
|
| 440 |
+
}
|
| 441 |
+
|
| 442 |
+
.action-btn.primary {
|
| 443 |
+
background: var(--accent-gradient);
|
| 444 |
+
color: white;
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
.action-btn.primary:hover {
|
| 448 |
+
transform: scale(1.05);
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
.action-btn.secondary {
|
| 452 |
+
background: var(--bg-tertiary);
|
| 453 |
+
color: var(--text-secondary);
|
| 454 |
+
border: 1px solid var(--border-color);
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
.action-btn.secondary:hover {
|
| 458 |
+
border-color: var(--border-hover);
|
| 459 |
+
color: var(--text-primary);
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
+
/* Digest View */
|
| 463 |
+
.digest-view {
|
| 464 |
+
background: var(--bg-card);
|
| 465 |
+
border: 1px solid var(--border-color);
|
| 466 |
+
border-radius: var(--radius-md);
|
| 467 |
+
padding: 32px;
|
| 468 |
+
backdrop-filter: blur(8px);
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
.digest-content {
|
| 472 |
+
font-size: 14px;
|
| 473 |
+
line-height: 1.8;
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
.digest-content h1 {
|
| 477 |
+
font-size: 24px;
|
| 478 |
+
margin-bottom: 16px;
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
.digest-content h2 {
|
| 482 |
+
font-size: 18px;
|
| 483 |
+
margin: 24px 0 12px;
|
| 484 |
+
}
|
| 485 |
+
|
| 486 |
+
.digest-content h3 {
|
| 487 |
+
font-size: 16px;
|
| 488 |
+
margin: 20px 0 8px;
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
.digest-content p {
|
| 492 |
+
margin-bottom: 12px;
|
| 493 |
+
color: var(--text-secondary);
|
| 494 |
+
}
|
| 495 |
+
|
| 496 |
+
.digest-content blockquote {
|
| 497 |
+
border-left: 3px solid var(--accent-primary);
|
| 498 |
+
padding-left: 16px;
|
| 499 |
+
color: var(--text-secondary);
|
| 500 |
+
margin: 12px 0;
|
| 501 |
+
}
|
| 502 |
+
|
| 503 |
+
.digest-content a {
|
| 504 |
+
color: var(--accent-primary);
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
.digest-content hr {
|
| 508 |
+
border: none;
|
| 509 |
+
border-top: 1px solid var(--border-color);
|
| 510 |
+
margin: 24px 0;
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
.digest-content table {
|
| 514 |
+
width: 100%;
|
| 515 |
+
border-collapse: collapse;
|
| 516 |
+
margin: 16px 0;
|
| 517 |
+
}
|
| 518 |
+
|
| 519 |
+
.digest-content th,
|
| 520 |
+
.digest-content td {
|
| 521 |
+
padding: 8px 12px;
|
| 522 |
+
border: 1px solid var(--border-color);
|
| 523 |
+
text-align: left;
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
.digest-content th {
|
| 527 |
+
background: var(--bg-tertiary);
|
| 528 |
+
}
|
| 529 |
+
|
| 530 |
+
/* Modal */
|
| 531 |
+
.modal {
|
| 532 |
+
display: none;
|
| 533 |
+
position: fixed;
|
| 534 |
+
top: 0;
|
| 535 |
+
left: 0;
|
| 536 |
+
width: 100%;
|
| 537 |
+
height: 100%;
|
| 538 |
+
z-index: 1000;
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
.modal.active {
|
| 542 |
+
display: flex;
|
| 543 |
+
align-items: center;
|
| 544 |
+
justify-content: center;
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
.modal-backdrop {
|
| 548 |
+
position: absolute;
|
| 549 |
+
top: 0;
|
| 550 |
+
left: 0;
|
| 551 |
+
width: 100%;
|
| 552 |
+
height: 100%;
|
| 553 |
+
background: rgba(0, 0, 0, 0.7);
|
| 554 |
+
backdrop-filter: blur(4px);
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
.modal-content {
|
| 558 |
+
position: relative;
|
| 559 |
+
background: var(--bg-secondary);
|
| 560 |
+
border: 1px solid var(--border-color);
|
| 561 |
+
border-radius: var(--radius-lg);
|
| 562 |
+
width: 90%;
|
| 563 |
+
max-width: 700px;
|
| 564 |
+
max-height: 80vh;
|
| 565 |
+
overflow-y: auto;
|
| 566 |
+
padding: 32px;
|
| 567 |
+
z-index: 1001;
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
.modal-close {
|
| 571 |
+
position: absolute;
|
| 572 |
+
top: 16px;
|
| 573 |
+
right: 16px;
|
| 574 |
+
background: var(--bg-tertiary);
|
| 575 |
+
border: none;
|
| 576 |
+
color: var(--text-secondary);
|
| 577 |
+
width: 32px;
|
| 578 |
+
height: 32px;
|
| 579 |
+
border-radius: 50%;
|
| 580 |
+
font-size: 20px;
|
| 581 |
+
cursor: pointer;
|
| 582 |
+
display: flex;
|
| 583 |
+
align-items: center;
|
| 584 |
+
justify-content: center;
|
| 585 |
+
transition: all 0.2s;
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
.modal-close:hover {
|
| 589 |
+
background: var(--danger);
|
| 590 |
+
color: white;
|
| 591 |
+
}
|
| 592 |
+
|
| 593 |
+
/* Scrollbar */
|
| 594 |
+
::-webkit-scrollbar {
|
| 595 |
+
width: 8px;
|
| 596 |
+
height: 8px;
|
| 597 |
+
}
|
| 598 |
+
|
| 599 |
+
::-webkit-scrollbar-track {
|
| 600 |
+
background: var(--bg-primary);
|
| 601 |
+
}
|
| 602 |
+
|
| 603 |
+
::-webkit-scrollbar-thumb {
|
| 604 |
+
background: var(--bg-tertiary);
|
| 605 |
+
border-radius: 4px;
|
| 606 |
+
}
|
| 607 |
+
|
| 608 |
+
::-webkit-scrollbar-thumb:hover {
|
| 609 |
+
background: var(--accent-primary);
|
| 610 |
+
}
|
| 611 |
+
|
| 612 |
+
/* Animations */
|
| 613 |
+
@keyframes fadeIn {
|
| 614 |
+
from {
|
| 615 |
+
opacity: 0;
|
| 616 |
+
transform: translateY(10px);
|
| 617 |
+
}
|
| 618 |
+
|
| 619 |
+
to {
|
| 620 |
+
opacity: 1;
|
| 621 |
+
transform: translateY(0);
|
| 622 |
+
}
|
| 623 |
+
}
|
| 624 |
+
|
| 625 |
+
.opportunity-card {
|
| 626 |
+
animation: fadeIn 0.3s ease forwards;
|
| 627 |
+
}
|
| 628 |
+
|
| 629 |
+
.opportunity-card:nth-child(1) {
|
| 630 |
+
animation-delay: 0.05s;
|
| 631 |
+
}
|
| 632 |
+
|
| 633 |
+
.opportunity-card:nth-child(2) {
|
| 634 |
+
animation-delay: 0.1s;
|
| 635 |
+
}
|
| 636 |
+
|
| 637 |
+
.opportunity-card:nth-child(3) {
|
| 638 |
+
animation-delay: 0.15s;
|
| 639 |
+
}
|
| 640 |
+
|
| 641 |
+
.opportunity-card:nth-child(4) {
|
| 642 |
+
animation-delay: 0.2s;
|
| 643 |
+
}
|
| 644 |
+
|
| 645 |
+
.opportunity-card:nth-child(5) {
|
| 646 |
+
animation-delay: 0.25s;
|
| 647 |
+
}
|
| 648 |
+
|
| 649 |
+
/* Responsive */
|
| 650 |
+
@media (max-width: 1024px) {
|
| 651 |
+
.sidebar {
|
| 652 |
+
width: 200px;
|
| 653 |
+
}
|
| 654 |
+
|
| 655 |
+
.main-content {
|
| 656 |
+
margin-left: 200px;
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
.stats-banner {
|
| 660 |
+
grid-template-columns: repeat(2, 1fr);
|
| 661 |
+
}
|
| 662 |
+
}
|
| 663 |
+
|
| 664 |
+
@media (max-width: 768px) {
|
| 665 |
+
.sidebar {
|
| 666 |
+
display: none;
|
| 667 |
+
}
|
| 668 |
+
|
| 669 |
+
.main-content {
|
| 670 |
+
margin-left: 0;
|
| 671 |
+
}
|
| 672 |
+
|
| 673 |
+
.header {
|
| 674 |
+
flex-direction: column;
|
| 675 |
+
height: auto;
|
| 676 |
+
padding: 16px;
|
| 677 |
+
gap: 12px;
|
| 678 |
+
}
|
| 679 |
+
|
| 680 |
+
.content-area {
|
| 681 |
+
padding: 16px;
|
| 682 |
+
}
|
| 683 |
+
|
| 684 |
+
.stats-banner {
|
| 685 |
+
grid-template-columns: 1fr 1fr;
|
| 686 |
+
}
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
/* PIOE 2.0: New Category Colors */
|
| 690 |
+
.card-category.micro_grant {
|
| 691 |
+
background: rgba(16, 185, 129, 0.2);
|
| 692 |
+
color: #10b981;
|
| 693 |
+
}
|
| 694 |
+
|
| 695 |
+
.card-category.ecosystem_grant {
|
| 696 |
+
background: rgba(245, 158, 11, 0.2);
|
| 697 |
+
color: #f59e0b;
|
| 698 |
+
}
|
| 699 |
+
|
| 700 |
+
.card-category.innovation_fund {
|
| 701 |
+
background: rgba(59, 130, 246, 0.2);
|
| 702 |
+
color: #3b82f6;
|
| 703 |
+
}
|
| 704 |
+
|
| 705 |
+
.card-category.partnership {
|
| 706 |
+
background: rgba(139, 92, 246, 0.2);
|
| 707 |
+
color: #8b5cf6;
|
| 708 |
+
}
|
| 709 |
+
|
| 710 |
+
.card-category.collaboration {
|
| 711 |
+
background: rgba(236, 72, 153, 0.2);
|
| 712 |
+
color: #ec4899;
|
| 713 |
+
}
|
| 714 |
+
|
| 715 |
+
.card-category.pitch_event {
|
| 716 |
+
background: rgba(244, 63, 94, 0.2);
|
| 717 |
+
color: #f43f5e;
|
| 718 |
+
}
|
| 719 |
+
|
| 720 |
+
.card-category.demo_day {
|
| 721 |
+
background: rgba(99, 102, 241, 0.2);
|
| 722 |
+
color: #6366f1;
|
| 723 |
+
}
|
| 724 |
+
|
| 725 |
+
.card-category.bounty {
|
| 726 |
+
background: rgba(34, 197, 94, 0.2);
|
| 727 |
+
color: #22c55e;
|
| 728 |
+
}
|
| 729 |
+
|
| 730 |
+
.card-category.ambassador {
|
| 731 |
+
background: rgba(234, 179, 8, 0.2);
|
| 732 |
+
color: #eab308;
|
| 733 |
+
}
|
| 734 |
+
|
| 735 |
+
.card-category.pre_grant_signal {
|
| 736 |
+
background: rgba(168, 85, 247, 0.2);
|
| 737 |
+
color: #a855f7;
|
| 738 |
+
}
|
| 739 |
+
|
| 740 |
+
.card-category.pre_hiring_signal {
|
| 741 |
+
background: rgba(6, 182, 212, 0.2);
|
| 742 |
+
color: #06b6d4;
|
| 743 |
+
}
|
| 744 |
+
|
| 745 |
+
/* PIOE 2.0: Chat Panel */
|
| 746 |
+
.chat-fab {
|
| 747 |
+
position: fixed;
|
| 748 |
+
bottom: 24px;
|
| 749 |
+
right: 24px;
|
| 750 |
+
width: 60px;
|
| 751 |
+
height: 60px;
|
| 752 |
+
border-radius: 50%;
|
| 753 |
+
background: var(--accent-gradient);
|
| 754 |
+
border: none;
|
| 755 |
+
box-shadow: 0 4px 20px rgba(99, 102, 241, 0.4);
|
| 756 |
+
font-size: 28px;
|
| 757 |
+
cursor: pointer;
|
| 758 |
+
z-index: 999;
|
| 759 |
+
transition: all 0.3s ease;
|
| 760 |
+
}
|
| 761 |
+
|
| 762 |
+
.chat-fab:hover {
|
| 763 |
+
transform: scale(1.1);
|
| 764 |
+
box-shadow: 0 6px 30px rgba(99, 102, 241, 0.6);
|
| 765 |
+
}
|
| 766 |
+
|
| 767 |
+
.chat-panel {
|
| 768 |
+
position: fixed;
|
| 769 |
+
bottom: 100px;
|
| 770 |
+
right: 24px;
|
| 771 |
+
width: 380px;
|
| 772 |
+
height: 500px;
|
| 773 |
+
background: var(--bg-secondary);
|
| 774 |
+
border: 1px solid var(--border-color);
|
| 775 |
+
border-radius: var(--radius-lg);
|
| 776 |
+
display: none;
|
| 777 |
+
flex-direction: column;
|
| 778 |
+
z-index: 1000;
|
| 779 |
+
box-shadow: 0 8px 40px rgba(0, 0, 0, 0.4);
|
| 780 |
+
}
|
| 781 |
+
|
| 782 |
+
.chat-panel.active {
|
| 783 |
+
display: flex;
|
| 784 |
+
}
|
| 785 |
+
|
| 786 |
+
.chat-header {
|
| 787 |
+
display: flex;
|
| 788 |
+
align-items: center;
|
| 789 |
+
justify-content: space-between;
|
| 790 |
+
padding: 16px 20px;
|
| 791 |
+
background: var(--accent-gradient);
|
| 792 |
+
border-radius: var(--radius-lg) var(--radius-lg) 0 0;
|
| 793 |
+
font-weight: 600;
|
| 794 |
+
}
|
| 795 |
+
|
| 796 |
+
.chat-close {
|
| 797 |
+
background: none;
|
| 798 |
+
border: none;
|
| 799 |
+
color: white;
|
| 800 |
+
font-size: 24px;
|
| 801 |
+
cursor: pointer;
|
| 802 |
+
opacity: 0.8;
|
| 803 |
+
transition: opacity 0.2s;
|
| 804 |
+
}
|
| 805 |
+
|
| 806 |
+
.chat-close:hover {
|
| 807 |
+
opacity: 1;
|
| 808 |
+
}
|
| 809 |
+
|
| 810 |
+
.chat-messages {
|
| 811 |
+
flex: 1;
|
| 812 |
+
overflow-y: auto;
|
| 813 |
+
padding: 16px;
|
| 814 |
+
display: flex;
|
| 815 |
+
flex-direction: column;
|
| 816 |
+
gap: 12px;
|
| 817 |
+
}
|
| 818 |
+
|
| 819 |
+
.chat-message {
|
| 820 |
+
padding: 12px 16px;
|
| 821 |
+
border-radius: var(--radius-md);
|
| 822 |
+
max-width: 90%;
|
| 823 |
+
animation: fadeIn 0.3s ease;
|
| 824 |
+
}
|
| 825 |
+
|
| 826 |
+
.chat-message.user {
|
| 827 |
+
background: var(--accent-gradient);
|
| 828 |
+
color: white;
|
| 829 |
+
align-self: flex-end;
|
| 830 |
+
}
|
| 831 |
+
|
| 832 |
+
.chat-message.bot {
|
| 833 |
+
background: var(--bg-tertiary);
|
| 834 |
+
color: var(--text-secondary);
|
| 835 |
+
align-self: flex-start;
|
| 836 |
+
}
|
| 837 |
+
|
| 838 |
+
.chat-message p {
|
| 839 |
+
margin: 0;
|
| 840 |
+
font-size: 14px;
|
| 841 |
+
line-height: 1.5;
|
| 842 |
+
}
|
| 843 |
+
|
| 844 |
+
.chat-message .opp-link {
|
| 845 |
+
display: block;
|
| 846 |
+
background: var(--bg-card);
|
| 847 |
+
padding: 8px 12px;
|
| 848 |
+
border-radius: var(--radius-sm);
|
| 849 |
+
margin-top: 8px;
|
| 850 |
+
font-size: 12px;
|
| 851 |
+
color: var(--accent-primary);
|
| 852 |
+
text-decoration: none;
|
| 853 |
+
border: 1px solid var(--border-color);
|
| 854 |
+
transition: border-color 0.2s;
|
| 855 |
+
}
|
| 856 |
+
|
| 857 |
+
.chat-message .opp-link:hover {
|
| 858 |
+
border-color: var(--accent-primary);
|
| 859 |
+
}
|
| 860 |
+
|
| 861 |
+
.chat-input-area {
|
| 862 |
+
display: flex;
|
| 863 |
+
gap: 8px;
|
| 864 |
+
padding: 16px;
|
| 865 |
+
border-top: 1px solid var(--border-color);
|
| 866 |
+
}
|
| 867 |
+
|
| 868 |
+
.chat-input-area input {
|
| 869 |
+
flex: 1;
|
| 870 |
+
background: var(--bg-tertiary);
|
| 871 |
+
border: 1px solid var(--border-color);
|
| 872 |
+
color: var(--text-primary);
|
| 873 |
+
padding: 12px 16px;
|
| 874 |
+
border-radius: var(--radius-sm);
|
| 875 |
+
font-size: 14px;
|
| 876 |
+
}
|
| 877 |
+
|
| 878 |
+
.chat-input-area input:focus {
|
| 879 |
+
outline: none;
|
| 880 |
+
border-color: var(--accent-primary);
|
| 881 |
+
}
|
| 882 |
+
|
| 883 |
+
.chat-input-area button {
|
| 884 |
+
background: var(--accent-gradient);
|
| 885 |
+
border: none;
|
| 886 |
+
color: white;
|
| 887 |
+
padding: 12px 20px;
|
| 888 |
+
border-radius: var(--radius-sm);
|
| 889 |
+
font-weight: 500;
|
| 890 |
+
cursor: pointer;
|
| 891 |
+
transition: transform 0.2s;
|
| 892 |
+
}
|
| 893 |
+
|
| 894 |
+
.chat-input-area button:hover {
|
| 895 |
+
transform: scale(1.05);
|
| 896 |
+
}
|
| 897 |
+
|
| 898 |
+
@media (max-width: 480px) {
|
| 899 |
+
.chat-panel {
|
| 900 |
+
width: calc(100% - 32px);
|
| 901 |
+
right: 16px;
|
| 902 |
+
bottom: 90px;
|
| 903 |
+
height: 60vh;
|
| 904 |
+
}
|
| 905 |
+
}
|
render.yaml
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# render.yaml - Render Blueprint for one-click deploy
|
| 2 |
+
services:
|
| 3 |
+
- type: web
|
| 4 |
+
name: pioe
|
| 5 |
+
runtime: python
|
| 6 |
+
buildCommand: pip install -r requirements.txt
|
| 7 |
+
startCommand: uvicorn backend.main:app --host 0.0.0.0 --port $PORT
|
| 8 |
+
envVars:
|
| 9 |
+
- key: GEMINI_API_KEY
|
| 10 |
+
sync: false
|
| 11 |
+
- key: ADZUNA_APP_ID
|
| 12 |
+
sync: false
|
| 13 |
+
- key: ADZUNA_API_KEY
|
| 14 |
+
sync: false
|
| 15 |
+
- key: JOOBLE_API_KEY
|
| 16 |
+
sync: false
|
| 17 |
+
- key: RAPIDAPI_KEY
|
| 18 |
+
sync: false
|
| 19 |
+
- key: GITHUB_TOKEN
|
| 20 |
+
sync: false
|
| 21 |
+
- key: DATABASE_URL
|
| 22 |
+
value: sqlite:///./pioe.db
|
| 23 |
+
- key: MIN_RELEVANCE_SCORE
|
| 24 |
+
value: "0.3"
|
| 25 |
+
healthCheckPath: /api/stats
|
requirements.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PIOE - Personal Intelligence & Opportunity Engine
|
| 2 |
+
fastapi
|
| 3 |
+
uvicorn[standard]
|
| 4 |
+
sqlalchemy
|
| 5 |
+
httpx
|
| 6 |
+
feedparser
|
| 7 |
+
beautifulsoup4
|
| 8 |
+
lxml
|
| 9 |
+
apscheduler
|
| 10 |
+
sentence-transformers
|
| 11 |
+
python-dotenv
|
| 12 |
+
pydantic
|
| 13 |
+
pydantic-settings
|
| 14 |
+
google-generativeai
|
| 15 |
+
praw
|
| 16 |
+
aiofiles
|
| 17 |
+
PyYAML
|
| 18 |
+
numpy
|