B1acB1rd commited on
Commit
4d92cd5
·
0 Parent(s):

PIOE 2.0 ready for deploymnet

Browse files
.env.example ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ===========================================
2
+ # PIOE 2.0 Environment Configuration
3
+ # ===========================================
4
+ # Copy this file to .env and fill in your values
5
+
6
+ # ===========================================
7
+ # AI Provider (Required - pick one)
8
+ # ===========================================
9
+ AI_PROVIDER=gemini
10
+
11
+ # Gemini API (Free: https://makersuite.google.com/app/apikey)
12
+ GEMINI_API_KEY=your_gemini_api_key_here
13
+
14
+ # OpenAI API (Alternative to Gemini)
15
+ OPENAI_API_KEY=
16
+
17
+ # ===========================================
18
+ # Job Board APIs (Optional - get for more jobs)
19
+ # ===========================================
20
+
21
+ # Adzuna API (Free: 250 requests/day)
22
+ # Sign up at: https://developer.adzuna.com/
23
+ ADZUNA_APP_ID=
24
+ ADZUNA_API_KEY=
25
+
26
+ # Jooble API (Free tier, aggregates LinkedIn/Indeed/Glassdoor)
27
+ # Sign up at: https://jooble.org/api/about
28
+ JOOBLE_API_KEY=
29
+
30
+ # RapidAPI for LinkedIn Jobs (Free: 100 requests/month)
31
+ # Sign up at: https://rapidapi.com/jaypat87/api/linkedin-jobs-search
32
+ RAPIDAPI_KEY=
33
+
34
+ # ===========================================
35
+ # Social APIs (Optional - for more sources)
36
+ # ===========================================
37
+
38
+ # Reddit API (get from reddit.com/prefs/apps)
39
+ REDDIT_CLIENT_ID=
40
+ REDDIT_CLIENT_SECRET=
41
+ REDDIT_USER_AGENT=PIOE/2.0
42
+
43
+ # GitHub API (for higher rate limits)
44
+ # Get at: https://github.com/settings/tokens
45
+ GITHUB_TOKEN=
46
+
47
+ # ===========================================
48
+ # Database
49
+ # ===========================================
50
+ DATABASE_URL=sqlite:///./pioe.db
51
+
52
+ # ===========================================
53
+ # Ingestion Schedule
54
+ # ===========================================
55
+ INGESTION_INTERVAL_HOURS=6
56
+
57
+ # ===========================================
58
+ # Scoring Thresholds (Lower = More Results)
59
+ # ===========================================
60
+ MIN_RELEVANCE_SCORE=0.3
61
+ MIN_NOVELTY_SCORE=0.3
62
+ MIN_CREDIBILITY_SCORE=0.5
.gitignore ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PIOE .gitignore
2
+
3
+ # Environment files (contains secrets!)
4
+ .env
5
+ .env.local
6
+
7
+ # Database
8
+ *.db
9
+ *.sqlite
10
+ *.sqlite3
11
+
12
+ # Python
13
+ __pycache__/
14
+ *.py[cod]
15
+ *$py.class
16
+ *.so
17
+ .Python
18
+ build/
19
+ develop-eggs/
20
+ dist/
21
+ downloads/
22
+ eggs/
23
+ .eggs/
24
+ lib/
25
+ lib64/
26
+ parts/
27
+ sdist/
28
+ var/
29
+ wheels/
30
+ *.egg-info/
31
+ .installed.cfg
32
+ *.egg
33
+
34
+ # Virtual environments
35
+ venv/
36
+ ENV/
37
+ env/
38
+ .venv/
39
+
40
+ # IDE
41
+ .idea/
42
+ .vscode/
43
+ *.swp
44
+ *.swo
45
+ *~
46
+
47
+ # OS
48
+ .DS_Store
49
+ Thumbs.db
50
+
51
+ # Logs
52
+ *.log
53
+ logs/
54
+
55
+ # Testing
56
+ .pytest_cache/
57
+ .coverage
58
+ htmlcov/
59
+
60
+ # Misc
61
+ *.bak
62
+ tmp/
63
+ temp/
Dockerfile ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PIOE Docker Image
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ gcc \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy requirements first for caching
13
+ COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy application code
17
+ COPY . .
18
+
19
+ # Create non-root user for security
20
+ RUN useradd -m appuser && chown -R appuser:appuser /app
21
+ USER appuser
22
+
23
+ # Expose port
24
+ EXPOSE 8000
25
+
26
+ # Health check
27
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
28
+ CMD curl -f http://localhost:8000/api/stats || exit 1
29
+
30
+ # Run the application
31
+ CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "8000"]
Procfile ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Procfile for Render/Heroku
2
+ web: uvicorn backend.main:app --host 0.0.0.0 --port ${PORT:-8000}
README.md ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PIOE 2.0 - Personal Intelligence & Opportunity Engine
2
+
3
+ Signal intelligence system for detecting early opportunities in AI, Robotics, Computer Vision, Finance, Scholarships, and Hackathons.
4
+
5
+ ## Features
6
+
7
+ - **Multi-Source Ingestion**: arXiv, GitHub, RSS, Superteam, Web scraping
8
+ - **Job Board Aggregators**: Arbeitnow, TheMuse, Remotive, Adzuna, Jooble, LinkedIn
9
+ - **AI Classification**: Gemini-powered categorization and summarization
10
+ - **Smart Scoring**: Relevance, novelty, and credibility scoring with ROI analysis
11
+ - **Anti-Noise Filters**: Rejects recycled content and discussion posts
12
+ - **Modern Dashboard**: Real-time opportunity feed with filters
13
+
14
+ ## Quick Start
15
+
16
+ ### 1. Install Dependencies
17
+
18
+ ```bash
19
+ cd PIOE
20
+ pip install -r requirements.txt
21
+ ```
22
+
23
+ ### 2. Configure Environment
24
+
25
+ ```bash
26
+ cp .env.example .env
27
+ # Edit .env with your API keys
28
+ ```
29
+
30
+ **Required:**
31
+ - `GEMINI_API_KEY` - Get from [Google AI Studio](https://makersuite.google.com/app/apikey)
32
+
33
+ **Optional (More Jobs):**
34
+ - `ADZUNA_APP_ID` / `ADZUNA_API_KEY` - [Adzuna Developer](https://developer.adzuna.com/) (Free: 250 req/day)
35
+ - `JOOBLE_API_KEY` - [Jooble API](https://jooble.org/api/about) (Free, aggregates LinkedIn/Indeed/Glassdoor)
36
+ - `RAPIDAPI_KEY` - [RapidAPI LinkedIn](https://rapidapi.com/jaypat87/api/linkedin-jobs-search) (Free: 100 req/month)
37
+ - `GITHUB_TOKEN` - For higher rate limits
38
+
39
+ ### 3. Run the Server
40
+
41
+ ```bash
42
+ uvicorn backend.main:app --reload
43
+ ```
44
+
45
+ Open http://localhost:8000 in your browser.
46
+
47
+ ### 4. Trigger First Ingestion
48
+
49
+ Click "Run Ingestion" in the dashboard or:
50
+
51
+ ```bash
52
+ curl -X POST http://localhost:8000/api/ingest/run
53
+ ```
54
+
55
+ ## Data Sources
56
+
57
+ ### Free (No API Key)
58
+ | Source | Type | Coverage |
59
+ |--------|------|----------|
60
+ | Arbeitnow | Jobs | Tech jobs worldwide |
61
+ | TheMuse | Jobs | Data Science, Engineering |
62
+ | Remotive | Remote Jobs | Software, DevOps, Data |
63
+ | ProFellow | Fellowships | Scholarships & Fellowships |
64
+ | RemoteOK | Remote Jobs | AI, ML, Internships |
65
+ | arXiv | Research | CS.CV, CS.RO, CS.AI papers |
66
+ | HN Jobs | Jobs | Startup jobs |
67
+
68
+ ### With Free API Keys
69
+ | Source | Type | Coverage |
70
+ |--------|------|----------|
71
+ | Adzuna | Jobs | Indeed, Monster, CareerBuilder |
72
+ | Jooble | Jobs | LinkedIn, Indeed, Glassdoor (70+ sources) |
73
+ | RapidAPI LinkedIn | Jobs | Direct LinkedIn job listings |
74
+ | Superteam | Web3 | Bounties, grants |
75
+
76
+ ## API Endpoints
77
+
78
+ | Endpoint | Method | Description |
79
+ |----------|--------|-------------|
80
+ | `/api/opportunities` | GET | List opportunities with filters |
81
+ | `/api/opportunities/{id}` | GET | Get single opportunity |
82
+ | `/api/opportunities/{id}/status` | PATCH | Update status (save, apply, dismiss) |
83
+ | `/api/digest/daily` | GET | Get daily intelligence brief |
84
+ | `/api/digest/weekly` | GET | Get weekly report |
85
+ | `/api/digest/urgent` | GET | Get opportunities with deadlines |
86
+ | `/api/ingest/run` | POST | Trigger full ingestion |
87
+ | `/api/stats` | GET | Get system statistics |
88
+
89
+ ## Deployment
90
+
91
+ ### Local Development
92
+ ```bash
93
+ uvicorn backend.main:app --reload
94
+ ```
95
+
96
+ ### Production (with Gunicorn)
97
+ ```bash
98
+ gunicorn backend.main:app -w 4 -k uvicorn.workers.UvicornWorker -b 0.0.0.0:8000
99
+ ```
100
+
101
+ ### Docker (Optional)
102
+ ```dockerfile
103
+ FROM python:3.11-slim
104
+ WORKDIR /app
105
+ COPY requirements.txt .
106
+ RUN pip install -r requirements.txt
107
+ COPY . .
108
+ CMD ["uvicorn", "backend.main:app", "--host", "0.0.0.0", "--port", "8000"]
109
+ ```
110
+
111
+ ## Opportunity Categories
112
+
113
+ - Scholarships & Fellowships
114
+ - Internships & Jobs
115
+ - Hackathons & Competitions
116
+ - Research Opportunities
117
+ - Grants & Funding
118
+ - Open Source Programs
119
+ - Web3 Bounties
120
+
121
+ ## Anti-Noise Rules
122
+
123
+ PIOE automatically filters out:
124
+ - Discussion posts ("How do I get an internship?")
125
+ - Opinion-only content
126
+ - Reposted/recycled news
127
+ - "Top 10 tools" listicles
128
+ - Low engagement social posts
129
+
130
+ ## License
131
+
132
+ MIT
133
+
134
+ ---
135
+
136
+ **Most people search. You detect.**
backend/__init__.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Backend - Init
3
+ """
4
+ from .config import get_settings
5
+ from .database import SessionLocal, init_db, get_db
6
+ from .models import Opportunity, Source, OpportunityCategory, OpportunityStatus
7
+
8
+ __all__ = [
9
+ "get_settings",
10
+ "SessionLocal",
11
+ "init_db",
12
+ "get_db",
13
+ "Opportunity",
14
+ "Source",
15
+ "OpportunityCategory",
16
+ "OpportunityStatus"
17
+ ]
backend/config.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Configuration Management
3
+ """
4
+ from pydantic_settings import BaseSettings
5
+ from functools import lru_cache
6
+ from typing import Literal
7
+
8
+
9
+ class Settings(BaseSettings):
10
+ """Application settings loaded from environment variables."""
11
+
12
+ # AI Configuration
13
+ ai_provider: Literal["gemini", "openai"] = "gemini"
14
+ gemini_api_key: str = ""
15
+ openai_api_key: str = ""
16
+
17
+ # Reddit API
18
+ reddit_client_id: str = ""
19
+ reddit_client_secret: str = ""
20
+ reddit_user_agent: str = "PIOE/1.0"
21
+
22
+ # GitHub API
23
+ github_token: str = ""
24
+
25
+ # ===========================================
26
+ # JOB BOARD APIs (Optional - get free keys)
27
+ # ===========================================
28
+
29
+ # Adzuna API (Free: 250 req/day)
30
+ # Get at: https://developer.adzuna.com/
31
+ adzuna_app_id: str = ""
32
+ adzuna_api_key: str = ""
33
+
34
+ # Jooble API (Free tier available)
35
+ # Get at: https://jooble.org/api/about
36
+ jooble_api_key: str = ""
37
+
38
+ # RapidAPI LinkedIn Jobs (Free: 100 req/month)
39
+ # Get at: https://rapidapi.com/jaypat87/api/linkedin-jobs-search
40
+ rapidapi_key: str = ""
41
+
42
+ # ===========================================
43
+ # Database
44
+ # ===========================================
45
+ database_url: str = "sqlite:///./pioe.db"
46
+
47
+ # Ingestion
48
+ ingestion_interval_hours: int = 6
49
+
50
+ # Scoring Thresholds (lower = more results saved)
51
+ min_relevance_score: float = 0.3 # Lowered from 0.4 for more results
52
+ min_novelty_score: float = 0.3
53
+ min_credibility_score: float = 0.5
54
+
55
+ # Keywords for relevance scoring
56
+ high_priority_keywords: list[str] = [
57
+ "computer vision", "robotics", "ROS", "PyTorch", "TensorFlow",
58
+ "machine learning", "deep learning", "neural network",
59
+ "internship", "fellowship", "scholarship", "grant", "funding",
60
+ "hackathon", "competition", "challenge", "bounty",
61
+ "research assistant", "PhD", "postdoc", "hiring",
62
+ "early-stage", "seed", "Series A", "startup",
63
+ "AI", "artificial intelligence", "data science", "NLP"
64
+ ]
65
+
66
+ class Config:
67
+ env_file = ".env"
68
+ env_file_encoding = "utf-8"
69
+ extra = "ignore"
70
+
71
+
72
+ @lru_cache
73
+ def get_settings() -> Settings:
74
+ """Get cached settings instance."""
75
+ return Settings()
76
+
backend/database.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Database Configuration
3
+ """
4
+ from sqlalchemy import create_engine
5
+ from sqlalchemy.orm import sessionmaker, declarative_base
6
+ from .config import get_settings
7
+
8
+ settings = get_settings()
9
+
10
+ engine = create_engine(
11
+ settings.database_url,
12
+ connect_args={"check_same_thread": False} if "sqlite" in settings.database_url else {}
13
+ )
14
+
15
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
16
+
17
+ Base = declarative_base()
18
+
19
+
20
+ def get_db():
21
+ """Dependency for FastAPI to get database session."""
22
+ db = SessionLocal()
23
+ try:
24
+ yield db
25
+ finally:
26
+ db.close()
27
+
28
+
29
+ def init_db():
30
+ """Initialize database tables."""
31
+ from . import models # noqa: F401
32
+ Base.metadata.create_all(bind=engine)
backend/delivery/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Delivery Layer - Init
3
+ """
4
+ from .digest import DigestGenerator
5
+
6
+ __all__ = ["DigestGenerator"]
backend/delivery/digest.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Delivery Layer - Daily Digest Generator
3
+ """
4
+ from datetime import datetime, timedelta
5
+ from typing import Optional
6
+ from sqlalchemy.orm import Session
7
+
8
+ from ..models import Opportunity, OpportunityCategory, OpportunityStatus
9
+
10
+
11
+ class DigestGenerator:
12
+ """
13
+ Generates daily/weekly opportunity digests.
14
+ Outputs in markdown format for easy reading.
15
+ """
16
+
17
+ def __init__(self, db: Session):
18
+ self.db = db
19
+
20
+ def generate_daily(self, limit: int = 10) -> str:
21
+ """Generate today's top opportunities digest."""
22
+ since = datetime.utcnow() - timedelta(days=1)
23
+
24
+ opportunities = self.db.query(Opportunity).filter(
25
+ Opportunity.discovered_at >= since,
26
+ Opportunity.status == OpportunityStatus.NEW
27
+ ).order_by(
28
+ Opportunity.combined_score.desc()
29
+ ).limit(limit).all()
30
+
31
+ return self._format_digest(opportunities, "Daily Intelligence Brief")
32
+
33
+ def generate_weekly(self, limit: int = 25) -> str:
34
+ """Generate weekly opportunities digest."""
35
+ since = datetime.utcnow() - timedelta(days=7)
36
+
37
+ opportunities = self.db.query(Opportunity).filter(
38
+ Opportunity.discovered_at >= since,
39
+ Opportunity.status == OpportunityStatus.NEW
40
+ ).order_by(
41
+ Opportunity.combined_score.desc()
42
+ ).limit(limit).all()
43
+
44
+ return self._format_digest(opportunities, "Weekly Intelligence Report")
45
+
46
+ def generate_by_category(
47
+ self,
48
+ category: OpportunityCategory,
49
+ limit: int = 10
50
+ ) -> str:
51
+ """Generate digest for a specific category."""
52
+ since = datetime.utcnow() - timedelta(days=7)
53
+
54
+ opportunities = self.db.query(Opportunity).filter(
55
+ Opportunity.discovered_at >= since,
56
+ Opportunity.category == category,
57
+ Opportunity.status == OpportunityStatus.NEW
58
+ ).order_by(
59
+ Opportunity.combined_score.desc()
60
+ ).limit(limit).all()
61
+
62
+ return self._format_digest(
63
+ opportunities,
64
+ f"{category.value.title()} Opportunities"
65
+ )
66
+
67
+ def generate_urgent(self, limit: int = 10) -> str:
68
+ """Generate digest for time-sensitive opportunities."""
69
+ now = datetime.utcnow()
70
+ soon = now + timedelta(days=14)
71
+
72
+ opportunities = self.db.query(Opportunity).filter(
73
+ Opportunity.deadline.isnot(None),
74
+ Opportunity.deadline > now,
75
+ Opportunity.deadline <= soon,
76
+ Opportunity.status == OpportunityStatus.NEW
77
+ ).order_by(
78
+ Opportunity.deadline.asc()
79
+ ).limit(limit).all()
80
+
81
+ return self._format_digest(opportunities, "⚡ Urgent - Deadlines Approaching")
82
+
83
+ def _format_digest(self, opportunities: list[Opportunity], title: str) -> str:
84
+ """Format opportunities into markdown digest."""
85
+ lines = [
86
+ f"# {title}",
87
+ f"*Generated: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}*",
88
+ "",
89
+ f"**{len(opportunities)} opportunities detected**",
90
+ "",
91
+ "---",
92
+ ""
93
+ ]
94
+
95
+ if not opportunities:
96
+ lines.append("*No new opportunities matching your criteria.*")
97
+ return "\n".join(lines)
98
+
99
+ for i, opp in enumerate(opportunities, 1):
100
+ lines.extend(self._format_opportunity(opp, i))
101
+
102
+ # Summary stats
103
+ lines.extend([
104
+ "",
105
+ "---",
106
+ "",
107
+ "## Quick Stats",
108
+ "",
109
+ self._generate_stats(opportunities)
110
+ ])
111
+
112
+ return "\n".join(lines)
113
+
114
+ def _format_opportunity(self, opp: Opportunity, index: int) -> list[str]:
115
+ """Format single opportunity."""
116
+ # Category emoji
117
+ cat_emoji = {
118
+ OpportunityCategory.SCHOLARSHIP: "🎓",
119
+ OpportunityCategory.FELLOWSHIP: "🏆",
120
+ OpportunityCategory.INTERNSHIP: "💼",
121
+ OpportunityCategory.JOB: "👔",
122
+ OpportunityCategory.HACKATHON: "🚀",
123
+ OpportunityCategory.COMPETITION: "🏅",
124
+ OpportunityCategory.GRANT: "💰",
125
+ OpportunityCategory.RESEARCH: "🔬",
126
+ OpportunityCategory.OPEN_SOURCE: "💻",
127
+ OpportunityCategory.CONFERENCE: "📅",
128
+ }.get(opp.category, "📌")
129
+
130
+ # Score indicator
131
+ score_stars = "⭐" * min(int(opp.combined_score * 5), 5)
132
+
133
+ lines = [
134
+ f"### {index}. {cat_emoji} {opp.title}",
135
+ "",
136
+ f"**Category:** {opp.category.value.replace('_', ' ').title()}",
137
+ f"**Domain:** {opp.domain.value.replace('_', ' ').title()}",
138
+ f"**Source:** {opp.source_name}",
139
+ f"**Score:** {score_stars} ({opp.combined_score:.2f})",
140
+ ]
141
+
142
+ if opp.deadline:
143
+ days_left = (opp.deadline - datetime.utcnow()).days
144
+ urgency = "🔴" if days_left < 7 else "🟡" if days_left < 14 else "🟢"
145
+ lines.append(f"**Deadline:** {urgency} {opp.deadline.strftime('%Y-%m-%d')} ({days_left} days)")
146
+
147
+ lines.extend([
148
+ "",
149
+ f"> {opp.raw_text[:300]}..." if len(opp.raw_text or '') > 300 else f"> {opp.raw_text}",
150
+ "",
151
+ f"🔗 [View Opportunity]({opp.url})",
152
+ "",
153
+ "---",
154
+ ""
155
+ ])
156
+
157
+ return lines
158
+
159
+ def _generate_stats(self, opportunities: list[Opportunity]) -> str:
160
+ """Generate summary statistics."""
161
+ from collections import Counter
162
+
163
+ categories = Counter(o.category.value for o in opportunities)
164
+ domains = Counter(o.domain.value for o in opportunities)
165
+
166
+ stats = ["| Metric | Value |", "|--------|-------|"]
167
+
168
+ for cat, count in categories.most_common(5):
169
+ stats.append(f"| {cat.replace('_', ' ').title()} | {count} |")
170
+
171
+ return "\n".join(stats)
backend/ingestion/__init__.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Ingestion Layer - Version 2.0
3
+ """
4
+ from .arxiv_client import ArxivClient
5
+ from .github_client import GitHubClient
6
+ from .rss_client import RSSClient
7
+ from .reddit_client import RedditClient
8
+ from .superteam_client import SuperteamClient
9
+ from .web_scraper import WebScraper
10
+ from .careers_client import CareersClient, InternshipClient
11
+ from .grants_client import GrantsClient, NigeriaGrantsClient
12
+ from .scheduler import IngestionScheduler
13
+
14
+ __all__ = [
15
+ "ArxivClient",
16
+ "GitHubClient",
17
+ "RSSClient",
18
+ "RedditClient",
19
+ "SuperteamClient",
20
+ "WebScraper",
21
+ "CareersClient",
22
+ "InternshipClient",
23
+ "GrantsClient",
24
+ "NigeriaGrantsClient",
25
+ "IngestionScheduler"
26
+ ]
27
+
backend/ingestion/arxiv_client.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE arXiv Client
3
+
4
+ Fetches papers from arXiv API for CS.CV, CS.RO, CS.AI, CS.LG categories.
5
+ """
6
+ import httpx
7
+ from datetime import datetime
8
+ from typing import Optional
9
+ import xml.etree.ElementTree as ET
10
+
11
+
12
+ class ArxivClient:
13
+ """
14
+ Client for arXiv API to fetch recent papers.
15
+ High credibility source for academic research.
16
+ """
17
+
18
+ BASE_URL = "https://export.arxiv.org/api/query"
19
+
20
+ # Target categories for PIOE
21
+ CATEGORIES = [
22
+ "cs.CV", # Computer Vision
23
+ "cs.RO", # Robotics
24
+ "cs.AI", # Artificial Intelligence
25
+ "cs.LG", # Machine Learning
26
+ "cs.CL", # Computation and Language (NLP)
27
+ ]
28
+
29
+ def __init__(self, max_results: int = 50):
30
+ self.max_results = max_results
31
+
32
+ async def fetch(self, categories: Optional[list[str]] = None) -> list[dict]:
33
+ """
34
+ Fetch recent papers from specified categories.
35
+
36
+ Returns list of normalized opportunity dicts.
37
+ """
38
+ categories = categories or self.CATEGORIES
39
+
40
+ # Build query for multiple categories
41
+ cat_query = " OR ".join(f"cat:{cat}" for cat in categories)
42
+
43
+ params = {
44
+ "search_query": cat_query,
45
+ "start": 0,
46
+ "max_results": self.max_results,
47
+ "sortBy": "submittedDate",
48
+ "sortOrder": "descending"
49
+ }
50
+
51
+ async with httpx.AsyncClient() as client:
52
+ response = await client.get(
53
+ self.BASE_URL,
54
+ params=params,
55
+ timeout=30,
56
+ follow_redirects=True
57
+ )
58
+ response.raise_for_status()
59
+
60
+ return self._parse_response(response.text)
61
+
62
+ def _parse_response(self, xml_content: str) -> list[dict]:
63
+ """Parse arXiv Atom feed into normalized opportunities."""
64
+ opportunities = []
65
+
66
+ # Parse XML
67
+ root = ET.fromstring(xml_content)
68
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
69
+
70
+ for entry in root.findall("atom:entry", ns):
71
+ try:
72
+ # Extract fields
73
+ title = entry.find("atom:title", ns)
74
+ summary = entry.find("atom:summary", ns)
75
+ published = entry.find("atom:published", ns)
76
+ link = entry.find("atom:id", ns)
77
+
78
+ # Get authors
79
+ authors = [
80
+ author.find("atom:name", ns).text
81
+ for author in entry.findall("atom:author", ns)
82
+ if author.find("atom:name", ns) is not None
83
+ ]
84
+
85
+ # Get categories
86
+ categories = [
87
+ cat.get("term") for cat in entry.findall("atom:category", ns)
88
+ ]
89
+
90
+ opportunity = {
91
+ "title": title.text.strip().replace("\n", " ") if title is not None else "",
92
+ "raw_text": summary.text.strip().replace("\n", " ") if summary is not None else "",
93
+ "url": link.text if link is not None else "",
94
+ "source_type": "arxiv",
95
+ "source_name": "arXiv",
96
+ "published_at": self._parse_date(published.text) if published is not None else None,
97
+ "metadata": {
98
+ "authors": authors,
99
+ "categories": categories
100
+ }
101
+ }
102
+
103
+ opportunities.append(opportunity)
104
+
105
+ except Exception as e:
106
+ print(f"Error parsing arXiv entry: {e}")
107
+ continue
108
+
109
+ return opportunities
110
+
111
+ def _parse_date(self, date_str: str) -> Optional[datetime]:
112
+ """Parse arXiv date format."""
113
+ try:
114
+ return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
115
+ except Exception:
116
+ return None
117
+
118
+
119
+ # Sync wrapper for non-async usage
120
+ def fetch_arxiv_sync(max_results: int = 50) -> list[dict]:
121
+ """Synchronous wrapper for arXiv fetch."""
122
+ import asyncio
123
+ client = ArxivClient(max_results)
124
+ return asyncio.run(client.fetch())
backend/ingestion/careers_client.py ADDED
@@ -0,0 +1,403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Careers Client
3
+
4
+ Tracks job/internship opportunities from major tech companies.
5
+ Microsoft, NVIDIA, Google, Meta, OpenAI, DeepMind, etc.
6
+ """
7
+ import httpx
8
+ from datetime import datetime
9
+ from typing import Optional
10
+ from bs4 import BeautifulSoup
11
+ import re
12
+
13
+
14
+ class CareersClient:
15
+ """
16
+ Scrapes career pages from major tech companies.
17
+ Focuses on AI, robotics, and computer vision roles.
18
+ """
19
+
20
+ # Target companies with their career page configurations
21
+ COMPANIES = [
22
+ # Microsoft
23
+ {
24
+ "name": "Microsoft",
25
+ "search_url": "https://careers.microsoft.com/v2/global/en/search.json",
26
+ "type": "api",
27
+ "keywords": ["computer vision", "robotics", "machine learning", "AI", "research"],
28
+ "filters": {"lc": "United States", "exp": "Internship"}
29
+ },
30
+ # NVIDIA
31
+ {
32
+ "name": "NVIDIA",
33
+ "search_url": "https://nvidia.wd5.myworkdayjobs.com/wday/cxs/nvidia/NVIDIAExternalCareerSite/jobs",
34
+ "type": "workday",
35
+ "keywords": ["computer vision", "robotics", "deep learning", "AI research", "intern"]
36
+ },
37
+ # Google
38
+ {
39
+ "name": "Google",
40
+ "rss_url": "https://careers.google.com/jobs/rss",
41
+ "type": "rss",
42
+ "keywords": ["machine learning", "research", "robotics", "computer vision", "intern"]
43
+ },
44
+ # Meta
45
+ {
46
+ "name": "Meta",
47
+ "search_url": "https://www.metacareers.com/jobs",
48
+ "type": "scrape",
49
+ "keywords": ["AI", "research", "robotics", "computer vision", "intern"]
50
+ },
51
+ # OpenAI
52
+ {
53
+ "name": "OpenAI",
54
+ "careers_url": "https://openai.com/careers",
55
+ "type": "scrape",
56
+ "keywords": ["research", "engineering", "intern"]
57
+ },
58
+ # DeepMind
59
+ {
60
+ "name": "DeepMind",
61
+ "careers_url": "https://deepmind.google/about/careers/",
62
+ "type": "scrape",
63
+ "keywords": ["research", "intern", "robotics"]
64
+ },
65
+ # Boston Dynamics
66
+ {
67
+ "name": "Boston Dynamics",
68
+ "careers_url": "https://bostondynamics.wd1.myworkdayjobs.com/Boston_Dynamics",
69
+ "type": "workday",
70
+ "keywords": ["robotics", "perception", "control", "intern"]
71
+ },
72
+ # Tesla (Optimus/AI)
73
+ {
74
+ "name": "Tesla AI",
75
+ "careers_url": "https://www.tesla.com/careers/search/?query=AI%20robotics",
76
+ "type": "scrape",
77
+ "keywords": ["autopilot", "optimus", "robotics", "computer vision", "intern"]
78
+ },
79
+ ]
80
+
81
+ # Internship-specific keywords
82
+ INTERNSHIP_KEYWORDS = [
83
+ "intern", "internship", "co-op", "summer", "student",
84
+ "graduate", "new grad", "entry level", "early career"
85
+ ]
86
+
87
+ def __init__(self):
88
+ self._headers = {
89
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
90
+ }
91
+
92
+ async def fetch_all(self, internship_only: bool = False) -> list[dict]:
93
+ """
94
+ Fetch opportunities from all configured companies.
95
+
96
+ Args:
97
+ internship_only: If True, filter to only internship positions
98
+ """
99
+ all_opportunities = []
100
+
101
+ for company in self.COMPANIES:
102
+ try:
103
+ opps = await self.fetch_company(company)
104
+
105
+ if internship_only:
106
+ opps = [o for o in opps if self._is_internship(o)]
107
+
108
+ all_opportunities.extend(opps)
109
+ except Exception as e:
110
+ print(f"Error fetching {company['name']}: {e}")
111
+
112
+ return all_opportunities
113
+
114
+ async def fetch_company(self, company: dict) -> list[dict]:
115
+ """Fetch jobs from a specific company."""
116
+ if company["type"] == "scrape":
117
+ return await self._scrape_careers_page(company)
118
+ elif company["type"] == "rss":
119
+ return await self._fetch_rss_careers(company)
120
+ elif company["type"] == "workday":
121
+ return await self._fetch_workday(company)
122
+ else:
123
+ return await self._scrape_careers_page(company)
124
+
125
+ async def _scrape_careers_page(self, company: dict) -> list[dict]:
126
+ """Scrape a generic careers page."""
127
+ url = company.get("careers_url") or company.get("search_url")
128
+
129
+ async with httpx.AsyncClient() as client:
130
+ response = await client.get(
131
+ url,
132
+ headers=self._headers,
133
+ timeout=30,
134
+ follow_redirects=True
135
+ )
136
+ response.raise_for_status()
137
+
138
+ soup = BeautifulSoup(response.text, "html.parser")
139
+ opportunities = []
140
+
141
+ # Look for job listing elements (common patterns)
142
+ job_selectors = [
143
+ "article", ".job-listing", ".job-card", ".position",
144
+ "[data-job]", ".career-item", ".opening"
145
+ ]
146
+
147
+ jobs = []
148
+ for selector in job_selectors:
149
+ jobs = soup.select(selector)
150
+ if jobs:
151
+ break
152
+
153
+ for job in jobs[:30]:
154
+ try:
155
+ title_el = job.select_one("h2, h3, h4, .title, .job-title")
156
+ link_el = job.select_one("a[href]")
157
+ location_el = job.select_one(".location, .job-location")
158
+
159
+ if not title_el:
160
+ continue
161
+
162
+ title = title_el.get_text(strip=True)
163
+
164
+ # Filter by keywords
165
+ if not self._matches_keywords(title, company.get("keywords", [])):
166
+ continue
167
+
168
+ link = ""
169
+ if link_el and link_el.get("href"):
170
+ href = link_el["href"]
171
+ if href.startswith("http"):
172
+ link = href
173
+ else:
174
+ from urllib.parse import urljoin
175
+ link = urljoin(url, href)
176
+
177
+ opportunity = {
178
+ "title": f"[{company['name']}] {title}",
179
+ "raw_text": job.get_text(strip=True)[:500],
180
+ "url": link or url,
181
+ "source_type": "web_scrape",
182
+ "source_name": f"{company['name']} Careers",
183
+ "published_at": datetime.utcnow(),
184
+ "metadata": {
185
+ "company": company["name"],
186
+ "location": location_el.get_text(strip=True) if location_el else None,
187
+ "is_internship": self._is_internship({"title": title})
188
+ }
189
+ }
190
+
191
+ opportunities.append(opportunity)
192
+
193
+ except Exception as e:
194
+ print(f"Error parsing job listing: {e}")
195
+
196
+ return opportunities
197
+
198
+ async def _fetch_workday(self, company: dict) -> list[dict]:
199
+ """Fetch from Workday-based career sites."""
200
+ url = company.get("search_url") or company.get("careers_url")
201
+
202
+ # Workday API format
203
+ payload = {
204
+ "limit": 20,
205
+ "offset": 0,
206
+ "searchText": " ".join(company.get("keywords", [])[:3])
207
+ }
208
+
209
+ try:
210
+ async with httpx.AsyncClient() as client:
211
+ response = await client.post(
212
+ url,
213
+ json=payload,
214
+ headers={**self._headers, "Content-Type": "application/json"},
215
+ timeout=30
216
+ )
217
+ response.raise_for_status()
218
+
219
+ data = response.json()
220
+ jobs = data.get("jobPostings", [])
221
+
222
+ return [
223
+ {
224
+ "title": f"[{company['name']}] {job.get('title', '')}",
225
+ "raw_text": job.get("bulletFields", [""])[0] if job.get("bulletFields") else "",
226
+ "url": job.get("externalPath", url),
227
+ "source_type": "web_scrape",
228
+ "source_name": f"{company['name']} Careers",
229
+ "published_at": datetime.utcnow(),
230
+ "metadata": {
231
+ "company": company["name"],
232
+ "location": job.get("locationsText"),
233
+ "is_internship": self._is_internship({"title": job.get("title", "")})
234
+ }
235
+ }
236
+ for job in jobs
237
+ ]
238
+ except Exception as e:
239
+ print(f"Workday fetch error: {e}")
240
+ return await self._scrape_careers_page(company)
241
+
242
+ async def _fetch_rss_careers(self, company: dict) -> list[dict]:
243
+ """Fetch from RSS-based career feeds."""
244
+ import feedparser
245
+
246
+ url = company.get("rss_url")
247
+
248
+ async with httpx.AsyncClient() as client:
249
+ response = await client.get(url, headers=self._headers, timeout=30)
250
+ content = response.text
251
+
252
+ feed = feedparser.parse(content)
253
+ opportunities = []
254
+
255
+ for entry in feed.entries[:20]:
256
+ title = entry.get("title", "")
257
+
258
+ if not self._matches_keywords(title, company.get("keywords", [])):
259
+ continue
260
+
261
+ opportunities.append({
262
+ "title": f"[{company['name']}] {title}",
263
+ "raw_text": entry.get("summary", "")[:500],
264
+ "url": entry.get("link", ""),
265
+ "source_type": "rss",
266
+ "source_name": f"{company['name']} Careers",
267
+ "published_at": datetime.utcnow(),
268
+ "metadata": {
269
+ "company": company["name"],
270
+ "is_internship": self._is_internship({"title": title})
271
+ }
272
+ })
273
+
274
+ return opportunities
275
+
276
+ def _matches_keywords(self, text: str, keywords: list[str]) -> bool:
277
+ """Check if text matches any keyword."""
278
+ if not keywords:
279
+ return True
280
+ text_lower = text.lower()
281
+ return any(kw.lower() in text_lower for kw in keywords)
282
+
283
+ def _is_internship(self, opportunity: dict) -> bool:
284
+ """Check if opportunity is an internship."""
285
+ title = opportunity.get("title", "").lower()
286
+ text = opportunity.get("raw_text", "").lower()
287
+ combined = f"{title} {text}"
288
+
289
+ return any(kw in combined for kw in self.INTERNSHIP_KEYWORDS)
290
+
291
+
292
+ class InternshipClient:
293
+ """
294
+ Dedicated client for finding internship opportunities.
295
+ Aggregates from multiple sources with internship focus.
296
+ """
297
+
298
+ # Internship-focused sites
299
+ INTERNSHIP_SOURCES = [
300
+ {
301
+ "name": "LinkedIn Internships",
302
+ "url": "https://www.linkedin.com/jobs/search/?keywords=AI%20robotics%20internship",
303
+ "type": "scrape"
304
+ },
305
+ {
306
+ "name": "Indeed Internships",
307
+ "url": "https://www.indeed.com/jobs?q=machine+learning+intern",
308
+ "type": "scrape"
309
+ },
310
+ {
311
+ "name": "Glassdoor Internships",
312
+ "url": "https://www.glassdoor.com/Job/computer-vision-intern-jobs-SRCH_KO0,22.htm",
313
+ "type": "scrape"
314
+ },
315
+ {
316
+ "name": "WayUp",
317
+ "url": "https://www.wayup.com/s/internships/computer-science/",
318
+ "type": "scrape"
319
+ },
320
+ {
321
+ "name": "Handshake",
322
+ "url": "https://joinhandshake.com",
323
+ "type": "scrape"
324
+ }
325
+ ]
326
+
327
+ def __init__(self):
328
+ self.careers_client = CareersClient()
329
+ self._headers = {
330
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
331
+ }
332
+
333
+ async def fetch_all(self) -> list[dict]:
334
+ """Fetch internships from all sources."""
335
+ opportunities = []
336
+
337
+ # Get internships from major companies
338
+ try:
339
+ company_internships = await self.careers_client.fetch_all(internship_only=True)
340
+ opportunities.extend(company_internships)
341
+ except Exception as e:
342
+ print(f"Careers client error: {e}")
343
+
344
+ # Scrape internship-focused sites
345
+ for source in self.INTERNSHIP_SOURCES[:3]: # Limit to avoid rate limiting
346
+ try:
347
+ opps = await self._scrape_internship_site(source)
348
+ opportunities.extend(opps)
349
+ except Exception as e:
350
+ print(f"Error fetching {source['name']}: {e}")
351
+
352
+ return opportunities
353
+
354
+ async def _scrape_internship_site(self, source: dict) -> list[dict]:
355
+ """Scrape an internship-focused site."""
356
+ try:
357
+ async with httpx.AsyncClient() as client:
358
+ response = await client.get(
359
+ source["url"],
360
+ headers=self._headers,
361
+ timeout=30,
362
+ follow_redirects=True
363
+ )
364
+ response.raise_for_status()
365
+ except Exception:
366
+ return []
367
+
368
+ soup = BeautifulSoup(response.text, "html.parser")
369
+ opportunities = []
370
+
371
+ # Find job cards
372
+ cards = soup.select(".job-card, .job-listing, article, .result")[:15]
373
+
374
+ for card in cards:
375
+ try:
376
+ title_el = card.select_one("h2, h3, .title, .job-title")
377
+ if not title_el:
378
+ continue
379
+
380
+ title = title_el.get_text(strip=True)
381
+ link_el = card.select_one("a[href]")
382
+
383
+ link = ""
384
+ if link_el and link_el.get("href"):
385
+ from urllib.parse import urljoin
386
+ link = urljoin(source["url"], link_el["href"])
387
+
388
+ opportunities.append({
389
+ "title": f"[Internship] {title}",
390
+ "raw_text": card.get_text(strip=True)[:500],
391
+ "url": link or source["url"],
392
+ "source_type": "web_scrape",
393
+ "source_name": source["name"],
394
+ "published_at": datetime.utcnow(),
395
+ "metadata": {
396
+ "is_internship": True,
397
+ "source_site": source["name"]
398
+ }
399
+ })
400
+ except Exception:
401
+ continue
402
+
403
+ return opportunities
backend/ingestion/github_client.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE GitHub Client
3
+
4
+ Tracks trending repositories and star velocity for AI/Robotics/CV projects.
5
+ """
6
+ import httpx
7
+ from datetime import datetime, timedelta
8
+ from typing import Optional
9
+
10
+
11
+ class GitHubClient:
12
+ """
13
+ Client for GitHub API to discover trending repositories.
14
+ Tracks star velocity and contributor growth.
15
+ """
16
+
17
+ BASE_URL = "https://api.github.com"
18
+
19
+ # Search queries for relevant topics
20
+ SEARCH_TOPICS = [
21
+ "computer-vision",
22
+ "robotics",
23
+ "machine-learning",
24
+ "deep-learning",
25
+ "ros",
26
+ "pytorch",
27
+ "transformers",
28
+ "llm"
29
+ ]
30
+
31
+ def __init__(self, token: Optional[str] = None, max_results: int = 30):
32
+ self.token = token
33
+ self.max_results = max_results
34
+ self._headers = {
35
+ "Accept": "application/vnd.github+json",
36
+ "X-GitHub-Api-Version": "2022-11-28"
37
+ }
38
+ if token:
39
+ self._headers["Authorization"] = f"Bearer {token}"
40
+
41
+ async def fetch_trending(self, topics: Optional[list[str]] = None) -> list[dict]:
42
+ """
43
+ Fetch recently popular repositories in target topics.
44
+
45
+ Returns list of normalized opportunity dicts.
46
+ """
47
+ topics = topics or self.SEARCH_TOPICS
48
+ opportunities = []
49
+
50
+ # Get repos created or updated in last 7 days with high stars
51
+ week_ago = (datetime.utcnow() - timedelta(days=7)).strftime("%Y-%m-%d")
52
+
53
+ for topic in topics[:5]: # Limit to avoid rate limiting
54
+ try:
55
+ repos = await self._search_repos(topic, week_ago)
56
+ opportunities.extend(repos)
57
+ except Exception as e:
58
+ print(f"GitHub search error for {topic}: {e}")
59
+
60
+ # Deduplicate by URL
61
+ seen_urls = set()
62
+ unique = []
63
+ for opp in opportunities:
64
+ if opp["url"] not in seen_urls:
65
+ seen_urls.add(opp["url"])
66
+ unique.append(opp)
67
+
68
+ return unique[:self.max_results]
69
+
70
+ async def _search_repos(self, topic: str, since_date: str) -> list[dict]:
71
+ """Search for repositories by topic."""
72
+ query = f"topic:{topic} pushed:>{since_date} stars:>50"
73
+
74
+ async with httpx.AsyncClient() as client:
75
+ response = await client.get(
76
+ f"{self.BASE_URL}/search/repositories",
77
+ params={
78
+ "q": query,
79
+ "sort": "stars",
80
+ "order": "desc",
81
+ "per_page": 10
82
+ },
83
+ headers=self._headers,
84
+ timeout=30,
85
+ follow_redirects=True
86
+ )
87
+ response.raise_for_status()
88
+
89
+ data = response.json()
90
+ return self._parse_repos(data.get("items", []), topic)
91
+
92
+ def _parse_repos(self, repos: list, topic: str) -> list[dict]:
93
+ """Parse GitHub repos into normalized opportunities."""
94
+ opportunities = []
95
+
96
+ for repo in repos:
97
+ try:
98
+ opportunity = {
99
+ "title": f"[GitHub] {repo['full_name']}: {repo.get('description', '')[:100]}",
100
+ "raw_text": repo.get("description", "") or "",
101
+ "url": repo["html_url"],
102
+ "source_type": "github",
103
+ "source_name": f"GitHub/{topic}",
104
+ "published_at": self._parse_date(repo.get("created_at")),
105
+ "social_engagement": repo.get("stargazers_count", 0),
106
+ "metadata": {
107
+ "owner": repo["owner"]["login"],
108
+ "stars": repo.get("stargazers_count", 0),
109
+ "forks": repo.get("forks_count", 0),
110
+ "language": repo.get("language"),
111
+ "topics": repo.get("topics", []),
112
+ "open_issues": repo.get("open_issues_count", 0),
113
+ "updated_at": repo.get("updated_at")
114
+ }
115
+ }
116
+ opportunities.append(opportunity)
117
+ except Exception as e:
118
+ print(f"Error parsing repo: {e}")
119
+
120
+ return opportunities
121
+
122
+ async def fetch_gsoc_repos(self) -> list[dict]:
123
+ """Fetch Google Summer of Code related repositories."""
124
+ async with httpx.AsyncClient() as client:
125
+ response = await client.get(
126
+ f"{self.BASE_URL}/search/repositories",
127
+ params={
128
+ "q": "topic:gsoc OR topic:google-summer-of-code",
129
+ "sort": "updated",
130
+ "per_page": 20
131
+ },
132
+ headers=self._headers,
133
+ timeout=30,
134
+ follow_redirects=True
135
+ )
136
+ response.raise_for_status()
137
+
138
+ data = response.json()
139
+ repos = self._parse_repos(data.get("items", []), "gsoc")
140
+
141
+ # Mark as open source opportunity
142
+ for repo in repos:
143
+ repo["title"] = f"[GSoC] {repo['title'].replace('[GitHub] ', '')}"
144
+
145
+ return repos
146
+
147
+ def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]:
148
+ """Parse GitHub date format."""
149
+ if not date_str:
150
+ return None
151
+ try:
152
+ return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
153
+ except Exception:
154
+ return None
backend/ingestion/grants_client.py ADDED
@@ -0,0 +1,385 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Grants Client - Version 2.0
3
+
4
+ Fetches grant opportunities from crypto ecosystems and funding platforms.
5
+ High-leverage opportunities with money + credibility + access.
6
+ """
7
+ import httpx
8
+ from datetime import datetime
9
+ from typing import Optional
10
+ from bs4 import BeautifulSoup
11
+
12
+
13
+ class GrantsClient:
14
+ """
15
+ Client for fetching grants from crypto ecosystems and funding platforms.
16
+ Prioritizes: Ethereum, Solana, Base, Starknet, Gitcoin.
17
+ """
18
+
19
+ # Grant sources with their configurations
20
+ GRANT_SOURCES = [
21
+ # Ethereum Ecosystem
22
+ {
23
+ "name": "Ethereum Foundation Grants",
24
+ "url": "https://esp.ethereum.foundation/",
25
+ "ecosystem": "ethereum",
26
+ "type": "ecosystem_grant",
27
+ "typical_size": (5000, 100000),
28
+ },
29
+ # Solana Ecosystem
30
+ {
31
+ "name": "Solana Foundation Grants",
32
+ "url": "https://solana.org/grants",
33
+ "ecosystem": "solana",
34
+ "type": "ecosystem_grant",
35
+ "typical_size": (5000, 50000),
36
+ },
37
+ # Base (Coinbase L2)
38
+ {
39
+ "name": "Base Builder Grants",
40
+ "url": "https://base.org/builders",
41
+ "ecosystem": "base",
42
+ "type": "ecosystem_grant",
43
+ "typical_size": (5000, 25000),
44
+ },
45
+ # Starknet
46
+ {
47
+ "name": "Starknet Grants",
48
+ "url": "https://www.starknet.io/ecosystem/grants/",
49
+ "ecosystem": "starknet",
50
+ "type": "ecosystem_grant",
51
+ "typical_size": (5000, 50000),
52
+ },
53
+ # Gitcoin
54
+ {
55
+ "name": "Gitcoin Grants",
56
+ "url": "https://gitcoin.co/grants",
57
+ "ecosystem": "gitcoin",
58
+ "type": "micro_grant",
59
+ "typical_size": (500, 10000),
60
+ },
61
+ # Protocol-specific
62
+ {
63
+ "name": "Uniswap Grants",
64
+ "url": "https://www.uniswapfoundation.org/grants",
65
+ "ecosystem": "ethereum",
66
+ "type": "ecosystem_grant",
67
+ "typical_size": (10000, 100000),
68
+ },
69
+ {
70
+ "name": "Aave Grants DAO",
71
+ "url": "https://aavegrants.org/",
72
+ "ecosystem": "ethereum",
73
+ "type": "ecosystem_grant",
74
+ "typical_size": (5000, 100000),
75
+ },
76
+ {
77
+ "name": "Polygon Grants",
78
+ "url": "https://polygon.technology/village/grants",
79
+ "ecosystem": "polygon",
80
+ "type": "ecosystem_grant",
81
+ "typical_size": (5000, 50000),
82
+ },
83
+ ]
84
+
85
+ # RSS/API sources for grants
86
+ GRANT_RSS_FEEDS = [
87
+ {
88
+ "name": "Ethereum Blog - Grants",
89
+ "url": "https://blog.ethereum.org/feed.xml",
90
+ "filter_keywords": ["grant", "funding", "ecosystem"],
91
+ },
92
+ ]
93
+
94
+ def __init__(self):
95
+ self._headers = {
96
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
97
+ }
98
+
99
+ async def fetch_all(self) -> list[dict]:
100
+ """Fetch grants from all configured sources."""
101
+ opportunities = []
102
+
103
+ # Fetch from grant pages
104
+ for source in self.GRANT_SOURCES:
105
+ try:
106
+ grants = await self._scrape_grant_page(source)
107
+ opportunities.extend(grants)
108
+ except Exception as e:
109
+ print(f"Error fetching {source['name']}: {e}")
110
+
111
+ return opportunities
112
+
113
+ async def _scrape_grant_page(self, source: dict) -> list[dict]:
114
+ """Scrape a grant program page for opportunities."""
115
+ try:
116
+ async with httpx.AsyncClient() as client:
117
+ response = await client.get(
118
+ source["url"],
119
+ headers=self._headers,
120
+ timeout=30,
121
+ follow_redirects=True
122
+ )
123
+
124
+ if response.status_code != 200:
125
+ return []
126
+
127
+ html = response.text
128
+ except Exception as e:
129
+ print(f"HTTP error for {source['name']}: {e}")
130
+ return []
131
+
132
+ soup = BeautifulSoup(html, "html.parser")
133
+
134
+ # Create a single opportunity for the grant program
135
+ # (These pages describe the program, not individual grants)
136
+ opportunity = {
137
+ "title": f"[{source['ecosystem'].upper()}] {source['name']}",
138
+ "raw_text": self._extract_page_text(soup)[:2000],
139
+ "url": source["url"],
140
+ "source_type": "grant_platform",
141
+ "source_name": source["name"],
142
+ "published_at": datetime.utcnow(),
143
+ "metadata": {
144
+ "ecosystem": source["ecosystem"],
145
+ "grant_type": source["type"],
146
+ "grant_size_min": source["typical_size"][0],
147
+ "grant_size_max": source["typical_size"][1],
148
+ "region": "global",
149
+ "technical_depth": "intermediate",
150
+ }
151
+ }
152
+
153
+ return [opportunity]
154
+
155
+ def _extract_page_text(self, soup: BeautifulSoup) -> str:
156
+ """Extract meaningful text from page."""
157
+ # Remove scripts and styles
158
+ for tag in soup(["script", "style", "nav", "footer", "header"]):
159
+ tag.decompose()
160
+
161
+ # Get text
162
+ text = soup.get_text(separator=" ", strip=True)
163
+ return " ".join(text.split())[:2000]
164
+
165
+ async def fetch_active_rounds(self) -> list[dict]:
166
+ """Fetch currently active grant rounds from Gitcoin."""
167
+ # Gitcoin has an API for active rounds
168
+ try:
169
+ async with httpx.AsyncClient() as client:
170
+ # This is a simplified version - actual API may differ
171
+ response = await client.get(
172
+ "https://api.gitcoin.co/grants/rounds/active",
173
+ headers=self._headers,
174
+ timeout=30,
175
+ follow_redirects=True
176
+ )
177
+
178
+ if response.status_code == 200:
179
+ data = response.json()
180
+ return self._parse_gitcoin_rounds(data)
181
+ except Exception as e:
182
+ print(f"Error fetching Gitcoin rounds: {e}")
183
+
184
+ return []
185
+
186
+ def _parse_gitcoin_rounds(self, data: dict) -> list[dict]:
187
+ """Parse Gitcoin API response into opportunities."""
188
+ opportunities = []
189
+
190
+ for round_data in data.get("rounds", []):
191
+ opportunity = {
192
+ "title": f"[GITCOIN] {round_data.get('name', 'Gitcoin Round')}",
193
+ "raw_text": round_data.get("description", ""),
194
+ "url": f"https://gitcoin.co/grants/{round_data.get('id', '')}",
195
+ "source_type": "grant_platform",
196
+ "source_name": "Gitcoin",
197
+ "published_at": datetime.utcnow(),
198
+ "deadline": self._parse_date(round_data.get("end_date")),
199
+ "metadata": {
200
+ "ecosystem": "gitcoin",
201
+ "grant_type": "micro_grant",
202
+ "matching_pool": round_data.get("matching_pool", 0),
203
+ "grant_size_min": 100,
204
+ "grant_size_max": 10000,
205
+ "region": "global",
206
+ }
207
+ }
208
+ opportunities.append(opportunity)
209
+
210
+ return opportunities
211
+
212
+ def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]:
213
+ """Parse date string."""
214
+ if not date_str:
215
+ return None
216
+ try:
217
+ return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
218
+ except Exception:
219
+ return None
220
+
221
+
222
+ class NigeriaGrantsClient:
223
+ """
224
+ Client for Nigeria-specific funding and grant opportunities.
225
+ Focuses on: NITDA, CcHub, BOI, Government programs.
226
+ """
227
+
228
+ # Nigeria-specific grant sources
229
+ NIGERIA_SOURCES = [
230
+ {
231
+ "name": "NITDA Programs",
232
+ "url": "https://nitda.gov.ng/",
233
+ "type": "innovation_fund",
234
+ "region": "nigeria",
235
+ },
236
+ {
237
+ "name": "CcHub Accelerator",
238
+ "url": "https://cchubnigeria.com/",
239
+ "type": "grant",
240
+ "region": "nigeria",
241
+ },
242
+ {
243
+ "name": "Tony Elumelu Foundation",
244
+ "url": "https://www.tonyelumelufoundation.org/",
245
+ "type": "grant",
246
+ "region": "africa",
247
+ },
248
+ {
249
+ "name": "Ventures Platform",
250
+ "url": "https://www.venturesplatform.com/",
251
+ "type": "investment",
252
+ "region": "africa",
253
+ },
254
+ {
255
+ "name": "BoI Youth Entrepreneurship",
256
+ "url": "https://www.boi.ng/",
257
+ "type": "innovation_fund",
258
+ "region": "nigeria",
259
+ },
260
+ ]
261
+
262
+ # RSS feeds for Nigeria tech news
263
+ NIGERIA_RSS = [
264
+ {"name": "TechCabal", "url": "https://techcabal.com/feed/"},
265
+ {"name": "Disrupt Africa", "url": "https://disrupt-africa.com/feed/"},
266
+ ]
267
+
268
+ def __init__(self):
269
+ self._headers = {
270
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
271
+ }
272
+
273
+ async def fetch_all(self) -> list[dict]:
274
+ """Fetch all Nigeria-specific opportunities."""
275
+ opportunities = []
276
+
277
+ # Fetch from Nigeria sources
278
+ for source in self.NIGERIA_SOURCES:
279
+ try:
280
+ opps = await self._fetch_source(source)
281
+ opportunities.extend(opps)
282
+ except Exception as e:
283
+ print(f"Error fetching {source['name']}: {e}")
284
+
285
+ # Fetch from RSS feeds
286
+ for feed in self.NIGERIA_RSS:
287
+ try:
288
+ opps = await self._fetch_rss(feed)
289
+ opportunities.extend(opps)
290
+ except Exception as e:
291
+ print(f"Error fetching {feed['name']}: {e}")
292
+
293
+ return opportunities
294
+
295
+ async def _fetch_source(self, source: dict) -> list[dict]:
296
+ """Fetch from a Nigeria source."""
297
+ try:
298
+ async with httpx.AsyncClient() as client:
299
+ response = await client.get(
300
+ source["url"],
301
+ headers=self._headers,
302
+ timeout=30,
303
+ follow_redirects=True
304
+ )
305
+
306
+ if response.status_code != 200:
307
+ return []
308
+
309
+ html = response.text
310
+ except Exception as e:
311
+ print(f"HTTP error for {source['name']}: {e}")
312
+ return []
313
+
314
+ soup = BeautifulSoup(html, "html.parser")
315
+
316
+ # Create opportunity for the program
317
+ opportunity = {
318
+ "title": f"[NIGERIA] {source['name']}",
319
+ "raw_text": self._extract_text(soup)[:2000],
320
+ "url": source["url"],
321
+ "source_type": "gov_portal",
322
+ "source_name": source["name"],
323
+ "published_at": datetime.utcnow(),
324
+ "metadata": {
325
+ "region": source["region"],
326
+ "grant_type": source["type"],
327
+ "nigeria_specific": True,
328
+ }
329
+ }
330
+
331
+ return [opportunity]
332
+
333
+ async def _fetch_rss(self, feed: dict) -> list[dict]:
334
+ """Fetch from an RSS feed and filter for opportunities."""
335
+ import feedparser
336
+
337
+ try:
338
+ async with httpx.AsyncClient() as client:
339
+ response = await client.get(
340
+ feed["url"],
341
+ headers=self._headers,
342
+ timeout=30,
343
+ follow_redirects=True
344
+ )
345
+ content = response.text
346
+ except Exception as e:
347
+ print(f"Error fetching {feed['name']}: {e}")
348
+ return []
349
+
350
+ parsed = feedparser.parse(content)
351
+ opportunities = []
352
+
353
+ # Keywords indicating opportunities
354
+ opportunity_keywords = [
355
+ "grant", "funding", "accelerator", "apply", "opportunity",
356
+ "fellowship", "program", "investment", "startup", "launch"
357
+ ]
358
+
359
+ for entry in parsed.entries[:20]:
360
+ title = entry.get("title", "").lower()
361
+ summary = entry.get("summary", "").lower()
362
+
363
+ # Check if contains opportunity keywords
364
+ if any(kw in title or kw in summary for kw in opportunity_keywords):
365
+ opportunity = {
366
+ "title": f"[AFRICA] {entry.get('title', '')}",
367
+ "raw_text": entry.get("summary", "")[:2000],
368
+ "url": entry.get("link", ""),
369
+ "source_type": "rss",
370
+ "source_name": feed["name"],
371
+ "published_at": datetime.utcnow(),
372
+ "metadata": {
373
+ "region": "africa",
374
+ "africa_focus": True,
375
+ }
376
+ }
377
+ opportunities.append(opportunity)
378
+
379
+ return opportunities
380
+
381
+ def _extract_text(self, soup: BeautifulSoup) -> str:
382
+ """Extract text from soup."""
383
+ for tag in soup(["script", "style", "nav", "footer"]):
384
+ tag.decompose()
385
+ return " ".join(soup.get_text(separator=" ", strip=True).split())
backend/ingestion/jobboard_client.py ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Job Board Client
3
+
4
+ Fetches REAL job opportunities from structured job board APIs.
5
+ These return actual job listings, not discussions.
6
+
7
+ Supports:
8
+ - Arbeitnow (free, no key needed)
9
+ - TheMuse (free, no key needed)
10
+ - Remotive (free, no key needed)
11
+ - Adzuna (free key, 250 req/day)
12
+ - Jooble (free key, aggregates LinkedIn/Indeed/Glassdoor)
13
+ - RapidAPI LinkedIn (free key, 100 req/month)
14
+ """
15
+ import httpx
16
+ from datetime import datetime
17
+ from typing import Optional
18
+ import re
19
+
20
+
21
+ class JobBoardClient:
22
+ """
23
+ Client for structured job board APIs.
24
+ Returns actual job listings you can apply to.
25
+
26
+ Usage:
27
+ client = JobBoardClient(
28
+ adzuna_app_id="xxx",
29
+ adzuna_api_key="xxx",
30
+ jooble_api_key="xxx",
31
+ rapidapi_key="xxx"
32
+ )
33
+ jobs = await client.fetch_all()
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ adzuna_app_id: str = "",
39
+ adzuna_api_key: str = "",
40
+ jooble_api_key: str = "",
41
+ rapidapi_key: str = ""
42
+ ):
43
+ self.adzuna_app_id = adzuna_app_id
44
+ self.adzuna_api_key = adzuna_api_key
45
+ self.jooble_api_key = jooble_api_key
46
+ self.rapidapi_key = rapidapi_key
47
+
48
+ async def fetch_all(self) -> list[dict]:
49
+ """Fetch from all available job board sources."""
50
+ opportunities = []
51
+
52
+ # === FREE APIs (no key needed) ===
53
+
54
+ # Arbeitnow (free job API)
55
+ try:
56
+ arbeitnow_jobs = await self.fetch_arbeitnow()
57
+ opportunities.extend(arbeitnow_jobs)
58
+ print(f" Arbeitnow: {len(arbeitnow_jobs)} jobs")
59
+ except Exception as e:
60
+ print(f" Arbeitnow error: {e}")
61
+
62
+ # TheMuse (free job API)
63
+ try:
64
+ muse_jobs = await self.fetch_themuse()
65
+ opportunities.extend(muse_jobs)
66
+ print(f" TheMuse: {len(muse_jobs)} jobs")
67
+ except Exception as e:
68
+ print(f" TheMuse error: {e}")
69
+
70
+ # Remotive (remote jobs, free)
71
+ try:
72
+ remote_jobs = await self.fetch_remotive()
73
+ opportunities.extend(remote_jobs)
74
+ print(f" Remotive: {len(remote_jobs)} remote jobs")
75
+ except Exception as e:
76
+ print(f" Remotive error: {e}")
77
+
78
+ # === APIs WITH FREE KEYS ===
79
+
80
+ # Adzuna (if API key provided)
81
+ if self.adzuna_app_id and self.adzuna_api_key:
82
+ try:
83
+ adzuna_jobs = await self.fetch_adzuna()
84
+ opportunities.extend(adzuna_jobs)
85
+ print(f" Adzuna: {len(adzuna_jobs)} jobs")
86
+ except Exception as e:
87
+ print(f" Adzuna error: {e}")
88
+
89
+ # Jooble (if API key provided) - aggregates LinkedIn, Indeed, Glassdoor
90
+ if self.jooble_api_key:
91
+ try:
92
+ jooble_jobs = await self.fetch_jooble()
93
+ opportunities.extend(jooble_jobs)
94
+ print(f" Jooble: {len(jooble_jobs)} jobs (LinkedIn/Indeed/Glassdoor)")
95
+ except Exception as e:
96
+ print(f" Jooble error: {e}")
97
+
98
+ # RapidAPI LinkedIn Jobs (if API key provided)
99
+ if self.rapidapi_key:
100
+ try:
101
+ linkedin_jobs = await self.fetch_linkedin_rapidapi()
102
+ opportunities.extend(linkedin_jobs)
103
+ print(f" LinkedIn (via RapidAPI): {len(linkedin_jobs)} jobs")
104
+ except Exception as e:
105
+ print(f" LinkedIn error: {e}")
106
+
107
+ return opportunities
108
+
109
+ # ===========================================
110
+ # FREE APIs (No registration needed)
111
+ # ===========================================
112
+
113
+ async def fetch_arbeitnow(self) -> list[dict]:
114
+ """Fetch from Arbeitnow API - free, no registration."""
115
+ opportunities = []
116
+
117
+ try:
118
+ url = "https://www.arbeitnow.com/api/job-board-api"
119
+
120
+ async with httpx.AsyncClient() as client:
121
+ response = await client.get(
122
+ url,
123
+ headers={"User-Agent": "PIOE/2.0"},
124
+ timeout=30
125
+ )
126
+
127
+ if response.status_code != 200:
128
+ return []
129
+
130
+ data = response.json()
131
+
132
+ for job in data.get("data", [])[:30]:
133
+ title = (job.get("title") or "").lower()
134
+ tags = " ".join(job.get("tags") or []).lower()
135
+ combined = f"{title} {tags}"
136
+
137
+ # Filter for relevant tech jobs
138
+ keywords = ["machine learning", "ai", "data", "engineer", "developer",
139
+ "software", "python", "intern", "research", "robotics",
140
+ "backend", "frontend", "fullstack", "devops"]
141
+
142
+ if not any(kw in combined for kw in keywords):
143
+ continue
144
+
145
+ opportunities.append({
146
+ "title": f"[Arbeitnow] {job.get('title', '')}",
147
+ "raw_text": self._strip_html(job.get("description", ""))[:2000],
148
+ "url": job.get("url", ""),
149
+ "source_type": "job",
150
+ "source_name": f"Arbeitnow ({job.get('company_name', 'Unknown')})",
151
+ "published_at": self._parse_date(job.get("created_at")),
152
+ "metadata": {
153
+ "company": job.get("company_name"),
154
+ "location": job.get("location"),
155
+ "remote": job.get("remote", False),
156
+ "tags": job.get("tags", []),
157
+ "region": "remote_global" if job.get("remote") else "global"
158
+ }
159
+ })
160
+
161
+ except Exception as e:
162
+ print(f" Arbeitnow fetch error: {e}")
163
+
164
+ return opportunities
165
+
166
+ async def fetch_themuse(self) -> list[dict]:
167
+ """Fetch from The Muse API - free, no registration."""
168
+ opportunities = []
169
+
170
+ categories = ["Data Science", "Engineering", "Software Engineering"]
171
+
172
+ for category in categories:
173
+ try:
174
+ url = "https://www.themuse.com/api/public/jobs"
175
+ params = {"category": category, "page": 1}
176
+
177
+ async with httpx.AsyncClient() as client:
178
+ response = await client.get(
179
+ url, params=params,
180
+ headers={"User-Agent": "PIOE/2.0"},
181
+ timeout=30
182
+ )
183
+
184
+ if response.status_code != 200:
185
+ continue
186
+
187
+ data = response.json()
188
+
189
+ for job in data.get("results", [])[:10]:
190
+ company = job.get("company", {})
191
+ opportunities.append({
192
+ "title": f"[TheMuse] {job.get('name', '')}",
193
+ "raw_text": self._strip_html(job.get("contents", ""))[:2000],
194
+ "url": job.get("refs", {}).get("landing_page", ""),
195
+ "source_type": "job",
196
+ "source_name": f"TheMuse ({company.get('name', 'Unknown')})",
197
+ "published_at": self._parse_date(job.get("publication_date")),
198
+ "metadata": {
199
+ "company": company.get("name"),
200
+ "locations": [loc.get("name") for loc in job.get("locations", [])],
201
+ "level": job.get("levels", [{}])[0].get("name") if job.get("levels") else None,
202
+ "region": "global"
203
+ }
204
+ })
205
+
206
+ except Exception as e:
207
+ print(f" TheMuse '{category}' error: {e}")
208
+
209
+ return opportunities
210
+
211
+ async def fetch_remotive(self) -> list[dict]:
212
+ """Fetch from Remotive API - free, no registration."""
213
+ opportunities = []
214
+
215
+ categories = ["software-dev", "data", "devops-sysadmin"]
216
+
217
+ for category in categories:
218
+ try:
219
+ url = "https://remotive.com/api/remote-jobs"
220
+ params = {"category": category, "limit": 15}
221
+
222
+ async with httpx.AsyncClient() as client:
223
+ response = await client.get(url, params=params, timeout=30)
224
+
225
+ if response.status_code != 200:
226
+ continue
227
+
228
+ data = response.json()
229
+
230
+ for job in data.get("jobs", []):
231
+ title_lower = (job.get("title") or "").lower()
232
+
233
+ # Skip non-tech roles
234
+ skip_keywords = ["sales", "marketing", "recruiter", "hr ", "customer support"]
235
+ if any(skip in title_lower for skip in skip_keywords):
236
+ continue
237
+
238
+ opportunities.append({
239
+ "title": f"[Remote] {job.get('title', '')}",
240
+ "raw_text": self._strip_html(job.get("description", ""))[:2000],
241
+ "url": job.get("url", ""),
242
+ "source_type": "job",
243
+ "source_name": f"Remotive ({job.get('company_name', 'Unknown')})",
244
+ "published_at": self._parse_date(job.get("publication_date")),
245
+ "metadata": {
246
+ "company": job.get("company_name"),
247
+ "location": job.get("candidate_required_location"),
248
+ "job_type": job.get("job_type"),
249
+ "salary": job.get("salary"),
250
+ "tags": job.get("tags", []),
251
+ "region": "remote_global"
252
+ }
253
+ })
254
+
255
+ except Exception as e:
256
+ print(f" Remotive '{category}' error: {e}")
257
+
258
+ return opportunities
259
+
260
+ # ===========================================
261
+ # APIs WITH FREE API KEYS
262
+ # ===========================================
263
+
264
+ async def fetch_adzuna(self) -> list[dict]:
265
+ """
266
+ Fetch from Adzuna API.
267
+ Free tier: 250 requests/day
268
+ Get key at: https://developer.adzuna.com/
269
+ """
270
+ opportunities = []
271
+
272
+ keywords = ["machine learning", "AI engineer", "data scientist", "robotics"]
273
+
274
+ for keyword in keywords[:2]: # Limit to conserve quota
275
+ try:
276
+ url = "https://api.adzuna.com/v1/api/jobs/us/search/1"
277
+ params = {
278
+ "app_id": self.adzuna_app_id,
279
+ "app_key": self.adzuna_api_key,
280
+ "what": keyword,
281
+ "results_per_page": 10,
282
+ "content-type": "application/json"
283
+ }
284
+
285
+ async with httpx.AsyncClient() as client:
286
+ response = await client.get(url, params=params, timeout=30)
287
+
288
+ if response.status_code != 200:
289
+ continue
290
+
291
+ data = response.json()
292
+
293
+ for job in data.get("results", []):
294
+ company = job.get("company", {})
295
+ location = job.get("location", {})
296
+
297
+ opportunities.append({
298
+ "title": f"[Adzuna] {job.get('title', '')}",
299
+ "raw_text": job.get("description", "")[:2000],
300
+ "url": job.get("redirect_url", ""),
301
+ "source_type": "job",
302
+ "source_name": f"Adzuna ({company.get('display_name', 'Unknown')})",
303
+ "published_at": self._parse_date(job.get("created")),
304
+ "metadata": {
305
+ "company": company.get("display_name"),
306
+ "location": location.get("display_name"),
307
+ "salary_min": job.get("salary_min"),
308
+ "salary_max": job.get("salary_max"),
309
+ "contract_type": job.get("contract_type"),
310
+ "region": "global"
311
+ }
312
+ })
313
+
314
+ except Exception as e:
315
+ print(f" Adzuna '{keyword}' error: {e}")
316
+
317
+ return opportunities
318
+
319
+ async def fetch_jooble(self) -> list[dict]:
320
+ """
321
+ Fetch from Jooble API - aggregates 70+ sources including:
322
+ - LinkedIn
323
+ - Indeed
324
+ - Glassdoor
325
+ - Monster
326
+ - CareerBuilder
327
+
328
+ Free tier available.
329
+ Get key at: https://jooble.org/api/about
330
+ """
331
+ opportunities = []
332
+
333
+ search_queries = [
334
+ "machine learning engineer",
335
+ "AI internship",
336
+ "data scientist",
337
+ "robotics engineer",
338
+ "computer vision",
339
+ "scholarship",
340
+ "fellowship"
341
+ ]
342
+
343
+ for query in search_queries[:5]: # Limit to conserve quota
344
+ try:
345
+ url = f"https://jooble.org/api/{self.jooble_api_key}"
346
+
347
+ payload = {
348
+ "keywords": query,
349
+ "location": "", # Worldwide
350
+ }
351
+
352
+ async with httpx.AsyncClient() as client:
353
+ response = await client.post(
354
+ url,
355
+ json=payload,
356
+ headers={"Content-Type": "application/json"},
357
+ timeout=30
358
+ )
359
+
360
+ if response.status_code != 200:
361
+ continue
362
+
363
+ data = response.json()
364
+
365
+ for job in data.get("jobs", [])[:10]:
366
+ opportunities.append({
367
+ "title": f"[Jooble] {job.get('title', '')}",
368
+ "raw_text": self._strip_html(job.get("snippet", ""))[:2000],
369
+ "url": job.get("link", ""),
370
+ "source_type": "job",
371
+ "source_name": f"Jooble ({job.get('company', 'Unknown')})",
372
+ "published_at": self._parse_date(job.get("updated")),
373
+ "metadata": {
374
+ "company": job.get("company"),
375
+ "location": job.get("location"),
376
+ "salary": job.get("salary"),
377
+ "source": job.get("source"), # Original source (LinkedIn, Indeed, etc.)
378
+ "region": "global"
379
+ }
380
+ })
381
+
382
+ except Exception as e:
383
+ print(f" Jooble '{query}' error: {e}")
384
+
385
+ return opportunities
386
+
387
+ async def fetch_linkedin_rapidapi(self) -> list[dict]:
388
+ """
389
+ Fetch LinkedIn jobs via RapidAPI.
390
+ Free tier: 100 requests/month
391
+ Get key at: https://rapidapi.com/jaypat87/api/linkedin-jobs-search
392
+ """
393
+ opportunities = []
394
+
395
+ search_queries = [
396
+ "machine learning",
397
+ "AI engineer",
398
+ "computer vision intern",
399
+ "robotics"
400
+ ]
401
+
402
+ for query in search_queries[:2]: # Limit to conserve quota
403
+ try:
404
+ url = "https://linkedin-jobs-search.p.rapidapi.com/"
405
+
406
+ payload = {
407
+ "search_terms": query,
408
+ "location": "United States",
409
+ "page": "1"
410
+ }
411
+
412
+ headers = {
413
+ "content-type": "application/json",
414
+ "X-RapidAPI-Key": self.rapidapi_key,
415
+ "X-RapidAPI-Host": "linkedin-jobs-search.p.rapidapi.com"
416
+ }
417
+
418
+ async with httpx.AsyncClient() as client:
419
+ response = await client.post(
420
+ url,
421
+ json=payload,
422
+ headers=headers,
423
+ timeout=30
424
+ )
425
+
426
+ if response.status_code != 200:
427
+ continue
428
+
429
+ data = response.json()
430
+
431
+ for job in data[:10] if isinstance(data, list) else []:
432
+ opportunities.append({
433
+ "title": f"[LinkedIn] {job.get('job_title', '')}",
434
+ "raw_text": job.get("job_description", "")[:2000],
435
+ "url": job.get("linkedin_job_url_cleaned", job.get("job_url", "")),
436
+ "source_type": "job",
437
+ "source_name": f"LinkedIn ({job.get('company_name', 'Unknown')})",
438
+ "published_at": self._parse_date(job.get("posted_date")),
439
+ "metadata": {
440
+ "company": job.get("company_name"),
441
+ "location": job.get("job_location"),
442
+ "linkedin_url": job.get("linkedin_job_url_cleaned"),
443
+ "region": "global"
444
+ }
445
+ })
446
+
447
+ except Exception as e:
448
+ print(f" LinkedIn '{query}' error: {e}")
449
+
450
+ return opportunities
451
+
452
+ # ===========================================
453
+ # HELPER METHODS
454
+ # ===========================================
455
+
456
+ def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]:
457
+ """Parse various date formats."""
458
+ if not date_str:
459
+ return None
460
+ try:
461
+ if "T" in str(date_str):
462
+ return datetime.fromisoformat(str(date_str).replace("Z", "+00:00"))
463
+ return datetime.strptime(str(date_str)[:10], "%Y-%m-%d")
464
+ except Exception:
465
+ return None
466
+
467
+ def _strip_html(self, text: str) -> str:
468
+ """Remove HTML tags from text."""
469
+ if not text:
470
+ return ""
471
+ clean = re.sub(r'<[^>]+>', '', text)
472
+ return " ".join(clean.split())
backend/ingestion/reddit_client.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Reddit Client
3
+
4
+ Monitors curated subreddits for opportunities with strict filtering.
5
+ """
6
+ from datetime import datetime
7
+ from typing import Optional
8
+ import httpx
9
+
10
+
11
+ class RedditClient:
12
+ """
13
+ Client for Reddit using public JSON API.
14
+
15
+ Note: For production, consider using PRAW with OAuth for better rate limits.
16
+ This implementation uses public endpoints which are rate-limited.
17
+ """
18
+
19
+ BASE_URL = "https://www.reddit.com"
20
+
21
+ # Curated subreddits for high-signal content
22
+ TARGET_SUBREDDITS = [
23
+ "computervision",
24
+ "robotics",
25
+ "MachineLearning",
26
+ "artificial",
27
+ "learnmachinelearning",
28
+ "deeplearning",
29
+ "hackathons",
30
+ "scholarships",
31
+ "cscareerquestions",
32
+ "roboticsengineering",
33
+ ]
34
+
35
+ # Keywords that indicate opportunities
36
+ OPPORTUNITY_KEYWORDS = [
37
+ "internship", "intern", "hiring", "job",
38
+ "hackathon", "competition", "challenge",
39
+ "scholarship", "fellowship", "grant", "funding",
40
+ "research assistant", "ra position", "phd",
41
+ "call for papers", "cfp", "workshop",
42
+ "applications open", "apply now", "deadline"
43
+ ]
44
+
45
+ # Keywords to filter out (noise)
46
+ NOISE_KEYWORDS = [
47
+ "meme", "funny", "eli5", "rant",
48
+ "top 10", "best tools", "what are",
49
+ "vs", "versus", "comparison"
50
+ ]
51
+
52
+ def __init__(self, user_agent: str = "PIOE/1.0"):
53
+ self.user_agent = user_agent
54
+ self._headers = {"User-Agent": user_agent}
55
+
56
+ async def fetch_all(self, subreddits: Optional[list[str]] = None) -> list[dict]:
57
+ """Fetch from all target subreddits with filtering."""
58
+ subreddits = subreddits or self.TARGET_SUBREDDITS
59
+ all_opportunities = []
60
+
61
+ for subreddit in subreddits:
62
+ try:
63
+ posts = await self.fetch_subreddit(subreddit)
64
+ all_opportunities.extend(posts)
65
+ except Exception as e:
66
+ print(f"Error fetching r/{subreddit}: {e}")
67
+
68
+ return all_opportunities
69
+
70
+ async def fetch_subreddit(
71
+ self,
72
+ subreddit: str,
73
+ sort: str = "new",
74
+ limit: int = 25
75
+ ) -> list[dict]:
76
+ """
77
+ Fetch posts from a subreddit with opportunity filtering.
78
+
79
+ Only returns posts that match opportunity keywords
80
+ and don't match noise keywords.
81
+ """
82
+ url = f"{self.BASE_URL}/r/{subreddit}/{sort}.json"
83
+
84
+ async with httpx.AsyncClient() as client:
85
+ response = await client.get(
86
+ url,
87
+ params={"limit": limit},
88
+ headers=self._headers,
89
+ timeout=30
90
+ )
91
+ response.raise_for_status()
92
+
93
+ data = response.json()
94
+ posts = data.get("data", {}).get("children", [])
95
+
96
+ return self._filter_and_parse(posts, subreddit)
97
+
98
+ def _filter_and_parse(self, posts: list, subreddit: str) -> list[dict]:
99
+ """Filter posts for opportunities and parse to normalized format."""
100
+ opportunities = []
101
+
102
+ for post_wrapper in posts:
103
+ post = post_wrapper.get("data", {})
104
+
105
+ # Skip removed/deleted posts
106
+ if post.get("removed_by_category") or post.get("selftext") == "[removed]":
107
+ continue
108
+
109
+ title = post.get("title", "").lower()
110
+ text = post.get("selftext", "").lower()
111
+ combined = f"{title} {text}"
112
+
113
+ # Filter out noise
114
+ if any(noise in combined for noise in self.NOISE_KEYWORDS):
115
+ continue
116
+
117
+ # Check for opportunity keywords
118
+ has_opportunity = any(kw in combined for kw in self.OPPORTUNITY_KEYWORDS)
119
+
120
+ # Also include posts with high scores (community validated)
121
+ high_score = post.get("score", 0) > 50
122
+
123
+ if not has_opportunity and not high_score:
124
+ continue
125
+
126
+ # Calculate engagement
127
+ engagement = post.get("score", 0) + post.get("num_comments", 0)
128
+
129
+ opportunity = {
130
+ "title": f"[Reddit] {post.get('title', '')}",
131
+ "raw_text": post.get("selftext", "")[:2000] or post.get("title", ""),
132
+ "url": f"https://reddit.com{post.get('permalink', '')}",
133
+ "source_type": "reddit",
134
+ "source_name": f"r/{subreddit}",
135
+ "published_at": self._parse_timestamp(post.get("created_utc")),
136
+ "social_engagement": engagement,
137
+ "metadata": {
138
+ "subreddit": subreddit,
139
+ "author": post.get("author"),
140
+ "score": post.get("score", 0),
141
+ "num_comments": post.get("num_comments", 0),
142
+ "flair": post.get("link_flair_text"),
143
+ "is_self": post.get("is_self", True),
144
+ "external_url": post.get("url") if not post.get("is_self") else None
145
+ }
146
+ }
147
+
148
+ opportunities.append(opportunity)
149
+
150
+ return opportunities
151
+
152
+ def _parse_timestamp(self, timestamp: Optional[float]) -> Optional[datetime]:
153
+ """Convert Unix timestamp to datetime."""
154
+ if not timestamp:
155
+ return None
156
+ try:
157
+ return datetime.utcfromtimestamp(timestamp)
158
+ except Exception:
159
+ return None
160
+
161
+ async def search(self, query: str, subreddit: Optional[str] = None) -> list[dict]:
162
+ """Search Reddit for specific opportunities."""
163
+ if subreddit:
164
+ url = f"{self.BASE_URL}/r/{subreddit}/search.json"
165
+ else:
166
+ url = f"{self.BASE_URL}/search.json"
167
+
168
+ async with httpx.AsyncClient() as client:
169
+ response = await client.get(
170
+ url,
171
+ params={
172
+ "q": query,
173
+ "sort": "new",
174
+ "limit": 25,
175
+ "restrict_sr": "on" if subreddit else "off"
176
+ },
177
+ headers=self._headers,
178
+ timeout=30
179
+ )
180
+ response.raise_for_status()
181
+
182
+ data = response.json()
183
+ posts = data.get("data", {}).get("children", [])
184
+
185
+ return self._filter_and_parse(posts, subreddit or "search")
backend/ingestion/rss_client.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE RSS Client
3
+
4
+ Parses RSS/Atom feeds from blogs, news sites, and announcement pages.
5
+ """
6
+ import feedparser
7
+ from datetime import datetime
8
+ from typing import Optional
9
+ import httpx
10
+ import re
11
+
12
+
13
+ class RSSClient:
14
+ """
15
+ Client for RSS/Atom feeds.
16
+ Supports multiple feeds with configurable filtering.
17
+ """
18
+
19
+ # Patterns that indicate non-actionable content (discussions, not opportunities)
20
+ FILTER_OUT_PATTERNS = [
21
+ r'^Ask HN:', # Hacker News discussions
22
+ r'^Show HN:', # Show HN posts (usually not opportunities)
23
+ r'^Tell HN:', # Tell HN posts
24
+ r'my internship', # Personal stories about internships
25
+ r'my experience', # Personal experiences
26
+ r'I (got|landed|received|missed)', # Personal stories
27
+ r'How (do|did|can|should) I', # Questions, not opportunities
28
+ r'\?$', # Questions
29
+ r'AMA$', # AMAs
30
+ r'white british', # News articles, not opportunities
31
+ r'is (this|it) (real|fake|legit)', # Verification questions
32
+ ]
33
+
34
+ # Patterns that indicate REAL opportunities
35
+ OPPORTUNITY_PATTERNS = [
36
+ r'hiring',
37
+ r'apply now',
38
+ r'deadline',
39
+ r'applications? open',
40
+ r'we are looking',
41
+ r'join (our|the) team',
42
+ r'open position',
43
+ r'fellowship program',
44
+ r'grant program',
45
+ r'scholarship',
46
+ r'bounty',
47
+ r'\$\d+', # Money amounts
48
+ r'remote (ok|friendly|position)',
49
+ ]
50
+
51
+ # Default feeds - ONLY actionable opportunity sources
52
+ DEFAULT_FEEDS = [
53
+ # HN Jobs - ACTUAL job postings, not discussions
54
+ {"name": "Hacker News Jobs", "url": "https://hnrss.org/jobs", "type": "job"},
55
+
56
+ # ArXiv RSS (research papers - always relevant)
57
+ {"name": "ArXiv CS.CV", "url": "https://rss.arxiv.org/rss/cs.CV", "type": "research"},
58
+ {"name": "ArXiv CS.RO", "url": "https://rss.arxiv.org/rss/cs.RO", "type": "research"},
59
+ {"name": "ArXiv CS.AI", "url": "https://rss.arxiv.org/rss/cs.AI", "type": "research"},
60
+
61
+ # Fellowships & Scholarships (working feeds only)
62
+ {"name": "ProFellow", "url": "https://www.profellow.com/feed/", "type": "fellowship"},
63
+ {"name": "Scholars4Dev", "url": "https://www.scholars4dev.com/feed/", "type": "scholarship"},
64
+ # NOTE: OpportunityDesk, AfterSchoolAfrica, WayUp removed - broken/invalid XML
65
+
66
+ # Remote Jobs
67
+ {"name": "RemoteOK AI", "url": "https://remoteok.com/remote-ai-jobs.rss", "type": "job"},
68
+ {"name": "RemoteOK Intern", "url": "https://remoteok.com/remote-intern-jobs.rss", "type": "internship"},
69
+ {"name": "RemoteOK ML", "url": "https://remoteok.com/remote-machine-learning-jobs.rss", "type": "job"},
70
+ ]
71
+
72
+ def __init__(self, custom_feeds: Optional[list[dict]] = None):
73
+ self.feeds = custom_feeds or self.DEFAULT_FEEDS
74
+
75
+ async def fetch_all(self) -> list[dict]:
76
+ """Fetch from all configured feeds."""
77
+ all_opportunities = []
78
+
79
+ for feed_config in self.feeds:
80
+ try:
81
+ opportunities = await self.fetch_feed(
82
+ feed_config["url"],
83
+ feed_config["name"],
84
+ feed_config.get("type", "rss")
85
+ )
86
+ all_opportunities.extend(opportunities)
87
+ except Exception as e:
88
+ print(f"Error fetching {feed_config['name']}: {e}")
89
+
90
+ return all_opportunities
91
+
92
+ async def fetch_feed(self, url: str, source_name: str, feed_type: str = "rss") -> list[dict]:
93
+ """
94
+ Fetch and parse a single RSS feed.
95
+
96
+ Returns list of normalized opportunity dicts.
97
+ """
98
+ try:
99
+ async with httpx.AsyncClient() as client:
100
+ response = await client.get(url, timeout=30, follow_redirects=True)
101
+ content = response.text
102
+ except Exception as e:
103
+ print(f"HTTP error for {url}: {e}")
104
+ return []
105
+
106
+ # Parse feed
107
+ feed = feedparser.parse(content)
108
+
109
+ if feed.bozo and not feed.entries:
110
+ print(f"Feed parse error for {url}: {feed.bozo_exception}")
111
+ return []
112
+
113
+ return self._parse_entries(feed.entries, source_name, feed_type)
114
+
115
+ def _is_discussion_not_opportunity(self, title: str, description: str) -> bool:
116
+ """Check if content is a discussion post rather than an actionable opportunity."""
117
+ text = f"{title} {description}".lower()
118
+
119
+ # Check for filter-out patterns (discussions, personal stories)
120
+ for pattern in self.FILTER_OUT_PATTERNS:
121
+ if re.search(pattern, title, re.IGNORECASE):
122
+ return True
123
+
124
+ return False
125
+
126
+ def _is_likely_opportunity(self, title: str, description: str, feed_type: str) -> bool:
127
+ """Check if content is likely a real opportunity."""
128
+ # Research papers are always opportunities
129
+ if feed_type == "research":
130
+ return True
131
+
132
+ # Fellowships/scholarships from ProFellow are always good
133
+ if feed_type in ["fellowship", "scholarship"]:
134
+ return True
135
+
136
+ # Jobs from HN Jobs feed are always real
137
+ if feed_type == "job":
138
+ return True
139
+
140
+ text = f"{title} {description}".lower()
141
+
142
+ # Check for opportunity patterns
143
+ for pattern in self.OPPORTUNITY_PATTERNS:
144
+ if re.search(pattern, text, re.IGNORECASE):
145
+ return True
146
+
147
+ return False
148
+
149
+ def _parse_entries(self, entries: list, source_name: str, feed_type: str) -> list[dict]:
150
+ """Parse feed entries into normalized opportunities."""
151
+ opportunities = []
152
+
153
+ for entry in entries[:20]: # Limit per feed
154
+ try:
155
+ # Extract content
156
+ title = entry.get("title", "").strip()
157
+
158
+ # Get description/summary
159
+ description = ""
160
+ if "summary" in entry:
161
+ description = entry.summary
162
+ elif "description" in entry:
163
+ description = entry.description
164
+ elif "content" in entry and entry.content:
165
+ description = entry.content[0].get("value", "")
166
+
167
+ # Clean HTML tags (basic)
168
+ description = self._strip_html(description)
169
+
170
+ # QUALITY FILTER: Skip discussions and non-opportunities
171
+ if self._is_discussion_not_opportunity(title, description):
172
+ continue
173
+
174
+ # QUALITY FILTER: Only keep likely opportunities
175
+ if not self._is_likely_opportunity(title, description, feed_type):
176
+ # For unknown types, be more lenient
177
+ if feed_type not in ["news", "blog"]:
178
+ continue
179
+
180
+ # Get published date
181
+ published = None
182
+ if "published_parsed" in entry and entry.published_parsed:
183
+ published = datetime(*entry.published_parsed[:6])
184
+ elif "updated_parsed" in entry and entry.updated_parsed:
185
+ published = datetime(*entry.updated_parsed[:6])
186
+
187
+ opportunity = {
188
+ "title": title,
189
+ "raw_text": description[:2000],
190
+ "url": entry.get("link", ""),
191
+ "source_type": "rss",
192
+ "source_name": source_name,
193
+ "published_at": published,
194
+ "metadata": {
195
+ "feed_type": feed_type,
196
+ "author": entry.get("author"),
197
+ "tags": [tag.term for tag in entry.get("tags", [])]
198
+ }
199
+ }
200
+
201
+ opportunities.append(opportunity)
202
+
203
+ except Exception as e:
204
+ print(f"Error parsing entry: {e}")
205
+
206
+ return opportunities
207
+
208
+ def _strip_html(self, text: str) -> str:
209
+ """Remove HTML tags from text."""
210
+ clean = re.sub(r'<[^>]+>', '', text)
211
+ return " ".join(clean.split())
212
+
213
+ def add_feed(self, name: str, url: str, feed_type: str = "rss"):
214
+ """Add a new feed to monitor."""
215
+ self.feeds.append({
216
+ "name": name,
217
+ "url": url,
218
+ "type": feed_type
219
+ })
220
+
backend/ingestion/scheduler.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Ingestion Scheduler - Version 2.0
3
+
4
+ Orchestrates periodic data collection from all sources.
5
+ Now includes Grant Intelligence and ROI scoring.
6
+ """
7
+ from apscheduler.schedulers.asyncio import AsyncIOScheduler
8
+ from datetime import datetime
9
+ from sqlalchemy.orm import Session
10
+
11
+ from ..config import get_settings
12
+ from ..database import SessionLocal
13
+ from ..models import Opportunity, Source, SourceType, OpportunityCategory, Domain, Region, RiskLevel
14
+ from ..intelligence import RelevanceScorer, NoveltyDetector, CredibilityScorer, OpportunityClassifier
15
+ from ..intelligence import ROIScorer, SilentOpportunityDetector
16
+
17
+ from .arxiv_client import ArxivClient
18
+ from .github_client import GitHubClient
19
+ from .rss_client import RSSClient
20
+ from .reddit_client import RedditClient
21
+ from .superteam_client import SuperteamClient
22
+ from .web_scraper import WebScraper
23
+ from .careers_client import CareersClient, InternshipClient
24
+ from .grants_client import GrantsClient, NigeriaGrantsClient
25
+ from .jobboard_client import JobBoardClient
26
+
27
+
28
+ class IngestionScheduler:
29
+ """
30
+ Coordinates all data ingestion and processing.
31
+ PIOE 2.0: Now includes grant intelligence and ROI scoring.
32
+ """
33
+
34
+ def __init__(self, user_region: str = "nigeria"):
35
+ self.settings = get_settings()
36
+ self.scheduler = AsyncIOScheduler()
37
+ self.user_region = user_region
38
+
39
+ # Initialize clients
40
+ self.arxiv = ArxivClient(max_results=30)
41
+ self.github = GitHubClient(token=self.settings.github_token)
42
+ self.rss = RSSClient()
43
+ self.reddit = RedditClient()
44
+ self.superteam = SuperteamClient()
45
+ self.scraper = WebScraper()
46
+ self.careers = CareersClient()
47
+ self.internships = InternshipClient()
48
+
49
+ # PIOE 2.0: Job boards (REAL opportunities, not discussions)
50
+ self.jobboards = JobBoardClient(
51
+ adzuna_app_id=self.settings.adzuna_app_id,
52
+ adzuna_api_key=self.settings.adzuna_api_key,
53
+ jooble_api_key=self.settings.jooble_api_key,
54
+ rapidapi_key=self.settings.rapidapi_key
55
+ )
56
+
57
+ # PIOE 2.0: Grant clients
58
+ self.grants = GrantsClient()
59
+ self.nigeria_grants = NigeriaGrantsClient()
60
+
61
+ # Initialize intelligence
62
+ self.scorer = RelevanceScorer()
63
+ self.novelty = NoveltyDetector()
64
+ self.credibility = CredibilityScorer()
65
+ self.classifier = OpportunityClassifier()
66
+
67
+ # PIOE 2.0: Decision intelligence
68
+ self.roi_scorer = ROIScorer(user_region=user_region)
69
+ self.silent_detector = SilentOpportunityDetector()
70
+
71
+ def start(self):
72
+ """Start the scheduler."""
73
+ # Run ingestion every N hours
74
+ self.scheduler.add_job(
75
+ self.run_full_ingestion,
76
+ 'interval',
77
+ hours=self.settings.ingestion_interval_hours,
78
+ id='full_ingestion'
79
+ )
80
+
81
+ # Run high-priority sources more frequently (every 2 hours)
82
+ self.scheduler.add_job(
83
+ self.run_priority_ingestion,
84
+ 'interval',
85
+ hours=2,
86
+ id='priority_ingestion'
87
+ )
88
+
89
+ self.scheduler.start()
90
+ print(f"Scheduler started - full ingestion every {self.settings.ingestion_interval_hours}h")
91
+
92
+ def stop(self):
93
+ """Stop the scheduler."""
94
+ try:
95
+ if self.scheduler.running:
96
+ self.scheduler.shutdown()
97
+ except Exception:
98
+ pass # Ignore if scheduler not running
99
+
100
+ async def run_full_ingestion(self):
101
+ """Run ingestion from all sources."""
102
+ print(f"[{datetime.utcnow()}] Starting full ingestion...")
103
+
104
+ results = {
105
+ "total_fetched": 0,
106
+ "total_saved": 0,
107
+ "sources": {}
108
+ }
109
+
110
+ db = SessionLocal()
111
+
112
+ try:
113
+ # Fetch from all sources (PIOE 2.0 includes grant + job board sources)
114
+ sources = [
115
+ ("arXiv", self.arxiv.fetch(), SourceType.ARXIV),
116
+ ("GitHub", self.github.fetch_trending(), SourceType.GITHUB),
117
+ ("RSS", self.rss.fetch_all(), SourceType.RSS),
118
+ # DISABLED: Reddit returns too many discussions, not opportunities
119
+ # ("Reddit", self.reddit.fetch_all(), SourceType.REDDIT),
120
+ ("Superteam", self.superteam.fetch_all(), SourceType.SUPERTEAM),
121
+ # ("Web Scraper", self.scraper.fetch_all(), SourceType.WEB_SCRAPE), # Often blocked
122
+ # ("Careers", self.careers.fetch_all(), SourceType.WEB_SCRAPE), # Often blocked
123
+ # ("Internships", self.internships.fetch_all(), SourceType.WEB_SCRAPE), # Often blocked
124
+ # PIOE 2.0: Job boards (REAL opportunities)
125
+ ("Job Boards", self.jobboards.fetch_all(), SourceType.WEB_SCRAPE),
126
+ # PIOE 2.0: Grant sources
127
+ ("Ecosystem Grants", self.grants.fetch_all(), SourceType.GRANT_PLATFORM),
128
+ ("Nigeria Grants", self.nigeria_grants.fetch_all(), SourceType.GOV_PORTAL),
129
+ ]
130
+
131
+ for source_name, fetch_coro, source_type in sources:
132
+ try:
133
+ opportunities = await fetch_coro
134
+ saved = self._process_and_save(db, opportunities, source_type)
135
+
136
+ results["sources"][source_name] = {
137
+ "fetched": len(opportunities),
138
+ "saved": saved
139
+ }
140
+ results["total_fetched"] += len(opportunities)
141
+ results["total_saved"] += saved
142
+
143
+ print(f" {source_name}: {len(opportunities)} fetched, {saved} saved")
144
+
145
+ except Exception as e:
146
+ print(f" {source_name}: ERROR - {e}")
147
+ results["sources"][source_name] = {"error": str(e)}
148
+
149
+ finally:
150
+ db.close()
151
+
152
+ print(f"[{datetime.utcnow()}] Ingestion complete: {results['total_saved']}/{results['total_fetched']} saved")
153
+ return results
154
+
155
+ async def run_priority_ingestion(self):
156
+ """Run ingestion for high-priority sources only."""
157
+ print(f"[{datetime.utcnow()}] Starting priority ingestion...")
158
+
159
+ db = SessionLocal()
160
+
161
+ try:
162
+ # Only run arXiv, GitHub, and Superteam (highest signal sources)
163
+ sources = [
164
+ ("arXiv", self.arxiv.fetch(), SourceType.ARXIV),
165
+ ("GitHub", self.github.fetch_trending(), SourceType.GITHUB),
166
+ ("Superteam", self.superteam.fetch_all(), SourceType.SUPERTEAM),
167
+ ]
168
+
169
+ for source_name, fetch_coro, source_type in sources:
170
+ try:
171
+ opportunities = await fetch_coro
172
+ saved = self._process_and_save(db, opportunities, source_type)
173
+ print(f" {source_name}: {saved} new")
174
+ except Exception as e:
175
+ print(f" {source_name}: ERROR - {e}")
176
+
177
+ finally:
178
+ db.close()
179
+
180
+ def _process_and_save(
181
+ self,
182
+ db: Session,
183
+ raw_opportunities: list[dict],
184
+ source_type: SourceType
185
+ ) -> int:
186
+ """
187
+ Process raw opportunities through intelligence layer and save.
188
+ Returns count of saved opportunities.
189
+ """
190
+ saved_count = 0
191
+
192
+ for raw in raw_opportunities:
193
+ try:
194
+ # Skip if already exists (by URL)
195
+ existing = db.query(Opportunity).filter(
196
+ Opportunity.url == raw.get("url")
197
+ ).first()
198
+
199
+ if existing:
200
+ continue
201
+
202
+ # Combine title and text for analysis
203
+ full_text = f"{raw.get('title', '')} {raw.get('raw_text', '')}"
204
+
205
+ # Score relevance
206
+ scores = self.scorer.score(raw.get("raw_text", ""), raw.get("title", ""))
207
+
208
+ # Skip low relevance
209
+ if scores["relevance_score"] < self.settings.min_relevance_score:
210
+ continue
211
+
212
+ # Get embedding for novelty detection
213
+ embedding = self.scorer.get_embedding(full_text[:1000])
214
+
215
+ # Check novelty
216
+ novelty_result = self.novelty.calculate_novelty(embedding, db)
217
+
218
+ # Skip duplicates
219
+ if novelty_result["is_duplicate"]:
220
+ continue
221
+
222
+ # Skip recycled content
223
+ if self.novelty.is_recycled_content(full_text):
224
+ continue
225
+
226
+ # Calculate credibility
227
+ cred_result = self.credibility.score(
228
+ source_type,
229
+ raw.get("raw_text", ""),
230
+ raw.get("metadata", {}),
231
+ social_engagement=raw.get("social_engagement", 0)
232
+ )
233
+
234
+ # Skip low credibility
235
+ if cred_result["credibility_score"] < self.settings.min_credibility_score:
236
+ continue
237
+
238
+ # Classify
239
+ classification = self.classifier.classify(
240
+ raw.get("raw_text", ""),
241
+ raw.get("title", ""),
242
+ source_type=raw.get("source_type", ""),
243
+ source_name=raw.get("source_name", "")
244
+ )
245
+
246
+ # PIOE 2.0: Check for silent opportunities
247
+ silent_result = self.silent_detector.detect(
248
+ raw.get("raw_text", ""),
249
+ raw.get("title", "")
250
+ )
251
+
252
+ # Override category if silent opportunity detected
253
+ final_category = classification["category"]
254
+ if silent_result["is_silent_opportunity"]:
255
+ final_category = silent_result["recommended_category"]
256
+
257
+ # PIOE 2.0: Calculate ROI score
258
+ metadata = raw.get("metadata", {})
259
+ roi_result = self.roi_scorer.calculate_roi(
260
+ category=final_category,
261
+ deadline=raw.get("deadline"),
262
+ grant_size=metadata.get("grant_size_max"),
263
+ region=metadata.get("region", "global"),
264
+ extra_data=metadata
265
+ )
266
+
267
+ # Calculate combined score (now includes ROI)
268
+ combined_score = (
269
+ 0.3 * scores["relevance_score"] +
270
+ 0.2 * novelty_result["novelty_score"] +
271
+ 0.2 * cred_result["credibility_score"] +
272
+ 0.3 * roi_result["roi_score"] # PIOE 2.0: Weight ROI heavily
273
+ )
274
+
275
+ # Prepare enhanced metadata
276
+ enhanced_metadata = {
277
+ **metadata,
278
+ "silent_opportunity": silent_result["is_silent_opportunity"],
279
+ "silent_type": silent_result.get("opportunity_type"),
280
+ "roi_reasoning": roi_result["reasoning"],
281
+ }
282
+
283
+ # Determine region
284
+ region_str = (metadata.get("region") or "global").lower()
285
+ region_map = {
286
+ "nigeria": Region.NIGERIA,
287
+ "africa": Region.AFRICA,
288
+ "global": Region.GLOBAL,
289
+ "remote_africa": Region.REMOTE_AFRICA,
290
+ "remote_global": Region.REMOTE_GLOBAL,
291
+ }
292
+ region = region_map.get(region_str, Region.GLOBAL)
293
+
294
+ # Map risk level
295
+ risk_map = {"low": RiskLevel.LOW, "medium": RiskLevel.MEDIUM, "high": RiskLevel.HIGH}
296
+ risk_level = risk_map.get(roi_result["risk_level"], RiskLevel.MEDIUM)
297
+
298
+ # Create opportunity record
299
+ opportunity = Opportunity(
300
+ title=raw.get("title", "")[:500],
301
+ source_type=source_type,
302
+ source_name=raw.get("source_name", ""),
303
+ domain=Domain(classification["domain"]) if classification["domain"] in [d.value for d in Domain] else Domain.MIXED,
304
+ category=OpportunityCategory(final_category) if final_category in [c.value for c in OpportunityCategory] else OpportunityCategory.OTHER,
305
+ region=region,
306
+ region_weight=1.0 if region_str == self.user_region else 0.7,
307
+ published_at=raw.get("published_at"),
308
+ deadline=raw.get("deadline"),
309
+ raw_text=raw.get("raw_text", "")[:5000],
310
+ url=raw.get("url", ""),
311
+ relevance_score=scores["relevance_score"],
312
+ novelty_score=novelty_result["novelty_score"],
313
+ credibility_score=cred_result["credibility_score"],
314
+ signal_strength=cred_result["signal_strength"],
315
+ combined_score=combined_score,
316
+ roi_score=roi_result["roi_score"],
317
+ unlock_potential=roi_result["unlock_potential"],
318
+ risk_level=risk_level,
319
+ competition_level=roi_result["competition_level"],
320
+ social_engagement=raw.get("social_engagement", 0),
321
+ extra_data=enhanced_metadata,
322
+ embedding=embedding
323
+ )
324
+
325
+ db.add(opportunity)
326
+ saved_count += 1
327
+
328
+ except Exception as e:
329
+ print(f"Error processing opportunity: {e}")
330
+ continue
331
+
332
+ # Commit batch
333
+ try:
334
+ db.commit()
335
+ except Exception as e:
336
+ print(f"Database commit error: {e}")
337
+ db.rollback()
338
+ saved_count = 0
339
+
340
+ return saved_count
341
+
342
+ async def ingest_single_source(self, source_name: str) -> dict:
343
+ """Manually trigger ingestion for a single source."""
344
+ db = SessionLocal()
345
+
346
+ source_map = {
347
+ "arxiv": (self.arxiv.fetch(), SourceType.ARXIV),
348
+ "github": (self.github.fetch_trending(), SourceType.GITHUB),
349
+ "rss": (self.rss.fetch_all(), SourceType.RSS),
350
+ "reddit": (self.reddit.fetch_all(), SourceType.REDDIT),
351
+ "superteam": (self.superteam.fetch_all(), SourceType.SUPERTEAM),
352
+ "scraper": (self.scraper.fetch_all(), SourceType.WEB_SCRAPE),
353
+ "careers": (self.careers.fetch_all(), SourceType.WEB_SCRAPE),
354
+ "internships": (self.internships.fetch_all(), SourceType.WEB_SCRAPE),
355
+ }
356
+
357
+ if source_name.lower() not in source_map:
358
+ return {"error": f"Unknown source: {source_name}"}
359
+
360
+ try:
361
+ fetch_coro, source_type = source_map[source_name.lower()]
362
+ opportunities = await fetch_coro
363
+ saved = self._process_and_save(db, opportunities, source_type)
364
+
365
+ return {
366
+ "source": source_name,
367
+ "fetched": len(opportunities),
368
+ "saved": saved
369
+ }
370
+ finally:
371
+ db.close()
backend/ingestion/superteam_client.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Superteam Client
3
+
4
+ Fetches bounties, grants, and hackathons from Superteam ecosystem.
5
+ High-value source for crypto/web3 opportunities.
6
+ """
7
+ import httpx
8
+ from datetime import datetime
9
+ from typing import Optional
10
+ from bs4 import BeautifulSoup
11
+
12
+
13
+ class SuperteamClient:
14
+ """
15
+ Client for Superteam ecosystem opportunities.
16
+
17
+ Superteam aggregates bounties, grants, hackathons, and jobs
18
+ across the Solana ecosystem and beyond.
19
+ """
20
+
21
+ # Known Superteam endpoints
22
+ EARN_URL = "https://earn.superteam.fun"
23
+ BOUNTIES_API = "https://earn.superteam.fun/api/listings"
24
+
25
+ def __init__(self):
26
+ self._headers = {
27
+ "User-Agent": "PIOE/1.0",
28
+ "Accept": "application/json"
29
+ }
30
+
31
+ async def fetch_all(self) -> list[dict]:
32
+ """Fetch all opportunity types from Superteam."""
33
+ opportunities = []
34
+
35
+ # Try API first
36
+ try:
37
+ api_opps = await self.fetch_from_api()
38
+ opportunities.extend(api_opps)
39
+ except Exception as e:
40
+ print(f"Superteam API error: {e}")
41
+ # Fall back to scraping
42
+ try:
43
+ scraped = await self.fetch_by_scraping()
44
+ opportunities.extend(scraped)
45
+ except Exception as e2:
46
+ print(f"Superteam scrape error: {e2}")
47
+
48
+ return opportunities
49
+
50
+ async def fetch_from_api(self) -> list[dict]:
51
+ """Fetch listings from Superteam API."""
52
+ async with httpx.AsyncClient() as client:
53
+ response = await client.get(
54
+ self.BOUNTIES_API,
55
+ params={"type": "all"},
56
+ headers=self._headers,
57
+ timeout=30
58
+ )
59
+ response.raise_for_status()
60
+
61
+ data = response.json()
62
+ listings = data if isinstance(data, list) else data.get("listings", [])
63
+
64
+ return self._parse_listings(listings)
65
+
66
+ async def fetch_by_scraping(self) -> list[dict]:
67
+ """Fallback: scrape Superteam Earn page."""
68
+ async with httpx.AsyncClient() as client:
69
+ response = await client.get(
70
+ self.EARN_URL,
71
+ headers={"User-Agent": "PIOE/1.0"},
72
+ timeout=30,
73
+ follow_redirects=True
74
+ )
75
+ response.raise_for_status()
76
+
77
+ soup = BeautifulSoup(response.text, "html.parser")
78
+ opportunities = []
79
+
80
+ # Look for listing cards (structure may vary)
81
+ for card in soup.select("[data-testid='listing-card'], .listing-card, article"):
82
+ try:
83
+ title_el = card.select_one("h3, h2, .title")
84
+ link_el = card.select_one("a[href]")
85
+ reward_el = card.select_one(".reward, .prize, [data-testid='reward']")
86
+ deadline_el = card.select_one(".deadline, .due-date")
87
+
88
+ if not title_el:
89
+ continue
90
+
91
+ opportunity = {
92
+ "title": f"[Superteam] {title_el.get_text(strip=True)}",
93
+ "raw_text": card.get_text(strip=True)[:500],
94
+ "url": f"{self.EARN_URL}{link_el.get('href')}" if link_el else self.EARN_URL,
95
+ "source_type": "superteam",
96
+ "source_name": "Superteam Earn",
97
+ "published_at": datetime.utcnow(),
98
+ "metadata": {
99
+ "reward": reward_el.get_text(strip=True) if reward_el else None,
100
+ "deadline": deadline_el.get_text(strip=True) if deadline_el else None,
101
+ }
102
+ }
103
+ opportunities.append(opportunity)
104
+
105
+ except Exception as e:
106
+ print(f"Error parsing Superteam card: {e}")
107
+
108
+ return opportunities
109
+
110
+ def _parse_listings(self, listings: list) -> list[dict]:
111
+ """Parse API listings to normalized format."""
112
+ opportunities = []
113
+
114
+ for listing in listings:
115
+ try:
116
+ # Determine opportunity type
117
+ listing_type = listing.get("type", "bounty").lower()
118
+ type_prefix = {
119
+ "bounty": "Bounty",
120
+ "grant": "Grant",
121
+ "hackathon": "Hackathon",
122
+ "job": "Job"
123
+ }.get(listing_type, "Opportunity")
124
+
125
+ # Parse reward
126
+ reward = None
127
+ if listing.get("rewardAmount"):
128
+ token = listing.get("token", "USDC")
129
+ reward = f"{listing['rewardAmount']} {token}"
130
+
131
+ # Parse deadline
132
+ deadline = None
133
+ if listing.get("deadline"):
134
+ try:
135
+ deadline = datetime.fromisoformat(
136
+ listing["deadline"].replace("Z", "+00:00")
137
+ )
138
+ except Exception:
139
+ pass
140
+
141
+ # Extract skills/requirements
142
+ skills = listing.get("skills", [])
143
+ if isinstance(skills, str):
144
+ skills = [s.strip() for s in skills.split(",")]
145
+
146
+ opportunity = {
147
+ "title": f"[Superteam {type_prefix}] {listing.get('title', '')}",
148
+ "raw_text": listing.get("description", "")[:2000],
149
+ "url": listing.get("link") or f"{self.EARN_URL}/listing/{listing.get('slug', '')}",
150
+ "source_type": "superteam",
151
+ "source_name": "Superteam Earn",
152
+ "published_at": self._parse_date(listing.get("publishedAt")),
153
+ "deadline": deadline,
154
+ "metadata": {
155
+ "listing_type": listing_type,
156
+ "reward": reward,
157
+ "skills": skills,
158
+ "sponsor": listing.get("sponsor", {}).get("name"),
159
+ "region": listing.get("region"),
160
+ "is_active": listing.get("isPublished", True)
161
+ }
162
+ }
163
+
164
+ opportunities.append(opportunity)
165
+
166
+ except Exception as e:
167
+ print(f"Error parsing Superteam listing: {e}")
168
+
169
+ return opportunities
170
+
171
+ def _parse_date(self, date_str: Optional[str]) -> Optional[datetime]:
172
+ """Parse date string to datetime."""
173
+ if not date_str:
174
+ return None
175
+ try:
176
+ return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
177
+ except Exception:
178
+ return None
backend/ingestion/web_scraper.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Web Scraper
3
+
4
+ Generic web scraper for scholarship sites, hackathon platforms, and university pages.
5
+ Uses BeautifulSoup for static pages, Playwright for dynamic content.
6
+ """
7
+ import httpx
8
+ from datetime import datetime
9
+ from typing import Optional
10
+ from bs4 import BeautifulSoup
11
+
12
+
13
+ class WebScraper:
14
+ """
15
+ Generic web scraper for pages without APIs.
16
+ Supports static and dynamic (JavaScript) pages.
17
+ """
18
+
19
+ # Preconfigured scrape targets
20
+ TARGETS = [
21
+ # Hackathon Platforms
22
+ {
23
+ "name": "Devpost Hackathons",
24
+ "url": "https://devpost.com/hackathons",
25
+ "type": "hackathon",
26
+ "selectors": {
27
+ "items": ".hackathon-tile, .challenge-listing",
28
+ "title": "h2, h3, .title",
29
+ "link": "a",
30
+ "deadline": ".submission-period, .dates"
31
+ }
32
+ },
33
+ {
34
+ "name": "Devfolio Hackathons",
35
+ "url": "https://devfolio.co/hackathons",
36
+ "type": "hackathon",
37
+ "selectors": {
38
+ "items": "[class*='HackathonCard'], article",
39
+ "title": "h3, h2, [class*='Name']",
40
+ "link": "a",
41
+ "deadline": "[class*='Date']"
42
+ }
43
+ },
44
+ {
45
+ "name": "HackerEarth Challenges",
46
+ "url": "https://www.hackerearth.com/challenges/",
47
+ "type": "hackathon",
48
+ "selectors": {
49
+ "items": ".challenge-card, .event-card",
50
+ "title": ".challenge-name, h3",
51
+ "link": "a",
52
+ "deadline": ".date, .timing"
53
+ }
54
+ },
55
+ # Scholarship/Fellowship Sites
56
+ {
57
+ "name": "FindAPhD AI",
58
+ "url": "https://www.findaphd.com/phds/?Keywords=artificial+intelligence+machine+learning",
59
+ "type": "scholarship",
60
+ "selectors": {
61
+ "items": ".phd-result",
62
+ "title": "h4 a, .title a",
63
+ "link": "a",
64
+ "deadline": ".close-date"
65
+ }
66
+ },
67
+ {
68
+ "name": "FindAPhD Robotics",
69
+ "url": "https://www.findaphd.com/phds/?Keywords=robotics+computer+vision",
70
+ "type": "scholarship",
71
+ "selectors": {
72
+ "items": ".phd-result",
73
+ "title": "h4 a, .title a",
74
+ "link": "a",
75
+ "deadline": ".close-date"
76
+ }
77
+ },
78
+ # Grant/Fellowship
79
+ {
80
+ "name": "Opportunities.com",
81
+ "url": "https://www.opportunitiescircle.com/category/fellowships/",
82
+ "type": "fellowship",
83
+ "selectors": {
84
+ "items": "article, .post",
85
+ "title": "h2, h3, .entry-title",
86
+ "link": "a",
87
+ "deadline": ".deadline"
88
+ }
89
+ },
90
+ ]
91
+
92
+ def __init__(self, use_playwright: bool = False):
93
+ self.use_playwright = use_playwright
94
+ self._headers = {
95
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
96
+ }
97
+
98
+ async def fetch_all(self, targets: Optional[list[dict]] = None) -> list[dict]:
99
+ """Fetch from all configured targets."""
100
+ targets = targets or self.TARGETS
101
+ all_opportunities = []
102
+
103
+ for target in targets:
104
+ try:
105
+ opps = await self.scrape_target(target)
106
+ all_opportunities.extend(opps)
107
+ except Exception as e:
108
+ print(f"Scrape error for {target['name']}: {e}")
109
+
110
+ return all_opportunities
111
+
112
+ async def scrape_target(self, target: dict) -> list[dict]:
113
+ """Scrape a single target configuration."""
114
+ html = await self._fetch_html(target["url"])
115
+ if not html:
116
+ return []
117
+
118
+ soup = BeautifulSoup(html, "html.parser")
119
+ selectors = target.get("selectors", {})
120
+
121
+ opportunities = []
122
+ items = soup.select(selectors.get("items", "article"))[:20]
123
+
124
+ for item in items:
125
+ try:
126
+ # Extract title
127
+ title_el = item.select_one(selectors.get("title", "h2, h3, .title"))
128
+ title = title_el.get_text(strip=True) if title_el else ""
129
+
130
+ if not title:
131
+ continue
132
+
133
+ # Extract link
134
+ link_el = item.select_one(selectors.get("link", "a"))
135
+ link = ""
136
+ if link_el and link_el.get("href"):
137
+ href = link_el.get("href")
138
+ if href.startswith("http"):
139
+ link = href
140
+ else:
141
+ # Relative URL - construct absolute
142
+ from urllib.parse import urljoin
143
+ link = urljoin(target["url"], href)
144
+
145
+ # Extract deadline if available
146
+ deadline_el = item.select_one(selectors.get("deadline", ".deadline"))
147
+ deadline_text = deadline_el.get_text(strip=True) if deadline_el else None
148
+
149
+ # Get full text content
150
+ raw_text = item.get_text(separator=" ", strip=True)[:1000]
151
+
152
+ opportunity = {
153
+ "title": f"[{target['type'].title()}] {title}",
154
+ "raw_text": raw_text,
155
+ "url": link or target["url"],
156
+ "source_type": "web_scrape",
157
+ "source_name": target["name"],
158
+ "published_at": datetime.utcnow(),
159
+ "metadata": {
160
+ "scrape_type": target["type"],
161
+ "deadline_text": deadline_text
162
+ }
163
+ }
164
+
165
+ opportunities.append(opportunity)
166
+
167
+ except Exception as e:
168
+ print(f"Error parsing item: {e}")
169
+
170
+ return opportunities
171
+
172
+ async def _fetch_html(self, url: str) -> Optional[str]:
173
+ """Fetch HTML content from URL."""
174
+ if self.use_playwright:
175
+ return await self._fetch_with_playwright(url)
176
+
177
+ try:
178
+ async with httpx.AsyncClient() as client:
179
+ response = await client.get(
180
+ url,
181
+ headers=self._headers,
182
+ timeout=30,
183
+ follow_redirects=True
184
+ )
185
+ response.raise_for_status()
186
+ return response.text
187
+ except Exception as e:
188
+ print(f"HTTP fetch error: {e}")
189
+ return None
190
+
191
+ async def _fetch_with_playwright(self, url: str) -> Optional[str]:
192
+ """Fetch dynamic content using Playwright."""
193
+ try:
194
+ from playwright.async_api import async_playwright
195
+
196
+ async with async_playwright() as p:
197
+ browser = await p.chromium.launch(headless=True)
198
+ page = await browser.new_page()
199
+ await page.goto(url, wait_until="networkidle", timeout=30000)
200
+ html = await page.content()
201
+ await browser.close()
202
+ return html
203
+ except Exception as e:
204
+ print(f"Playwright error: {e}")
205
+ return None
206
+
207
+ async def scrape_custom(
208
+ self,
209
+ url: str,
210
+ name: str,
211
+ item_selector: str,
212
+ title_selector: str = "h2, h3",
213
+ link_selector: str = "a",
214
+ scrape_type: str = "custom"
215
+ ) -> list[dict]:
216
+ """Scrape a custom URL with provided selectors."""
217
+ target = {
218
+ "name": name,
219
+ "url": url,
220
+ "type": scrape_type,
221
+ "selectors": {
222
+ "items": item_selector,
223
+ "title": title_selector,
224
+ "link": link_selector
225
+ }
226
+ }
227
+ return await self.scrape_target(target)
backend/intelligence/__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Intelligence Layer - Version 2.0
3
+ """
4
+ from .llm_client import LLMClient
5
+ from .scorer import RelevanceScorer
6
+ from .novelty import NoveltyDetector
7
+ from .classifier import OpportunityClassifier
8
+ from .credibility import CredibilityScorer
9
+ from .roi_scorer import ROIScorer
10
+ from .silent_detector import SilentOpportunityDetector, OpportunityLanguageDetector
11
+
12
+ __all__ = [
13
+ "LLMClient",
14
+ "RelevanceScorer",
15
+ "NoveltyDetector",
16
+ "OpportunityClassifier",
17
+ "CredibilityScorer",
18
+ "ROIScorer",
19
+ "SilentOpportunityDetector",
20
+ "OpportunityLanguageDetector",
21
+ ]
22
+
backend/intelligence/classifier.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Opportunity Classifier
3
+
4
+ Classifies opportunities into categories using rules and LLM.
5
+ """
6
+ from ..models import OpportunityCategory, Domain
7
+
8
+
9
+ class OpportunityClassifier:
10
+ """
11
+ Classifies opportunities into categories and domains.
12
+ Uses rule-based classification first, LLM for ambiguous cases.
13
+ """
14
+
15
+ # Source type to category mapping (high priority)
16
+ SOURCE_CATEGORY_MAP = {
17
+ "arxiv": OpportunityCategory.RESEARCH,
18
+ "github": OpportunityCategory.OPEN_SOURCE,
19
+ "superteam": OpportunityCategory.BOUNTY,
20
+ "grant_platform": OpportunityCategory.GRANT,
21
+ "gov_portal": OpportunityCategory.GRANT,
22
+ }
23
+
24
+ # Keyword patterns for each category
25
+ CATEGORY_PATTERNS = {
26
+ OpportunityCategory.SCHOLARSHIP: [
27
+ "scholarship", "tuition", "financial aid", "merit award"
28
+ ],
29
+ OpportunityCategory.FELLOWSHIP: [
30
+ "fellowship", "fellow program", "research fellow"
31
+ ],
32
+ OpportunityCategory.INTERNSHIP: [
33
+ "internship", "intern ", "summer program", "co-op"
34
+ ],
35
+ OpportunityCategory.JOB: [
36
+ "hiring", "job opening", "position available", "career opportunity",
37
+ "we're looking for", "full-time", "remote job"
38
+ ],
39
+ OpportunityCategory.RESEARCH: [
40
+ "research assistant", "ra position", "research opportunity", "arxiv",
41
+ "abstract:", "we present", "we propose", "our method"
42
+ ],
43
+ OpportunityCategory.HACKATHON: [
44
+ "hackathon", "buildathon", "hackers wanted", "hack day"
45
+ ],
46
+ OpportunityCategory.COMPETITION: [
47
+ "competition", "challenge", "contest", "prize pool"
48
+ ],
49
+ OpportunityCategory.GRANT: [
50
+ "grant program", "grant application", "grant funding", "grant deadline"
51
+ ],
52
+ OpportunityCategory.CONFERENCE: [
53
+ "conference", "call for papers", "summit", "symposium"
54
+ ],
55
+ OpportunityCategory.OPEN_SOURCE: [
56
+ "open source", "gsoc", "outreachy", "contributor wanted"
57
+ ],
58
+ OpportunityCategory.INVESTMENT: [
59
+ "funding round", "series a", "series b", "vc funding", "raised $"
60
+ ],
61
+ OpportunityCategory.BOUNTY: [
62
+ "bounty", "bug bounty", "earn reward", "usdc reward"
63
+ ],
64
+ }
65
+
66
+ # Domain patterns
67
+ DOMAIN_PATTERNS = {
68
+ Domain.COMPUTER_VISION: [
69
+ "computer vision", "image", "visual", "object detection", "segmentation", "opencv"
70
+ ],
71
+ Domain.ROBOTICS: [
72
+ "robot", "ros", "autonomous", "manipulation", "navigation"
73
+ ],
74
+ Domain.AI: [
75
+ "ai", "artificial intelligence", "machine learning", "deep learning",
76
+ "neural network", "llm", "transformer", "gpt"
77
+ ],
78
+ Domain.FINANCE: [
79
+ "finance", "fintech", "trading", "investment", "stock", "quantitative"
80
+ ],
81
+ Domain.CRYPTO: [
82
+ "crypto", "blockchain", "web3", "defi", "solana", "ethereum", "nft"
83
+ ],
84
+ Domain.ACADEMIA: [
85
+ "research", "phd", "postdoc", "university", "academic", "professor"
86
+ ],
87
+ }
88
+
89
+ def classify_by_source(self, source_type: str, source_name: str = "") -> OpportunityCategory | None:
90
+ """
91
+ Classify primarily by source type.
92
+ Returns category or None if source doesn't determine category.
93
+ """
94
+ source_lower = (source_type or "").lower()
95
+ source_name_lower = (source_name or "").lower()
96
+
97
+ # Check direct source mapping
98
+ if source_lower in self.SOURCE_CATEGORY_MAP:
99
+ return self.SOURCE_CATEGORY_MAP[source_lower]
100
+
101
+ # Check source name patterns
102
+ if "arxiv" in source_name_lower:
103
+ return OpportunityCategory.RESEARCH
104
+ if "github" in source_name_lower:
105
+ return OpportunityCategory.OPEN_SOURCE
106
+ if "profellow" in source_name_lower:
107
+ return OpportunityCategory.FELLOWSHIP
108
+ if "remoteok" in source_name_lower:
109
+ return OpportunityCategory.JOB
110
+ if "hacker news" in source_name_lower:
111
+ if "internship" in source_name_lower:
112
+ return OpportunityCategory.INTERNSHIP
113
+ if "robotics" in source_name_lower:
114
+ return OpportunityCategory.RESEARCH
115
+ if "jobs" in source_name_lower:
116
+ return OpportunityCategory.JOB
117
+ if "devfolio" in source_name_lower:
118
+ return OpportunityCategory.HACKATHON
119
+
120
+ return None
121
+
122
+ def classify_by_rules(self, text: str) -> tuple[OpportunityCategory, Domain, float]:
123
+ """
124
+ Classify using keyword matching.
125
+ Returns (category, domain, confidence)
126
+ """
127
+ if not text:
128
+ return OpportunityCategory.OTHER, Domain.MIXED, 0.0
129
+
130
+ text_lower = text.lower()
131
+
132
+ # Find matching category
133
+ category = OpportunityCategory.OTHER
134
+ cat_confidence = 0.0
135
+
136
+ for cat, patterns in self.CATEGORY_PATTERNS.items():
137
+ matches = sum(1 for p in patterns if p in text_lower)
138
+ if matches > cat_confidence:
139
+ category = cat
140
+ cat_confidence = min(matches * 0.3, 0.9)
141
+
142
+ # Find matching domain
143
+ domain = Domain.MIXED
144
+ domain_matches = 0
145
+
146
+ for dom, patterns in self.DOMAIN_PATTERNS.items():
147
+ matches = sum(1 for p in patterns if p in text_lower)
148
+ if matches > domain_matches:
149
+ domain = dom
150
+ domain_matches = matches
151
+
152
+ # If multiple domains match well, keep as mixed
153
+ domain_counts = {
154
+ dom: sum(1 for p in patterns if p in text_lower)
155
+ for dom, patterns in self.DOMAIN_PATTERNS.items()
156
+ }
157
+ high_matches = [d for d, c in domain_counts.items() if c >= domain_matches and c > 0]
158
+ if len(high_matches) > 1:
159
+ domain = Domain.MIXED
160
+
161
+ return category, domain, cat_confidence
162
+
163
+ def classify(
164
+ self,
165
+ text: str,
166
+ title: str = "",
167
+ source_type: str = "",
168
+ source_name: str = "",
169
+ use_llm: bool = False,
170
+ llm_client = None
171
+ ) -> dict:
172
+ """
173
+ Classify opportunity with optional LLM enhancement.
174
+
175
+ Returns dict with category, domain, confidence, method
176
+ """
177
+ full_text = f"{title} {text}".strip()
178
+
179
+ # PRIORITY 1: Source-based classification (most reliable)
180
+ source_category = self.classify_by_source(source_type, source_name)
181
+
182
+ # PRIORITY 2: Rule-based keyword matching
183
+ rule_category, domain, confidence = self.classify_by_rules(full_text)
184
+
185
+ # Use source category if available (overrides keyword matching)
186
+ if source_category:
187
+ category = source_category
188
+ confidence = 0.85 # High confidence for source-based
189
+ method = "source"
190
+ else:
191
+ category = rule_category
192
+ method = "rules"
193
+
194
+ # Use LLM for low-confidence or ambiguous cases (only if no source match)
195
+ if use_llm and llm_client and confidence < 0.5 and not source_category:
196
+ try:
197
+ llm_result = llm_client.classify(full_text)
198
+ if llm_result.get("confidence", 0) > confidence:
199
+ return {
200
+ "category": llm_result.get("category", category.value),
201
+ "domain": llm_result.get("domain", domain.value),
202
+ "confidence": llm_result.get("confidence", confidence),
203
+ "method": "llm"
204
+ }
205
+ except Exception as e:
206
+ print(f"LLM classification failed: {e}")
207
+
208
+ return {
209
+ "category": category.value,
210
+ "domain": domain.value,
211
+ "confidence": confidence,
212
+ "method": method
213
+ }
214
+
backend/intelligence/credibility.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Credibility Scorer
3
+
4
+ Evaluates trustworthiness of sources and authors.
5
+ """
6
+ from ..models import SourceType
7
+
8
+
9
+ class CredibilityScorer:
10
+ """
11
+ Scores credibility based on source type, author history, and content signals.
12
+ """
13
+
14
+ # Base credibility scores by source type
15
+ SOURCE_CREDIBILITY = {
16
+ SourceType.ARXIV: 0.95, # Academic papers - highest trust
17
+ SourceType.GITHUB: 0.8, # Open source - high trust
18
+ SourceType.RSS: 0.7, # Varies by feed
19
+ SourceType.SUPERTEAM: 0.85, # Official platform
20
+ SourceType.REDDIT: 0.5, # Community - variable
21
+ SourceType.TWITTER: 0.4, # Social - requires filtering
22
+ SourceType.LINKEDIN: 0.6, # Professional but noisy
23
+ SourceType.WEB_SCRAPE: 0.5, # Unknown quality
24
+ }
25
+
26
+ def __init__(self):
27
+ pass
28
+
29
+ def score_source(self, source_type: SourceType) -> float:
30
+ """Get base credibility score for source type."""
31
+ return self.SOURCE_CREDIBILITY.get(source_type, 0.5)
32
+
33
+ def score_content_signals(self, text: str, metadata: dict = None) -> dict:
34
+ """
35
+ Evaluate content signals that indicate credibility.
36
+ Returns individual signal scores.
37
+ """
38
+ metadata = metadata or {}
39
+ signals = {}
40
+
41
+ text_lower = text.lower() if text else ""
42
+
43
+ # Has deadline (official announcements usually have deadlines)
44
+ signals["has_deadline"] = 1.0 if metadata.get("deadline") or \
45
+ any(kw in text_lower for kw in ["deadline", "due date", "apply by", "closes"]) else 0.0
46
+
47
+ # Has organization/institution
48
+ signals["has_organization"] = 1.0 if metadata.get("organization") else 0.5
49
+
50
+ # Contains action URL
51
+ signals["has_action_url"] = 1.0 if metadata.get("url") or \
52
+ any(kw in text_lower for kw in ["apply here", "register at", "sign up"]) else 0.0
53
+
54
+ # Is first announcement (not a repost)
55
+ signals["is_original"] = 0.0 if any(kw in text_lower for kw in [
56
+ "repost", "sharing", "fyi", "icymi", "in case you missed"
57
+ ]) else 1.0
58
+
59
+ # Has specific requirements (detailed = more credible)
60
+ signals["has_requirements"] = 1.0 if metadata.get("requirements") or \
61
+ any(kw in text_lower for kw in ["requirements", "qualifications", "must have"]) else 0.0
62
+
63
+ return signals
64
+
65
+ def calculate_signal_strength(self, signals: dict) -> float:
66
+ """
67
+ Calculate overall signal strength from content signals.
68
+ High signal strength = actionable, official, time-sensitive.
69
+ """
70
+ weights = {
71
+ "has_deadline": 0.3,
72
+ "has_organization": 0.2,
73
+ "has_action_url": 0.2,
74
+ "is_original": 0.2,
75
+ "has_requirements": 0.1
76
+ }
77
+
78
+ total = sum(signals.get(k, 0) * w for k, w in weights.items())
79
+ return round(total, 3)
80
+
81
+ def score(
82
+ self,
83
+ source_type: SourceType,
84
+ text: str = "",
85
+ metadata: dict = None,
86
+ author_credibility: float = 0.5,
87
+ social_engagement: int = 0
88
+ ) -> dict:
89
+ """
90
+ Calculate comprehensive credibility score.
91
+
92
+ Returns dict with:
93
+ - source_score: Base source credibility
94
+ - signal_strength: Content actionability
95
+ - credibility_score: Combined score
96
+ """
97
+ source_score = self.score_source(source_type)
98
+ content_signals = self.score_content_signals(text, metadata)
99
+ signal_strength = self.calculate_signal_strength(content_signals)
100
+
101
+ # Social engagement boost (for social sources)
102
+ engagement_boost = 0.0
103
+ if source_type in [SourceType.REDDIT, SourceType.TWITTER]:
104
+ if social_engagement > 100:
105
+ engagement_boost = 0.15
106
+ elif social_engagement > 50:
107
+ engagement_boost = 0.1
108
+ elif social_engagement > 20:
109
+ engagement_boost = 0.05
110
+
111
+ # Combined credibility:
112
+ # 50% source, 30% signals, 10% author, 10% engagement
113
+ credibility_score = (
114
+ 0.5 * source_score +
115
+ 0.3 * signal_strength +
116
+ 0.1 * author_credibility +
117
+ 0.1 * min(engagement_boost + 0.5, 1.0)
118
+ )
119
+
120
+ return {
121
+ "source_score": round(source_score, 3),
122
+ "signal_strength": signal_strength,
123
+ "signals": content_signals,
124
+ "credibility_score": round(credibility_score, 3)
125
+ }
backend/intelligence/llm_client.py ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE LLM Client Abstraction Layer
3
+
4
+ Supports Gemini (default) and OpenAI as providers.
5
+ """
6
+ from abc import ABC, abstractmethod
7
+ from typing import Optional
8
+ import json
9
+
10
+ from ..config import get_settings
11
+
12
+
13
+ class BaseLLMClient(ABC):
14
+ """Abstract base class for LLM providers."""
15
+
16
+ @abstractmethod
17
+ def classify(self, text: str) -> dict:
18
+ """Classify opportunity text into category and domain."""
19
+ pass
20
+
21
+ @abstractmethod
22
+ def summarize(self, text: str, max_length: int = 150) -> str:
23
+ """Generate concise summary of opportunity."""
24
+ pass
25
+
26
+ @abstractmethod
27
+ def recommend_action(self, opportunity: dict) -> dict:
28
+ """Recommend action based on opportunity context."""
29
+ pass
30
+
31
+ @abstractmethod
32
+ def extract_metadata(self, text: str) -> dict:
33
+ """Extract structured metadata (deadline, location, reward, etc.)."""
34
+ pass
35
+
36
+
37
+ class GeminiClient(BaseLLMClient):
38
+ """Google Gemini implementation."""
39
+
40
+ def __init__(self, api_key: str):
41
+ import google.generativeai as genai
42
+ genai.configure(api_key=api_key)
43
+ self.model = genai.GenerativeModel('gemini-1.5-flash')
44
+
45
+ def _generate(self, prompt: str, as_json: bool = False) -> str:
46
+ """Generate response from Gemini."""
47
+ response = self.model.generate_content(prompt)
48
+ return response.text
49
+
50
+ def classify(self, text: str) -> dict:
51
+ """Classify opportunity into category and domain."""
52
+ prompt = f"""Analyze this opportunity and classify it. Return JSON only.
53
+
54
+ TEXT: {text[:2000]}
55
+
56
+ Return this exact JSON structure:
57
+ {{
58
+ "category": "one of: scholarship, fellowship, internship, job, research, hackathon, competition, grant, conference, open_source, investment, weak_signal, other",
59
+ "domain": "one of: ai, computer_vision, robotics, finance, crypto, academia, mixed",
60
+ "confidence": 0.0 to 1.0
61
+ }}"""
62
+
63
+ try:
64
+ result = self._generate(prompt)
65
+ # Extract JSON from response
66
+ start = result.find('{')
67
+ end = result.rfind('}') + 1
68
+ if start != -1 and end > start:
69
+ return json.loads(result[start:end])
70
+ except Exception as e:
71
+ print(f"Classification error: {e}")
72
+
73
+ return {"category": "other", "domain": "mixed", "confidence": 0.0}
74
+
75
+ def summarize(self, text: str, max_length: int = 150) -> str:
76
+ """Generate concise summary."""
77
+ prompt = f"""Summarize this opportunity in {max_length} characters or less.
78
+ Focus on: what it is, who it's for, and deadline if any.
79
+
80
+ TEXT: {text[:2000]}
81
+
82
+ Return only the summary, no quotes or labels."""
83
+
84
+ try:
85
+ return self._generate(prompt).strip()[:max_length]
86
+ except Exception as e:
87
+ print(f"Summary error: {e}")
88
+ return text[:max_length]
89
+
90
+ def recommend_action(self, opportunity: dict) -> dict:
91
+ """
92
+ PIOE 2.0 Enhanced Action Guidance.
93
+ Returns comprehensive recommendations for how to approach the opportunity.
94
+ """
95
+ prompt = f"""You are an expert career and opportunity advisor. Analyze this opportunity and provide detailed action guidance.
96
+
97
+ OPPORTUNITY DETAILS:
98
+ - Title: {opportunity.get('title', '')}
99
+ - Category: {opportunity.get('category', '')}
100
+ - Domain: {opportunity.get('domain', '')}
101
+ - Deadline: {opportunity.get('deadline', 'No deadline specified')}
102
+ - Description: {opportunity.get('raw_text', '')[:1500]}
103
+ - ROI Score: {opportunity.get('roi_score', 'N/A')}
104
+ - Competition Level: {opportunity.get('competition_level', 'N/A')}
105
+ - Region: {opportunity.get('region', 'global')}
106
+
107
+ USER CONTEXT:
108
+ - Location: Nigeria, Africa
109
+ - Interests: AI, Computer Vision, Robotics, Web3
110
+ - Status: Student/Early Career
111
+
112
+ Provide strategic action guidance. Return JSON only:
113
+ {{
114
+ "primary_action": "one of: apply_now, apply_prepared, track, save_for_later, deep_research, network_first, skip",
115
+ "urgency": "one of: immediate, this_week, this_month, whenever, expired",
116
+ "timing_status": "one of: early, optimal, late, unknown",
117
+
118
+ "skills_to_highlight": ["skill1", "skill2", "skill3"],
119
+ "portfolio_pieces": ["project type 1", "project type 2"],
120
+
121
+ "preparation_steps": [
122
+ "step 1",
123
+ "step 2",
124
+ "step 3"
125
+ ],
126
+
127
+ "networking_tips": "who to contact or how to stand out (1 sentence)",
128
+ "differentiation_angle": "what unique angle to take (1 sentence)",
129
+
130
+ "success_probability": 0.0 to 1.0,
131
+ "time_investment_hours": estimated hours to apply well,
132
+ "risk_level": "low, medium, or high",
133
+
134
+ "why": "brief strategic reasoning (max 100 chars)",
135
+ "red_flags": ["any concerns"] or []
136
+ }}"""
137
+
138
+ try:
139
+ result = self._generate(prompt)
140
+ start = result.find('{')
141
+ end = result.rfind('}') + 1
142
+ if start != -1 and end > start:
143
+ parsed = json.loads(result[start:end])
144
+ # Ensure required fields exist
145
+ return {
146
+ "primary_action": parsed.get("primary_action", "save_for_later"),
147
+ "urgency": parsed.get("urgency", "whenever"),
148
+ "timing_status": parsed.get("timing_status", "unknown"),
149
+ "skills_to_highlight": parsed.get("skills_to_highlight", []),
150
+ "portfolio_pieces": parsed.get("portfolio_pieces", []),
151
+ "preparation_steps": parsed.get("preparation_steps", []),
152
+ "networking_tips": parsed.get("networking_tips", ""),
153
+ "differentiation_angle": parsed.get("differentiation_angle", ""),
154
+ "success_probability": parsed.get("success_probability", 0.3),
155
+ "time_investment_hours": parsed.get("time_investment_hours", 10),
156
+ "risk_level": parsed.get("risk_level", "medium"),
157
+ "why": parsed.get("why", "Review and decide"),
158
+ "red_flags": parsed.get("red_flags", []),
159
+ }
160
+ except Exception as e:
161
+ print(f"Action guidance error: {e}")
162
+
163
+ # Fallback response
164
+ return {
165
+ "primary_action": "save_for_later",
166
+ "urgency": "whenever",
167
+ "timing_status": "unknown",
168
+ "skills_to_highlight": [],
169
+ "portfolio_pieces": [],
170
+ "preparation_steps": ["Review the opportunity details", "Assess fit with your goals"],
171
+ "networking_tips": "",
172
+ "differentiation_angle": "",
173
+ "success_probability": 0.3,
174
+ "time_investment_hours": 10,
175
+ "risk_level": "medium",
176
+ "why": "Needs manual review",
177
+ "red_flags": [],
178
+ }
179
+
180
+ def extract_metadata(self, text: str) -> dict:
181
+ """Extract structured metadata from text."""
182
+ prompt = f"""Extract metadata from this opportunity text. Return JSON only.
183
+
184
+ TEXT: {text[:2000]}
185
+
186
+ Return this structure (use null for missing info):
187
+ {{
188
+ "deadline": "YYYY-MM-DD or null",
189
+ "location": "location or 'remote' or null",
190
+ "reward": "amount or null",
191
+ "organization": "org name or null",
192
+ "requirements": ["skill1", "skill2"] or [],
193
+ "url": "application url or null"
194
+ }}"""
195
+
196
+ try:
197
+ result = self._generate(prompt)
198
+ start = result.find('{')
199
+ end = result.rfind('}') + 1
200
+ if start != -1 and end > start:
201
+ return json.loads(result[start:end])
202
+ except Exception as e:
203
+ print(f"Metadata extraction error: {e}")
204
+
205
+ return {}
206
+
207
+
208
+ class OpenAIClient(BaseLLMClient):
209
+ """OpenAI implementation (fallback)."""
210
+
211
+ def __init__(self, api_key: str):
212
+ from openai import OpenAI
213
+ self.client = OpenAI(api_key=api_key)
214
+ self.model = "gpt-3.5-turbo"
215
+
216
+ def _generate(self, prompt: str) -> str:
217
+ """Generate response from OpenAI."""
218
+ response = self.client.chat.completions.create(
219
+ model=self.model,
220
+ messages=[{"role": "user", "content": prompt}],
221
+ temperature=0.3
222
+ )
223
+ return response.choices[0].message.content
224
+
225
+ def classify(self, text: str) -> dict:
226
+ """Classify opportunity - same logic as Gemini."""
227
+ prompt = f"""Classify this opportunity. Return JSON only with keys: category, domain, confidence.
228
+ Categories: scholarship, fellowship, internship, job, research, hackathon, competition, grant, conference, open_source, investment, weak_signal, other
229
+ Domains: ai, computer_vision, robotics, finance, crypto, academia, mixed
230
+
231
+ TEXT: {text[:2000]}"""
232
+
233
+ try:
234
+ result = self._generate(prompt)
235
+ start = result.find('{')
236
+ end = result.rfind('}') + 1
237
+ if start != -1 and end > start:
238
+ return json.loads(result[start:end])
239
+ except Exception:
240
+ pass
241
+ return {"category": "other", "domain": "mixed", "confidence": 0.0}
242
+
243
+ def summarize(self, text: str, max_length: int = 150) -> str:
244
+ prompt = f"Summarize in {max_length} chars: {text[:2000]}"
245
+ try:
246
+ return self._generate(prompt).strip()[:max_length]
247
+ except Exception:
248
+ return text[:max_length]
249
+
250
+ def recommend_action(self, opportunity: dict) -> dict:
251
+ return {"action": "save", "reason": "Review later", "urgency": "low"}
252
+
253
+ def extract_metadata(self, text: str) -> dict:
254
+ return {}
255
+
256
+
257
+ class LLMClient:
258
+ """
259
+ Factory class that provides the configured LLM client.
260
+ Uses Gemini by default, falls back to OpenAI if configured.
261
+ """
262
+
263
+ _instance: Optional[BaseLLMClient] = None
264
+
265
+ @classmethod
266
+ def get_client(cls) -> BaseLLMClient:
267
+ """Get or create the LLM client instance."""
268
+ if cls._instance is None:
269
+ settings = get_settings()
270
+
271
+ if settings.ai_provider == "gemini" and settings.gemini_api_key:
272
+ cls._instance = GeminiClient(settings.gemini_api_key)
273
+ elif settings.openai_api_key:
274
+ cls._instance = OpenAIClient(settings.openai_api_key)
275
+ else:
276
+ # Return a mock client if no API keys configured
277
+ cls._instance = MockLLMClient()
278
+
279
+ return cls._instance
280
+
281
+
282
+ class MockLLMClient(BaseLLMClient):
283
+ """Mock client for development without API keys. PIOE 2.0 compatible."""
284
+
285
+ def classify(self, text: str) -> dict:
286
+ # Basic rule-based classification
287
+ text_lower = text.lower()
288
+
289
+ if any(kw in text_lower for kw in ["scholarship", "fellowship", "grant"]):
290
+ return {"category": "scholarship", "domain": "academia", "confidence": 0.7}
291
+ elif any(kw in text_lower for kw in ["hackathon", "competition", "challenge"]):
292
+ return {"category": "hackathon", "domain": "ai", "confidence": 0.7}
293
+ elif any(kw in text_lower for kw in ["internship", "intern"]):
294
+ return {"category": "internship", "domain": "mixed", "confidence": 0.7}
295
+ elif any(kw in text_lower for kw in ["job", "hiring", "position"]):
296
+ return {"category": "job", "domain": "mixed", "confidence": 0.7}
297
+ elif any(kw in text_lower for kw in ["bounty", "ecosystem", "solana", "ethereum"]):
298
+ return {"category": "bounty", "domain": "crypto", "confidence": 0.7}
299
+ elif any(kw in text_lower for kw in ["pitch", "demo day", "accelerator"]):
300
+ return {"category": "pitch_event", "domain": "mixed", "confidence": 0.7}
301
+ elif any(kw in text_lower for kw in ["collaborat", "partner", "looking for"]):
302
+ return {"category": "collaboration", "domain": "mixed", "confidence": 0.6}
303
+
304
+ return {"category": "other", "domain": "mixed", "confidence": 0.3}
305
+
306
+ def summarize(self, text: str, max_length: int = 150) -> str:
307
+ return text[:max_length]
308
+
309
+ def recommend_action(self, opportunity: dict) -> dict:
310
+ """PIOE 2.0 action guidance - rule-based fallback."""
311
+ category = opportunity.get("category", "other")
312
+
313
+ # Category-based action mapping
314
+ action_map = {
315
+ "hackathon": ("apply_now", "this_week", ["Python", "ML/AI"], ["Previous hackathon project"]),
316
+ "grant": ("apply_prepared", "this_month", ["Technical writing", "Project planning"], ["Open source contributions"]),
317
+ "ecosystem_grant": ("apply_prepared", "this_month", ["Solidity/Rust", "Web3"], ["DApp or smart contract"]),
318
+ "internship": ("apply_now", "this_week", ["Relevant coursework", "Projects"], ["GitHub portfolio"]),
319
+ "scholarship": ("apply_prepared", "this_month", ["Academic excellence", "Leadership"], ["Research paper or thesis"]),
320
+ "bounty": ("apply_now", "immediate", ["Specific tech stack"], ["Related code samples"]),
321
+ "pitch_event": ("apply_prepared", "this_month", ["Presentation", "Business model"], ["Pitch deck", "Demo video"]),
322
+ "collaboration": ("network_first", "whenever", ["Domain expertise"], ["Relevant projects"]),
323
+ }
324
+
325
+ action, urgency, skills, portfolio = action_map.get(
326
+ category,
327
+ ("save_for_later", "whenever", [], [])
328
+ )
329
+
330
+ return {
331
+ "primary_action": action,
332
+ "urgency": urgency,
333
+ "timing_status": "unknown",
334
+ "skills_to_highlight": skills,
335
+ "portfolio_pieces": portfolio,
336
+ "preparation_steps": [
337
+ "Review the opportunity requirements",
338
+ "Prepare relevant materials",
339
+ "Submit before deadline"
340
+ ],
341
+ "networking_tips": "Research the organization and connect with past participants",
342
+ "differentiation_angle": "Highlight unique projects and Africa/Nigeria perspective",
343
+ "success_probability": 0.3,
344
+ "time_investment_hours": 10,
345
+ "risk_level": "medium",
346
+ "why": f"Standard approach for {category}",
347
+ "red_flags": [],
348
+ }
349
+
350
+ def extract_metadata(self, text: str) -> dict:
351
+ return {}
352
+
backend/intelligence/novelty.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Novelty Detector
3
+
4
+ Detects if an opportunity is novel or a repeat of existing content.
5
+ Uses embedding similarity against historical database.
6
+ """
7
+ from typing import Optional
8
+ import numpy as np
9
+ from sqlalchemy.orm import Session
10
+
11
+ from ..models import Opportunity
12
+
13
+
14
+ class NoveltyDetector:
15
+ """
16
+ Detects novelty by comparing against historical opportunity embeddings.
17
+ High novelty = new and unseen topics/opportunities.
18
+ """
19
+
20
+ def __init__(self, similarity_threshold: float = 0.85):
21
+ """
22
+ Args:
23
+ similarity_threshold: If similarity > threshold, item is considered duplicate.
24
+ """
25
+ self.similarity_threshold = similarity_threshold
26
+
27
+ def cosine_similarity(self, vec1: list[float], vec2: list[float]) -> float:
28
+ """Calculate cosine similarity between two vectors."""
29
+ a = np.array(vec1)
30
+ b = np.array(vec2)
31
+
32
+ norm_a = np.linalg.norm(a)
33
+ norm_b = np.linalg.norm(b)
34
+
35
+ if norm_a == 0 or norm_b == 0:
36
+ return 0.0
37
+
38
+ return float(np.dot(a, b) / (norm_a * norm_b))
39
+
40
+ def calculate_novelty(
41
+ self,
42
+ embedding: list[float],
43
+ db: Session,
44
+ limit: int = 100
45
+ ) -> dict:
46
+ """
47
+ Calculate novelty score by comparing against recent opportunities.
48
+
49
+ Returns:
50
+ dict with novelty_score, is_duplicate, most_similar_id
51
+ """
52
+ if not embedding:
53
+ return {
54
+ "novelty_score": 1.0,
55
+ "is_duplicate": False,
56
+ "most_similar_id": None
57
+ }
58
+
59
+ # Get recent opportunities with embeddings
60
+ recent = db.query(Opportunity).filter(
61
+ Opportunity.embedding.isnot(None)
62
+ ).order_by(
63
+ Opportunity.discovered_at.desc()
64
+ ).limit(limit).all()
65
+
66
+ if not recent:
67
+ return {
68
+ "novelty_score": 1.0,
69
+ "is_duplicate": False,
70
+ "most_similar_id": None
71
+ }
72
+
73
+ max_similarity = 0.0
74
+ most_similar_id = None
75
+
76
+ for opp in recent:
77
+ if opp.embedding:
78
+ similarity = self.cosine_similarity(embedding, opp.embedding)
79
+ if similarity > max_similarity:
80
+ max_similarity = similarity
81
+ most_similar_id = opp.id
82
+
83
+ # Novelty is inverse of maximum similarity
84
+ novelty_score = 1.0 - max_similarity
85
+ is_duplicate = max_similarity > self.similarity_threshold
86
+
87
+ return {
88
+ "novelty_score": round(novelty_score, 3),
89
+ "is_duplicate": is_duplicate,
90
+ "most_similar_id": most_similar_id if is_duplicate else None,
91
+ "max_similarity": round(max_similarity, 3)
92
+ }
93
+
94
+ def is_recycled_content(self, text: str) -> bool:
95
+ """
96
+ Rule-based check for recycled/aggregated content.
97
+ Returns True if content appears to be recycled.
98
+ """
99
+ if not text:
100
+ return False
101
+
102
+ text_lower = text.lower()
103
+
104
+ # Patterns indicating recycled content
105
+ recycled_patterns = [
106
+ "top 10",
107
+ "top 5",
108
+ "best tools",
109
+ "complete guide",
110
+ "everything you need to know",
111
+ "roundup",
112
+ "weekly digest",
113
+ "news summary",
114
+ "in case you missed",
115
+ "trending this week"
116
+ ]
117
+
118
+ return any(pattern in text_lower for pattern in recycled_patterns)
backend/intelligence/roi_scorer.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE ROI Scorer - Version 2.0
3
+
4
+ Calculates "Is this worth my time?" score.
5
+ Key decision intelligence for prioritizing opportunities.
6
+ """
7
+ from datetime import datetime, timedelta
8
+ from typing import Optional
9
+
10
+
11
+ class ROIScorer:
12
+ """
13
+ Calculates ROI (Return on Investment) score for opportunities.
14
+
15
+ Considers:
16
+ - Time required
17
+ - Probability of success
18
+ - Financial/career upside
19
+ - Opportunity chain unlocks
20
+ - Competition level
21
+ - Regional accessibility
22
+ """
23
+
24
+ # Weights for ROI calculation
25
+ WEIGHTS = {
26
+ "time_efficiency": 0.15,
27
+ "success_probability": 0.25,
28
+ "upside_potential": 0.25,
29
+ "unlock_potential": 0.15,
30
+ "competition": 0.10,
31
+ "accessibility": 0.10,
32
+ }
33
+
34
+ # Category time requirements (hours)
35
+ CATEGORY_TIME = {
36
+ "hackathon": 40,
37
+ "grant": 20,
38
+ "micro_grant": 8,
39
+ "ecosystem_grant": 25,
40
+ "scholarship": 15,
41
+ "fellowship": 20,
42
+ "internship": 10,
43
+ "job": 5,
44
+ "research": 30,
45
+ "bounty": 15,
46
+ "pitch_event": 20,
47
+ "ambassador": 10,
48
+ "partnership": 5,
49
+ }
50
+
51
+ # Category upside potential (0-1)
52
+ CATEGORY_UPSIDE = {
53
+ "ecosystem_grant": 0.9,
54
+ "grant": 0.85,
55
+ "fellowship": 0.85,
56
+ "scholarship": 0.8,
57
+ "hackathon": 0.8,
58
+ "micro_grant": 0.6,
59
+ "pitch_event": 0.75,
60
+ "internship": 0.7,
61
+ "bounty": 0.5,
62
+ "job": 0.6,
63
+ "research": 0.65,
64
+ "ambassador": 0.4,
65
+ "partnership": 0.7,
66
+ }
67
+
68
+ # Category competition levels (0-1, higher = more competitive)
69
+ CATEGORY_COMPETITION = {
70
+ "scholarship": 0.9,
71
+ "fellowship": 0.85,
72
+ "job": 0.7,
73
+ "internship": 0.75,
74
+ "hackathon": 0.6,
75
+ "grant": 0.5,
76
+ "ecosystem_grant": 0.4,
77
+ "micro_grant": 0.3,
78
+ "bounty": 0.3,
79
+ "pitch_event": 0.5,
80
+ "ambassador": 0.35,
81
+ "partnership": 0.4,
82
+ }
83
+
84
+ # Chain unlock values (which categories open doors)
85
+ UNLOCK_VALUES = {
86
+ "hackathon": 0.8, # Opens: grants, accelerators, jobs
87
+ "fellowship": 0.9, # Opens: PhD, research, network
88
+ "ecosystem_grant": 0.85, # Opens: ecosystem jobs, more grants
89
+ "internship": 0.7, # Opens: full-time, network
90
+ "research": 0.75, # Opens: PhD, conference, collaboration
91
+ "pitch_event": 0.7, # Opens: investment, visibility
92
+ "bounty": 0.4, # Opens: ecosystem roles
93
+ "ambassador": 0.5, # Opens: community, ecosystem
94
+ }
95
+
96
+ def __init__(self, user_region: str = "nigeria"):
97
+ self.user_region = user_region.lower()
98
+
99
+ def calculate_roi(
100
+ self,
101
+ category: str,
102
+ deadline: Optional[datetime] = None,
103
+ grant_size: Optional[int] = None,
104
+ region: str = "global",
105
+ extra_data: dict = None
106
+ ) -> dict:
107
+ """
108
+ Calculate ROI score for an opportunity.
109
+
110
+ Returns dict with:
111
+ - roi_score: 0.0 to 1.0
112
+ - risk_level: low/medium/high
113
+ - unlock_potential: 0.0 to 1.0
114
+ - competition_level: 0.0 to 1.0
115
+ - reasoning: explanation
116
+ """
117
+ extra_data = extra_data or {}
118
+ category = category.lower() if category else "other"
119
+
120
+ # Calculate component scores
121
+ time_efficiency = self._calculate_time_efficiency(category, deadline)
122
+ success_prob = self._calculate_success_probability(category, extra_data)
123
+ upside = self._calculate_upside(category, grant_size)
124
+ unlock = self._calculate_unlock_potential(category)
125
+ competition = self._calculate_competition(category)
126
+ accessibility = self._calculate_accessibility(region)
127
+
128
+ # Weighted ROI score
129
+ roi_score = (
130
+ self.WEIGHTS["time_efficiency"] * time_efficiency +
131
+ self.WEIGHTS["success_probability"] * success_prob +
132
+ self.WEIGHTS["upside_potential"] * upside +
133
+ self.WEIGHTS["unlock_potential"] * unlock +
134
+ self.WEIGHTS["competition"] * (1 - competition) + # Invert competition
135
+ self.WEIGHTS["accessibility"] * accessibility
136
+ )
137
+
138
+ # Determine risk level
139
+ risk_level = self._determine_risk(category, competition, deadline)
140
+
141
+ # Generate reasoning
142
+ reasoning = self._generate_reasoning(
143
+ category, roi_score, risk_level,
144
+ time_efficiency, success_prob, upside, accessibility
145
+ )
146
+
147
+ return {
148
+ "roi_score": round(roi_score, 3),
149
+ "risk_level": risk_level,
150
+ "unlock_potential": round(unlock, 3),
151
+ "competition_level": round(competition, 3),
152
+ "time_hours": self.CATEGORY_TIME.get(category, 15),
153
+ "reasoning": reasoning,
154
+ }
155
+
156
+ def _calculate_time_efficiency(
157
+ self,
158
+ category: str,
159
+ deadline: Optional[datetime]
160
+ ) -> float:
161
+ """Score based on time required and deadline pressure."""
162
+ base_hours = self.CATEGORY_TIME.get(category, 15)
163
+
164
+ # Lower hours = higher efficiency
165
+ efficiency = 1.0 - (min(base_hours, 60) / 60)
166
+
167
+ # Deadline factor
168
+ if deadline:
169
+ # Handle timezone-aware datetimes
170
+ try:
171
+ if deadline.tzinfo is not None:
172
+ deadline = deadline.replace(tzinfo=None)
173
+ days_left = (deadline - datetime.utcnow()).days
174
+ except Exception:
175
+ days_left = 30 # Default if comparison fails
176
+ if days_left < 3:
177
+ efficiency *= 0.5 # Too rushed
178
+ elif days_left < 7:
179
+ efficiency *= 0.8 # Tight
180
+ elif days_left > 30:
181
+ efficiency *= 1.0 # Good time
182
+
183
+ return min(efficiency, 1.0)
184
+
185
+ def _calculate_success_probability(
186
+ self,
187
+ category: str,
188
+ extra_data: dict
189
+ ) -> float:
190
+ """Estimate probability of success."""
191
+ base_prob = {
192
+ "bounty": 0.7,
193
+ "micro_grant": 0.5,
194
+ "ambassador": 0.5,
195
+ "hackathon": 0.3,
196
+ "ecosystem_grant": 0.25,
197
+ "grant": 0.2,
198
+ "internship": 0.2,
199
+ "job": 0.15,
200
+ "fellowship": 0.1,
201
+ "scholarship": 0.1,
202
+ }.get(category, 0.2)
203
+
204
+ # Adjust based on extra data
205
+ if extra_data.get("technical_depth") == "beginner":
206
+ base_prob += 0.1
207
+ if extra_data.get("africa_focus") or extra_data.get("nigeria_specific"):
208
+ base_prob += 0.15 # Regional programs often less competitive
209
+
210
+ return min(base_prob, 1.0)
211
+
212
+ def _calculate_upside(
213
+ self,
214
+ category: str,
215
+ grant_size: Optional[int]
216
+ ) -> float:
217
+ """Calculate potential upside."""
218
+ base_upside = self.CATEGORY_UPSIDE.get(category, 0.5)
219
+
220
+ # Adjust for grant size
221
+ if grant_size:
222
+ if grant_size > 50000:
223
+ base_upside = min(base_upside + 0.2, 1.0)
224
+ elif grant_size > 10000:
225
+ base_upside = min(base_upside + 0.1, 1.0)
226
+
227
+ return base_upside
228
+
229
+ def _calculate_unlock_potential(self, category: str) -> float:
230
+ """Calculate what doors this opens."""
231
+ return self.UNLOCK_VALUES.get(category, 0.3)
232
+
233
+ def _calculate_competition(self, category: str) -> float:
234
+ """Estimate competition level."""
235
+ return self.CATEGORY_COMPETITION.get(category, 0.5)
236
+
237
+ def _calculate_accessibility(self, region: str) -> float:
238
+ """Calculate accessibility based on user region."""
239
+ region = (region or "global").lower()
240
+
241
+ # Perfect match
242
+ if region == self.user_region:
243
+ return 1.0
244
+
245
+ # Regional matches
246
+ if self.user_region == "nigeria":
247
+ if region in ["africa", "remote_africa"]:
248
+ return 0.9
249
+ elif region in ["global", "remote_global"]:
250
+ return 0.7
251
+ else:
252
+ return 0.3
253
+
254
+ # Global is accessible
255
+ if region in ["global", "remote_global"]:
256
+ return 0.8
257
+
258
+ return 0.5
259
+
260
+ def _determine_risk(
261
+ self,
262
+ category: str,
263
+ competition: float,
264
+ deadline: Optional[datetime]
265
+ ) -> str:
266
+ """Determine risk level (time sink risk)."""
267
+ risk_score = 0
268
+
269
+ # High time = high risk
270
+ time_hours = self.CATEGORY_TIME.get(category, 15)
271
+ if time_hours > 30:
272
+ risk_score += 2
273
+ elif time_hours > 15:
274
+ risk_score += 1
275
+
276
+ # High competition = high risk
277
+ if competition > 0.7:
278
+ risk_score += 2
279
+ elif competition > 0.5:
280
+ risk_score += 1
281
+
282
+ # Tight deadline = high risk
283
+ if deadline:
284
+ try:
285
+ if deadline.tzinfo is not None:
286
+ deadline = deadline.replace(tzinfo=None)
287
+ days_left = (deadline - datetime.utcnow()).days
288
+ except Exception:
289
+ days_left = 30 # Default if comparison fails
290
+ if days_left < 5:
291
+ risk_score += 2
292
+
293
+ if risk_score >= 4:
294
+ return "high"
295
+ elif risk_score >= 2:
296
+ return "medium"
297
+ else:
298
+ return "low"
299
+
300
+ def _generate_reasoning(
301
+ self,
302
+ category: str,
303
+ roi_score: float,
304
+ risk_level: str,
305
+ time_eff: float,
306
+ success_prob: float,
307
+ upside: float,
308
+ accessibility: float
309
+ ) -> str:
310
+ """Generate human-readable reasoning."""
311
+ reasons = []
312
+
313
+ if roi_score > 0.7:
314
+ reasons.append("High-value opportunity")
315
+ elif roi_score > 0.5:
316
+ reasons.append("Moderate value")
317
+ else:
318
+ reasons.append("Consider carefully")
319
+
320
+ if time_eff > 0.7:
321
+ reasons.append("time-efficient")
322
+ elif time_eff < 0.4:
323
+ reasons.append("requires significant time")
324
+
325
+ if success_prob > 0.4:
326
+ reasons.append("good success odds")
327
+ elif success_prob < 0.15:
328
+ reasons.append("highly competitive")
329
+
330
+ if accessibility > 0.8:
331
+ reasons.append("region-accessible")
332
+ elif accessibility < 0.5:
333
+ reasons.append("may have access barriers")
334
+
335
+ if risk_level == "low":
336
+ reasons.append("low time-sink risk")
337
+ elif risk_level == "high":
338
+ reasons.append("high time investment")
339
+
340
+ return ". ".join(reasons) + "."
backend/intelligence/scorer.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Relevance Scorer
3
+
4
+ Calculates relevance score based on keyword matching and semantic similarity.
5
+ """
6
+ from typing import Optional
7
+ import numpy as np
8
+ from sentence_transformers import SentenceTransformer
9
+
10
+ from ..config import get_settings
11
+
12
+
13
+ class RelevanceScorer:
14
+ """
15
+ Scores opportunities based on relevance to user interests.
16
+ Uses both keyword matching and semantic similarity.
17
+ """
18
+
19
+ def __init__(self):
20
+ self.settings = get_settings()
21
+ self._model: Optional[SentenceTransformer] = None
22
+ self._interest_embedding: Optional[np.ndarray] = None
23
+
24
+ # Build interest text from keywords
25
+ self.interest_text = " ".join(self.settings.high_priority_keywords)
26
+
27
+ @property
28
+ def model(self) -> SentenceTransformer:
29
+ """Lazy load the embedding model."""
30
+ if self._model is None:
31
+ self._model = SentenceTransformer('all-MiniLM-L6-v2')
32
+ return self._model
33
+
34
+ @property
35
+ def interest_embedding(self) -> np.ndarray:
36
+ """Get cached interest vector embedding."""
37
+ if self._interest_embedding is None:
38
+ self._interest_embedding = self.model.encode(self.interest_text)
39
+ return self._interest_embedding
40
+
41
+ def get_embedding(self, text: str) -> list[float]:
42
+ """Generate embedding for text."""
43
+ embedding = self.model.encode(text)
44
+ return embedding.tolist()
45
+
46
+ def score_keywords(self, text: str) -> float:
47
+ """
48
+ Score based on keyword presence.
49
+ Returns 0.0 to 1.0
50
+ """
51
+ if not text:
52
+ return 0.0
53
+
54
+ text_lower = text.lower()
55
+ matches = sum(
56
+ 1 for keyword in self.settings.high_priority_keywords
57
+ if keyword.lower() in text_lower
58
+ )
59
+
60
+ # Normalize: more matches = higher score, capped at 1.0
61
+ max_expected = 5 # Expect 5+ matches for full score
62
+ return min(matches / max_expected, 1.0)
63
+
64
+ def score_semantic(self, text: str) -> float:
65
+ """
66
+ Score based on semantic similarity to interest vector.
67
+ Returns 0.0 to 1.0
68
+ """
69
+ if not text:
70
+ return 0.0
71
+
72
+ try:
73
+ text_embedding = self.model.encode(text)
74
+ # Cosine similarity
75
+ similarity = np.dot(text_embedding, self.interest_embedding) / (
76
+ np.linalg.norm(text_embedding) * np.linalg.norm(self.interest_embedding)
77
+ )
78
+ # Normalize from [-1, 1] to [0, 1]
79
+ return float((similarity + 1) / 2)
80
+ except Exception as e:
81
+ print(f"Semantic scoring error: {e}")
82
+ return 0.5
83
+
84
+ def score(self, text: str, title: str = "") -> dict:
85
+ """
86
+ Calculate combined relevance score.
87
+ Returns dict with individual and combined scores.
88
+ """
89
+ full_text = f"{title} {text}".strip()
90
+
91
+ keyword_score = self.score_keywords(full_text)
92
+ semantic_score = self.score_semantic(full_text)
93
+
94
+ # Weighted average: keywords 40%, semantic 60%
95
+ combined = 0.4 * keyword_score + 0.6 * semantic_score
96
+
97
+ return {
98
+ "keyword_score": round(keyword_score, 3),
99
+ "semantic_score": round(semantic_score, 3),
100
+ "relevance_score": round(combined, 3)
101
+ }
backend/intelligence/silent_detector.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Silent Opportunities Detector - Version 2.0
3
+
4
+ Detects implicit/hidden opportunities that are never announced clearly.
5
+ These appear in blog posts, tweets, Discord messages, research updates.
6
+
7
+ Examples:
8
+ - "We're exploring ideas around..."
9
+ - "We're looking for collaborators..."
10
+ - "If anyone is interested..."
11
+ - "We're building something new..."
12
+ """
13
+ import re
14
+ from typing import Optional
15
+
16
+
17
+ class SilentOpportunityDetector:
18
+ """
19
+ Detects implicit opportunities from content that doesn't
20
+ explicitly announce them as opportunities.
21
+ """
22
+
23
+ # Patterns for implicit opportunities
24
+ SIGNAL_PATTERNS = {
25
+ # Pre-hiring signals
26
+ "pre_hiring": [
27
+ r"we(?:'re| are) (?:actively )?(?:looking|searching) for",
28
+ r"we need (?:a |someone|people)",
29
+ r"hiring (?:soon|next|this)",
30
+ r"building (?:a |our |the )?team",
31
+ r"if you(?:'re| are) interested in joining",
32
+ r"open roles? (?:coming|soon)",
33
+ r"dm (?:me|us) if (?:you(?:'re| are)|interested)",
34
+ r"reach out if",
35
+ ],
36
+
37
+ # Pre-grant signals
38
+ "pre_grant": [
39
+ r"(?:we(?:'re| are)|we will be) (?:funding|supporting|backing)",
40
+ r"grants? (?:coming|opening|soon|next)",
41
+ r"ecosystem fund",
42
+ r"builder(?:s)? program",
43
+ r"retroactive (?:funding|rewards)",
44
+ r"announcing.{0,30}funding",
45
+ r"accepting applications",
46
+ ],
47
+
48
+ # Collaboration signals
49
+ "collaboration": [
50
+ r"looking for (?:collaborators?|partners?|co-founder)",
51
+ r"seeking (?:collaborat|partner)",
52
+ r"open to (?:collaborat|partner|work)",
53
+ r"anyone (?:want|interested).{0,30}(?:build|work|collaborat)",
54
+ r"let(?:'s| us) (?:build|work|create) together",
55
+ r"who wants to",
56
+ r"exploring.{0,30}partnership",
57
+ ],
58
+
59
+ # Project/research signals
60
+ "research": [
61
+ r"we(?:'re| are) (?:exploring|researching|investigating)",
62
+ r"new (?:research|project|initiative)",
63
+ r"call for (?:papers?|proposals?|abstracts?)",
64
+ r"(?:research|academic) (?:collaboration|partnership)",
65
+ r"phd (?:position|opportunity|student)",
66
+ r"postdoc",
67
+ r"looking for (?:interns?|students?)",
68
+ ],
69
+
70
+ # Community/ambassador signals
71
+ "ambassador": [
72
+ r"ambassador program",
73
+ r"community (?:lead|manager|role)",
74
+ r"help (?:us )?(?:grow|build|spread)",
75
+ r"join (?:our|the) (?:community|team|movement)",
76
+ r"early (?:adopter|supporter)",
77
+ ],
78
+
79
+ # Investment/demo signals
80
+ "investment": [
81
+ r"demo day",
82
+ r"pitch (?:competition|event|day)",
83
+ r"investor (?:meeting|demo|call)",
84
+ r"raising (?:a |our )?(?:seed|round|series)",
85
+ r"open to (?:investment|investors)",
86
+ ],
87
+ }
88
+
89
+ # Strength indicators (modifiers)
90
+ STRENGTH_BOOSTERS = [
91
+ r"immediately",
92
+ r"urgently",
93
+ r"actively",
94
+ r"now",
95
+ r"today",
96
+ r"this week",
97
+ r"asap",
98
+ r"serious",
99
+ r"exciting",
100
+ ]
101
+
102
+ # Negative patterns (reduce signal)
103
+ NOISE_PATTERNS = [
104
+ r"not (?:looking|hiring|seeking)",
105
+ r"no longer",
106
+ r"was (?:looking|hiring)",
107
+ r"used to",
108
+ r"back in",
109
+ r"years? ago",
110
+ r"hypothetically",
111
+ r"if only",
112
+ ]
113
+
114
+ def detect(self, text: str, title: str = "") -> dict:
115
+ """
116
+ Analyze text for silent opportunity signals.
117
+
118
+ Returns:
119
+ - is_silent_opportunity: bool
120
+ - opportunity_type: str (pre_hiring, pre_grant, etc.)
121
+ - signal_strength: float (0.0 to 1.0)
122
+ - detected_patterns: list
123
+ - recommended_category: str
124
+ """
125
+ full_text = f"{title} {text}".lower()
126
+
127
+ # Check for noise patterns first
128
+ if self._has_noise(full_text):
129
+ return {
130
+ "is_silent_opportunity": False,
131
+ "opportunity_type": None,
132
+ "signal_strength": 0.0,
133
+ "detected_patterns": [],
134
+ "recommended_category": None,
135
+ }
136
+
137
+ # Detect patterns
138
+ detected = {}
139
+ for opp_type, patterns in self.SIGNAL_PATTERNS.items():
140
+ matches = self._find_matches(full_text, patterns)
141
+ if matches:
142
+ detected[opp_type] = matches
143
+
144
+ if not detected:
145
+ return {
146
+ "is_silent_opportunity": False,
147
+ "opportunity_type": None,
148
+ "signal_strength": 0.0,
149
+ "detected_patterns": [],
150
+ "recommended_category": None,
151
+ }
152
+
153
+ # Find primary opportunity type
154
+ primary_type = max(detected, key=lambda k: len(detected[k]))
155
+
156
+ # Calculate signal strength
157
+ signal_strength = self._calculate_strength(
158
+ full_text, detected, primary_type
159
+ )
160
+
161
+ # Map to category
162
+ category_map = {
163
+ "pre_hiring": "pre_hiring_signal",
164
+ "pre_grant": "pre_grant_signal",
165
+ "collaboration": "collaboration",
166
+ "research": "research",
167
+ "ambassador": "ambassador",
168
+ "investment": "pitch_event",
169
+ }
170
+
171
+ return {
172
+ "is_silent_opportunity": True,
173
+ "opportunity_type": primary_type,
174
+ "signal_strength": round(signal_strength, 3),
175
+ "detected_patterns": detected[primary_type],
176
+ "recommended_category": category_map.get(primary_type, "weak_signal"),
177
+ }
178
+
179
+ def _find_matches(self, text: str, patterns: list) -> list:
180
+ """Find all matching patterns in text."""
181
+ matches = []
182
+ for pattern in patterns:
183
+ if re.search(pattern, text, re.IGNORECASE):
184
+ # Extract the matching context
185
+ match = re.search(pattern, text, re.IGNORECASE)
186
+ if match:
187
+ # Get surrounding context
188
+ start = max(0, match.start() - 20)
189
+ end = min(len(text), match.end() + 20)
190
+ context = text[start:end]
191
+ matches.append(context.strip())
192
+ return matches
193
+
194
+ def _has_noise(self, text: str) -> bool:
195
+ """Check if text contains noise patterns."""
196
+ for pattern in self.NOISE_PATTERNS:
197
+ if re.search(pattern, text, re.IGNORECASE):
198
+ return True
199
+ return False
200
+
201
+ def _calculate_strength(
202
+ self,
203
+ text: str,
204
+ detected: dict,
205
+ primary_type: str
206
+ ) -> float:
207
+ """Calculate signal strength."""
208
+ base_strength = 0.5
209
+
210
+ # More patterns = stronger signal
211
+ pattern_count = len(detected[primary_type])
212
+ base_strength += min(pattern_count * 0.1, 0.3)
213
+
214
+ # Check for strength boosters
215
+ for booster in self.STRENGTH_BOOSTERS:
216
+ if re.search(booster, text, re.IGNORECASE):
217
+ base_strength += 0.05
218
+
219
+ # Multiple types of signals = stronger
220
+ if len(detected) > 1:
221
+ base_strength += 0.1
222
+
223
+ # Cap at 1.0
224
+ return min(base_strength, 1.0)
225
+
226
+ def reclassify_opportunity(
227
+ self,
228
+ opportunity: dict
229
+ ) -> tuple[str, float]:
230
+ """
231
+ Re-evaluate an existing opportunity for silent signals.
232
+
233
+ Returns (new_category, confidence)
234
+ """
235
+ title = opportunity.get("title", "")
236
+ text = opportunity.get("raw_text", "")
237
+
238
+ result = self.detect(text, title)
239
+
240
+ if result["is_silent_opportunity"]:
241
+ return (
242
+ result["recommended_category"],
243
+ result["signal_strength"]
244
+ )
245
+
246
+ return (None, 0.0)
247
+
248
+
249
+ class OpportunityLanguageDetector:
250
+ """
251
+ Detects the urgency, timing, and action language in opportunities.
252
+ """
253
+
254
+ TIMING_PATTERNS = {
255
+ "early": [
256
+ r"early (?:bird|access|application)",
257
+ r"just (?:launched|announced|opened)",
258
+ r"applications? (?:now )?open",
259
+ r"first (?:round|batch|cohort)",
260
+ r"founding",
261
+ r"new program",
262
+ ],
263
+ "optimal": [
264
+ r"applications? (?:open|accepted)",
265
+ r"deadline (?:is )?(?:soon|approaching)",
266
+ r"apply (?:now|today)",
267
+ r"last call",
268
+ r"extended deadline",
269
+ ],
270
+ "late": [
271
+ r"deadline (?:in )?(?:days?|hours?)",
272
+ r"closes? (?:soon|tomorrow|today)",
273
+ r"final (?:day|hour|chance)",
274
+ r"last (?:day|chance)",
275
+ ],
276
+ }
277
+
278
+ def detect_timing(self, text: str) -> str:
279
+ """Detect application timing."""
280
+ text = text.lower()
281
+
282
+ for timing, patterns in self.TIMING_PATTERNS.items():
283
+ for pattern in patterns:
284
+ if re.search(pattern, text, re.IGNORECASE):
285
+ return timing
286
+
287
+ return "unknown"
288
+
289
+ def extract_action_items(self, text: str) -> list:
290
+ """Extract actionable items from text."""
291
+ actions = []
292
+
293
+ # Common action patterns
294
+ action_patterns = [
295
+ r"apply (?:at|via|through|here)",
296
+ r"visit (?:our|the) (?:website|page|link)",
297
+ r"(?:fill|submit).{0,20}(?:form|application)",
298
+ r"send.{0,20}(?:email|resume|cv|portfolio)",
299
+ r"register (?:at|on|here)",
300
+ r"sign up",
301
+ r"join.{0,20}(?:discord|telegram|slack)",
302
+ r"dm (?:me|us)",
303
+ r"follow.{0,10}on",
304
+ ]
305
+
306
+ for pattern in action_patterns:
307
+ match = re.search(pattern, text, re.IGNORECASE)
308
+ if match:
309
+ start = max(0, match.start() - 10)
310
+ end = min(len(text), match.end() + 30)
311
+ actions.append(text[start:end].strip())
312
+
313
+ return actions[:5] # Limit to 5 actions
backend/main.py ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE - Personal Intelligence & Opportunity Engine
3
+
4
+ FastAPI Backend Application
5
+ """
6
+ from fastapi import FastAPI, Depends, HTTPException, Query, BackgroundTasks
7
+ from fastapi.staticfiles import StaticFiles
8
+ from fastapi.responses import HTMLResponse, JSONResponse
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from sqlalchemy.orm import Session
11
+ from datetime import datetime
12
+ from typing import Optional
13
+ from pathlib import Path
14
+
15
+ from .database import get_db, init_db
16
+ from .models import Opportunity, OpportunityCategory, OpportunityStatus, Domain
17
+ from .delivery import DigestGenerator
18
+ from .ingestion import IngestionScheduler
19
+
20
+ # Initialize app
21
+ app = FastAPI(
22
+ title="PIOE - Personal Intelligence & Opportunity Engine",
23
+ description="Signal intelligence system for opportunities in AI, Robotics, and more",
24
+ version="1.0.0"
25
+ )
26
+
27
+ # CORS middleware
28
+ app.add_middleware(
29
+ CORSMiddleware,
30
+ allow_origins=["*"],
31
+ allow_credentials=True,
32
+ allow_methods=["*"],
33
+ allow_headers=["*"],
34
+ )
35
+
36
+ # Global scheduler instance
37
+ scheduler: Optional[IngestionScheduler] = None
38
+
39
+
40
+ @app.on_event("startup")
41
+ async def startup():
42
+ """Initialize database and scheduler on startup."""
43
+ init_db()
44
+ global scheduler
45
+ scheduler = IngestionScheduler()
46
+ # Don't auto-start scheduler - let user trigger manually first
47
+ print("PIOE Backend started. Run /api/ingest/start to begin ingestion.")
48
+
49
+
50
+ @app.on_event("shutdown")
51
+ async def shutdown():
52
+ """Cleanup on shutdown."""
53
+ global scheduler
54
+ if scheduler:
55
+ scheduler.stop()
56
+
57
+
58
+ # ============== API Routes ==============
59
+
60
+ @app.get("/", response_class=HTMLResponse)
61
+ async def serve_dashboard():
62
+ """Serve the frontend dashboard."""
63
+ frontend_path = Path(__file__).parent.parent / "frontend" / "index.html"
64
+ if frontend_path.exists():
65
+ return HTMLResponse(content=frontend_path.read_text(), status_code=200)
66
+ return HTMLResponse(content="<h1>PIOE Dashboard - Frontend not found</h1>", status_code=200)
67
+
68
+
69
+ # ---------- Opportunities ----------
70
+
71
+ @app.get("/api/opportunities")
72
+ async def get_opportunities(
73
+ db: Session = Depends(get_db),
74
+ category: Optional[str] = None,
75
+ domain: Optional[str] = None,
76
+ status: Optional[str] = None,
77
+ min_score: float = 0.0,
78
+ limit: int = Query(default=50, le=200),
79
+ offset: int = 0
80
+ ):
81
+ """Get filtered list of opportunities."""
82
+ query = db.query(Opportunity).filter(
83
+ Opportunity.combined_score >= min_score
84
+ )
85
+
86
+ if category:
87
+ try:
88
+ query = query.filter(Opportunity.category == OpportunityCategory(category))
89
+ except ValueError:
90
+ pass
91
+
92
+ if domain:
93
+ try:
94
+ query = query.filter(Opportunity.domain == Domain(domain))
95
+ except ValueError:
96
+ pass
97
+
98
+ if status:
99
+ try:
100
+ query = query.filter(Opportunity.status == OpportunityStatus(status))
101
+ except ValueError:
102
+ pass
103
+
104
+ total = query.count()
105
+
106
+ opportunities = query.order_by(
107
+ Opportunity.combined_score.desc()
108
+ ).offset(offset).limit(limit).all()
109
+
110
+ return {
111
+ "total": total,
112
+ "limit": limit,
113
+ "offset": offset,
114
+ "opportunities": [
115
+ {
116
+ "id": o.id,
117
+ "title": o.title,
118
+ "category": o.category.value if o.category else None,
119
+ "domain": o.domain.value if o.domain else None,
120
+ "source_name": o.source_name,
121
+ "url": o.url,
122
+ "deadline": o.deadline.isoformat() if o.deadline else None,
123
+ "relevance_score": o.relevance_score,
124
+ "novelty_score": o.novelty_score,
125
+ "credibility_score": o.credibility_score,
126
+ "combined_score": o.combined_score,
127
+ # PIOE 2.0 fields
128
+ "roi_score": getattr(o, 'roi_score', None),
129
+ "risk_level": o.risk_level.value if hasattr(o, 'risk_level') and o.risk_level else "medium",
130
+ "region": o.region.value if hasattr(o, 'region') and o.region else "global",
131
+ "status": o.status.value if o.status else None,
132
+ "discovered_at": o.discovered_at.isoformat() if o.discovered_at else None,
133
+ "raw_text": o.raw_text[:500] if o.raw_text else None
134
+ }
135
+ for o in opportunities
136
+ ]
137
+ }
138
+
139
+
140
+ @app.get("/api/opportunities/{opportunity_id}")
141
+ async def get_opportunity(opportunity_id: str, db: Session = Depends(get_db)):
142
+ """Get single opportunity by ID with full PIOE 2.0 details."""
143
+ opp = db.query(Opportunity).filter(Opportunity.id == opportunity_id).first()
144
+ if not opp:
145
+ raise HTTPException(status_code=404, detail="Opportunity not found")
146
+
147
+ return {
148
+ "id": opp.id,
149
+ "title": opp.title,
150
+ "category": opp.category.value if opp.category else None,
151
+ "domain": opp.domain.value if opp.domain else None,
152
+ "source_name": opp.source_name,
153
+ "source_type": opp.source_type.value if opp.source_type else None,
154
+ "url": opp.url,
155
+ "deadline": opp.deadline.isoformat() if opp.deadline else None,
156
+ "published_at": opp.published_at.isoformat() if opp.published_at else None,
157
+ "discovered_at": opp.discovered_at.isoformat() if opp.discovered_at else None,
158
+ "raw_text": opp.raw_text,
159
+ # Core scores
160
+ "relevance_score": opp.relevance_score,
161
+ "novelty_score": opp.novelty_score,
162
+ "credibility_score": opp.credibility_score,
163
+ "signal_strength": opp.signal_strength,
164
+ "combined_score": opp.combined_score,
165
+ # PIOE 2.0: Decision intelligence
166
+ "roi_score": getattr(opp, 'roi_score', None),
167
+ "unlock_potential": getattr(opp, 'unlock_potential', None),
168
+ "risk_level": opp.risk_level.value if hasattr(opp, 'risk_level') and opp.risk_level else "medium",
169
+ "competition_level": getattr(opp, 'competition_level', None),
170
+ # PIOE 2.0: Regional
171
+ "region": opp.region.value if hasattr(opp, 'region') and opp.region else "global",
172
+ "region_weight": getattr(opp, 'region_weight', 1.0),
173
+ # Status and metadata
174
+ "status": opp.status.value if opp.status else None,
175
+ "metadata": opp.extra_data
176
+ }
177
+
178
+
179
+ @app.get("/api/opportunities/{opportunity_id}/guidance")
180
+ async def get_action_guidance(opportunity_id: str, db: Session = Depends(get_db)):
181
+ """PIOE 2.0: Get AI-powered action guidance for an opportunity."""
182
+ from .intelligence import LLMClient
183
+
184
+ opp = db.query(Opportunity).filter(Opportunity.id == opportunity_id).first()
185
+ if not opp:
186
+ raise HTTPException(status_code=404, detail="Opportunity not found")
187
+
188
+ # Build opportunity dict for LLM
189
+ opp_dict = {
190
+ "title": opp.title,
191
+ "category": opp.category.value if opp.category else "other",
192
+ "domain": opp.domain.value if opp.domain else "mixed",
193
+ "deadline": opp.deadline.isoformat() if opp.deadline else None,
194
+ "raw_text": opp.raw_text or "",
195
+ "roi_score": getattr(opp, 'roi_score', 0.5),
196
+ "competition_level": getattr(opp, 'competition_level', 0.5),
197
+ "region": opp.region.value if hasattr(opp, 'region') and opp.region else "global",
198
+ }
199
+
200
+ # Get action guidance from LLM
201
+ llm = LLMClient.get_client()
202
+ guidance = llm.recommend_action(opp_dict)
203
+
204
+ return {
205
+ "opportunity_id": opportunity_id,
206
+ "guidance": guidance
207
+ }
208
+
209
+
210
+ @app.patch("/api/opportunities/{opportunity_id}/status")
211
+ async def update_opportunity_status(
212
+ opportunity_id: str,
213
+ status: str,
214
+ db: Session = Depends(get_db)
215
+ ):
216
+ """Update opportunity status (save, apply, dismiss, etc.)."""
217
+ opp = db.query(Opportunity).filter(Opportunity.id == opportunity_id).first()
218
+ if not opp:
219
+ raise HTTPException(status_code=404, detail="Opportunity not found")
220
+
221
+ try:
222
+ opp.status = OpportunityStatus(status)
223
+ db.commit()
224
+ return {"success": True, "new_status": status}
225
+ except ValueError:
226
+ raise HTTPException(status_code=400, detail=f"Invalid status: {status}")
227
+
228
+
229
+ # ---------- Digest ----------
230
+
231
+ @app.get("/api/digest/daily")
232
+ async def get_daily_digest(db: Session = Depends(get_db), limit: int = 10):
233
+ """Get today's opportunity digest."""
234
+ generator = DigestGenerator(db)
235
+ digest = generator.generate_daily(limit)
236
+ return {"digest": digest}
237
+
238
+
239
+ @app.get("/api/digest/weekly")
240
+ async def get_weekly_digest(db: Session = Depends(get_db), limit: int = 25):
241
+ """Get weekly opportunity digest."""
242
+ generator = DigestGenerator(db)
243
+ digest = generator.generate_weekly(limit)
244
+ return {"digest": digest}
245
+
246
+
247
+ @app.get("/api/digest/urgent")
248
+ async def get_urgent_digest(db: Session = Depends(get_db), limit: int = 10):
249
+ """Get urgent opportunities with approaching deadlines."""
250
+ generator = DigestGenerator(db)
251
+ digest = generator.generate_urgent(limit)
252
+ return {"digest": digest}
253
+
254
+
255
+ @app.get("/api/digest/{category}")
256
+ async def get_category_digest(
257
+ category: str,
258
+ db: Session = Depends(get_db),
259
+ limit: int = 10
260
+ ):
261
+ """Get digest for specific category."""
262
+ try:
263
+ cat = OpportunityCategory(category)
264
+ except ValueError:
265
+ raise HTTPException(status_code=400, detail=f"Invalid category: {category}")
266
+
267
+ generator = DigestGenerator(db)
268
+ digest = generator.generate_by_category(cat, limit)
269
+ return {"digest": digest}
270
+
271
+
272
+ # ---------- Ingestion Control ----------
273
+
274
+ @app.post("/api/ingest/run")
275
+ async def run_ingestion(background_tasks: BackgroundTasks):
276
+ """Trigger full ingestion manually."""
277
+ global scheduler
278
+ if not scheduler:
279
+ scheduler = IngestionScheduler()
280
+
281
+ background_tasks.add_task(scheduler.run_full_ingestion)
282
+ return {"message": "Ingestion started in background"}
283
+
284
+
285
+ @app.post("/api/ingest/source/{source_name}")
286
+ async def run_source_ingestion(source_name: str, background_tasks: BackgroundTasks):
287
+ """Trigger ingestion for specific source."""
288
+ global scheduler
289
+ if not scheduler:
290
+ scheduler = IngestionScheduler()
291
+
292
+ background_tasks.add_task(scheduler.ingest_single_source, source_name)
293
+ return {"message": f"Ingestion started for {source_name}"}
294
+
295
+
296
+ @app.post("/api/ingest/start")
297
+ async def start_scheduler():
298
+ """Start the automatic ingestion scheduler."""
299
+ global scheduler
300
+ if not scheduler:
301
+ scheduler = IngestionScheduler()
302
+
303
+ scheduler.start()
304
+ return {"message": "Scheduler started"}
305
+
306
+
307
+ @app.post("/api/ingest/stop")
308
+ async def stop_scheduler():
309
+ """Stop the automatic ingestion scheduler."""
310
+ global scheduler
311
+ if scheduler:
312
+ scheduler.stop()
313
+ return {"message": "Scheduler stopped"}
314
+
315
+
316
+ # ---------- Stats ----------
317
+
318
+ @app.get("/api/stats")
319
+ async def get_stats(db: Session = Depends(get_db)):
320
+ """Get overview statistics."""
321
+ from sqlalchemy import func
322
+
323
+ total = db.query(Opportunity).count()
324
+ new_count = db.query(Opportunity).filter(
325
+ Opportunity.status == OpportunityStatus.NEW
326
+ ).count()
327
+
328
+ # Category breakdown
329
+ categories = db.query(
330
+ Opportunity.category, func.count(Opportunity.id)
331
+ ).group_by(Opportunity.category).all()
332
+
333
+ # Domain breakdown
334
+ domains = db.query(
335
+ Opportunity.domain, func.count(Opportunity.id)
336
+ ).group_by(Opportunity.domain).all()
337
+
338
+ return {
339
+ "total_opportunities": total,
340
+ "new_opportunities": new_count,
341
+ "by_category": {
342
+ cat.value if cat else "unknown": count
343
+ for cat, count in categories
344
+ },
345
+ "by_domain": {
346
+ dom.value if dom else "unknown": count
347
+ for dom, count in domains
348
+ }
349
+ }
350
+
351
+
352
+ # ---------- AI Chat ----------
353
+
354
+ from pydantic import BaseModel
355
+
356
+ class ChatMessage(BaseModel):
357
+ message: str
358
+
359
+ @app.post("/api/chat")
360
+ async def chat_with_opportunities(
361
+ chat: ChatMessage,
362
+ db: Session = Depends(get_db)
363
+ ):
364
+ """
365
+ PIOE 2.0: AI-powered chat to search and explore opportunities.
366
+ Ask questions like:
367
+ - "Find me hackathons in Nigeria"
368
+ - "What grants are available for AI projects?"
369
+ - "Show me high ROI opportunities with low competition"
370
+ """
371
+ from .intelligence import LLMClient
372
+
373
+ user_message = chat.message.strip()
374
+ if not user_message:
375
+ return {"response": "Please ask a question about opportunities.", "opportunities": []}
376
+
377
+ # Get all opportunities for context (limit to recent high-scoring ones)
378
+ opportunities = db.query(Opportunity).filter(
379
+ Opportunity.combined_score >= 0.3
380
+ ).order_by(Opportunity.combined_score.desc()).limit(100).all()
381
+
382
+ # Build context for LLM
383
+ opp_summaries = []
384
+ for o in opportunities:
385
+ summary = f"[{o.id}] {o.title} | Category: {o.category.value if o.category else 'other'} | Domain: {o.domain.value if o.domain else 'mixed'} | Region: {o.region.value if hasattr(o, 'region') and o.region else 'global'} | ROI: {getattr(o, 'roi_score', 0.5):.0%} | Risk: {o.risk_level.value if hasattr(o, 'risk_level') and o.risk_level else 'medium'}"
386
+ opp_summaries.append(summary)
387
+
388
+ opp_context = "\n".join(opp_summaries[:50]) if opp_summaries else "No opportunities found in database."
389
+
390
+ # Create prompt for LLM
391
+ prompt = f"""You are PIOE, a Personal Intelligence & Opportunity Engine assistant.
392
+ The user is from Nigeria and interested in AI, Computer Vision, Robotics, and Web3 opportunities.
393
+
394
+ AVAILABLE OPPORTUNITIES:
395
+ {opp_context}
396
+
397
+ USER QUESTION: {user_message}
398
+
399
+ Instructions:
400
+ 1. Answer the user's question based on the opportunities above
401
+ 2. If they're searching for specific types, list the most relevant opportunity IDs
402
+ 3. Provide actionable advice
403
+ 4. Be concise but helpful
404
+ 5. If no matching opportunities exist, suggest what to search for
405
+
406
+ Return a JSON response:
407
+ {{
408
+ "response": "Your helpful answer here",
409
+ "matched_ids": ["id1", "id2"] or [] if none match,
410
+ "suggested_action": "What the user should do next"
411
+ }}"""
412
+
413
+ try:
414
+ llm = LLMClient.get_client()
415
+ result = llm._generate(prompt) if hasattr(llm, '_generate') else '{"response": "AI not configured", "matched_ids": [], "suggested_action": "Configure Gemini API key"}'
416
+
417
+ import json
418
+ # Try to parse JSON response
419
+ start = result.find('{')
420
+ end = result.rfind('}') + 1
421
+ if start != -1 and end > start:
422
+ parsed = json.loads(result[start:end])
423
+ response_text = parsed.get("response", result)
424
+ matched_ids = parsed.get("matched_ids", [])
425
+ suggested_action = parsed.get("suggested_action", "")
426
+ else:
427
+ response_text = result
428
+ matched_ids = []
429
+ suggested_action = ""
430
+
431
+ # Get the matched opportunities
432
+ matched_opps = []
433
+ if matched_ids:
434
+ for opp in opportunities:
435
+ if opp.id in matched_ids:
436
+ matched_opps.append({
437
+ "id": opp.id,
438
+ "title": opp.title,
439
+ "category": opp.category.value if opp.category else None,
440
+ "domain": opp.domain.value if opp.domain else None,
441
+ "url": opp.url,
442
+ "roi_score": getattr(opp, 'roi_score', None),
443
+ "risk_level": opp.risk_level.value if hasattr(opp, 'risk_level') and opp.risk_level else "medium",
444
+ "region": opp.region.value if hasattr(opp, 'region') and opp.region else "global",
445
+ })
446
+
447
+ return {
448
+ "response": response_text,
449
+ "opportunities": matched_opps[:10],
450
+ "suggested_action": suggested_action,
451
+ "total_searched": len(opportunities)
452
+ }
453
+
454
+ except Exception as e:
455
+ # Fallback: Simple keyword search
456
+ keywords = user_message.lower().split()
457
+ matched = []
458
+ for o in opportunities:
459
+ text = f"{o.title} {o.raw_text or ''}".lower()
460
+ if any(kw in text for kw in keywords):
461
+ matched.append({
462
+ "id": o.id,
463
+ "title": o.title,
464
+ "category": o.category.value if o.category else None,
465
+ "url": o.url,
466
+ "roi_score": getattr(o, 'roi_score', None),
467
+ })
468
+
469
+ return {
470
+ "response": f"Found {len(matched)} opportunities matching your search. (AI unavailable: {str(e)[:50]})",
471
+ "opportunities": matched[:10],
472
+ "suggested_action": "Click on any opportunity for details",
473
+ "total_searched": len(opportunities)
474
+ }
475
+
476
+
477
+ # Mount static files (frontend assets)
478
+ frontend_dir = Path(__file__).parent.parent / "frontend"
479
+ if frontend_dir.exists():
480
+ app.mount("/static", StaticFiles(directory=str(frontend_dir)), name="static")
481
+
backend/models.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PIOE Database Models - Version 2.0
3
+ Personal Advantage Engine
4
+ """
5
+ from sqlalchemy import Column, String, Float, DateTime, Text, Boolean, Integer, JSON, ForeignKey, Enum as SQLEnum
6
+ from sqlalchemy.orm import relationship
7
+ from datetime import datetime
8
+ import uuid
9
+ import enum
10
+
11
+ from .database import Base
12
+
13
+
14
+ class OpportunityCategory(str, enum.Enum):
15
+ """Categories for opportunity classification - PIOE 2.0 Extended."""
16
+ # Standard opportunities
17
+ SCHOLARSHIP = "scholarship"
18
+ FELLOWSHIP = "fellowship"
19
+ INTERNSHIP = "internship"
20
+ JOB = "job"
21
+ RESEARCH = "research"
22
+ HACKATHON = "hackathon"
23
+ COMPETITION = "competition"
24
+ CONFERENCE = "conference"
25
+ OPEN_SOURCE = "open_source"
26
+
27
+ # Grant types (PIOE 2.0)
28
+ GRANT = "grant"
29
+ MICRO_GRANT = "micro_grant"
30
+ ECOSYSTEM_GRANT = "ecosystem_grant"
31
+ INNOVATION_FUND = "innovation_fund"
32
+
33
+ # Partnership & Collaboration (PIOE 2.0)
34
+ PARTNERSHIP = "partnership"
35
+ COLLABORATION = "collaboration"
36
+
37
+ # Events & Showcases (PIOE 2.0)
38
+ PITCH_EVENT = "pitch_event"
39
+ DEMO_DAY = "demo_day"
40
+ TALENT_CALL = "talent_call"
41
+
42
+ # Web3/Crypto specific (PIOE 2.0)
43
+ BOUNTY = "bounty"
44
+ AMBASSADOR = "ambassador"
45
+
46
+ # Silent/Implicit opportunities (PIOE 2.0)
47
+ PRE_GRANT_SIGNAL = "pre_grant_signal"
48
+ PRE_HIRING_SIGNAL = "pre_hiring_signal"
49
+ WEAK_SIGNAL = "weak_signal"
50
+
51
+ # Other
52
+ INVESTMENT = "investment"
53
+ OTHER = "other"
54
+
55
+
56
+ class OpportunityStatus(str, enum.Enum):
57
+ """User interaction status."""
58
+ NEW = "new"
59
+ SAVED = "saved"
60
+ APPLIED = "applied"
61
+ TRACKING = "tracking"
62
+ DISMISSED = "dismissed"
63
+ EXPIRED = "expired"
64
+
65
+
66
+ class SourceType(str, enum.Enum):
67
+ """Types of data sources."""
68
+ ARXIV = "arxiv"
69
+ GITHUB = "github"
70
+ RSS = "rss"
71
+ REDDIT = "reddit"
72
+ TWITTER = "twitter"
73
+ LINKEDIN = "linkedin"
74
+ SUPERTEAM = "superteam"
75
+ WEB_SCRAPE = "web_scrape"
76
+ DISCORD = "discord"
77
+ GOV_PORTAL = "gov_portal"
78
+ GRANT_PLATFORM = "grant_platform"
79
+
80
+
81
+ class Domain(str, enum.Enum):
82
+ """Domain classification."""
83
+ AI = "ai"
84
+ COMPUTER_VISION = "computer_vision"
85
+ ROBOTICS = "robotics"
86
+ FINANCE = "finance"
87
+ CRYPTO = "crypto"
88
+ ACADEMIA = "academia"
89
+ WEB3 = "web3"
90
+ MIXED = "mixed"
91
+
92
+
93
+ class Region(str, enum.Enum):
94
+ """Regional accessibility - PIOE 2.0."""
95
+ NIGERIA = "nigeria"
96
+ AFRICA = "africa"
97
+ GLOBAL = "global"
98
+ REMOTE_AFRICA = "remote_africa" # Remote but Africa-accessible
99
+ REMOTE_GLOBAL = "remote_global"
100
+
101
+
102
+ class RiskLevel(str, enum.Enum):
103
+ """Time investment risk level."""
104
+ LOW = "low"
105
+ MEDIUM = "medium"
106
+ HIGH = "high"
107
+
108
+
109
+ class Source(Base):
110
+ """Data source configuration."""
111
+ __tablename__ = "sources"
112
+
113
+ id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
114
+ name = Column(String, nullable=False)
115
+ type = Column(SQLEnum(SourceType), nullable=False)
116
+ url = Column(String)
117
+ config = Column(JSON, default={})
118
+ credibility_score = Column(Float, default=0.7)
119
+ last_fetch = Column(DateTime)
120
+ is_active = Column(Boolean, default=True)
121
+ created_at = Column(DateTime, default=datetime.utcnow)
122
+
123
+ opportunities = relationship("Opportunity", back_populates="source")
124
+
125
+
126
+ class Opportunity(Base):
127
+ """Normalized opportunity item - PIOE 2.0 Enhanced."""
128
+ __tablename__ = "opportunities"
129
+
130
+ id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
131
+ title = Column(String, nullable=False)
132
+ source_id = Column(String, ForeignKey("sources.id"))
133
+ source_name = Column(String)
134
+ source_type = Column(SQLEnum(SourceType))
135
+ domain = Column(SQLEnum(Domain), default=Domain.MIXED)
136
+ category = Column(SQLEnum(OpportunityCategory), default=OpportunityCategory.OTHER)
137
+
138
+ # Regional accessibility (PIOE 2.0)
139
+ region = Column(SQLEnum(Region), default=Region.GLOBAL)
140
+ region_weight = Column(Float, default=1.0) # 1.0 = perfect match for user
141
+
142
+ # Timestamps
143
+ discovered_at = Column(DateTime, default=datetime.utcnow)
144
+ published_at = Column(DateTime)
145
+ deadline = Column(DateTime)
146
+
147
+ # Content
148
+ raw_text = Column(Text)
149
+ summary = Column(Text)
150
+ url = Column(String)
151
+
152
+ # Core Scores (0.0 to 1.0)
153
+ relevance_score = Column(Float, default=0.0)
154
+ novelty_score = Column(Float, default=1.0)
155
+ credibility_score = Column(Float, default=0.5)
156
+ signal_strength = Column(Float, default=0.5)
157
+ combined_score = Column(Float, default=0.0)
158
+
159
+ # PIOE 2.0: Decision Intelligence Scores
160
+ roi_score = Column(Float, default=0.5) # Is this worth my time?
161
+ unlock_potential = Column(Float, default=0.0) # Opens doors to what?
162
+ risk_level = Column(SQLEnum(RiskLevel), default=RiskLevel.MEDIUM)
163
+ competition_level = Column(Float, default=0.5) # Estimated competition
164
+
165
+ # Social engagement (from social sources)
166
+ social_engagement = Column(Integer, default=0)
167
+
168
+ # User status
169
+ status = Column(SQLEnum(OpportunityStatus), default=OpportunityStatus.NEW)
170
+
171
+ # Grant-specific metadata (PIOE 2.0)
172
+ # Stored in extra_data:
173
+ # - grant_size_min, grant_size_max
174
+ # - required_output (MVP, paper, OSS)
175
+ # - timeline_months
176
+ # - ecosystem (ethereum, solana, government)
177
+ # - eligibility_regions
178
+ # - technical_depth
179
+
180
+ # Action guidance (PIOE 2.0)
181
+ # Stored in extra_data:
182
+ # - recommended_action
183
+ # - skill_to_highlight
184
+ # - timing (early/optimal/late)
185
+ # - success_probability
186
+ # - preparation_steps
187
+
188
+ # Opportunity chaining (PIOE 2.0)
189
+ # - chain_next: list of potential next opportunity IDs
190
+ # - chain_unlocks: what this unlocks
191
+
192
+ extra_data = Column(JSON, default={})
193
+
194
+ # Embedding for novelty detection
195
+ embedding = Column(JSON)
196
+
197
+ source = relationship("Source", back_populates="opportunities")
198
+ interactions = relationship("UserInteraction", back_populates="opportunity")
199
+
200
+
201
+ class UserInteraction(Base):
202
+ """Track user actions for personalization."""
203
+ __tablename__ = "user_interactions"
204
+
205
+ id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
206
+ opportunity_id = Column(String, ForeignKey("opportunities.id"))
207
+ action = Column(String) # view, apply, save, dismiss, track
208
+ timestamp = Column(DateTime, default=datetime.utcnow)
209
+
210
+ opportunity = relationship("Opportunity", back_populates="interactions")
211
+
212
+
213
+ class Author(Base):
214
+ """Track authors for credibility and social graph."""
215
+ __tablename__ = "authors"
216
+
217
+ id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
218
+ name = Column(String, nullable=False)
219
+ platform = Column(String) # reddit, twitter, github, etc.
220
+ platform_id = Column(String) # username or ID on platform
221
+ credibility_score = Column(Float, default=0.5)
222
+ opportunity_creator_score = Column(Float, default=0.0) # Do they create opportunities?
223
+ first_seen = Column(DateTime, default=datetime.utcnow)
224
+ extra_data = Column(JSON, default={})
225
+
226
+
227
+ class OpportunityChain(Base):
228
+ """Track opportunity sequences/paths - PIOE 2.0."""
229
+ __tablename__ = "opportunity_chains"
230
+
231
+ id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
232
+ name = Column(String) # e.g., "Hackathon to Startup Path"
233
+ description = Column(Text)
234
+ steps = Column(JSON) # Ordered list of opportunity categories/types
235
+ success_rate = Column(Float, default=0.0)
236
+ example_urls = Column(JSON, default=[])
237
+ created_at = Column(DateTime, default=datetime.utcnow)
config/sources.yaml ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PIOE Default Sources Configuration
2
+
3
+ # arXiv Categories
4
+ arxiv:
5
+ enabled: true
6
+ categories:
7
+ - cs.CV # Computer Vision
8
+ - cs.RO # Robotics
9
+ - cs.AI # Artificial Intelligence
10
+ - cs.LG # Machine Learning
11
+ - cs.CL # Natural Language Processing
12
+ max_results: 50
13
+ schedule: "daily"
14
+
15
+ # GitHub Topics/Search
16
+ github:
17
+ enabled: true
18
+ topics:
19
+ - computer-vision
20
+ - robotics
21
+ - machine-learning
22
+ - deep-learning
23
+ - ros
24
+ - pytorch
25
+ - transformers
26
+ - llm
27
+ min_stars: 50
28
+ schedule: "daily"
29
+
30
+ # RSS Feeds
31
+ rss:
32
+ enabled: true
33
+ feeds:
34
+ # AI Research Labs
35
+ - name: "Google AI Blog"
36
+ url: "https://blog.google/technology/ai/rss/"
37
+ type: blog
38
+
39
+ - name: "OpenAI Blog"
40
+ url: "https://openai.com/blog/rss/"
41
+ type: blog
42
+
43
+ - name: "DeepMind Blog"
44
+ url: "https://www.deepmind.com/blog/rss.xml"
45
+ type: blog
46
+
47
+ # Tech News
48
+ - name: "Hacker News - AI"
49
+ url: "https://hnrss.org/newest?q=ai+machine+learning"
50
+ type: news
51
+
52
+ - name: "Hacker News - Robotics"
53
+ url: "https://hnrss.org/newest?q=robotics"
54
+ type: news
55
+
56
+ - name: "TechCrunch AI"
57
+ url: "https://techcrunch.com/category/artificial-intelligence/feed/"
58
+ type: news
59
+
60
+ # Reddit Subreddits
61
+ reddit:
62
+ enabled: true
63
+ subreddits:
64
+ - computervision
65
+ - robotics
66
+ - MachineLearning
67
+ - artificial
68
+ - learnmachinelearning
69
+ - deeplearning
70
+ - hackathons
71
+ - scholarships
72
+ - cscareerquestions
73
+ min_score: 10
74
+ schedule: "every_6_hours"
75
+
76
+ # Superteam (Web3/Crypto Opportunities)
77
+ superteam:
78
+ enabled: true
79
+ focus:
80
+ - bounties
81
+ - grants
82
+ - hackathons
83
+ schedule: "daily"
84
+
85
+ # Major Tech Company Careers
86
+ careers:
87
+ enabled: true
88
+ companies:
89
+ - name: Microsoft
90
+ keywords: ["computer vision", "robotics", "AI", "machine learning", "intern"]
91
+ - name: NVIDIA
92
+ keywords: ["deep learning", "computer vision", "robotics", "intern"]
93
+ - name: Google
94
+ keywords: ["machine learning", "research", "robotics", "intern"]
95
+ - name: Meta
96
+ keywords: ["AI", "research", "robotics", "computer vision", "intern"]
97
+ - name: OpenAI
98
+ keywords: ["research", "engineering"]
99
+ - name: DeepMind
100
+ keywords: ["research", "robotics"]
101
+ - name: "Boston Dynamics"
102
+ keywords: ["robotics", "perception", "control"]
103
+ - name: "Tesla AI"
104
+ keywords: ["autopilot", "optimus", "robotics", "computer vision"]
105
+ schedule: "daily"
106
+
107
+ # Web Scraping Targets
108
+ scraper:
109
+ enabled: true
110
+ targets:
111
+ # Hackathons
112
+ - name: "Devpost Hackathons"
113
+ url: "https://devpost.com/hackathons"
114
+ type: hackathon
115
+
116
+ - name: "MLH Events"
117
+ url: "https://mlh.io/seasons/2024/events"
118
+ type: hackathon
119
+
120
+ # Scholarships
121
+ - name: "FindAPhD"
122
+ url: "https://www.findaphd.com/phds/?Keywords=computer+vision+robotics"
123
+ type: scholarship
124
+ schedule: "daily"
125
+
126
+ # Scheduling
127
+ schedule:
128
+ full_ingestion_hours: 6
129
+ priority_ingestion_hours: 2
130
+
131
+ # Scoring Thresholds
132
+ scoring:
133
+ min_relevance: 0.4
134
+ min_novelty: 0.3
135
+ min_credibility: 0.5
frontend/app.js ADDED
@@ -0,0 +1,660 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * PIOE - Personal Intelligence & Opportunity Engine
3
+ * Frontend JavaScript Application
4
+ */
5
+
6
+ class PIOEApp {
7
+ constructor() {
8
+ this.currentCategory = null;
9
+ this.currentDomain = null;
10
+ this.minScore = 0;
11
+ this.opportunities = [];
12
+
13
+ this.init();
14
+ }
15
+
16
+ init() {
17
+ this.bindEvents();
18
+ this.loadStats();
19
+ this.loadOpportunities();
20
+ }
21
+
22
+ bindEvents() {
23
+ // Navigation items
24
+ document.querySelectorAll('.nav-item[data-view]').forEach(item => {
25
+ item.addEventListener('click', (e) => {
26
+ e.preventDefault();
27
+ this.setActiveNav(item);
28
+ this.handleViewChange(item.dataset.view);
29
+ });
30
+ });
31
+
32
+ // Category filters
33
+ document.querySelectorAll('.nav-item[data-category]').forEach(item => {
34
+ item.addEventListener('click', (e) => {
35
+ e.preventDefault();
36
+ this.setActiveNav(item);
37
+ this.currentCategory = item.dataset.category;
38
+ this.loadOpportunities();
39
+ this.showFeedView();
40
+ });
41
+ });
42
+
43
+ // Domain filter
44
+ document.getElementById('domain-filter').addEventListener('change', (e) => {
45
+ this.currentDomain = e.target.value || null;
46
+ this.loadOpportunities();
47
+ });
48
+
49
+ // Score filter
50
+ document.getElementById('score-filter').addEventListener('change', (e) => {
51
+ this.minScore = parseFloat(e.target.value) || 0;
52
+ this.loadOpportunities();
53
+ });
54
+
55
+ // Run ingestion
56
+ document.getElementById('run-ingestion').addEventListener('click', (e) => {
57
+ e.preventDefault();
58
+ this.runIngestion();
59
+ });
60
+
61
+ // View stats
62
+ document.getElementById('view-stats').addEventListener('click', (e) => {
63
+ e.preventDefault();
64
+ this.showStatsModal();
65
+ });
66
+
67
+ // Modal close
68
+ document.querySelector('.modal-close').addEventListener('click', () => {
69
+ this.closeModal();
70
+ });
71
+
72
+ document.querySelector('.modal-backdrop').addEventListener('click', () => {
73
+ this.closeModal();
74
+ });
75
+
76
+ // PIOE 2.0: AI Chat
77
+ document.getElementById('open-chat')?.addEventListener('click', (e) => {
78
+ e.preventDefault();
79
+ this.toggleChat();
80
+ });
81
+ }
82
+
83
+ // PIOE 2.0: Chat Methods
84
+ toggleChat() {
85
+ const panel = document.getElementById('chat-panel');
86
+ panel.classList.toggle('active');
87
+ }
88
+
89
+ async sendChatMessage() {
90
+ const input = document.getElementById('chat-input');
91
+ const messagesContainer = document.getElementById('chat-messages');
92
+ const message = input.value.trim();
93
+
94
+ if (!message) return;
95
+
96
+ // Add user message to chat
97
+ messagesContainer.innerHTML += `
98
+ <div class="chat-message user">
99
+ <p>${this.escapeHtml(message)}</p>
100
+ </div>
101
+ `;
102
+ input.value = '';
103
+ messagesContainer.scrollTop = messagesContainer.scrollHeight;
104
+
105
+ // Add loading indicator
106
+ const loadingId = `loading-${Date.now()}`;
107
+ messagesContainer.innerHTML += `
108
+ <div class="chat-message bot" id="${loadingId}">
109
+ <p>[...] Searching opportunities...</p>
110
+ </div>
111
+ `;
112
+ messagesContainer.scrollTop = messagesContainer.scrollHeight;
113
+
114
+ try {
115
+ const response = await fetch('/api/chat', {
116
+ method: 'POST',
117
+ headers: { 'Content-Type': 'application/json' },
118
+ body: JSON.stringify({ message })
119
+ });
120
+ const data = await response.json();
121
+
122
+ // Remove loading indicator
123
+ document.getElementById(loadingId)?.remove();
124
+
125
+ // Build response HTML
126
+ let responseHtml = `<p>${this.escapeHtml(data.response || 'No response')}</p>`;
127
+
128
+ // Add matched opportunities if any
129
+ if (data.opportunities && data.opportunities.length > 0) {
130
+ responseHtml += `<div style="margin-top: 12px">`;
131
+ for (const opp of data.opportunities) {
132
+ const roiDisplay = opp.roi_score ? `${Math.round(opp.roi_score * 100)}% ROI` : '';
133
+ responseHtml += `
134
+ <a href="${opp.url}" target="_blank" class="opp-link">
135
+ ${this.getCategoryEmoji(opp.category)} ${this.escapeHtml(opp.title.slice(0, 60))}${opp.title.length > 60 ? '...' : ''}
136
+ <span style="opacity: 0.7; margin-left: 8px">${roiDisplay}</span>
137
+ </a>
138
+ `;
139
+ }
140
+ responseHtml += `</div>`;
141
+ }
142
+
143
+ // Add suggested action if any
144
+ if (data.suggested_action) {
145
+ responseHtml += `<p style="margin-top: 12px; font-style: italic; opacity: 0.8">[TIP] ${this.escapeHtml(data.suggested_action)}</p>`;
146
+ }
147
+
148
+ messagesContainer.innerHTML += `
149
+ <div class="chat-message bot">
150
+ ${responseHtml}
151
+ </div>
152
+ `;
153
+
154
+ } catch (error) {
155
+ document.getElementById(loadingId)?.remove();
156
+ messagesContainer.innerHTML += `
157
+ <div class="chat-message bot">
158
+ <p style="color: var(--danger)">Error: ${error.message}</p>
159
+ </div>
160
+ `;
161
+ }
162
+
163
+ messagesContainer.scrollTop = messagesContainer.scrollHeight;
164
+ }
165
+
166
+ setActiveNav(activeItem) {
167
+ document.querySelectorAll('.nav-item').forEach(item => {
168
+ item.classList.remove('active');
169
+ });
170
+ activeItem.classList.add('active');
171
+ }
172
+
173
+ handleViewChange(view) {
174
+ if (view === 'feed') {
175
+ this.currentCategory = null;
176
+ this.loadOpportunities();
177
+ this.showFeedView();
178
+ this.updateHeader('Opportunity Feed', 'High-signal opportunities detected by PIOE');
179
+ } else if (view === 'digest') {
180
+ this.loadDigest('daily');
181
+ this.showDigestView();
182
+ this.updateHeader('Daily Brief', 'Your personalized intelligence report');
183
+ } else if (view === 'urgent') {
184
+ this.loadDigest('urgent');
185
+ this.showDigestView();
186
+ this.updateHeader('Urgent Opportunities', 'Deadlines approaching soon');
187
+ }
188
+ }
189
+
190
+ updateHeader(title, subtitle) {
191
+ document.getElementById('page-title').textContent = title;
192
+ document.getElementById('page-subtitle').textContent = subtitle;
193
+ }
194
+
195
+ showFeedView() {
196
+ document.getElementById('opportunity-feed').style.display = 'flex';
197
+ document.getElementById('digest-view').style.display = 'none';
198
+ }
199
+
200
+ showDigestView() {
201
+ document.getElementById('opportunity-feed').style.display = 'none';
202
+ document.getElementById('digest-view').style.display = 'block';
203
+ }
204
+
205
+ async loadStats() {
206
+ try {
207
+ const response = await fetch('/api/stats');
208
+ const stats = await response.json();
209
+
210
+ document.getElementById('total-count').textContent = stats.total_opportunities || 0;
211
+ document.getElementById('new-count').textContent = stats.new_opportunities || 0;
212
+ document.getElementById('hackathon-count').textContent = stats.by_category?.hackathon || 0;
213
+ document.getElementById('internship-count').textContent = stats.by_category?.internship || 0;
214
+ } catch (error) {
215
+ console.error('Failed to load stats:', error);
216
+ }
217
+ }
218
+
219
+ async loadOpportunities() {
220
+ const feed = document.getElementById('opportunity-feed');
221
+ feed.innerHTML = '<div class="loading">Loading opportunities...</div>';
222
+
223
+ try {
224
+ const params = new URLSearchParams();
225
+ if (this.currentCategory) params.set('category', this.currentCategory);
226
+ if (this.currentDomain) params.set('domain', this.currentDomain);
227
+ if (this.minScore) params.set('min_score', this.minScore);
228
+ params.set('limit', '50');
229
+
230
+ const response = await fetch(`/api/opportunities?${params}`);
231
+ const data = await response.json();
232
+
233
+ this.opportunities = data.opportunities || [];
234
+ this.renderOpportunities();
235
+ } catch (error) {
236
+ feed.innerHTML = `<div class="loading">Error loading opportunities: ${error.message}</div>`;
237
+ }
238
+ }
239
+
240
+ renderOpportunities() {
241
+ const feed = document.getElementById('opportunity-feed');
242
+
243
+ if (this.opportunities.length === 0) {
244
+ feed.innerHTML = `
245
+ <div class="loading">
246
+ No opportunities found. Try running ingestion first!
247
+ </div>
248
+ `;
249
+ return;
250
+ }
251
+
252
+ feed.innerHTML = this.opportunities.map(opp => this.renderOpportunityCard(opp)).join('');
253
+
254
+ // Bind card click events
255
+ feed.querySelectorAll('.opportunity-card').forEach((card, index) => {
256
+ card.addEventListener('click', () => {
257
+ this.showOpportunityDetail(this.opportunities[index]);
258
+ });
259
+
260
+ // Action buttons
261
+ card.querySelector('.action-btn.primary')?.addEventListener('click', (e) => {
262
+ e.stopPropagation();
263
+ window.open(this.opportunities[index].url, '_blank');
264
+ });
265
+
266
+ card.querySelector('.action-btn.secondary')?.addEventListener('click', (e) => {
267
+ e.stopPropagation();
268
+ this.updateStatus(this.opportunities[index].id, 'saved');
269
+ });
270
+ });
271
+ }
272
+
273
+ renderOpportunityCard(opp) {
274
+ const category = opp.category || 'other';
275
+ const categoryEmoji = this.getCategoryEmoji(category);
276
+ const scorePercent = Math.round((opp.combined_score || 0) * 100);
277
+ const roiPercent = Math.round((opp.roi_score || 0.5) * 100);
278
+ const riskLevel = opp.risk_level || 'medium';
279
+ const region = opp.region || 'global';
280
+
281
+ let deadlineBadge = '';
282
+ if (opp.deadline) {
283
+ const daysLeft = Math.ceil((new Date(opp.deadline) - new Date()) / (1000 * 60 * 60 * 24));
284
+ let urgency = 'ok';
285
+ if (daysLeft < 7) urgency = 'urgent';
286
+ else if (daysLeft < 14) urgency = 'soon';
287
+
288
+ deadlineBadge = `
289
+ <span class="deadline-badge ${urgency}">
290
+ [!] ${daysLeft} days left
291
+ </span>
292
+ `;
293
+ }
294
+
295
+ // Risk level badge
296
+ const riskColors = { low: '#10b981', medium: '#f59e0b', high: '#ef4444' };
297
+ const riskLabels = { low: '[OK]', medium: '[!]', high: '[!!]' };
298
+
299
+ // Region badge
300
+ const regionLabels = { nigeria: 'NG', africa: 'AFR', global: 'GLB', remote_africa: 'AFR-R', remote_global: 'GLB-R' };
301
+
302
+ return `
303
+ <div class="opportunity-card">
304
+ <div class="card-header">
305
+ <span class="card-category ${category}">
306
+ ${categoryEmoji} ${category.replace('_', ' ')}
307
+ </span>
308
+ <div class="card-score">
309
+ <div class="score-bar">
310
+ <div class="score-fill" style="width: ${scorePercent}%"></div>
311
+ </div>
312
+ <span>${scorePercent}%</span>
313
+ </div>
314
+ </div>
315
+
316
+ <h3 class="card-title">${this.escapeHtml(opp.title)}</h3>
317
+
318
+ <div class="card-meta">
319
+ <span>[SRC] ${opp.source_name || 'Unknown'}</span>
320
+ <span>[${regionLabels[region] || 'GLB'}] ${region.replace('_', ' ')}</span>
321
+ <span style="color: ${riskColors[riskLevel]}">${riskLabels[riskLevel]} ${riskLevel} risk</span>
322
+ </div>
323
+
324
+ <div class="card-meta" style="margin-top: 8px">
325
+ <span title="ROI Score">[ROI] ${roiPercent}%</span>
326
+ <span>[DATE] ${this.formatDate(opp.discovered_at)}</span>
327
+ </div>
328
+
329
+ <p class="card-summary">${this.escapeHtml(opp.raw_text?.slice(0, 200) || '')}</p>
330
+
331
+ <div class="card-footer">
332
+ ${deadlineBadge}
333
+ <div class="card-actions">
334
+ <button class="action-btn secondary">Save</button>
335
+ <button class="action-btn primary">Open</button>
336
+ </div>
337
+ </div>
338
+ </div>
339
+ `;
340
+ }
341
+
342
+ getCategoryEmoji(category) {
343
+ const labels = {
344
+ scholarship: '[S]',
345
+ fellowship: '[F]',
346
+ internship: '[I]',
347
+ job: '[J]',
348
+ hackathon: '[H]',
349
+ competition: '[C]',
350
+ grant: '[G]',
351
+ micro_grant: '[MG]',
352
+ ecosystem_grant: '[EG]',
353
+ innovation_fund: '[IF]',
354
+ research: '[R]',
355
+ open_source: '[OS]',
356
+ conference: '[CF]',
357
+ investment: '[IV]',
358
+ partnership: '[P]',
359
+ collaboration: '[CO]',
360
+ pitch_event: '[PE]',
361
+ demo_day: '[DD]',
362
+ talent_call: '[TC]',
363
+ bounty: '[B]',
364
+ ambassador: '[A]',
365
+ pre_grant_signal: '[PG]',
366
+ pre_hiring_signal: '[PH]',
367
+ weak_signal: '[WS]',
368
+ other: '[?]'
369
+ };
370
+ return labels[category] || '[?]';
371
+ }
372
+
373
+ async loadDigest(type) {
374
+ const content = document.getElementById('digest-content');
375
+ content.innerHTML = '<div class="loading">Generating digest...</div>';
376
+
377
+ try {
378
+ const response = await fetch(`/api/digest/${type}`);
379
+ const data = await response.json();
380
+
381
+ // Convert markdown to HTML (simple conversion)
382
+ content.innerHTML = this.markdownToHtml(data.digest || 'No digest available.');
383
+ } catch (error) {
384
+ content.innerHTML = `<p>Error loading digest: ${error.message}</p>`;
385
+ }
386
+ }
387
+
388
+ markdownToHtml(md) {
389
+ return md
390
+ .replace(/^### (.*$)/gim, '<h3>$1</h3>')
391
+ .replace(/^## (.*$)/gim, '<h2>$1</h2>')
392
+ .replace(/^# (.*$)/gim, '<h1>$1</h1>')
393
+ .replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
394
+ .replace(/\*(.*?)\*/g, '<em>$1</em>')
395
+ .replace(/^> (.*$)/gim, '<blockquote>$1</blockquote>')
396
+ .replace(/\[(.*?)\]\((.*?)\)/g, '<a href="$2" target="_blank">$1</a>')
397
+ .replace(/^---$/gim, '<hr>')
398
+ .replace(/\n/g, '<br>');
399
+ }
400
+
401
+ showOpportunityDetail(opp) {
402
+ const modal = document.getElementById('detail-modal');
403
+ const body = document.getElementById('modal-body');
404
+
405
+ const roiPercent = Math.round((opp.roi_score || 0.5) * 100);
406
+ const riskLevel = opp.risk_level || 'medium';
407
+ const region = opp.region || 'global';
408
+ const riskColors = { low: '#10b981', medium: '#f59e0b', high: '#ef4444' };
409
+
410
+ body.innerHTML = `
411
+ <span class="card-category ${opp.category}" style="margin-bottom: 16px">
412
+ ${this.getCategoryEmoji(opp.category)} ${(opp.category || 'other').replace('_', ' ')}
413
+ </span>
414
+
415
+ <h2 style="margin: 16px 0">${this.escapeHtml(opp.title)}</h2>
416
+
417
+ <div class="card-meta" style="margin-bottom: 20px">
418
+ <span>📡 ${opp.source_name}</span>
419
+ <span>🌐 ${region.replace('_', ' ')}</span>
420
+ <span style="color: ${riskColors[riskLevel]}">${riskLevel} risk</span>
421
+ </div>
422
+
423
+ <div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px; margin-bottom: 24px">
424
+ <div class="stat-card">
425
+ <span class="stat-value">${Math.round((opp.relevance_score || 0) * 100)}%</span>
426
+ <span class="stat-label">Relevance</span>
427
+ </div>
428
+ <div class="stat-card">
429
+ <span class="stat-value">${Math.round((opp.novelty_score || 0) * 100)}%</span>
430
+ <span class="stat-label">Novelty</span>
431
+ </div>
432
+ <div class="stat-card">
433
+ <span class="stat-value">${Math.round((opp.credibility_score || 0) * 100)}%</span>
434
+ <span class="stat-label">Credibility</span>
435
+ </div>
436
+ <div class="stat-card highlight">
437
+ <span class="stat-value">${roiPercent}%</span>
438
+ <span class="stat-label">💎 ROI</span>
439
+ </div>
440
+ </div>
441
+
442
+ ${opp.deadline ? `<p style="color: var(--warning); margin-bottom: 16px">⏰ Deadline: ${new Date(opp.deadline).toLocaleDateString()}</p>` : ''}
443
+
444
+ <p style="color: var(--text-secondary); line-height: 1.8; margin-bottom: 24px">
445
+ ${this.escapeHtml(opp.raw_text || 'No description available.')}
446
+ </p>
447
+
448
+ <!-- Action Guidance Container -->
449
+ <div id="guidance-container" style="margin-bottom: 24px; padding: 16px; background: rgba(99, 102, 241, 0.1); border-radius: 12px; display: none;">
450
+ <h3 style="margin-bottom: 12px; color: var(--accent)">🎯 Action Guidance</h3>
451
+ <div id="guidance-content"></div>
452
+ </div>
453
+
454
+ <div style="display: flex; flex-wrap: wrap; gap: 12px">
455
+ <button class="action-btn primary" onclick="app.getGuidance('${opp.id}')" style="padding: 12px 24px; background: linear-gradient(135deg, #8b5cf6, #6366f1)">
456
+ 🧠 Get Guidance
457
+ </button>
458
+ <a href="${opp.url}" target="_blank" class="action-btn primary" style="text-decoration: none; padding: 12px 24px">
459
+ 🔗 View Original
460
+ </a>
461
+ <button class="action-btn secondary" onclick="app.updateStatus('${opp.id}', 'saved')" style="padding: 12px 24px">
462
+ 💾 Save
463
+ </button>
464
+ <button class="action-btn secondary" onclick="app.updateStatus('${opp.id}', 'applied')" style="padding: 12px 24px">
465
+ ✅ Mark Applied
466
+ </button>
467
+ </div>
468
+ `;
469
+
470
+ modal.classList.add('active');
471
+ }
472
+
473
+ async getGuidance(opportunityId) {
474
+ const container = document.getElementById('guidance-container');
475
+ const content = document.getElementById('guidance-content');
476
+
477
+ container.style.display = 'block';
478
+ content.innerHTML = '<p>🔄 Analyzing opportunity...</p>';
479
+
480
+ try {
481
+ const response = await fetch(`/api/opportunities/${opportunityId}/guidance`);
482
+ const data = await response.json();
483
+ const g = data.guidance;
484
+
485
+ content.innerHTML = `
486
+ <div style="display: grid; gap: 16px">
487
+ <div style="display: flex; gap: 16px; flex-wrap: wrap">
488
+ <div class="stat-card" style="flex: 1; min-width: 120px">
489
+ <span class="stat-value" style="font-size: 14px">${g.primary_action?.replace('_', ' ') || 'Review'}</span>
490
+ <span class="stat-label">Action</span>
491
+ </div>
492
+ <div class="stat-card" style="flex: 1; min-width: 120px">
493
+ <span class="stat-value" style="font-size: 14px">${g.urgency || 'whenever'}</span>
494
+ <span class="stat-label">Urgency</span>
495
+ </div>
496
+ <div class="stat-card" style="flex: 1; min-width: 120px">
497
+ <span class="stat-value" style="font-size: 14px">${Math.round((g.success_probability || 0.3) * 100)}%</span>
498
+ <span class="stat-label">Success Odds</span>
499
+ </div>
500
+ <div class="stat-card" style="flex: 1; min-width: 120px">
501
+ <span class="stat-value" style="font-size: 14px">${g.time_investment_hours || 10}h</span>
502
+ <span class="stat-label">Time Needed</span>
503
+ </div>
504
+ </div>
505
+
506
+ ${g.skills_to_highlight?.length ? `
507
+ <div>
508
+ <strong>Skills to Highlight:</strong>
509
+ <div style="display: flex; gap: 8px; flex-wrap: wrap; margin-top: 8px">
510
+ ${g.skills_to_highlight.map(s => `<span style="background: var(--accent); padding: 4px 12px; border-radius: 20px; font-size: 12px">${s}</span>`).join('')}
511
+ </div>
512
+ </div>
513
+ ` : ''}
514
+
515
+ ${g.portfolio_pieces?.length ? `
516
+ <div>
517
+ <strong>Portfolio to Show:</strong>
518
+ <div style="display: flex; gap: 8px; flex-wrap: wrap; margin-top: 8px">
519
+ ${g.portfolio_pieces.map(p => `<span style="background: var(--success); padding: 4px 12px; border-radius: 20px; font-size: 12px">${p}</span>`).join('')}
520
+ </div>
521
+ </div>
522
+ ` : ''}
523
+
524
+ ${g.preparation_steps?.length ? `
525
+ <div>
526
+ <strong>Preparation Steps:</strong>
527
+ <ol style="margin-top: 8px; padding-left: 20px">
528
+ ${g.preparation_steps.map(s => `<li style="margin-bottom: 4px">${s}</li>`).join('')}
529
+ </ol>
530
+ </div>
531
+ ` : ''}
532
+
533
+ ${g.networking_tips ? `
534
+ <div>
535
+ <strong>💡 Networking Tip:</strong>
536
+ <p style="margin-top: 4px; color: var(--text-secondary)">${g.networking_tips}</p>
537
+ </div>
538
+ ` : ''}
539
+
540
+ ${g.differentiation_angle ? `
541
+ <div>
542
+ <strong>🎯 Your Angle:</strong>
543
+ <p style="margin-top: 4px; color: var(--text-secondary)">${g.differentiation_angle}</p>
544
+ </div>
545
+ ` : ''}
546
+
547
+ ${g.red_flags?.length ? `
548
+ <div style="background: rgba(239, 68, 68, 0.1); padding: 12px; border-radius: 8px">
549
+ <strong style="color: #ef4444">⚠️ Red Flags:</strong>
550
+ <ul style="margin-top: 8px; padding-left: 20px">
551
+ ${g.red_flags.map(f => `<li style="color: #ef4444">${f}</li>`).join('')}
552
+ </ul>
553
+ </div>
554
+ ` : ''}
555
+
556
+ <p style="font-style: italic; color: var(--text-secondary); font-size: 12px">
557
+ ${g.why || 'Personalized guidance based on your profile'}
558
+ </p>
559
+ </div>
560
+ `;
561
+ } catch (error) {
562
+ content.innerHTML = `<p style="color: var(--error)">Failed to get guidance: ${error.message}</p>`;
563
+ }
564
+ }
565
+
566
+ closeModal() {
567
+ document.getElementById('detail-modal').classList.remove('active');
568
+ }
569
+
570
+ async updateStatus(id, status) {
571
+ try {
572
+ await fetch(`/api/opportunities/${id}/status`, {
573
+ method: 'PATCH',
574
+ headers: { 'Content-Type': 'application/json' },
575
+ body: JSON.stringify({ status })
576
+ });
577
+
578
+ // Visual feedback
579
+ this.showNotification(`Status updated to ${status}`);
580
+ } catch (error) {
581
+ console.error('Failed to update status:', error);
582
+ }
583
+ }
584
+
585
+ async runIngestion() {
586
+ this.showNotification('Starting ingestion... This may take a few minutes.');
587
+
588
+ try {
589
+ await fetch('/api/ingest/run', { method: 'POST' });
590
+ this.showNotification('Ingestion started! Refresh in a few minutes to see new opportunities.');
591
+ } catch (error) {
592
+ this.showNotification('Failed to start ingestion: ' + error.message);
593
+ }
594
+ }
595
+
596
+ async showStatsModal() {
597
+ try {
598
+ const response = await fetch('/api/stats');
599
+ const stats = await response.json();
600
+
601
+ const body = document.getElementById('modal-body');
602
+ body.innerHTML = `
603
+ <h2 style="margin-bottom: 24px">📊 System Statistics</h2>
604
+
605
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 16px; margin-bottom: 24px">
606
+ <div class="stat-card highlight">
607
+ <span class="stat-value">${stats.total_opportunities || 0}</span>
608
+ <span class="stat-label">Total Opportunities</span>
609
+ </div>
610
+ <div class="stat-card">
611
+ <span class="stat-value">${stats.new_opportunities || 0}</span>
612
+ <span class="stat-label">New (Unread)</span>
613
+ </div>
614
+ </div>
615
+
616
+ <h3 style="margin: 24px 0 16px">By Category</h3>
617
+ ${Object.entries(stats.by_category || {}).map(([cat, count]) => `
618
+ <div style="display: flex; justify-content: space-between; padding: 8px 0; border-bottom: 1px solid var(--border-color)">
619
+ <span>${this.getCategoryEmoji(cat)} ${cat.replace('_', ' ')}</span>
620
+ <span style="font-weight: 600">${count}</span>
621
+ </div>
622
+ `).join('')}
623
+
624
+ <h3 style="margin: 24px 0 16px">By Domain</h3>
625
+ ${Object.entries(stats.by_domain || {}).map(([dom, count]) => `
626
+ <div style="display: flex; justify-content: space-between; padding: 8px 0; border-bottom: 1px solid var(--border-color)">
627
+ <span>${dom.replace('_', ' ')}</span>
628
+ <span style="font-weight: 600">${count}</span>
629
+ </div>
630
+ `).join('')}
631
+ `;
632
+
633
+ document.getElementById('detail-modal').classList.add('active');
634
+ } catch (error) {
635
+ console.error('Failed to load stats:', error);
636
+ }
637
+ }
638
+
639
+ showNotification(message) {
640
+ // Simple notification - could be enhanced with toast UI
641
+ console.log('PIOE:', message);
642
+ alert(message);
643
+ }
644
+
645
+ formatDate(dateStr) {
646
+ if (!dateStr) return 'Unknown';
647
+ const date = new Date(dateStr);
648
+ return date.toLocaleDateString('en-US', { month: 'short', day: 'numeric' });
649
+ }
650
+
651
+ escapeHtml(text) {
652
+ if (!text) return '';
653
+ const div = document.createElement('div');
654
+ div.textContent = text;
655
+ return div.innerHTML;
656
+ }
657
+ }
658
+
659
+ // Initialize app
660
+ const app = new PIOEApp();
frontend/index.html ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>PIOE 2.0 - Personal Advantage Engine</title>
8
+ <link rel="stylesheet" href="/static/styles.css">
9
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
10
+ </head>
11
+
12
+ <body>
13
+ <div class="app">
14
+ <!-- Sidebar -->
15
+ <nav class="sidebar">
16
+ <div class="logo">
17
+ <span class="logo-icon">[P]</span>
18
+ <span class="logo-text">PIOE 2.0</span>
19
+ </div>
20
+
21
+ <div class="nav-section">
22
+ <span class="nav-label">Dashboard</span>
23
+ <a href="#" class="nav-item active" data-view="feed">
24
+ <span class="icon">[F]</span> Opportunity Feed
25
+ </a>
26
+ <a href="#" class="nav-item" data-view="digest">
27
+ <span class="icon">[D]</span> Daily Brief
28
+ </a>
29
+ <a href="#" class="nav-item" data-view="urgent">
30
+ <span class="icon">[!]</span> Urgent
31
+ </a>
32
+ <a href="#" class="nav-item" id="open-chat">
33
+ <span class="icon">[AI]</span> AI Search
34
+ </a>
35
+ </div>
36
+
37
+ <div class="nav-section">
38
+ <span class="nav-label">Categories</span>
39
+ <a href="#" class="nav-item" data-category="hackathon">[H] Hackathons</a>
40
+ <a href="#" class="nav-item" data-category="internship">[I] Internships</a>
41
+ <a href="#" class="nav-item" data-category="scholarship">[S] Scholarships</a>
42
+ <a href="#" class="nav-item" data-category="research">[R] Research</a>
43
+ <a href="#" class="nav-item" data-category="job">[J] Jobs</a>
44
+ <a href="#" class="nav-item" data-category="grant">[G] Grants</a>
45
+ <a href="#" class="nav-item" data-category="ecosystem_grant">[E] Ecosystem Grants</a>
46
+ <a href="#" class="nav-item" data-category="bounty">[B] Bounties</a>
47
+ <a href="#" class="nav-item" data-category="open_source">[O] Open Source</a>
48
+ </div>
49
+
50
+ <div class="nav-section">
51
+ <span class="nav-label">System</span>
52
+ <a href="#" class="nav-item" id="run-ingestion">
53
+ <span class="icon">[>]</span> Run Ingestion
54
+ </a>
55
+ <a href="#" class="nav-item" id="view-stats">
56
+ <span class="icon">[#]</span> Statistics
57
+ </a>
58
+ </div>
59
+ </nav>
60
+
61
+ <!-- Main Content -->
62
+ <main class="main-content">
63
+ <header class="header">
64
+ <div class="header-title">
65
+ <h1 id="page-title">Opportunity Feed</h1>
66
+ <p class="subtitle" id="page-subtitle">High-signal opportunities detected by PIOE</p>
67
+ </div>
68
+ <div class="header-actions">
69
+ <select id="domain-filter" class="filter-select">
70
+ <option value="">All Domains</option>
71
+ <option value="ai">AI</option>
72
+ <option value="computer_vision">Computer Vision</option>
73
+ <option value="robotics">Robotics</option>
74
+ <option value="finance">Finance</option>
75
+ <option value="crypto">Crypto</option>
76
+ <option value="academia">Academia</option>
77
+ </select>
78
+ <select id="score-filter" class="filter-select">
79
+ <option value="0">All Scores</option>
80
+ <option value="0.5">Score > 0.5</option>
81
+ <option value="0.7">Score > 0.7</option>
82
+ <option value="0.8">Score > 0.8</option>
83
+ </select>
84
+ </div>
85
+ </header>
86
+
87
+ <div class="content-area">
88
+ <!-- Stats Banner -->
89
+ <div class="stats-banner" id="stats-banner">
90
+ <div class="stat-card">
91
+ <span class="stat-value" id="total-count">-</span>
92
+ <span class="stat-label">Total</span>
93
+ </div>
94
+ <div class="stat-card">
95
+ <span class="stat-value" id="new-count">-</span>
96
+ <span class="stat-label">New</span>
97
+ </div>
98
+ <div class="stat-card highlight">
99
+ <span class="stat-value" id="hackathon-count">-</span>
100
+ <span class="stat-label">Hackathons</span>
101
+ </div>
102
+ <div class="stat-card">
103
+ <span class="stat-value" id="internship-count">-</span>
104
+ <span class="stat-label">Internships</span>
105
+ </div>
106
+ </div>
107
+
108
+ <!-- Opportunity Feed -->
109
+ <div class="feed" id="opportunity-feed">
110
+ <div class="loading">Loading opportunities...</div>
111
+ </div>
112
+
113
+ <!-- Digest View (Hidden by default) -->
114
+ <div class="digest-view" id="digest-view" style="display: none;">
115
+ <div class="digest-content" id="digest-content"></div>
116
+ </div>
117
+ </div>
118
+ </main>
119
+ </div>
120
+
121
+ <!-- Opportunity Detail Modal -->
122
+ <div class="modal" id="detail-modal">
123
+ <div class="modal-backdrop"></div>
124
+ <div class="modal-content">
125
+ <button class="modal-close">&times;</button>
126
+ <div id="modal-body"></div>
127
+ </div>
128
+ </div>
129
+
130
+ <!-- AI Chat Panel -->
131
+ <div class="chat-panel" id="chat-panel">
132
+ <div class="chat-header">
133
+ <span>PIOE AI Search</span>
134
+ <button class="chat-close" onclick="app.toggleChat()">&times;</button>
135
+ </div>
136
+ <div class="chat-messages" id="chat-messages">
137
+ <div class="chat-message bot">
138
+ <p>Hi! I'm PIOE AI. Ask me to find opportunities:</p>
139
+ <ul style="margin: 8px 0; padding-left: 20px; font-size: 12px; opacity: 0.8">
140
+ <li>"Find hackathons in Nigeria"</li>
141
+ <li>"What grants are available for AI?"</li>
142
+ <li>"Show high ROI opportunities"</li>
143
+ <li>"Internships in robotics"</li>
144
+ </ul>
145
+ </div>
146
+ </div>
147
+ <div class="chat-input-area">
148
+ <input type="text" id="chat-input" placeholder="Ask about opportunities..."
149
+ onkeypress="if(event.key==='Enter') app.sendChatMessage()">
150
+ <button onclick="app.sendChatMessage()">Send</button>
151
+ </div>
152
+ </div>
153
+
154
+ <!-- Floating Chat Button -->
155
+ <button class="chat-fab" id="chat-fab" onclick="app.toggleChat()">
156
+ AI
157
+ </button>
158
+
159
+ <script src="/static/app.js"></script>
160
+ </body>
161
+
162
+ </html>
frontend/styles.css ADDED
@@ -0,0 +1,905 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* PIOE - Personal Intelligence & Opportunity Engine
2
+ Modern Dark Theme with Glassmorphism */
3
+
4
+ :root {
5
+ /* Color Palette */
6
+ --bg-primary: #0a0a0f;
7
+ --bg-secondary: #12121a;
8
+ --bg-tertiary: #1a1a24;
9
+ --bg-card: rgba(26, 26, 36, 0.8);
10
+ --bg-glass: rgba(255, 255, 255, 0.03);
11
+
12
+ --accent-primary: #6366f1;
13
+ --accent-secondary: #8b5cf6;
14
+ --accent-gradient: linear-gradient(135deg, #6366f1, #8b5cf6);
15
+
16
+ --text-primary: #ffffff;
17
+ --text-secondary: #a1a1aa;
18
+ --text-muted: #71717a;
19
+
20
+ --border-color: rgba(255, 255, 255, 0.08);
21
+ --border-hover: rgba(255, 255, 255, 0.15);
22
+
23
+ /* Status Colors */
24
+ --success: #22c55e;
25
+ --warning: #f59e0b;
26
+ --danger: #ef4444;
27
+ --info: #3b82f6;
28
+
29
+ /* Category Colors */
30
+ --cat-hackathon: #f43f5e;
31
+ --cat-internship: #3b82f6;
32
+ --cat-scholarship: #22c55e;
33
+ --cat-research: #8b5cf6;
34
+ --cat-job: #f59e0b;
35
+ --cat-grant: #14b8a6;
36
+ --cat-opensource: #ec4899;
37
+
38
+ /* Spacing */
39
+ --sidebar-width: 260px;
40
+ --header-height: 70px;
41
+ --radius-sm: 8px;
42
+ --radius-md: 12px;
43
+ --radius-lg: 16px;
44
+ }
45
+
46
+ * {
47
+ margin: 0;
48
+ padding: 0;
49
+ box-sizing: border-box;
50
+ }
51
+
52
+ body {
53
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
54
+ background: var(--bg-primary);
55
+ color: var(--text-primary);
56
+ line-height: 1.6;
57
+ min-height: 100vh;
58
+ overflow-x: hidden;
59
+ }
60
+
61
+ /* App Layout */
62
+ .app {
63
+ display: flex;
64
+ min-height: 100vh;
65
+ }
66
+
67
+ /* Sidebar */
68
+ .sidebar {
69
+ width: var(--sidebar-width);
70
+ background: var(--bg-secondary);
71
+ border-right: 1px solid var(--border-color);
72
+ padding: 24px 16px;
73
+ position: fixed;
74
+ height: 100vh;
75
+ overflow-y: auto;
76
+ z-index: 100;
77
+ }
78
+
79
+ .logo {
80
+ display: flex;
81
+ align-items: center;
82
+ gap: 12px;
83
+ padding: 8px 12px;
84
+ margin-bottom: 32px;
85
+ }
86
+
87
+ .logo-icon {
88
+ font-size: 28px;
89
+ filter: drop-shadow(0 0 8px rgba(99, 102, 241, 0.5));
90
+ }
91
+
92
+ .logo-text {
93
+ font-size: 24px;
94
+ font-weight: 700;
95
+ background: var(--accent-gradient);
96
+ -webkit-background-clip: text;
97
+ -webkit-text-fill-color: transparent;
98
+ background-clip: text;
99
+ }
100
+
101
+ .nav-section {
102
+ margin-bottom: 24px;
103
+ }
104
+
105
+ .nav-label {
106
+ display: block;
107
+ font-size: 11px;
108
+ font-weight: 600;
109
+ text-transform: uppercase;
110
+ letter-spacing: 0.05em;
111
+ color: var(--text-muted);
112
+ padding: 0 12px;
113
+ margin-bottom: 8px;
114
+ }
115
+
116
+ .nav-item {
117
+ display: flex;
118
+ align-items: center;
119
+ gap: 10px;
120
+ padding: 10px 12px;
121
+ border-radius: var(--radius-sm);
122
+ color: var(--text-secondary);
123
+ text-decoration: none;
124
+ font-size: 14px;
125
+ font-weight: 500;
126
+ transition: all 0.2s ease;
127
+ cursor: pointer;
128
+ }
129
+
130
+ .nav-item:hover {
131
+ background: var(--bg-glass);
132
+ color: var(--text-primary);
133
+ }
134
+
135
+ .nav-item.active {
136
+ background: var(--accent-gradient);
137
+ color: white;
138
+ }
139
+
140
+ .nav-item .icon {
141
+ font-size: 16px;
142
+ }
143
+
144
+ /* Main Content */
145
+ .main-content {
146
+ flex: 1;
147
+ margin-left: var(--sidebar-width);
148
+ min-height: 100vh;
149
+ display: flex;
150
+ flex-direction: column;
151
+ }
152
+
153
+ /* Header */
154
+ .header {
155
+ height: var(--header-height);
156
+ background: var(--bg-secondary);
157
+ border-bottom: 1px solid var(--border-color);
158
+ display: flex;
159
+ align-items: center;
160
+ justify-content: space-between;
161
+ padding: 0 32px;
162
+ position: sticky;
163
+ top: 0;
164
+ z-index: 50;
165
+ backdrop-filter: blur(12px);
166
+ }
167
+
168
+ .header-title h1 {
169
+ font-size: 20px;
170
+ font-weight: 600;
171
+ }
172
+
173
+ .subtitle {
174
+ font-size: 13px;
175
+ color: var(--text-muted);
176
+ }
177
+
178
+ .header-actions {
179
+ display: flex;
180
+ gap: 12px;
181
+ }
182
+
183
+ .filter-select {
184
+ background: var(--bg-tertiary);
185
+ border: 1px solid var(--border-color);
186
+ color: var(--text-primary);
187
+ padding: 8px 16px;
188
+ border-radius: var(--radius-sm);
189
+ font-size: 13px;
190
+ cursor: pointer;
191
+ transition: border-color 0.2s;
192
+ }
193
+
194
+ .filter-select:hover {
195
+ border-color: var(--border-hover);
196
+ }
197
+
198
+ .filter-select:focus {
199
+ outline: none;
200
+ border-color: var(--accent-primary);
201
+ }
202
+
203
+ /* Content Area */
204
+ .content-area {
205
+ flex: 1;
206
+ padding: 24px 32px;
207
+ overflow-y: auto;
208
+ }
209
+
210
+ /* Stats Banner */
211
+ .stats-banner {
212
+ display: grid;
213
+ grid-template-columns: repeat(4, 1fr);
214
+ gap: 16px;
215
+ margin-bottom: 24px;
216
+ }
217
+
218
+ .stat-card {
219
+ background: var(--bg-card);
220
+ border: 1px solid var(--border-color);
221
+ border-radius: var(--radius-md);
222
+ padding: 20px;
223
+ display: flex;
224
+ flex-direction: column;
225
+ gap: 4px;
226
+ backdrop-filter: blur(8px);
227
+ }
228
+
229
+ .stat-card.highlight {
230
+ background: var(--accent-gradient);
231
+ border: none;
232
+ }
233
+
234
+ .stat-value {
235
+ font-size: 28px;
236
+ font-weight: 700;
237
+ }
238
+
239
+ .stat-label {
240
+ font-size: 12px;
241
+ color: var(--text-secondary);
242
+ text-transform: uppercase;
243
+ letter-spacing: 0.05em;
244
+ }
245
+
246
+ .stat-card.highlight .stat-label {
247
+ color: rgba(255, 255, 255, 0.8);
248
+ }
249
+
250
+ /* Opportunity Feed */
251
+ .feed {
252
+ display: flex;
253
+ flex-direction: column;
254
+ gap: 16px;
255
+ }
256
+
257
+ .loading {
258
+ text-align: center;
259
+ padding: 60px;
260
+ color: var(--text-muted);
261
+ }
262
+
263
+ /* Opportunity Card */
264
+ .opportunity-card {
265
+ background: var(--bg-card);
266
+ border: 1px solid var(--border-color);
267
+ border-radius: var(--radius-md);
268
+ padding: 20px;
269
+ transition: all 0.2s ease;
270
+ cursor: pointer;
271
+ backdrop-filter: blur(8px);
272
+ }
273
+
274
+ .opportunity-card:hover {
275
+ border-color: var(--border-hover);
276
+ transform: translateY(-2px);
277
+ box-shadow: 0 8px 24px rgba(0, 0, 0, 0.3);
278
+ }
279
+
280
+ .card-header {
281
+ display: flex;
282
+ align-items: flex-start;
283
+ justify-content: space-between;
284
+ margin-bottom: 12px;
285
+ }
286
+
287
+ .card-category {
288
+ display: inline-flex;
289
+ align-items: center;
290
+ gap: 6px;
291
+ padding: 4px 10px;
292
+ border-radius: 20px;
293
+ font-size: 11px;
294
+ font-weight: 600;
295
+ text-transform: uppercase;
296
+ letter-spacing: 0.03em;
297
+ }
298
+
299
+ .card-category.hackathon {
300
+ background: rgba(244, 63, 94, 0.2);
301
+ color: var(--cat-hackathon);
302
+ }
303
+
304
+ .card-category.internship {
305
+ background: rgba(59, 130, 246, 0.2);
306
+ color: var(--cat-internship);
307
+ }
308
+
309
+ .card-category.scholarship {
310
+ background: rgba(34, 197, 94, 0.2);
311
+ color: var(--cat-scholarship);
312
+ }
313
+
314
+ .card-category.research {
315
+ background: rgba(139, 92, 246, 0.2);
316
+ color: var(--cat-research);
317
+ }
318
+
319
+ .card-category.job {
320
+ background: rgba(245, 158, 11, 0.2);
321
+ color: var(--cat-job);
322
+ }
323
+
324
+ .card-category.grant {
325
+ background: rgba(20, 184, 166, 0.2);
326
+ color: var(--cat-grant);
327
+ }
328
+
329
+ .card-category.open_source {
330
+ background: rgba(236, 72, 153, 0.2);
331
+ color: var(--cat-opensource);
332
+ }
333
+
334
+ .card-category.other {
335
+ background: rgba(161, 161, 170, 0.2);
336
+ color: var(--text-secondary);
337
+ }
338
+
339
+ .card-score {
340
+ display: flex;
341
+ align-items: center;
342
+ gap: 4px;
343
+ font-size: 13px;
344
+ color: var(--text-secondary);
345
+ }
346
+
347
+ .score-bar {
348
+ width: 60px;
349
+ height: 6px;
350
+ background: var(--bg-tertiary);
351
+ border-radius: 3px;
352
+ overflow: hidden;
353
+ }
354
+
355
+ .score-fill {
356
+ height: 100%;
357
+ background: var(--accent-gradient);
358
+ border-radius: 3px;
359
+ transition: width 0.3s ease;
360
+ }
361
+
362
+ .card-title {
363
+ font-size: 16px;
364
+ font-weight: 600;
365
+ margin-bottom: 8px;
366
+ line-height: 1.4;
367
+ }
368
+
369
+ .card-meta {
370
+ display: flex;
371
+ gap: 16px;
372
+ font-size: 12px;
373
+ color: var(--text-muted);
374
+ margin-bottom: 12px;
375
+ }
376
+
377
+ .card-meta span {
378
+ display: flex;
379
+ align-items: center;
380
+ gap: 4px;
381
+ }
382
+
383
+ .card-summary {
384
+ font-size: 14px;
385
+ color: var(--text-secondary);
386
+ line-height: 1.6;
387
+ display: -webkit-box;
388
+ -webkit-line-clamp: 2;
389
+ -webkit-box-orient: vertical;
390
+ overflow: hidden;
391
+ }
392
+
393
+ .card-footer {
394
+ display: flex;
395
+ align-items: center;
396
+ justify-content: space-between;
397
+ margin-top: 16px;
398
+ padding-top: 16px;
399
+ border-top: 1px solid var(--border-color);
400
+ }
401
+
402
+ .deadline-badge {
403
+ display: inline-flex;
404
+ align-items: center;
405
+ gap: 6px;
406
+ padding: 4px 10px;
407
+ border-radius: var(--radius-sm);
408
+ font-size: 12px;
409
+ font-weight: 500;
410
+ }
411
+
412
+ .deadline-badge.urgent {
413
+ background: rgba(239, 68, 68, 0.2);
414
+ color: var(--danger);
415
+ }
416
+
417
+ .deadline-badge.soon {
418
+ background: rgba(245, 158, 11, 0.2);
419
+ color: var(--warning);
420
+ }
421
+
422
+ .deadline-badge.ok {
423
+ background: rgba(34, 197, 94, 0.2);
424
+ color: var(--success);
425
+ }
426
+
427
+ .card-actions {
428
+ display: flex;
429
+ gap: 8px;
430
+ }
431
+
432
+ .action-btn {
433
+ padding: 6px 12px;
434
+ border-radius: var(--radius-sm);
435
+ font-size: 12px;
436
+ font-weight: 500;
437
+ border: none;
438
+ cursor: pointer;
439
+ transition: all 0.2s;
440
+ }
441
+
442
+ .action-btn.primary {
443
+ background: var(--accent-gradient);
444
+ color: white;
445
+ }
446
+
447
+ .action-btn.primary:hover {
448
+ transform: scale(1.05);
449
+ }
450
+
451
+ .action-btn.secondary {
452
+ background: var(--bg-tertiary);
453
+ color: var(--text-secondary);
454
+ border: 1px solid var(--border-color);
455
+ }
456
+
457
+ .action-btn.secondary:hover {
458
+ border-color: var(--border-hover);
459
+ color: var(--text-primary);
460
+ }
461
+
462
+ /* Digest View */
463
+ .digest-view {
464
+ background: var(--bg-card);
465
+ border: 1px solid var(--border-color);
466
+ border-radius: var(--radius-md);
467
+ padding: 32px;
468
+ backdrop-filter: blur(8px);
469
+ }
470
+
471
+ .digest-content {
472
+ font-size: 14px;
473
+ line-height: 1.8;
474
+ }
475
+
476
+ .digest-content h1 {
477
+ font-size: 24px;
478
+ margin-bottom: 16px;
479
+ }
480
+
481
+ .digest-content h2 {
482
+ font-size: 18px;
483
+ margin: 24px 0 12px;
484
+ }
485
+
486
+ .digest-content h3 {
487
+ font-size: 16px;
488
+ margin: 20px 0 8px;
489
+ }
490
+
491
+ .digest-content p {
492
+ margin-bottom: 12px;
493
+ color: var(--text-secondary);
494
+ }
495
+
496
+ .digest-content blockquote {
497
+ border-left: 3px solid var(--accent-primary);
498
+ padding-left: 16px;
499
+ color: var(--text-secondary);
500
+ margin: 12px 0;
501
+ }
502
+
503
+ .digest-content a {
504
+ color: var(--accent-primary);
505
+ }
506
+
507
+ .digest-content hr {
508
+ border: none;
509
+ border-top: 1px solid var(--border-color);
510
+ margin: 24px 0;
511
+ }
512
+
513
+ .digest-content table {
514
+ width: 100%;
515
+ border-collapse: collapse;
516
+ margin: 16px 0;
517
+ }
518
+
519
+ .digest-content th,
520
+ .digest-content td {
521
+ padding: 8px 12px;
522
+ border: 1px solid var(--border-color);
523
+ text-align: left;
524
+ }
525
+
526
+ .digest-content th {
527
+ background: var(--bg-tertiary);
528
+ }
529
+
530
+ /* Modal */
531
+ .modal {
532
+ display: none;
533
+ position: fixed;
534
+ top: 0;
535
+ left: 0;
536
+ width: 100%;
537
+ height: 100%;
538
+ z-index: 1000;
539
+ }
540
+
541
+ .modal.active {
542
+ display: flex;
543
+ align-items: center;
544
+ justify-content: center;
545
+ }
546
+
547
+ .modal-backdrop {
548
+ position: absolute;
549
+ top: 0;
550
+ left: 0;
551
+ width: 100%;
552
+ height: 100%;
553
+ background: rgba(0, 0, 0, 0.7);
554
+ backdrop-filter: blur(4px);
555
+ }
556
+
557
+ .modal-content {
558
+ position: relative;
559
+ background: var(--bg-secondary);
560
+ border: 1px solid var(--border-color);
561
+ border-radius: var(--radius-lg);
562
+ width: 90%;
563
+ max-width: 700px;
564
+ max-height: 80vh;
565
+ overflow-y: auto;
566
+ padding: 32px;
567
+ z-index: 1001;
568
+ }
569
+
570
+ .modal-close {
571
+ position: absolute;
572
+ top: 16px;
573
+ right: 16px;
574
+ background: var(--bg-tertiary);
575
+ border: none;
576
+ color: var(--text-secondary);
577
+ width: 32px;
578
+ height: 32px;
579
+ border-radius: 50%;
580
+ font-size: 20px;
581
+ cursor: pointer;
582
+ display: flex;
583
+ align-items: center;
584
+ justify-content: center;
585
+ transition: all 0.2s;
586
+ }
587
+
588
+ .modal-close:hover {
589
+ background: var(--danger);
590
+ color: white;
591
+ }
592
+
593
+ /* Scrollbar */
594
+ ::-webkit-scrollbar {
595
+ width: 8px;
596
+ height: 8px;
597
+ }
598
+
599
+ ::-webkit-scrollbar-track {
600
+ background: var(--bg-primary);
601
+ }
602
+
603
+ ::-webkit-scrollbar-thumb {
604
+ background: var(--bg-tertiary);
605
+ border-radius: 4px;
606
+ }
607
+
608
+ ::-webkit-scrollbar-thumb:hover {
609
+ background: var(--accent-primary);
610
+ }
611
+
612
+ /* Animations */
613
+ @keyframes fadeIn {
614
+ from {
615
+ opacity: 0;
616
+ transform: translateY(10px);
617
+ }
618
+
619
+ to {
620
+ opacity: 1;
621
+ transform: translateY(0);
622
+ }
623
+ }
624
+
625
+ .opportunity-card {
626
+ animation: fadeIn 0.3s ease forwards;
627
+ }
628
+
629
+ .opportunity-card:nth-child(1) {
630
+ animation-delay: 0.05s;
631
+ }
632
+
633
+ .opportunity-card:nth-child(2) {
634
+ animation-delay: 0.1s;
635
+ }
636
+
637
+ .opportunity-card:nth-child(3) {
638
+ animation-delay: 0.15s;
639
+ }
640
+
641
+ .opportunity-card:nth-child(4) {
642
+ animation-delay: 0.2s;
643
+ }
644
+
645
+ .opportunity-card:nth-child(5) {
646
+ animation-delay: 0.25s;
647
+ }
648
+
649
+ /* Responsive */
650
+ @media (max-width: 1024px) {
651
+ .sidebar {
652
+ width: 200px;
653
+ }
654
+
655
+ .main-content {
656
+ margin-left: 200px;
657
+ }
658
+
659
+ .stats-banner {
660
+ grid-template-columns: repeat(2, 1fr);
661
+ }
662
+ }
663
+
664
+ @media (max-width: 768px) {
665
+ .sidebar {
666
+ display: none;
667
+ }
668
+
669
+ .main-content {
670
+ margin-left: 0;
671
+ }
672
+
673
+ .header {
674
+ flex-direction: column;
675
+ height: auto;
676
+ padding: 16px;
677
+ gap: 12px;
678
+ }
679
+
680
+ .content-area {
681
+ padding: 16px;
682
+ }
683
+
684
+ .stats-banner {
685
+ grid-template-columns: 1fr 1fr;
686
+ }
687
+ }
688
+
689
+ /* PIOE 2.0: New Category Colors */
690
+ .card-category.micro_grant {
691
+ background: rgba(16, 185, 129, 0.2);
692
+ color: #10b981;
693
+ }
694
+
695
+ .card-category.ecosystem_grant {
696
+ background: rgba(245, 158, 11, 0.2);
697
+ color: #f59e0b;
698
+ }
699
+
700
+ .card-category.innovation_fund {
701
+ background: rgba(59, 130, 246, 0.2);
702
+ color: #3b82f6;
703
+ }
704
+
705
+ .card-category.partnership {
706
+ background: rgba(139, 92, 246, 0.2);
707
+ color: #8b5cf6;
708
+ }
709
+
710
+ .card-category.collaboration {
711
+ background: rgba(236, 72, 153, 0.2);
712
+ color: #ec4899;
713
+ }
714
+
715
+ .card-category.pitch_event {
716
+ background: rgba(244, 63, 94, 0.2);
717
+ color: #f43f5e;
718
+ }
719
+
720
+ .card-category.demo_day {
721
+ background: rgba(99, 102, 241, 0.2);
722
+ color: #6366f1;
723
+ }
724
+
725
+ .card-category.bounty {
726
+ background: rgba(34, 197, 94, 0.2);
727
+ color: #22c55e;
728
+ }
729
+
730
+ .card-category.ambassador {
731
+ background: rgba(234, 179, 8, 0.2);
732
+ color: #eab308;
733
+ }
734
+
735
+ .card-category.pre_grant_signal {
736
+ background: rgba(168, 85, 247, 0.2);
737
+ color: #a855f7;
738
+ }
739
+
740
+ .card-category.pre_hiring_signal {
741
+ background: rgba(6, 182, 212, 0.2);
742
+ color: #06b6d4;
743
+ }
744
+
745
+ /* PIOE 2.0: Chat Panel */
746
+ .chat-fab {
747
+ position: fixed;
748
+ bottom: 24px;
749
+ right: 24px;
750
+ width: 60px;
751
+ height: 60px;
752
+ border-radius: 50%;
753
+ background: var(--accent-gradient);
754
+ border: none;
755
+ box-shadow: 0 4px 20px rgba(99, 102, 241, 0.4);
756
+ font-size: 28px;
757
+ cursor: pointer;
758
+ z-index: 999;
759
+ transition: all 0.3s ease;
760
+ }
761
+
762
+ .chat-fab:hover {
763
+ transform: scale(1.1);
764
+ box-shadow: 0 6px 30px rgba(99, 102, 241, 0.6);
765
+ }
766
+
767
+ .chat-panel {
768
+ position: fixed;
769
+ bottom: 100px;
770
+ right: 24px;
771
+ width: 380px;
772
+ height: 500px;
773
+ background: var(--bg-secondary);
774
+ border: 1px solid var(--border-color);
775
+ border-radius: var(--radius-lg);
776
+ display: none;
777
+ flex-direction: column;
778
+ z-index: 1000;
779
+ box-shadow: 0 8px 40px rgba(0, 0, 0, 0.4);
780
+ }
781
+
782
+ .chat-panel.active {
783
+ display: flex;
784
+ }
785
+
786
+ .chat-header {
787
+ display: flex;
788
+ align-items: center;
789
+ justify-content: space-between;
790
+ padding: 16px 20px;
791
+ background: var(--accent-gradient);
792
+ border-radius: var(--radius-lg) var(--radius-lg) 0 0;
793
+ font-weight: 600;
794
+ }
795
+
796
+ .chat-close {
797
+ background: none;
798
+ border: none;
799
+ color: white;
800
+ font-size: 24px;
801
+ cursor: pointer;
802
+ opacity: 0.8;
803
+ transition: opacity 0.2s;
804
+ }
805
+
806
+ .chat-close:hover {
807
+ opacity: 1;
808
+ }
809
+
810
+ .chat-messages {
811
+ flex: 1;
812
+ overflow-y: auto;
813
+ padding: 16px;
814
+ display: flex;
815
+ flex-direction: column;
816
+ gap: 12px;
817
+ }
818
+
819
+ .chat-message {
820
+ padding: 12px 16px;
821
+ border-radius: var(--radius-md);
822
+ max-width: 90%;
823
+ animation: fadeIn 0.3s ease;
824
+ }
825
+
826
+ .chat-message.user {
827
+ background: var(--accent-gradient);
828
+ color: white;
829
+ align-self: flex-end;
830
+ }
831
+
832
+ .chat-message.bot {
833
+ background: var(--bg-tertiary);
834
+ color: var(--text-secondary);
835
+ align-self: flex-start;
836
+ }
837
+
838
+ .chat-message p {
839
+ margin: 0;
840
+ font-size: 14px;
841
+ line-height: 1.5;
842
+ }
843
+
844
+ .chat-message .opp-link {
845
+ display: block;
846
+ background: var(--bg-card);
847
+ padding: 8px 12px;
848
+ border-radius: var(--radius-sm);
849
+ margin-top: 8px;
850
+ font-size: 12px;
851
+ color: var(--accent-primary);
852
+ text-decoration: none;
853
+ border: 1px solid var(--border-color);
854
+ transition: border-color 0.2s;
855
+ }
856
+
857
+ .chat-message .opp-link:hover {
858
+ border-color: var(--accent-primary);
859
+ }
860
+
861
+ .chat-input-area {
862
+ display: flex;
863
+ gap: 8px;
864
+ padding: 16px;
865
+ border-top: 1px solid var(--border-color);
866
+ }
867
+
868
+ .chat-input-area input {
869
+ flex: 1;
870
+ background: var(--bg-tertiary);
871
+ border: 1px solid var(--border-color);
872
+ color: var(--text-primary);
873
+ padding: 12px 16px;
874
+ border-radius: var(--radius-sm);
875
+ font-size: 14px;
876
+ }
877
+
878
+ .chat-input-area input:focus {
879
+ outline: none;
880
+ border-color: var(--accent-primary);
881
+ }
882
+
883
+ .chat-input-area button {
884
+ background: var(--accent-gradient);
885
+ border: none;
886
+ color: white;
887
+ padding: 12px 20px;
888
+ border-radius: var(--radius-sm);
889
+ font-weight: 500;
890
+ cursor: pointer;
891
+ transition: transform 0.2s;
892
+ }
893
+
894
+ .chat-input-area button:hover {
895
+ transform: scale(1.05);
896
+ }
897
+
898
+ @media (max-width: 480px) {
899
+ .chat-panel {
900
+ width: calc(100% - 32px);
901
+ right: 16px;
902
+ bottom: 90px;
903
+ height: 60vh;
904
+ }
905
+ }
render.yaml ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # render.yaml - Render Blueprint for one-click deploy
2
+ services:
3
+ - type: web
4
+ name: pioe
5
+ runtime: python
6
+ buildCommand: pip install -r requirements.txt
7
+ startCommand: uvicorn backend.main:app --host 0.0.0.0 --port $PORT
8
+ envVars:
9
+ - key: GEMINI_API_KEY
10
+ sync: false
11
+ - key: ADZUNA_APP_ID
12
+ sync: false
13
+ - key: ADZUNA_API_KEY
14
+ sync: false
15
+ - key: JOOBLE_API_KEY
16
+ sync: false
17
+ - key: RAPIDAPI_KEY
18
+ sync: false
19
+ - key: GITHUB_TOKEN
20
+ sync: false
21
+ - key: DATABASE_URL
22
+ value: sqlite:///./pioe.db
23
+ - key: MIN_RELEVANCE_SCORE
24
+ value: "0.3"
25
+ healthCheckPath: /api/stats
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PIOE - Personal Intelligence & Opportunity Engine
2
+ fastapi
3
+ uvicorn[standard]
4
+ sqlalchemy
5
+ httpx
6
+ feedparser
7
+ beautifulsoup4
8
+ lxml
9
+ apscheduler
10
+ sentence-transformers
11
+ python-dotenv
12
+ pydantic
13
+ pydantic-settings
14
+ google-generativeai
15
+ praw
16
+ aiofiles
17
+ PyYAML
18
+ numpy