Spaces:

paijo77
/

1proxy

Paused

App Files Files Community

paijo77 commited on Feb 2

Commit

fafa4d5

verified ·

1 Parent(s): a3f299b

update app/hunter/service.py

Browse files

Files changed (1) hide show

app/hunter/service.py +141 -0

app/hunter/service.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import logging
+import aiohttp
+import asyncio
+from datetime import datetime
+from typing import List, Optional
+from sqlalchemy import select, func
+from sqlalchemy.ext.asyncio import AsyncSession
+from app.database import get_db
+from app.db_models import CandidateSource, ProxySource
+from app.hunter.strategies.github import GitHubStrategy
+from app.hunter.strategies.ai import AIStrategy
+from app.hunter.strategies.search import SearchStrategy
+from app.hunter.extractor import UniversalExtractor
+logger = logging.getLogger(__name__)
+class HunterService:
+    def __init__(self):
+        self.strategies = [GitHubStrategy(), AIStrategy(), SearchStrategy()]
+    async def run_hunt(self):
+        """
+        Execute all discovery strategies and process results.
+        """
+        logger.info("Starting Hunter Protocol...")
+        discovered_urls = set()
+        # 1. Run all strategies concurrently
+        tasks = [strategy.discover() for strategy in self.strategies]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        for i, result in enumerate(results):
+            strategy_name = self.strategies[i].name
+            if isinstance(result, Exception):
+                logger.error(f"Strategy {strategy_name} failed: {result}")
+                continue
+            if result:
+                logger.info(f"Strategy {strategy_name} found {len(result)} URLs")
+                for url in result:
+                    discovered_urls.add((url, strategy_name))
+        # 2. Process unique URLs
+        logger.info(f"Total unique candidates found: {len(discovered_urls)}")
+        async for session in get_db():
+            for url, method in discovered_urls:
+                await self.process_candidate(session, url, method)
+            await session.commit()
+        logger.info("Hunter Protocol complete.")
+    async def process_candidate(self, session: AsyncSession, url: str, method: str):
+        """
+        Check if URL is new, save it, and score it.
+        """
+        # Check if already exists in Candidates
+        stmt = select(CandidateSource).where(CandidateSource.url == url)
+        result = await session.execute(stmt)
+        if result.scalar_one_or_none():
+            logger.debug(f"Candidate already exists: {url}")
+            return
+        # Check if already exists in Active Sources
+        stmt = select(ProxySource).where(ProxySource.url == url)
+        result = await session.execute(stmt)
+        if result.scalar_one_or_none():
+            logger.debug(f"Source already active: {url}")
+            return
+        # Fetch and Analyze
+        try:
+            content = await self._fetch_content(url)
+            proxies = UniversalExtractor.extract_proxies(content, source_url=url)
+            confidence = self._calculate_confidence(url, proxies)
+            # Save
+            candidate = CandidateSource(
+                url=url,
+                domain=self._extract_domain(url),
+                discovery_method=method,
+                status="pending",
+                confidence_score=confidence,
+                proxies_found_count=len(proxies),
+                last_checked_at=datetime.utcnow(),
+            )
+            session.add(candidate)
+            logger.info(
+                f"Added candidate: {url} (Score: {confidence}, Proxies: {len(proxies)})"
+            )
+        except Exception as e:
+            logger.warning(f"Failed to process candidate {url}: {str(e)}")
+            # We might still save it as 'failed' or 'pending' retry?
+            # For now, skip invalid URLs to keep DB clean.
+    async def _fetch_content(self, url: str) -> str:
+        async with aiohttp.ClientSession() as session:
+            async with session.get(url, timeout=10) as resp:
+                if resp.status != 200:
+                    raise Exception(f"HTTP {resp.status}")
+                return await resp.text()
+    def _calculate_confidence(self, url: str, proxies: List[any]) -> int:
+        score = 0
+        # Domain Trust
+        if "github.com" in url or "raw.githubusercontent.com" in url:
+            score += 20
+        elif "pastebin.com" in url:
+            score += 10
+        # Content Volume
+        count = len(proxies)
+        if count > 0:
+            score += 10
+        if count > 50:
+            score += 20
+        if count > 500:
+            score += 20
+        # Protocol Diversity
+        protocols = {p.protocol for p in proxies}
+        if len(protocols) > 1:
+            score += 10
+        if "vmess" in protocols or "vless" in protocols:
+            score += 10
+        return min(score, 100)
+    def _extract_domain(self, url: str) -> str:
+        from urllib.parse import urlparse
+        try:
+            return urlparse(url).netloc
+        except:
+            return ""