paijo77 commited on
Commit
c756a01
·
verified ·
1 Parent(s): d22bb02

update app/grabber/github_grabber.py

Browse files
Files changed (1) hide show
  1. app/grabber/github_grabber.py +43 -0
app/grabber/github_grabber.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiohttp
2
+ import asyncio
3
+ from app.grabber.base import BaseGrabber
4
+ from app.models.source import SourceConfig
5
+
6
+
7
+ class GitHubGrabber(BaseGrabber):
8
+ async def fetch_content(self, source: SourceConfig) -> str:
9
+ url = str(source.url)
10
+
11
+ if "github.com" in url and "/raw/" in url:
12
+ url = url.replace("github.com", "raw.githubusercontent.com")
13
+ url = url.replace("/raw/", "/")
14
+
15
+ for attempt in range(self.max_retries):
16
+ try:
17
+ async with aiohttp.ClientSession() as session:
18
+ async with session.get(
19
+ url, timeout=aiohttp.ClientTimeout(total=self.timeout)
20
+ ) as response:
21
+ if response.status == 200:
22
+ return await response.text()
23
+ elif response.status == 404:
24
+ raise FileNotFoundError(f"URL not found: {url}")
25
+ else:
26
+ if attempt < self.max_retries - 1:
27
+ await asyncio.sleep(self.retry_delay)
28
+ continue
29
+ response.raise_for_status()
30
+
31
+ except asyncio.TimeoutError:
32
+ if attempt < self.max_retries - 1:
33
+ await asyncio.sleep(self.retry_delay)
34
+ continue
35
+ raise
36
+
37
+ except aiohttp.ClientError as e:
38
+ if attempt < self.max_retries - 1:
39
+ await asyncio.sleep(self.retry_delay)
40
+ continue
41
+ raise
42
+
43
+ raise RuntimeError(f"Failed to fetch after {self.max_retries} attempts")