googlesprojectzero commited on
Commit
8813c03
·
verified ·
1 Parent(s): 7c67c8f

Delete src/archive/archive_analyzer.py

Browse files
Files changed (1) hide show
  1. src/archive/archive_analyzer.py +0 -65
src/archive/archive_analyzer.py DELETED
@@ -1,65 +0,0 @@
1
- import asyncio
2
- import aiohttp
3
- from bs4 import BeautifulSoup
4
- from urllib.parse import urljoin
5
-
6
- async def fetch_url(session, url, semaphore, timeout=10):
7
- async with semaphore:
8
- try:
9
- async with session.get(url, timeout=timeout) as response:
10
- return await response.text()
11
- except asyncio.TimeoutError:
12
- return {"error": "Request timed out"}
13
- except aiohttp.ClientError as e:
14
- return {"error": str(e)}
15
-
16
- async def analyze_sources(sources, concurrency=10):
17
- semaphore = asyncio.Semaphore(concurrency)
18
- tasks = [analyze_source(name, url, semaphore) for name, url in sources.items()]
19
- return await asyncio.gather(*tasks)
20
-
21
- async def analyze_source(source_name, source_url, semaphore):
22
- async with aiohttp.ClientSession() as session:
23
- try:
24
- html = await fetch_url(session, source_url, semaphore)
25
- soup = BeautifulSoup(html, 'html.parser')
26
- base_url = source_url
27
- return {
28
- "source": source_name,
29
- "title": soup.title.string if soup.title else "No Title Found",
30
- "links": [urljoin(base_url, a['href']) for a in soup.find_all('a', href=True)]
31
- }
32
- except Exception as e:
33
- return {"source": source_name, "error": str(e)}
34
-
35
- if __name__ == "__main__":
36
- sources = {
37
- "Source A": "http://example.com",
38
- "Source B": "http://example.org"
39
- }
40
- asyncio.run(analyze_sources(sources))
41
-
42
- # import asyncio
43
- # import aiohttp
44
- # from bs4 import BeautifulSoup
45
-
46
- # async def fetch_url(session, url):
47
- # async with session.get(url) as response:
48
- # return await response.text()
49
-
50
- # async def analyze_source(source_name, source_url):
51
- # async with aiohttp.ClientSession() as session:
52
- # try:
53
- # html = await fetch_url(session, source_url)
54
- # soup = BeautifulSoup(html, 'html.parser')
55
- # return {
56
- # "source": source_name,
57
- # "title": soup.title.string if soup.title else "No Title Found",
58
- # "links": [a['href'] for a in soup.find_all('a', href=True)]
59
- # }
60
- # except Exception as e:
61
- # return {"source": source_name, "error": str(e)}
62
-
63
- # async def analyze_sources(sources):
64
- # tasks = [analyze_source(name, url) for name, url in sources.items()]
65
- # return await asyncio.gather(*tasks)