rsm-roguchi commited on
Commit
7ebaa26
·
1 Parent(s): 3242f2a

update brokens

Browse files
Files changed (2) hide show
  1. app.py +4 -4
  2. server/general_blog.py +20 -22
app.py CHANGED
@@ -5,7 +5,7 @@ import os
5
 
6
  from ui import (
7
  blog,
8
- general_blog,
9
  meta,
10
  twitter,
11
  price_matching
@@ -13,7 +13,7 @@ from ui import (
13
 
14
  from server import (
15
  blog as blog_srv,
16
- general_blog as general_blog_srv,
17
  meta as meta_srv,
18
  twitter as twitter_srv,
19
  price_matching as price_matching_srv
@@ -23,7 +23,7 @@ from server import (
23
  ui = ui.page_fluid(
24
  ui.page_navbar(
25
  blog.ui,
26
- general_blog.ui,
27
  meta.ui,
28
  twitter.ui,
29
  price_matching.ui,
@@ -36,7 +36,7 @@ ui = ui.page_fluid(
36
 
37
  def server(input, output, session):
38
  blog_srv.server(input, output, session)
39
- general_blog_srv.server(input, output, session)
40
  meta_srv.server(input, output, session)
41
  twitter_srv.server(input, output, session)
42
  price_matching_srv.server(input, output, session)
 
5
 
6
  from ui import (
7
  blog,
8
+ #general_blog,
9
  meta,
10
  twitter,
11
  price_matching
 
13
 
14
  from server import (
15
  blog as blog_srv,
16
+ #general_blog as general_blog_srv,
17
  meta as meta_srv,
18
  twitter as twitter_srv,
19
  price_matching as price_matching_srv
 
23
  ui = ui.page_fluid(
24
  ui.page_navbar(
25
  blog.ui,
26
+ #general_blog.ui,
27
  meta.ui,
28
  twitter.ui,
29
  price_matching.ui,
 
36
 
37
  def server(input, output, session):
38
  blog_srv.server(input, output, session)
39
+ #general_blog_srv.server(input, output, session)
40
  meta_srv.server(input, output, session)
41
  twitter_srv.server(input, output, session)
42
  price_matching_srv.server(input, output, session)
server/general_blog.py CHANGED
@@ -17,29 +17,27 @@ SHOPIFY_API_VERSION = "2024-04"
17
  BLOG_ID = "73667707064"
18
 
19
  # === Static scraper for pokemon.com ===
20
- def scrape_section_content_from_url(url: str) -> str:
21
  try:
22
- resp = requests.get(url, timeout=10, headers={
23
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
24
- "Accept-Language": "en-US,en;q=0.9",
25
- "Referer": "https://www.google.com/"
26
- })
27
- if not resp.ok:
28
- print(f"[ERROR] Request failed: {resp.status_code}")
29
- return ""
30
-
31
- soup = BeautifulSoup(resp.text, "html.parser")
32
-
33
- # Match all divs that contain the full class string
34
- content_blocks = soup.find_all("div")
35
-
36
- if not content_blocks:
37
- print("[WARN] No content blocks matched.")
38
- return ""
39
-
40
- texts = [div.get_text(separator=" ", strip=True) for div in content_blocks]
41
- print(f"[INFO] Extracted {len(texts)} content blocks.")
42
- return "\n\n".join(texts)
43
 
44
  except Exception as e:
45
  print(f"[ERROR] Scraping failed: {e}")
 
17
  BLOG_ID = "73667707064"
18
 
19
  # === Static scraper for pokemon.com ===
20
+ async def scrape_section_content_from_url(url: str) -> str:
21
  try:
22
+ async with async_playwright() as p:
23
+ browser = await p.chromium.launch(headless=True)
24
+ page = await browser.new_page()
25
+ await page.goto(url, timeout=30000)
26
+ await page.wait_for_load_state("networkidle")
27
+ html = await page.content()
28
+ await browser.close()
29
+
30
+ soup = BeautifulSoup(html, "html.parser")
31
+
32
+ # Match all divs and extract text
33
+ content_blocks = soup.find_all("div")
34
+ if not content_blocks:
35
+ print("[WARN] No <div> elements found.")
36
+ return ""
37
+
38
+ texts = [div.get_text(separator=" ", strip=True) for div in content_blocks if div.get_text(strip=True)]
39
+ print(f"[INFO] Extracted {len(texts)} content blocks.")
40
+ return "\n\n".join(texts)
 
 
41
 
42
  except Exception as e:
43
  print(f"[ERROR] Scraping failed: {e}")