| import gradio as gr |
| import requests |
| from bs4 import BeautifulSoup |
| from urllib.parse import quote_plus |
| import random |
| import time |
| import re |
|
|
| USER_AGENTS = [ |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15", |
| "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0" |
| ] |
|
|
| def human_like_delay(min_delay=1, max_delay=3): |
| time.sleep(random.uniform(min_delay, max_delay)) |
|
|
| def random_query_string(length=10): |
| letters = 'abcdefghijklmnopqrstuvwxyz' |
| return ''.join(random.choice(letters) for i in range(length)) |
|
|
| def natural_search_and_extract(keyword): |
| session = requests.Session() |
| user_agent = random.choice(USER_AGENTS) |
| session.headers.update({ |
| "User-Agent": user_agent, |
| "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", |
| "Accept-Language": "ja,en-US;q=0.7,en;q=0.3", |
| "Accept-Encoding": "gzip, deflate, br", |
| "DNT": "1", |
| "Connection": "keep-alive", |
| "Upgrade-Insecure-Requests": "1" |
| }) |
| |
| search_url = f"https://www.google.com/search?q={quote_plus(keyword)}&hl=ja&{random_query_string()}" |
| |
| try: |
| human_like_delay() |
| response = session.get(search_url) |
| |
| if response.status_code != 200: |
| return f"Error: Unable to fetch search results. Status code: {response.status_code}" |
|
|
| soup = BeautifulSoup(response.text, 'html.parser') |
| queries = [] |
|
|
| |
| related_section = soup.find('span', string=re.compile('他の人はこちらも検索')) |
| if related_section: |
| parent_div = related_section.find_parent('div') |
| if parent_div: |
| for b_tag in parent_div.find_all('b'): |
| query_text = b_tag.get_text(strip=True) |
| if query_text and query_text not in queries: |
| queries.append(query_text) |
| human_like_delay(0.5, 1) |
|
|
| |
| if queries: |
| return "\n".join(queries) |
| else: |
| return f"No related queries found. HTML structure might have changed.\nKeyword searched: {keyword}\nURL: {search_url}\nResponse length: {len(response.text)}" |
|
|
| except Exception as e: |
| return f"An error occurred: {str(e)}" |
|
|
| |
| iface = gr.Interface( |
| fn=natural_search_and_extract, |
| inputs=gr.Textbox(lines=1, label="検索キーワード"), |
| outputs=gr.Textbox(label="関連検索キーワード"), |
| title="Google検索関連キーワード抽出ツール", |
| description="キーワードを入力して、Googleの「他の人はこちらも検索」セクションから関連キーワードを抽出します。" |
| ) |
|
|
| iface.launch() |