mykeyword / app.py
Yasu777's picture
Update app.py
b419da2 verified
import gradio as gr
import requests
from bs4 import BeautifulSoup
from urllib.parse import quote_plus
import random
import time
import re
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0"
]
def human_like_delay(min_delay=1, max_delay=3):
time.sleep(random.uniform(min_delay, max_delay))
def random_query_string(length=10):
letters = 'abcdefghijklmnopqrstuvwxyz'
return ''.join(random.choice(letters) for i in range(length))
def natural_search_and_extract(keyword):
session = requests.Session()
user_agent = random.choice(USER_AGENTS)
session.headers.update({
"User-Agent": user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "ja,en-US;q=0.7,en;q=0.3",
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1"
})
search_url = f"https://www.google.com/search?q={quote_plus(keyword)}&hl=ja&{random_query_string()}"
try:
human_like_delay()
response = session.get(search_url)
if response.status_code != 200:
return f"Error: Unable to fetch search results. Status code: {response.status_code}"
soup = BeautifulSoup(response.text, 'html.parser')
queries = []
# 「他の人はこちらも検索」セクションを探す
related_section = soup.find('span', string=re.compile('他の人はこちらも検索'))
if related_section:
parent_div = related_section.find_parent('div')
if parent_div:
for b_tag in parent_div.find_all('b'):
query_text = b_tag.get_text(strip=True)
if query_text and query_text not in queries:
queries.append(query_text)
human_like_delay(0.5, 1)
# 結果を整形
if queries:
return "\n".join(queries)
else:
return f"No related queries found. HTML structure might have changed.\nKeyword searched: {keyword}\nURL: {search_url}\nResponse length: {len(response.text)}"
except Exception as e:
return f"An error occurred: {str(e)}"
# Gradioインターフェースの設定
iface = gr.Interface(
fn=natural_search_and_extract,
inputs=gr.Textbox(lines=1, label="検索キーワード"),
outputs=gr.Textbox(label="関連検索キーワード"),
title="Google検索関連キーワード抽出ツール",
description="キーワードを入力して、Googleの「他の人はこちらも検索」セクションから関連キーワードを抽出します。"
)
iface.launch()