|
|
import requests |
|
|
import time |
|
|
import hashlib |
|
|
import json |
|
|
from dotenv import load_dotenv |
|
|
import os |
|
|
|
|
|
def parse_cookies(cookie_str): |
|
|
"""将cookie字符串解析为字典""" |
|
|
print(f"解析cookie字符串: {cookie_str}") |
|
|
cookies = {} |
|
|
for item in cookie_str.split(';'): |
|
|
if '=' in item: |
|
|
name, value = item.strip().split('=', 1) |
|
|
cookies[name] = value |
|
|
print(f"解析后的cookies: {json.dumps(cookies, ensure_ascii=False)}") |
|
|
return cookies |
|
|
|
|
|
class GoofishAPI: |
|
|
def __init__(self): |
|
|
print("初始化GoofishAPI...") |
|
|
self.base_url = "https://h5api.m.goofish.com" |
|
|
self.app_key = "34839810" |
|
|
self.headers = { |
|
|
'accept': 'application/json', |
|
|
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', |
|
|
'content-type': 'application/x-www-form-urlencoded', |
|
|
'origin': 'https://www.goofish.com', |
|
|
'referer': 'https://www.goofish.com/', |
|
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0' |
|
|
} |
|
|
|
|
|
load_dotenv() |
|
|
print("环境变量列表:") |
|
|
for key, value in os.environ.items(): |
|
|
if 'COOKIE' in key.upper(): |
|
|
print(f"{key}: {value[:20]}...") |
|
|
|
|
|
cookie_str = os.getenv('GOOFISH_COOKIES', '') |
|
|
print(f"从环境变量读取的GOOFISH_COOKIES长度: {len(cookie_str)}") |
|
|
|
|
|
|
|
|
if not cookie_str: |
|
|
print("警告:未找到GOOFISH_COOKIES环境变量,使用内置默认cookie") |
|
|
|
|
|
cookie_str = "_m_h5_tk=6e9d46fed73aae0bf6be61ee132e9a06_1742723039105; _m_h5_tk_enc=6eb4c709b4fbcad1a927c771c7beef21" |
|
|
|
|
|
self.cookies = parse_cookies(cookie_str) |
|
|
print(f"_m_h5_tk token是否存在: {'_m_h5_tk' in self.cookies}") |
|
|
|
|
|
|
|
|
self.last_refresh_time = time.time() |
|
|
self.refresh_interval = 1800 |
|
|
self.auto_refresh = True |
|
|
self.retry_count = 3 |
|
|
|
|
|
def _get_sign(self, t, data): |
|
|
"""生成签名""" |
|
|
token = self.cookies.get('_m_h5_tk', '').split('_')[0] |
|
|
print(f"签名使用的token: {token}") |
|
|
sign_str = f"{token}&{t}&{self.app_key}&{data}" |
|
|
return hashlib.md5(sign_str.encode('utf-8')).hexdigest() |
|
|
|
|
|
def refresh_token_via_api_call(self): |
|
|
"""通过常规API调用刷新token""" |
|
|
print("开始尝试刷新token...") |
|
|
|
|
|
url = f"{self.base_url}/h5/mtop.taobao.wireless.home.load/1.0/" |
|
|
|
|
|
t = str(int(time.time() * 1000)) |
|
|
params = { |
|
|
'jsv': '2.7.2', |
|
|
'appKey': self.app_key, |
|
|
't': t, |
|
|
'sign': self._get_sign(t, "{}"), |
|
|
'type': 'json', |
|
|
'dataType': 'json', |
|
|
'v': '1.0', |
|
|
'api': 'mtop.taobao.wireless.home.load', |
|
|
'data': '{}' |
|
|
} |
|
|
|
|
|
try: |
|
|
print(f"发送刷新token请求到: {url}") |
|
|
response = requests.get( |
|
|
url, |
|
|
params=params, |
|
|
headers=self.headers, |
|
|
cookies=self.cookies |
|
|
) |
|
|
|
|
|
print(f"刷新token响应状态码: {response.status_code}") |
|
|
|
|
|
|
|
|
cookie_updated = False |
|
|
if response.cookies: |
|
|
for cookie in response.cookies: |
|
|
if cookie.name in ['_m_h5_tk', '_m_h5_tk_enc']: |
|
|
old_value = self.cookies.get(cookie.name, '') |
|
|
self.cookies[cookie.name] = cookie.value |
|
|
print(f"更新了 {cookie.name} token: {old_value[:10]}... -> {cookie.value[:10]}...") |
|
|
cookie_updated = True |
|
|
|
|
|
|
|
|
if 'set-cookie' in response.headers: |
|
|
print("从响应头中发现Set-Cookie字段") |
|
|
cookie_header = response.headers.get('set-cookie', '') |
|
|
if '_m_h5_tk=' in cookie_header or '_m_h5_tk_enc=' in cookie_header: |
|
|
print(f"响应头中包含token cookies: {cookie_header[:50]}...") |
|
|
|
|
|
|
|
|
for cookie_part in cookie_header.split(','): |
|
|
if '_m_h5_tk=' in cookie_part: |
|
|
token_value = cookie_part.split('_m_h5_tk=')[1].split(';')[0] |
|
|
self.cookies['_m_h5_tk'] = token_value |
|
|
print(f"从头部提取_m_h5_tk: {token_value[:10]}...") |
|
|
cookie_updated = True |
|
|
elif '_m_h5_tk_enc=' in cookie_part: |
|
|
token_enc_value = cookie_part.split('_m_h5_tk_enc=')[1].split(';')[0] |
|
|
self.cookies['_m_h5_tk_enc'] = token_enc_value |
|
|
print(f"从头部提取_m_h5_tk_enc: {token_enc_value[:10]}...") |
|
|
cookie_updated = True |
|
|
|
|
|
|
|
|
result = response.json() |
|
|
if isinstance(result, dict): |
|
|
if result.get('ret') and 'SUCCESS' in result.get('ret')[0]: |
|
|
print("Token刷新API调用成功") |
|
|
else: |
|
|
print(f"Token刷新API响应: {result.get('ret')}") |
|
|
|
|
|
|
|
|
if cookie_updated: |
|
|
self.last_refresh_time = time.time() |
|
|
print(f"Token已刷新,下次刷新将在 {self.refresh_interval} 秒后") |
|
|
return True |
|
|
else: |
|
|
print("API调用未返回新的token cookies") |
|
|
return False |
|
|
|
|
|
except Exception as e: |
|
|
print(f"刷新token失败: {str(e)}") |
|
|
return False |
|
|
|
|
|
def maybe_refresh_token(self): |
|
|
"""如果距离上次刷新超过设定间隔,则刷新token""" |
|
|
if not self.auto_refresh: |
|
|
return True |
|
|
|
|
|
current_time = time.time() |
|
|
if current_time - self.last_refresh_time > self.refresh_interval: |
|
|
print(f"已经 {int(current_time - self.last_refresh_time)} 秒未刷新token,执行定期刷新...") |
|
|
return self.refresh_token_via_api_call() |
|
|
return True |
|
|
|
|
|
def is_token_expired(self, result): |
|
|
"""检查响应是否表明token已过期""" |
|
|
if isinstance(result, dict) and result.get('ret'): |
|
|
ret_msg = result.get('ret')[0] if isinstance(result.get('ret'), list) else result.get('ret') |
|
|
return 'TOKEN_EXOIRED' in ret_msg or 'TOKEN_EXPIRED' in ret_msg |
|
|
return False |
|
|
|
|
|
def search_with_retry(self, keyword, page_number=1, rows_per_page=30, min_price=None, max_price=None, publish_days=None): |
|
|
"""带有自动刷新token功能的搜索函数""" |
|
|
|
|
|
self.maybe_refresh_token() |
|
|
|
|
|
|
|
|
for attempt in range(self.retry_count): |
|
|
result = self.search(keyword, page_number, rows_per_page, min_price, max_price, publish_days) |
|
|
|
|
|
|
|
|
if self.is_token_expired(result): |
|
|
print(f"Token已过期,尝试刷新并重试 (尝试 {attempt+1}/{self.retry_count})") |
|
|
self.refresh_token_via_api_call() |
|
|
continue |
|
|
|
|
|
return result |
|
|
|
|
|
print(f"多次尝试后仍无法获取有效结果") |
|
|
return None |
|
|
|
|
|
def search(self, keyword, page_number=1, rows_per_page=30, min_price=None, max_price=None, publish_days=None): |
|
|
""" |
|
|
搜索商品 |
|
|
|
|
|
Args: |
|
|
keyword (str): 搜索关键词 |
|
|
page_number (int): 页码,从1开始 |
|
|
rows_per_page (int): 每页数量 |
|
|
min_price (float, optional): 最低价格 |
|
|
max_price (float, optional): 最高价格 |
|
|
publish_days (int, optional): 发布时间范围(天) |
|
|
""" |
|
|
print(f"开始搜索: 关键词={keyword}, 页码={page_number}, 每页={rows_per_page}") |
|
|
t = str(int(time.time() * 1000)) |
|
|
|
|
|
|
|
|
search_filters = [] |
|
|
|
|
|
|
|
|
if min_price is not None or max_price is not None: |
|
|
min_price = min_price if min_price is not None else 0 |
|
|
max_price = max_price if max_price is not None else '' |
|
|
search_filters.append(f"priceRange:{min_price},{max_price}") |
|
|
|
|
|
|
|
|
if publish_days is not None: |
|
|
search_filters.append(f"publishDays:{publish_days}") |
|
|
|
|
|
|
|
|
prop_value_str = {} |
|
|
if search_filters: |
|
|
prop_value_str["searchFilter"] = ";".join(search_filters) |
|
|
|
|
|
|
|
|
data = { |
|
|
"pageNumber": page_number, |
|
|
"keyword": keyword, |
|
|
"fromFilter": bool(search_filters), |
|
|
"rowsPerPage": rows_per_page, |
|
|
"sortValue": "", |
|
|
"sortField": "", |
|
|
"customDistance": "", |
|
|
"gps": "", |
|
|
"propValueStr": prop_value_str, |
|
|
"customGps": "", |
|
|
"searchReqFromPage": "pcSearch", |
|
|
"extraFilterValue": "{}", |
|
|
"userPositionJson": "{}" |
|
|
} |
|
|
|
|
|
data_str = json.dumps(data) |
|
|
sign = self._get_sign(t, data_str) |
|
|
|
|
|
|
|
|
params = { |
|
|
'jsv': '2.7.2', |
|
|
'appKey': self.app_key, |
|
|
't': t, |
|
|
'sign': sign, |
|
|
'v': '1.0', |
|
|
'type': 'originaljson', |
|
|
'accountSite': 'xianyu', |
|
|
'dataType': 'json', |
|
|
'timeout': '20000', |
|
|
'api': 'mtop.taobao.idlemtopsearch.pc.search', |
|
|
'sessionOption': 'AutoLoginOnly', |
|
|
'spm_cnt': 'a21ybx.search.0.0', |
|
|
'spm_pre': 'a21ybx.home.searchInput.0' |
|
|
} |
|
|
|
|
|
url = f"{self.base_url}/h5/mtop.taobao.idlemtopsearch.pc.search/1.0/" |
|
|
|
|
|
try: |
|
|
print(f"发送请求到URL: {url}") |
|
|
print(f"请求参数: {json.dumps(params, ensure_ascii=False)}") |
|
|
print(f"请求数据: {data_str}") |
|
|
print(f"Cookie长度: {len(str(self.cookies))}") |
|
|
|
|
|
response = requests.post( |
|
|
url, |
|
|
params=params, |
|
|
data={'data': data_str}, |
|
|
headers=self.headers, |
|
|
cookies=self.cookies |
|
|
) |
|
|
|
|
|
print(f"响应状态码: {response.status_code}") |
|
|
print(f"响应头: {dict(response.headers)}") |
|
|
|
|
|
|
|
|
if response.cookies: |
|
|
for cookie in response.cookies: |
|
|
if cookie.name in ['_m_h5_tk', '_m_h5_tk_enc']: |
|
|
old_value = self.cookies.get(cookie.name, '') |
|
|
self.cookies[cookie.name] = cookie.value |
|
|
print(f"从响应中更新了 {cookie.name} token: {old_value[:10]}... -> {cookie.value[:10]}...") |
|
|
|
|
|
result = response.json() |
|
|
print(f"响应数据键: {list(result.keys()) if isinstance(result, dict) else '不是字典'}") |
|
|
|
|
|
|
|
|
if isinstance(result, dict) and result.get('ret') and 'FAIL' in result.get('ret')[0]: |
|
|
print(f"API错误: {result.get('ret')}") |
|
|
|
|
|
return result |
|
|
except Exception as e: |
|
|
print(f"请求失败: {str(e)}") |
|
|
return None |
|
|
|
|
|
def main(): |
|
|
api = GoofishAPI() |
|
|
|
|
|
result = api.search_with_retry("手机") |
|
|
print(json.dumps(result, ensure_ascii=False, indent=2)) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |