goofish-api-huggingface / goofish_api.py
luoluoluo22's picture
实现token自动刷新功能并添加Web界面
f99684d
import requests
import time
import hashlib
import json
from dotenv import load_dotenv
import os
def parse_cookies(cookie_str):
"""将cookie字符串解析为字典"""
print(f"解析cookie字符串: {cookie_str}")
cookies = {}
for item in cookie_str.split(';'):
if '=' in item:
name, value = item.strip().split('=', 1)
cookies[name] = value
print(f"解析后的cookies: {json.dumps(cookies, ensure_ascii=False)}")
return cookies
class GoofishAPI:
def __init__(self):
print("初始化GoofishAPI...")
self.base_url = "https://h5api.m.goofish.com"
self.app_key = "34839810"
self.headers = {
'accept': 'application/json',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'content-type': 'application/x-www-form-urlencoded',
'origin': 'https://www.goofish.com',
'referer': 'https://www.goofish.com/',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0'
}
# 从环境变量加载cookie
load_dotenv()
print("环境变量列表:")
for key, value in os.environ.items():
if 'COOKIE' in key.upper():
print(f"{key}: {value[:20]}...") # 只打印前20个字符,保护隐私
cookie_str = os.getenv('GOOFISH_COOKIES', '')
print(f"从环境变量读取的GOOFISH_COOKIES长度: {len(cookie_str)}")
# 如果没有找到环境变量,尝试使用默认cookie
if not cookie_str:
print("警告:未找到GOOFISH_COOKIES环境变量,使用内置默认cookie")
# 设置一个简单的默认cookie,可能无法正常工作
cookie_str = "_m_h5_tk=6e9d46fed73aae0bf6be61ee132e9a06_1742723039105; _m_h5_tk_enc=6eb4c709b4fbcad1a927c771c7beef21"
self.cookies = parse_cookies(cookie_str)
print(f"_m_h5_tk token是否存在: {'_m_h5_tk' in self.cookies}")
# 添加上次刷新时间记录
self.last_refresh_time = time.time()
self.refresh_interval = 1800 # 30分钟刷新一次
self.auto_refresh = True # 是否自动刷新token
self.retry_count = 3 # 请求失败后的重试次数
def _get_sign(self, t, data):
"""生成签名"""
token = self.cookies.get('_m_h5_tk', '').split('_')[0]
print(f"签名使用的token: {token}")
sign_str = f"{token}&{t}&{self.app_key}&{data}"
return hashlib.md5(sign_str.encode('utf-8')).hexdigest()
def refresh_token_via_api_call(self):
"""通过常规API调用刷新token"""
print("开始尝试刷新token...")
# 选择一个简单、低开销的API端点
url = f"{self.base_url}/h5/mtop.taobao.wireless.home.load/1.0/"
t = str(int(time.time() * 1000))
params = {
'jsv': '2.7.2',
'appKey': self.app_key,
't': t,
'sign': self._get_sign(t, "{}"), # 空数据的签名
'type': 'json',
'dataType': 'json',
'v': '1.0',
'api': 'mtop.taobao.wireless.home.load',
'data': '{}'
}
try:
print(f"发送刷新token请求到: {url}")
response = requests.get(
url,
params=params,
headers=self.headers,
cookies=self.cookies
)
print(f"刷新token响应状态码: {response.status_code}")
# 检查响应中是否包含新的cookies
cookie_updated = False
if response.cookies:
for cookie in response.cookies:
if cookie.name in ['_m_h5_tk', '_m_h5_tk_enc']:
old_value = self.cookies.get(cookie.name, '')
self.cookies[cookie.name] = cookie.value
print(f"更新了 {cookie.name} token: {old_value[:10]}... -> {cookie.value[:10]}...")
cookie_updated = True
# 如果响应中包含set-cookie头,也尝试解析它们
if 'set-cookie' in response.headers:
print("从响应头中发现Set-Cookie字段")
cookie_header = response.headers.get('set-cookie', '')
if '_m_h5_tk=' in cookie_header or '_m_h5_tk_enc=' in cookie_header:
print(f"响应头中包含token cookies: {cookie_header[:50]}...")
# 简单解析set-cookie头
for cookie_part in cookie_header.split(','):
if '_m_h5_tk=' in cookie_part:
token_value = cookie_part.split('_m_h5_tk=')[1].split(';')[0]
self.cookies['_m_h5_tk'] = token_value
print(f"从头部提取_m_h5_tk: {token_value[:10]}...")
cookie_updated = True
elif '_m_h5_tk_enc=' in cookie_part:
token_enc_value = cookie_part.split('_m_h5_tk_enc=')[1].split(';')[0]
self.cookies['_m_h5_tk_enc'] = token_enc_value
print(f"从头部提取_m_h5_tk_enc: {token_enc_value[:10]}...")
cookie_updated = True
# 检查响应内容
result = response.json()
if isinstance(result, dict):
if result.get('ret') and 'SUCCESS' in result.get('ret')[0]:
print("Token刷新API调用成功")
else:
print(f"Token刷新API响应: {result.get('ret')}")
# 更新最后刷新时间
if cookie_updated:
self.last_refresh_time = time.time()
print(f"Token已刷新,下次刷新将在 {self.refresh_interval} 秒后")
return True
else:
print("API调用未返回新的token cookies")
return False
except Exception as e:
print(f"刷新token失败: {str(e)}")
return False
def maybe_refresh_token(self):
"""如果距离上次刷新超过设定间隔,则刷新token"""
if not self.auto_refresh:
return True
current_time = time.time()
if current_time - self.last_refresh_time > self.refresh_interval:
print(f"已经 {int(current_time - self.last_refresh_time)} 秒未刷新token,执行定期刷新...")
return self.refresh_token_via_api_call()
return True
def is_token_expired(self, result):
"""检查响应是否表明token已过期"""
if isinstance(result, dict) and result.get('ret'):
ret_msg = result.get('ret')[0] if isinstance(result.get('ret'), list) else result.get('ret')
return 'TOKEN_EXOIRED' in ret_msg or 'TOKEN_EXPIRED' in ret_msg
return False
def search_with_retry(self, keyword, page_number=1, rows_per_page=30, min_price=None, max_price=None, publish_days=None):
"""带有自动刷新token功能的搜索函数"""
# 先检查是否需要定期刷新token
self.maybe_refresh_token()
# 尝试搜索,如果token过期则刷新并重试
for attempt in range(self.retry_count):
result = self.search(keyword, page_number, rows_per_page, min_price, max_price, publish_days)
# 检查是否需要刷新token
if self.is_token_expired(result):
print(f"Token已过期,尝试刷新并重试 (尝试 {attempt+1}/{self.retry_count})")
self.refresh_token_via_api_call()
continue
return result
print(f"多次尝试后仍无法获取有效结果")
return None
def search(self, keyword, page_number=1, rows_per_page=30, min_price=None, max_price=None, publish_days=None):
"""
搜索商品
Args:
keyword (str): 搜索关键词
page_number (int): 页码,从1开始
rows_per_page (int): 每页数量
min_price (float, optional): 最低价格
max_price (float, optional): 最高价格
publish_days (int, optional): 发布时间范围(天)
"""
print(f"开始搜索: 关键词={keyword}, 页码={page_number}, 每页={rows_per_page}")
t = str(int(time.time() * 1000))
# 构建搜索过滤条件
search_filters = []
# 添加价格区间
if min_price is not None or max_price is not None:
min_price = min_price if min_price is not None else 0
max_price = max_price if max_price is not None else ''
search_filters.append(f"priceRange:{min_price},{max_price}")
# 添加发布时间
if publish_days is not None:
search_filters.append(f"publishDays:{publish_days}")
# 构建propValueStr
prop_value_str = {}
if search_filters:
prop_value_str["searchFilter"] = ";".join(search_filters)
# 构建请求数据
data = {
"pageNumber": page_number,
"keyword": keyword,
"fromFilter": bool(search_filters), # 如果有过滤条件则为True
"rowsPerPage": rows_per_page,
"sortValue": "",
"sortField": "",
"customDistance": "",
"gps": "",
"propValueStr": prop_value_str,
"customGps": "",
"searchReqFromPage": "pcSearch",
"extraFilterValue": "{}",
"userPositionJson": "{}"
}
data_str = json.dumps(data)
sign = self._get_sign(t, data_str)
# 构建URL参数
params = {
'jsv': '2.7.2',
'appKey': self.app_key,
't': t,
'sign': sign,
'v': '1.0',
'type': 'originaljson',
'accountSite': 'xianyu',
'dataType': 'json',
'timeout': '20000',
'api': 'mtop.taobao.idlemtopsearch.pc.search',
'sessionOption': 'AutoLoginOnly',
'spm_cnt': 'a21ybx.search.0.0',
'spm_pre': 'a21ybx.home.searchInput.0'
}
url = f"{self.base_url}/h5/mtop.taobao.idlemtopsearch.pc.search/1.0/"
try:
print(f"发送请求到URL: {url}")
print(f"请求参数: {json.dumps(params, ensure_ascii=False)}")
print(f"请求数据: {data_str}")
print(f"Cookie长度: {len(str(self.cookies))}")
response = requests.post(
url,
params=params,
data={'data': data_str},
headers=self.headers,
cookies=self.cookies
)
print(f"响应状态码: {response.status_code}")
print(f"响应头: {dict(response.headers)}")
# 更新cookies,如果响应中包含新的token
if response.cookies:
for cookie in response.cookies:
if cookie.name in ['_m_h5_tk', '_m_h5_tk_enc']:
old_value = self.cookies.get(cookie.name, '')
self.cookies[cookie.name] = cookie.value
print(f"从响应中更新了 {cookie.name} token: {old_value[:10]}... -> {cookie.value[:10]}...")
result = response.json()
print(f"响应数据键: {list(result.keys()) if isinstance(result, dict) else '不是字典'}")
# 检查是否有错误信息
if isinstance(result, dict) and result.get('ret') and 'FAIL' in result.get('ret')[0]:
print(f"API错误: {result.get('ret')}")
return result
except Exception as e:
print(f"请求失败: {str(e)}")
return None
def main():
api = GoofishAPI()
# 测试搜索手机,使用带自动刷新功能的方法
result = api.search_with_retry("手机")
print(json.dumps(result, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()