Commit ·
77d5d95
1
Parent(s): e776c08
添加调试信息并更新文档,强调必须设置GOOFISH_COOKIES环境变量
Browse files- README-HF.md +24 -0
- app.py +14 -0
- data_parser.py +30 -0
- goofish_api.py +29 -1
README-HF.md
CHANGED
|
@@ -63,6 +63,30 @@
|
|
| 63 |
|
| 64 |
本API服务基于FastAPI构建,使用Docker部署在Hugging Face Spaces上。
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
如需本地运行,请确保:
|
| 67 |
|
| 68 |
1. 安装所需依赖:`pip install -r requirements.txt`
|
|
|
|
| 63 |
|
| 64 |
本API服务基于FastAPI构建,使用Docker部署在Hugging Face Spaces上。
|
| 65 |
|
| 66 |
+
### 在Hugging Face Spaces上部署
|
| 67 |
+
|
| 68 |
+
**重要:必须设置GOOFISH_COOKIES环境变量,否则API将无法正常工作!**
|
| 69 |
+
|
| 70 |
+
1. 登录Hugging Face账户
|
| 71 |
+
2. 进入您的Space页面(例如:https://huggingface.co/spaces/你的用户名/闲鱼搜索API)
|
| 72 |
+
3. 点击"Settings"选项卡
|
| 73 |
+
4. 找到"Repository secrets"部分(在页面底部)
|
| 74 |
+
5. 点击"New secret"按钮
|
| 75 |
+
6. 名称填写:`GOOFISH_COOKIES`
|
| 76 |
+
7. 值填写:从闲鱼网页版登录后获取的cookies字符串(包含`_m_h5_tk`等关键cookie)
|
| 77 |
+
8. 点击"Add secret"保存
|
| 78 |
+
9. 重启你的Space使环境变量生效(在Space页面顶部找到"Factory reset"或"Restart Space"按钮)
|
| 79 |
+
|
| 80 |
+
> **提示**: 如何获取闲鱼cookies?
|
| 81 |
+
> 1. 使用Chrome浏览器访问并登录闲鱼网页版(https://www.goofish.com)
|
| 82 |
+
> 2. 登录成功后,按F12打开开发者工具
|
| 83 |
+
> 3. 在开发者工具中,选择"Application"(应用程序)选项卡
|
| 84 |
+
> 4. 在左侧找到"Storage"(存储)>"Cookies">"https://www.goofish.com"
|
| 85 |
+
> 5. 找到所有cookie项,特别是`_m_h5_tk`和`_m_h5_tk_enc`这两项最重要
|
| 86 |
+
> 6. 将所有cookie项复制组合成格式:`cookie1=value1; cookie2=value2; ...`
|
| 87 |
+
|
| 88 |
+
### 本地运行
|
| 89 |
+
|
| 90 |
如需本地运行,请确保:
|
| 91 |
|
| 92 |
1. 安装所需依赖:`pip install -r requirements.txt`
|
app.py
CHANGED
|
@@ -2,7 +2,21 @@ import os
|
|
| 2 |
from api_server import app
|
| 3 |
import uvicorn
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
# Hugging Face Spaces默认使用端口7860
|
| 6 |
if __name__ == "__main__":
|
| 7 |
port = int(os.environ.get("PORT", 7860))
|
|
|
|
| 8 |
uvicorn.run(app, host="0.0.0.0", port=port)
|
|
|
|
| 2 |
from api_server import app
|
| 3 |
import uvicorn
|
| 4 |
|
| 5 |
+
# 打印环境变量信息(只打印名称包含COOKIE的变量,不显示完整内容保护隐私)
|
| 6 |
+
print("应用启动,检查环境变量...")
|
| 7 |
+
for key, value in os.environ.items():
|
| 8 |
+
if 'COOKIE' in key.upper():
|
| 9 |
+
value_preview = value[:20] + "..." if len(value) > 20 else value
|
| 10 |
+
print(f"环境变量: {key} = {value_preview}")
|
| 11 |
+
|
| 12 |
+
# 检查是否存在GOOFISH_COOKIES环境变量
|
| 13 |
+
if 'GOOFISH_COOKIES' in os.environ:
|
| 14 |
+
print("找到GOOFISH_COOKIES环境变量,长度:", len(os.environ['GOOFISH_COOKIES']))
|
| 15 |
+
else:
|
| 16 |
+
print("警告: 未找到GOOFISH_COOKIES环境变量")
|
| 17 |
+
|
| 18 |
# Hugging Face Spaces默认使用端口7860
|
| 19 |
if __name__ == "__main__":
|
| 20 |
port = int(os.environ.get("PORT", 7860))
|
| 21 |
+
print(f"服务启动在端口: {port}")
|
| 22 |
uvicorn.run(app, host="0.0.0.0", port=port)
|
data_parser.py
CHANGED
|
@@ -41,13 +41,43 @@ def parse_search_result(raw_data: Dict) -> List[ItemDetail]:
|
|
| 41 |
"""解析搜索结果数据"""
|
| 42 |
print("开始解析数据...")
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
if not raw_data or 'data' not in raw_data:
|
| 45 |
print("无效的数据格式:缺少 'data' 字段")
|
| 46 |
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
if 'resultList' not in raw_data['data']:
|
| 49 |
print("无效的数据格式:缺少 'resultList' 字段")
|
| 50 |
print(f"可用的字段: {list(raw_data['data'].keys())}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
return []
|
| 52 |
|
| 53 |
items = []
|
|
|
|
| 41 |
"""解析搜索结果数据"""
|
| 42 |
print("开始解析数据...")
|
| 43 |
|
| 44 |
+
# 打印完整的原始数据结构(仅限顶级键)
|
| 45 |
+
print(f"原始数据类型: {type(raw_data)}")
|
| 46 |
+
if isinstance(raw_data, dict):
|
| 47 |
+
print(f"原始数据顶级键: {list(raw_data.keys())}")
|
| 48 |
+
|
| 49 |
+
# 检查API返回的状态码
|
| 50 |
+
if 'ret' in raw_data:
|
| 51 |
+
print(f"API返回状态: {raw_data['ret']}")
|
| 52 |
+
|
| 53 |
+
# 检查API错误信息
|
| 54 |
+
if 'data' in raw_data and isinstance(raw_data['data'], dict) and 'msg' in raw_data['data']:
|
| 55 |
+
print(f"API返回消息: {raw_data['data']['msg']}")
|
| 56 |
+
else:
|
| 57 |
+
print(f"原始数据不是字典: {raw_data}")
|
| 58 |
+
return []
|
| 59 |
+
|
| 60 |
if not raw_data or 'data' not in raw_data:
|
| 61 |
print("无效的数据格式:缺少 'data' 字段")
|
| 62 |
return []
|
| 63 |
+
|
| 64 |
+
# 打印data字段的类型和结构
|
| 65 |
+
print(f"data字段类型: {type(raw_data['data'])}")
|
| 66 |
+
if isinstance(raw_data['data'], dict):
|
| 67 |
+
print(f"data字段键: {list(raw_data['data'].keys())}")
|
| 68 |
+
else:
|
| 69 |
+
print(f"data字段不是字典: {raw_data['data']}")
|
| 70 |
+
return []
|
| 71 |
|
| 72 |
if 'resultList' not in raw_data['data']:
|
| 73 |
print("无效的数据格式:缺少 'resultList' 字段")
|
| 74 |
print(f"可用的字段: {list(raw_data['data'].keys())}")
|
| 75 |
+
|
| 76 |
+
# 如果返回的是重定向URL,打印出来
|
| 77 |
+
if 'url' in raw_data['data']:
|
| 78 |
+
print(f"发现重定向URL: {raw_data['data']['url']}")
|
| 79 |
+
print("这表明cookie已过期或无效,需要重新登录获取新cookie")
|
| 80 |
+
|
| 81 |
return []
|
| 82 |
|
| 83 |
items = []
|
goofish_api.py
CHANGED
|
@@ -7,15 +7,18 @@ import os
|
|
| 7 |
|
| 8 |
def parse_cookies(cookie_str):
|
| 9 |
"""将cookie字符串解析为字典"""
|
|
|
|
| 10 |
cookies = {}
|
| 11 |
for item in cookie_str.split(';'):
|
| 12 |
if '=' in item:
|
| 13 |
name, value = item.strip().split('=', 1)
|
| 14 |
cookies[name] = value
|
|
|
|
| 15 |
return cookies
|
| 16 |
|
| 17 |
class GoofishAPI:
|
| 18 |
def __init__(self):
|
|
|
|
| 19 |
self.base_url = "https://h5api.m.goofish.com"
|
| 20 |
self.app_key = "34839810"
|
| 21 |
self.headers = {
|
|
@@ -28,7 +31,13 @@ class GoofishAPI:
|
|
| 28 |
}
|
| 29 |
# 从环境变量加载cookie
|
| 30 |
load_dotenv()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
cookie_str = os.getenv('GOOFISH_COOKIES', '')
|
|
|
|
| 32 |
|
| 33 |
# 如果没有找到环境变量,尝试使用默认cookie
|
| 34 |
if not cookie_str:
|
|
@@ -37,10 +46,12 @@ class GoofishAPI:
|
|
| 37 |
cookie_str = "_m_h5_tk=6e9d46fed73aae0bf6be61ee132e9a06_1742723039105; _m_h5_tk_enc=6eb4c709b4fbcad1a927c771c7beef21"
|
| 38 |
|
| 39 |
self.cookies = parse_cookies(cookie_str)
|
|
|
|
| 40 |
|
| 41 |
def _get_sign(self, t, data):
|
| 42 |
"""生成签名"""
|
| 43 |
token = self.cookies.get('_m_h5_tk', '').split('_')[0]
|
|
|
|
| 44 |
sign_str = f"{token}&{t}&{self.app_key}&{data}"
|
| 45 |
return hashlib.md5(sign_str.encode('utf-8')).hexdigest()
|
| 46 |
|
|
@@ -56,6 +67,7 @@ class GoofishAPI:
|
|
| 56 |
max_price (float, optional): 最高价格
|
| 57 |
publish_days (int, optional): 发布时间范围(天)
|
| 58 |
"""
|
|
|
|
| 59 |
t = str(int(time.time() * 1000))
|
| 60 |
|
| 61 |
# 构建搜索过滤条件
|
|
@@ -116,6 +128,11 @@ class GoofishAPI:
|
|
| 116 |
url = f"{self.base_url}/h5/mtop.taobao.idlemtopsearch.pc.search/1.0/"
|
| 117 |
|
| 118 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
response = requests.post(
|
| 120 |
url,
|
| 121 |
params=params,
|
|
@@ -123,7 +140,18 @@ class GoofishAPI:
|
|
| 123 |
headers=self.headers,
|
| 124 |
cookies=self.cookies
|
| 125 |
)
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
except Exception as e:
|
| 128 |
print(f"请求失败: {str(e)}")
|
| 129 |
return None
|
|
|
|
| 7 |
|
| 8 |
def parse_cookies(cookie_str):
|
| 9 |
"""将cookie字符串解析为字典"""
|
| 10 |
+
print(f"解析cookie字符串: {cookie_str}")
|
| 11 |
cookies = {}
|
| 12 |
for item in cookie_str.split(';'):
|
| 13 |
if '=' in item:
|
| 14 |
name, value = item.strip().split('=', 1)
|
| 15 |
cookies[name] = value
|
| 16 |
+
print(f"解析后的cookies: {json.dumps(cookies, ensure_ascii=False)}")
|
| 17 |
return cookies
|
| 18 |
|
| 19 |
class GoofishAPI:
|
| 20 |
def __init__(self):
|
| 21 |
+
print("初始化GoofishAPI...")
|
| 22 |
self.base_url = "https://h5api.m.goofish.com"
|
| 23 |
self.app_key = "34839810"
|
| 24 |
self.headers = {
|
|
|
|
| 31 |
}
|
| 32 |
# 从环境变量加载cookie
|
| 33 |
load_dotenv()
|
| 34 |
+
print("环境变量列表:")
|
| 35 |
+
for key, value in os.environ.items():
|
| 36 |
+
if 'COOKIE' in key.upper():
|
| 37 |
+
print(f"{key}: {value[:20]}...") # 只打印前20个字符,保护隐私
|
| 38 |
+
|
| 39 |
cookie_str = os.getenv('GOOFISH_COOKIES', '')
|
| 40 |
+
print(f"从环境变量读取的GOOFISH_COOKIES长度: {len(cookie_str)}")
|
| 41 |
|
| 42 |
# 如果没有找到环境变量,尝试使用默认cookie
|
| 43 |
if not cookie_str:
|
|
|
|
| 46 |
cookie_str = "_m_h5_tk=6e9d46fed73aae0bf6be61ee132e9a06_1742723039105; _m_h5_tk_enc=6eb4c709b4fbcad1a927c771c7beef21"
|
| 47 |
|
| 48 |
self.cookies = parse_cookies(cookie_str)
|
| 49 |
+
print(f"_m_h5_tk token是否存在: {'_m_h5_tk' in self.cookies}")
|
| 50 |
|
| 51 |
def _get_sign(self, t, data):
|
| 52 |
"""生成签名"""
|
| 53 |
token = self.cookies.get('_m_h5_tk', '').split('_')[0]
|
| 54 |
+
print(f"签名使用的token: {token}")
|
| 55 |
sign_str = f"{token}&{t}&{self.app_key}&{data}"
|
| 56 |
return hashlib.md5(sign_str.encode('utf-8')).hexdigest()
|
| 57 |
|
|
|
|
| 67 |
max_price (float, optional): 最高价格
|
| 68 |
publish_days (int, optional): 发布时间范围(天)
|
| 69 |
"""
|
| 70 |
+
print(f"开始搜索: 关键词={keyword}, 页码={page_number}, 每页={rows_per_page}")
|
| 71 |
t = str(int(time.time() * 1000))
|
| 72 |
|
| 73 |
# 构建搜索过滤条件
|
|
|
|
| 128 |
url = f"{self.base_url}/h5/mtop.taobao.idlemtopsearch.pc.search/1.0/"
|
| 129 |
|
| 130 |
try:
|
| 131 |
+
print(f"发送请求到URL: {url}")
|
| 132 |
+
print(f"请求参数: {json.dumps(params, ensure_ascii=False)}")
|
| 133 |
+
print(f"请求数据: {data_str}")
|
| 134 |
+
print(f"Cookie长度: {len(str(self.cookies))}")
|
| 135 |
+
|
| 136 |
response = requests.post(
|
| 137 |
url,
|
| 138 |
params=params,
|
|
|
|
| 140 |
headers=self.headers,
|
| 141 |
cookies=self.cookies
|
| 142 |
)
|
| 143 |
+
|
| 144 |
+
print(f"响应状态码: {response.status_code}")
|
| 145 |
+
print(f"响应头: {dict(response.headers)}")
|
| 146 |
+
|
| 147 |
+
result = response.json()
|
| 148 |
+
print(f"响应数据键: {list(result.keys()) if isinstance(result, dict) else '不是字典'}")
|
| 149 |
+
|
| 150 |
+
# 检查是否有错误信息
|
| 151 |
+
if isinstance(result, dict) and result.get('ret') and 'FAIL' in result.get('ret')[0]:
|
| 152 |
+
print(f"API错误: {result.get('ret')}")
|
| 153 |
+
|
| 154 |
+
return result
|
| 155 |
except Exception as e:
|
| 156 |
print(f"请求失败: {str(e)}")
|
| 157 |
return None
|