File size: 4,822 Bytes
d0f36e8 77d5d95 d0f36e8 77d5d95 d0f36e8 77d5d95 d0f36e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
from typing import List, Dict, Optional
from pydantic import BaseModel, Field
import json
class ItemLocation(BaseModel):
area: str
class ItemPrice(BaseModel):
price: float
class ItemDetail(BaseModel):
title: str
price: float
item_id: str
area: str
seller_nick: str
publish_time: Optional[str]
pics: List[str]
want_count: int = 0
detail_url: str = "" # 添加商品详情页URL字段
def safe_int(value: str, default: int = 0) -> int:
"""安全地将字符串转换为整数"""
try:
if not value:
return default
return int(value)
except (ValueError, TypeError):
return default
def safe_float(value: str, default: float = 0.0) -> float:
"""安全地将字符串转换为浮点数"""
try:
if not value:
return default
return float(value)
except (ValueError, TypeError):
return default
def parse_search_result(raw_data: Dict) -> List[ItemDetail]:
"""解析搜索结果数据"""
print("开始解析数据...")
# 打印完整的原始数据结构(仅限顶级键)
print(f"原始数据类型: {type(raw_data)}")
if isinstance(raw_data, dict):
print(f"原始数据顶级键: {list(raw_data.keys())}")
# 检查API返回的状态码
if 'ret' in raw_data:
print(f"API返回状态: {raw_data['ret']}")
# 检查API错误信息
if 'data' in raw_data and isinstance(raw_data['data'], dict) and 'msg' in raw_data['data']:
print(f"API返回消息: {raw_data['data']['msg']}")
else:
print(f"原始数据不是字典: {raw_data}")
return []
if not raw_data or 'data' not in raw_data:
print("无效的数据格式:缺少 'data' 字段")
return []
# 打印data字段的类型和结构
print(f"data字段类型: {type(raw_data['data'])}")
if isinstance(raw_data['data'], dict):
print(f"data字段键: {list(raw_data['data'].keys())}")
else:
print(f"data字段不是字典: {raw_data['data']}")
return []
if 'resultList' not in raw_data['data']:
print("无效的数据格式:缺少 'resultList' 字段")
print(f"可用的字段: {list(raw_data['data'].keys())}")
# 如果返回的是重定向URL,打印出来
if 'url' in raw_data['data']:
print(f"发现重定向URL: {raw_data['data']['url']}")
print("这表明cookie已过期或无效,需要重新登录获取新cookie")
return []
items = []
items_array = raw_data['data'].get('resultList', [])
print(f"找到 {len(items_array)} 个商品")
for idx, item_data in enumerate(items_array):
try:
if 'data' not in item_data:
print(f"商品 {idx} 缺少 'data' 字段")
continue
if 'item' not in item_data['data']:
print(f"商品 {idx} 缺少 'item' 字段")
continue
item = item_data['data']['item']
if 'main' not in item:
print(f"商品 {idx} 缺少 'main' 字段")
continue
if 'exContent' not in item['main']:
print(f"商品 {idx} 缺少 'exContent' 字段")
continue
ex_content = item['main']['exContent']
detail_params = ex_content.get('detailParams', {})
# 提取价格
price = safe_float(detail_params.get('soldPrice', 0))
# 提取商品ID
item_id = ex_content.get('itemId', '')
# 构建商品详情页URL
detail_url = f"https://www.goofish.com/item?id={item_id}" if item_id else ""
# 构建商品详情
item_detail = ItemDetail(
title=ex_content.get('title', ''),
price=price,
item_id=item_id,
area=ex_content.get('area', ''),
seller_nick=ex_content.get('userNickName', ''),
publish_time=str(detail_params.get('publishTime', '')),
pics=[ex_content.get('picUrl', '')] if ex_content.get('picUrl') else [],
want_count=safe_int(ex_content.get('want', '0')),
detail_url=detail_url
)
items.append(item_detail)
print(f"成功解析商品 {idx}: {item_detail.title[:30]}...")
except Exception as e:
print(f"解析商品 {idx} 时出错: {str(e)}")
continue
print(f"成功解析 {len(items)} 个商品")
return items |