|
|
from typing import List, Dict, Optional |
|
|
from pydantic import BaseModel, Field |
|
|
import json |
|
|
|
|
|
class ItemLocation(BaseModel): |
|
|
area: str |
|
|
|
|
|
class ItemPrice(BaseModel): |
|
|
price: float |
|
|
|
|
|
class ItemDetail(BaseModel): |
|
|
title: str |
|
|
price: float |
|
|
item_id: str |
|
|
area: str |
|
|
seller_nick: str |
|
|
publish_time: Optional[str] |
|
|
pics: List[str] |
|
|
want_count: int = 0 |
|
|
detail_url: str = "" |
|
|
|
|
|
def safe_int(value: str, default: int = 0) -> int: |
|
|
"""安全地将字符串转换为整数""" |
|
|
try: |
|
|
if not value: |
|
|
return default |
|
|
return int(value) |
|
|
except (ValueError, TypeError): |
|
|
return default |
|
|
|
|
|
def safe_float(value: str, default: float = 0.0) -> float: |
|
|
"""安全地将字符串转换为浮点数""" |
|
|
try: |
|
|
if not value: |
|
|
return default |
|
|
return float(value) |
|
|
except (ValueError, TypeError): |
|
|
return default |
|
|
|
|
|
def parse_search_result(raw_data: Dict) -> List[ItemDetail]: |
|
|
"""解析搜索结果数据""" |
|
|
print("开始解析数据...") |
|
|
|
|
|
|
|
|
print(f"原始数据类型: {type(raw_data)}") |
|
|
if isinstance(raw_data, dict): |
|
|
print(f"原始数据顶级键: {list(raw_data.keys())}") |
|
|
|
|
|
|
|
|
if 'ret' in raw_data: |
|
|
print(f"API返回状态: {raw_data['ret']}") |
|
|
|
|
|
|
|
|
if 'data' in raw_data and isinstance(raw_data['data'], dict) and 'msg' in raw_data['data']: |
|
|
print(f"API返回消息: {raw_data['data']['msg']}") |
|
|
else: |
|
|
print(f"原始数据不是字典: {raw_data}") |
|
|
return [] |
|
|
|
|
|
if not raw_data or 'data' not in raw_data: |
|
|
print("无效的数据格式:缺少 'data' 字段") |
|
|
return [] |
|
|
|
|
|
|
|
|
print(f"data字段类型: {type(raw_data['data'])}") |
|
|
if isinstance(raw_data['data'], dict): |
|
|
print(f"data字段键: {list(raw_data['data'].keys())}") |
|
|
else: |
|
|
print(f"data字段不是字典: {raw_data['data']}") |
|
|
return [] |
|
|
|
|
|
if 'resultList' not in raw_data['data']: |
|
|
print("无效的数据格式:缺少 'resultList' 字段") |
|
|
print(f"可用的字段: {list(raw_data['data'].keys())}") |
|
|
|
|
|
|
|
|
if 'url' in raw_data['data']: |
|
|
print(f"发现重定向URL: {raw_data['data']['url']}") |
|
|
print("这表明cookie已过期或无效,需要重新登录获取新cookie") |
|
|
|
|
|
return [] |
|
|
|
|
|
items = [] |
|
|
items_array = raw_data['data'].get('resultList', []) |
|
|
print(f"找到 {len(items_array)} 个商品") |
|
|
|
|
|
for idx, item_data in enumerate(items_array): |
|
|
try: |
|
|
if 'data' not in item_data: |
|
|
print(f"商品 {idx} 缺少 'data' 字段") |
|
|
continue |
|
|
|
|
|
if 'item' not in item_data['data']: |
|
|
print(f"商品 {idx} 缺少 'item' 字段") |
|
|
continue |
|
|
|
|
|
item = item_data['data']['item'] |
|
|
if 'main' not in item: |
|
|
print(f"商品 {idx} 缺少 'main' 字段") |
|
|
continue |
|
|
|
|
|
if 'exContent' not in item['main']: |
|
|
print(f"商品 {idx} 缺少 'exContent' 字段") |
|
|
continue |
|
|
|
|
|
ex_content = item['main']['exContent'] |
|
|
detail_params = ex_content.get('detailParams', {}) |
|
|
|
|
|
|
|
|
price = safe_float(detail_params.get('soldPrice', 0)) |
|
|
|
|
|
|
|
|
item_id = ex_content.get('itemId', '') |
|
|
|
|
|
|
|
|
detail_url = f"https://www.goofish.com/item?id={item_id}" if item_id else "" |
|
|
|
|
|
|
|
|
item_detail = ItemDetail( |
|
|
title=ex_content.get('title', ''), |
|
|
price=price, |
|
|
item_id=item_id, |
|
|
area=ex_content.get('area', ''), |
|
|
seller_nick=ex_content.get('userNickName', ''), |
|
|
publish_time=str(detail_params.get('publishTime', '')), |
|
|
pics=[ex_content.get('picUrl', '')] if ex_content.get('picUrl') else [], |
|
|
want_count=safe_int(ex_content.get('want', '0')), |
|
|
detail_url=detail_url |
|
|
) |
|
|
|
|
|
items.append(item_detail) |
|
|
print(f"成功解析商品 {idx}: {item_detail.title[:30]}...") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"解析商品 {idx} 时出错: {str(e)}") |
|
|
continue |
|
|
|
|
|
print(f"成功解析 {len(items)} 个商品") |
|
|
return items |