| from xhs_utils.common_util import init |
| from xhs_utils.spider import Data_Spider |
|
|
| if __name__ == '__main__': |
| """ |
| 此文件为爬虫的入口文件,可以直接运行 |
| apis/xhs_pc_apis.py 为爬虫的api文件,包含小红书的全部数据接口,可以继续封装 |
| apis/xhs_creator_apis.py 为小红书创作者中心的api文件 |
| 感谢star和follow |
| """ |
|
|
| cookies_str, base_path = init() |
| data_spider = Data_Spider() |
| """ |
| save_choice: all: 保存所有的信息, media: 保存视频和图片(media-video只下载视频, media-image只下载图片,media都下载), excel: 保存到excel |
| save_choice 为 excel 或者 all 时,excel_name 不能为空 |
| """ |
|
|
|
|
| |
| notes = [ |
| r'https://www.xiaohongshu.com/explore/683fe17f0000000023017c6a?xsec_token=ABBr_cMzallQeLyKSRdPk9fwzA0torkbT_ubuQP1ayvKA=&xsec_source=pc_user', |
| ] |
| data_spider.spider_some_note(notes, cookies_str, base_path, 'all', 'test') |
|
|
| |
| user_url = 'https://www.xiaohongshu.com/user/profile/64c3f392000000002b009e45?xsec_token=AB-GhAToFu07JwNk_AMICHnp7bSTjVz2beVIDBwSyPwvM=&xsec_source=pc_feed' |
| data_spider.spider_user_all_note(user_url, cookies_str, base_path, 'all') |
|
|
| |
| query = "榴莲" |
| query_num = 10 |
| sort_type_choice = 0 |
| note_type = 0 |
| note_time = 0 |
| note_range = 0 |
| pos_distance = 0 |
| |
| |
| |
| |
| |
| data_spider.spider_some_search_note(query, query_num, cookies_str, base_path, 'all', sort_type_choice, note_type, note_time, note_range, pos_distance, geo=None) |
|
|