import os
import urllib.parse

from loguru import logger

from apis.xhs_pc_apis import XHS_Apis
from xhs_utils.data_util import handle_note_info, download_note, save_to_xlsx
from xhs_utils.response_guard import get_dict, get_list


class Data_Spider():
    def __init__(self):
        self.xhs_apis = XHS_Apis()

    def spider_note(self, note_url: str, cookies_str: str, proxies=None):
        note_info = None
        try:
            success, msg, note_info = self.xhs_apis.get_note_info(note_url, cookies_str, proxies)
            if success:
                data = get_dict(note_info, "data", context="get_note_info")
                items = get_list(data, "items", context="get_note_info")
                if not items:
                    raise ValueError("empty_items")
                note_info = items[0]
                note_info['url'] = note_url
                note_info = handle_note_info(note_info)
        except Exception as e:
            success = False
            msg = e
        logger.info(f'爬取笔记信息 {note_url}: {success}, msg: {msg}')
        return success, msg, note_info

    def _note_id_from_url(self, note_url: str):
        try:
            url_parse = urllib.parse.urlparse(note_url)
            return url_parse.path.split("/")[-1]
        except Exception:
            return note_url

    def spider_some_note(self, notes: list, cookies_str: str, base_path: dict, save_choice: str, excel_name: str = '', proxies=None, state_store=None, state_key: str = "done_note_ids"):
        if (save_choice == 'all' or save_choice == 'excel') and excel_name == '':
            raise ValueError('excel_name 不能为空')
        note_list = []
        done = set()
        if state_store is not None:
            done = state_store.get_set(state_key)
        total = 0
        skipped = 0
        ok = 0
        failed = 0
        fail_reasons: dict[str, int] = {}
        for note_url in notes:
            total += 1
            note_id = self._note_id_from_url(note_url)
            if note_id in done:
                skipped += 1
                continue
            success, msg, note_info = self.spider_note(note_url, cookies_str, proxies)
            if note_info is not None and success:
                note_list.append(note_info)
                ok += 1
                if state_store is not None:
                    state_store.add_to_set(state_key, note_id)
            else:
                failed += 1
                r = str(msg)
                fail_reasons[r] = fail_reasons.get(r, 0) + 1
        for note_info in note_list:
            if save_choice == 'all' or 'media' in save_choice:
                download_note(note_info, base_path['media'], save_choice)
        if save_choice == 'all' or save_choice == 'excel':
            file_path = os.path.abspath(os.path.join(base_path['excel'], f'{excel_name}.xlsx'))
            save_to_xlsx(note_list, file_path)
        return {
            "total": total,
            "skipped": skipped,
            "ok": ok,
            "failed": failed,
            "fail_reasons": fail_reasons,
        }

    def spider_user_all_note(self, user_url: str, cookies_str: str, base_path: dict, save_choice: str, excel_name: str = '', proxies=None, state_store=None):
        note_list = []
        try:
            success, msg, all_note_info = self.xhs_apis.get_user_all_notes(user_url, cookies_str, proxies)
            if success:
                logger.info(f'用户 {user_url} 作品数量: {len(all_note_info)}')
                for simple_note_info in all_note_info:
                    note_url = f"https://www.xiaohongshu.com/explore/{simple_note_info['note_id']}?xsec_token={simple_note_info['xsec_token']}"
                    note_list.append(note_url)
            if save_choice == 'all' or save_choice == 'excel':
                excel_name = user_url.split('/')[-1].split('?')[0]
            state_key = f"done_note_ids:user:{user_url.split('/')[-1].split('?')[0]}"
            return self.spider_some_note(note_list, cookies_str, base_path, save_choice, excel_name, proxies, state_store=state_store, state_key=state_key)
        except Exception as e:
            success = False
            msg = e
        logger.info(f'爬取用户所有视频 {user_url}: {success}, msg: {msg}')
        return note_list, success, msg

    def spider_some_search_note(self, query: str, require_num: int, cookies_str: str, base_path: dict, save_choice: str, sort_type_choice=0, note_type=0, note_time=0, note_range=0, pos_distance=0, geo: dict = None,  excel_name: str = '', proxies=None, state_store=None):
        note_list = []
        try:
            success, msg, notes = self.xhs_apis.search_some_note(query, require_num, cookies_str, sort_type_choice, note_type, note_time, note_range, pos_distance, geo, proxies)
            if success:
                notes = list(filter(lambda x: x['model_type'] == "note", notes))
                logger.info(f'搜索关键词 {query} 笔记数量: {len(notes)}')
                for note in notes:
                    note_url = f"https://www.xiaohongshu.com/explore/{note['id']}?xsec_token={note['xsec_token']}"
                    note_list.append(note_url)
            if save_choice == 'all' or save_choice == 'excel':
                excel_name = query
            state_key = f"done_note_ids:search:{query}:{sort_type_choice}:{note_type}:{note_time}:{note_range}:{pos_distance}"
            return self.spider_some_note(note_list, cookies_str, base_path, save_choice, excel_name, proxies, state_store=state_store, state_key=state_key)
        except Exception as e:
            success = False
            msg = e
        logger.info(f'搜索关键词 {query} 笔记: {success}, msg: {msg}')
        return note_list, success, msg