#!/usr/bin/env python3
from __future__ import annotations

import argparse
import csv
import gzip
import json
import re
import sys
import time
import zlib
from dataclasses import dataclass
from datetime import datetime
from email.message import Message
from pathlib import Path
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
from zoneinfo import ZoneInfo


DEFAULT_USER_ID = 259655230
DEFAULT_TIMEZONE = "Asia/Shanghai"
LIST_URL_TEMPLATE = "https://www.bilibili.com/list/{user_id}"
SPACE_URL_TEMPLATE = "https://space.bilibili.com/{user_id}/upload/video"
USER_AGENT = (
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/135.0.0.0 Safari/537.36"
)
INITIAL_STATE_PATTERN = re.compile(
    r"window\.__INITIAL_STATE__=(.*?);\(function\(\)", re.DOTALL
)
PLAY_COUNT_PATTERN = re.compile(r"(\d+(?:\.\d+)?)(万|亿)?")
CSV_FIELDNAMES = ("title", "publish_time", "video_url")


class BilibiliCrawlerError(RuntimeError):
    """Raised when the crawler cannot extract the expected data."""


@dataclass(slots=True)
class VideoInfo:
    user_id: int
    title: str
    bvid: str
    play_count: int
    publish_timestamp: int

    @property
    def video_url(self) -> str:
        return f"https://www.bilibili.com/video/{self.bvid}"

    def format_publish_time(self, timezone_name: str) -> str:
        tz = ZoneInfo(timezone_name)
        return datetime.fromtimestamp(self.publish_timestamp, tz=tz).strftime(
            "%Y-%m-%d %H:%M:%S %Z"
        )

    def to_dict(self, timezone_name: str) -> dict[str, Any]:
        return {
            "user_id": self.user_id,
            "title": self.title,
            "bvid": self.bvid,
            "play_count": self.play_count,
            "publish_timestamp": self.publish_timestamp,
            "publish_time": self.format_publish_time(timezone_name),
            "video_url": self.video_url,
        }


def build_headers(user_id: int) -> dict[str, str]:
    return {
        "User-Agent": USER_AGENT,
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
        "Referer": SPACE_URL_TEMPLATE.format(user_id=user_id),
        "Cache-Control": "no-cache",
        "Pragma": "no-cache",
    }


def decode_response(body: bytes, headers: Message) -> str:
    content_encoding = headers.get("Content-Encoding", "").lower()
    if content_encoding == "gzip":
        body = gzip.decompress(body)
    elif content_encoding == "deflate":
        try:
            body = zlib.decompress(body)
        except zlib.error:
            body = zlib.decompress(body, -zlib.MAX_WBITS)

    charset = headers.get_content_charset() or "utf-8"
    return body.decode(charset, errors="replace")


def fetch_list_page(user_id: int, timeout: float) -> str:
    request = Request(
        LIST_URL_TEMPLATE.format(user_id=user_id),
        headers=build_headers(user_id),
    )
    try:
        with urlopen(request, timeout=timeout) as response:
            body = response.read()
            return decode_response(body, response.headers)
    except HTTPError as exc:
        message = f"请求失败，HTTP {exc.code}"
        if exc.code == 412:
            message += "（B站风控触发，请稍后重试或更换网络环境）"
        raise BilibiliCrawlerError(message) from exc
    except URLError as exc:
        raise BilibiliCrawlerError(f"网络请求失败：{exc.reason}") from exc


def extract_initial_state(html: str) -> dict[str, Any]:
    match = INITIAL_STATE_PATTERN.search(html)
    if not match:
        raise BilibiliCrawlerError("未在页面中找到 __INITIAL_STATE__ 数据")

    try:
        return json.loads(match.group(1))
    except json.JSONDecodeError as exc:
        raise BilibiliCrawlerError("页面状态 JSON 解析失败") from exc


def parse_play_count(play_text: str) -> int:
    match = PLAY_COUNT_PATTERN.search(play_text)
    if not match:
        return 0

    value = float(match.group(1))
    unit = match.group(2)
    multiplier = 1
    if unit == "万":
        multiplier = 10_000
    elif unit == "亿":
        multiplier = 100_000_000
    return int(value * multiplier)


def iter_episode_records(state: dict[str, Any]) -> list[dict[str, Any]]:
    records: list[dict[str, Any]] = []
    video_data = state.get("videoData") or {}
    if video_data:
        records.append(
            {
                "bvid": video_data.get("bvid", ""),
                "title": video_data.get("title", ""),
                "pubdate": video_data.get("pubdate") or video_data.get("ctime") or 0,
                "play_count": (
                    (video_data.get("stat") or {}).get("view")
                    or (video_data.get("stat") or {}).get("vv")
                    or 0
                ),
            }
        )

    for section in video_data.get("sections") or []:
        for episode in section.get("episodes") or []:
            arc = episode.get("arc") or {}
            records.append(
                {
                    "bvid": episode.get("bvid", ""),
                    "title": episode.get("title") or arc.get("title", ""),
                    "pubdate": arc.get("pubdate") or arc.get("ctime") or 0,
                    "play_count": (
                        (arc.get("stat") or {}).get("view")
                        or (arc.get("stat") or {}).get("vv")
                        or 0
                    ),
                }
            )

    return records


def extract_latest_video(state: dict[str, Any], user_id: int) -> VideoInfo:
    resource_list = state.get("resourceList") or []
    if not resource_list:
        raise BilibiliCrawlerError("页面里没有找到视频列表")

    latest_item = resource_list[0]
    latest_bvid = latest_item.get("bvid", "")
    latest_title = latest_item.get("title", "")
    fallback_play_count = parse_play_count(latest_item.get("views", ""))

    episode_map = {
        item.get("bvid", ""): item for item in iter_episode_records(state) if item.get("bvid")
    }
    episode = episode_map.get(latest_bvid)

    if episode:
        publish_timestamp = int(episode.get("pubdate") or 0)
        play_count = int(episode.get("play_count") or fallback_play_count)
        if publish_timestamp > 0:
            return VideoInfo(
                user_id=user_id,
                title=latest_title or str(episode.get("title") or ""),
                bvid=latest_bvid,
                play_count=play_count,
                publish_timestamp=publish_timestamp,
            )

    video_data = state.get("videoData") or {}
    publish_timestamp = int(video_data.get("pubdate") or video_data.get("ctime") or 0)
    play_count = int(
        ((video_data.get("stat") or {}).get("view"))
        or ((video_data.get("stat") or {}).get("vv"))
        or fallback_play_count
        or 0
    )
    if latest_bvid and publish_timestamp > 0:
        return VideoInfo(
            user_id=user_id,
            title=latest_title or str(video_data.get("title") or ""),
            bvid=latest_bvid,
            play_count=play_count,
            publish_timestamp=publish_timestamp,
        )

    raise BilibiliCrawlerError("找到了视频列表，但没能提取出发布时间")


def get_latest_video(user_id: int, timeout: float) -> VideoInfo:
    html = fetch_list_page(user_id, timeout)
    state = extract_initial_state(html)
    return extract_latest_video(state, user_id)


def default_csv_path(user_id: int) -> Path:
    return Path(f"bilibili_latest_videos_{user_id}.csv")


def latest_video_exists(csv_path: Path, video_url: str) -> bool:
    if not csv_path.exists():
        return False

    with csv_path.open("r", encoding="utf-8-sig", newline="") as file:
        reader = csv.DictReader(file)
        return any(row.get("video_url") == video_url for row in reader)


def save_latest_video_to_csv(
    video: VideoInfo,
    csv_path: Path,
    timezone_name: str,
) -> bool:
    csv_path.parent.mkdir(parents=True, exist_ok=True)
    if latest_video_exists(csv_path, video.video_url):
        return False

    should_write_header = not csv_path.exists() or csv_path.stat().st_size == 0
    with csv_path.open("a", encoding="utf-8-sig", newline="") as file:
        writer = csv.DictWriter(file, fieldnames=CSV_FIELDNAMES)
        if should_write_header:
            writer.writeheader()
        writer.writerow(
            {
                "title": video.title,
                "publish_time": video.format_publish_time(timezone_name),
                "video_url": video.video_url,
            }
        )
    return True


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        description="查询 B 站用户最新发布的视频标题、发布时间和播放量"
    )
    parser.add_argument(
        "--user-id",
        type=int,
        default=DEFAULT_USER_ID,
        help=f"目标 B 站用户 ID，默认 {DEFAULT_USER_ID}",
    )
    parser.add_argument(
        "--timezone",
        default=DEFAULT_TIMEZONE,
        help=f"发布时间输出时区，默认 {DEFAULT_TIMEZONE}",
    )
    parser.add_argument(
        "--timeout",
        type=float,
        default=15.0,
        help="网络请求超时时间（秒），默认 15",
    )
    parser.add_argument(
        "--json",
        action="store_true",
        help="以 JSON 格式输出结果",
    )
    parser.add_argument(
        "--csv",
        type=Path,
        default=None,
        help="保存最新视频信息的 CSV 文件路径，默认按用户 ID 自动命名",
    )
    return parser


def main() -> int:
    parser = build_parser()
    args = parser.parse_args()

    try:
        video = get_latest_video(user_id=args.user_id, timeout=args.timeout)
    except BilibiliCrawlerError as exc:
        print(f"错误：{exc}", file=sys.stderr)
        return 1
    except Exception as exc:  # pragma: no cover - unexpected safety net
        print(f"未预期错误：{exc}", file=sys.stderr)
        return 1

    csv_path = args.csv or default_csv_path(args.user_id)
    try:
        saved = save_latest_video_to_csv(video, csv_path, args.timezone)
    except OSError as exc:
        print(f"CSV 写入失败：{exc}", file=sys.stderr)
        return 1

    if args.json:
        print(
            json.dumps(
                video.to_dict(args.timezone),
                ensure_ascii=False,
                indent=2,
            )
        )
        return 0

    print(f"用户ID: {video.user_id}")
    print(f"最新视频标题: {video.title}")
    print(f"发布时间: {video.format_publish_time(args.timezone)}")
    print(f"播放量: {video.play_count}")
    print(f"视频链接: {video.video_url}")
    print(f"CSV文件: {csv_path}")
    print(f"CSV状态: {'已写入新视频' if saved else '视频已存在，未重复写入'}")
    return 0


if __name__ == "__main__":
    while True:
        main()
        time.sleep(5 * 60)
