from __future__ import annotations

import argparse
import hashlib
import html
import os
import re
import sys
import threading
import time
from pathlib import Path
from urllib.parse import quote_plus

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry


DEFAULT_KEYWORD = "天外来物封面图"
DEFAULT_COUNT = 10
DEFAULT_OUTPUT_DIR = Path(__file__).resolve().parent / "baidu_img"

HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/125.0.0.0 Safari/537.36"
    ),
    "Referer": "https://image.baidu.com/",
}

IMAGE_EXTENSIONS = {
    "image/jpeg": ".jpg",
    "image/jpg": ".jpg",
    "image/png": ".png",
    "image/gif": ".gif",
    "image/webp": ".webp",
    "image/bmp": ".bmp",
}


def build_proxy_config(proxy_mode="auto", proxy_url=""):
    """根据代理模式生成 requests Session 配置。"""
    proxy_mode = (proxy_mode or "auto").lower()
    proxy_url = (proxy_url or "").strip()

    if proxy_mode == "none":
        return False, None, False
    if proxy_mode == "custom":
        if not proxy_url:
            raise ValueError("proxy-mode 为 custom 时必须填写 --proxy")
        proxies = {"http": proxy_url, "https": proxy_url}
        return True, proxies, False
    if proxy_mode == "system":
        return True, None, False
    if proxy_mode == "auto":
        return True, None, True

    raise ValueError("proxy-mode 只能是 auto、none、system、custom")


def create_session(proxy_mode="auto", proxy_url="") -> requests.Session:
    session = requests.Session()
    trust_env, proxies, _ = build_proxy_config(proxy_mode, proxy_url)
    session.trust_env = trust_env
    if proxies:
        session.proxies.update(proxies)

    retry = Retry(
        total=3,
        connect=3,
        read=3,
        backoff_factor=0.8,
        status_forcelist=(429, 500, 502, 503, 504),
        allowed_methods=("GET",),
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount("http://", adapter)
    session.mount("https://", adapter)
    session.headers.update(HEADERS)
    return session


def request_with_auto_proxy(session, method, url, auto_retry=True, **kwargs):
    """代理失败时自动切换直连并重试一次。"""
    try:
        return session.request(method, url, **kwargs)
    except requests.exceptions.ProxyError as error:
        if auto_retry and session.trust_env:
            print(f"代理请求失败，自动切换为直连重试：{error}")
            session.trust_env = False
            session.proxies.clear()
            return session.request(method, url, **kwargs)
        raise


def clean_filename(text: str, max_length: int = 80) -> str:
    text = re.sub(r'[<>:"/\\|?*\x00-\x1f]', "_", text).strip(" .")
    return text[:max_length] or "image"


def decode_baidu_url(raw_url: str) -> str:
    decoded = html.unescape(raw_url).replace("\\/", "/")
    try:
        decoded = decoded.encode("utf-8").decode("unicode_escape")
    except UnicodeDecodeError:
        pass
    return decoded


def extract_image_urls(page_html: str) -> list[str]:
    patterns = (
        r'"objURL"\s*:\s*"([^"]+)"',
        r'"middleURL"\s*:\s*"([^"]+)"',
        r'"thumbURL"\s*:\s*"([^"]+)"',
    )

    seen: set[str] = set()
    urls: list[str] = []

    for pattern in patterns:
        for raw_url in re.findall(pattern, page_html):
            url = decode_baidu_url(raw_url)
            if not url.startswith(("http://", "https://")):
                continue
            if url in seen:
                continue
            seen.add(url)
            urls.append(url)

    return urls


def detect_image_extension(content: bytes, content_type: str | None) -> str | None:
    if content.startswith(b"\xff\xd8\xff"):
        return ".jpg"
    if content.startswith(b"\x89PNG\r\n\x1a\n"):
        return ".png"
    if content.startswith((b"GIF87a", b"GIF89a")):
        return ".gif"
    if content.startswith(b"RIFF") and content[8:12] == b"WEBP":
        return ".webp"
    if content.startswith(b"BM"):
        return ".bmp"

    if content_type:
        media_type = content_type.split(";", 1)[0].strip().lower()
        return IMAGE_EXTENSIONS.get(media_type)

    return None


def fetch_image_urls(session: requests.Session, keyword: str, auto_proxy_retry=True) -> list[str]:
    search_url = (
        "https://image.baidu.com/search/flip"
        f"?tn=baiduimage&ie=utf-8&word={quote_plus(keyword)}"
    )
    response = request_with_auto_proxy(
        session,
        "GET",
        search_url,
        auto_retry=auto_proxy_retry,
        timeout=15,
    )
    response.raise_for_status()
    return extract_image_urls(response.text)


def download_one_image(
    session: requests.Session,
    image_url: str,
    output_dir: Path,
    base_name: str,
    index: int,
    content_hashes: set[str],
    auto_proxy_retry=True,
) -> Path | None:
    response = request_with_auto_proxy(
        session,
        "GET",
        image_url,
        auto_retry=auto_proxy_retry,
        timeout=15,
    )
    response.raise_for_status()

    content = response.content
    if len(content) < 512:
        raise ValueError("文件太小，疑似不是有效图片")

    content_hash = hashlib.sha256(content).hexdigest()
    if content_hash in content_hashes:
        raise ValueError("重复图片，已跳过")

    extension = detect_image_extension(content, response.headers.get("Content-Type"))
    if extension is None:
        raise ValueError("响应内容不是常见图片格式")

    content_hashes.add(content_hash)
    file_path = output_dir / f"{base_name}_{index:02d}{extension}"
    file_path.write_bytes(content)
    return file_path


def download_images(
    keyword: str,
    count: int,
    output_dir: Path,
    delay: float,
    proxy_mode: str = "auto",
    proxy_url: str = "",
    log=print,
    progress=None,
    stop_requested=None,
) -> int:
    def emit(message: str) -> None:
        if log is not None:
            log(message)

    output_dir.mkdir(parents=True, exist_ok=True)
    _, _, auto_proxy_retry = build_proxy_config(proxy_mode, proxy_url)
    session = create_session(proxy_mode, proxy_url)
    base_name = clean_filename(keyword)

    emit(f"搜索关键词：{keyword}")
    emit(f"保存目录：{output_dir}")
    emit(f"计划下载：{count} 张")
    if progress is not None:
        progress(0, count)

    image_urls = fetch_image_urls(session, keyword, auto_proxy_retry=auto_proxy_retry)
    if not image_urls:
        emit("没有提取到图片链接，可能是页面结构变化或网络被拦截。")
        return 0

    emit(f"已提取到 {len(image_urls)} 个候选链接，开始下载...\n")

    saved_count = 0
    failed_count = 0
    content_hashes: set[str] = set()

    for image_url in image_urls:
        if saved_count >= count:
            break
        if stop_requested is not None and stop_requested():
            emit("已收到停止请求，下载已中断。")
            break

        next_index = saved_count + 1
        try:
            file_path = download_one_image(
                session=session,
                image_url=image_url,
                output_dir=output_dir,
                base_name=base_name,
                index=next_index,
                content_hashes=content_hashes,
                auto_proxy_retry=auto_proxy_retry,
            )
            saved_count += 1
            emit(f"[{saved_count:02d}/{count:02d}] 下载成功：{file_path}")
            if progress is not None:
                progress(saved_count, count)
        except Exception as exc:
            failed_count += 1
            emit(f"[跳过] {image_url} -> {exc}")

        if delay > 0:
            time.sleep(delay)

    emit("\n下载完成")
    emit(f"成功：{saved_count} 张")
    emit(f"失败/跳过：{failed_count} 个链接")
    emit(f"位置：{output_dir}")
    return saved_count


def run_gui() -> None:
    import queue
    import tkinter as tk
    from tkinter import filedialog, messagebox, ttk
    from tkinter.scrolledtext import ScrolledText

    root = tk.Tk()
    root.title("百度图片下载器")
    root.geometry("860x600")
    root.minsize(760, 520)

    style = ttk.Style(root)
    if "vista" in style.theme_names():
        style.theme_use("vista")
    elif "clam" in style.theme_names():
        style.theme_use("clam")

    keyword_var = tk.StringVar(value=DEFAULT_KEYWORD)
    count_var = tk.StringVar(value=str(DEFAULT_COUNT))
    output_var = tk.StringVar(value=str(DEFAULT_OUTPUT_DIR))
    delay_var = tk.StringVar(value="0.2")
    status_var = tk.StringVar(value="就绪")
    progress_var = tk.IntVar(value=0)

    messages: queue.Queue[tuple] = queue.Queue()
    stop_event = threading.Event()
    worker: threading.Thread | None = None

    root.columnconfigure(0, weight=1)
    root.rowconfigure(1, weight=1)

    top = ttk.Frame(root, padding=(18, 16, 18, 10))
    top.grid(row=0, column=0, sticky="ew")
    top.columnconfigure(1, weight=1)

    ttk.Label(top, text="关键词").grid(row=0, column=0, sticky="w", padx=(0, 10), pady=6)
    keyword_entry = ttk.Entry(top, textvariable=keyword_var)
    keyword_entry.grid(row=0, column=1, sticky="ew", pady=6)

    ttk.Label(top, text="数量").grid(row=0, column=2, sticky="w", padx=(18, 10), pady=6)
    count_spin = ttk.Spinbox(top, from_=1, to=200, textvariable=count_var, width=8)
    count_spin.grid(row=0, column=3, sticky="w", pady=6)

    ttk.Label(top, text="保存目录").grid(row=1, column=0, sticky="w", padx=(0, 10), pady=6)
    output_entry = ttk.Entry(top, textvariable=output_var)
    output_entry.grid(row=1, column=1, columnspan=2, sticky="ew", pady=6)

    def browse_output() -> None:
        current = Path(output_var.get()).expanduser()
        initial_dir = current if current.exists() else DEFAULT_OUTPUT_DIR.parent
        selected = filedialog.askdirectory(initialdir=str(initial_dir))
        if selected:
            output_var.set(selected)

    browse_button = ttk.Button(top, text="浏览", command=browse_output)
    browse_button.grid(row=1, column=3, sticky="ew", pady=6)

    ttk.Label(top, text="间隔秒").grid(row=2, column=0, sticky="w", padx=(0, 10), pady=6)
    delay_spin = ttk.Spinbox(top, from_=0, to=10, increment=0.1, textvariable=delay_var, width=8)
    delay_spin.grid(row=2, column=1, sticky="w", pady=6)

    actions = ttk.Frame(top)
    actions.grid(row=2, column=2, columnspan=2, sticky="e", pady=6)

    start_button = ttk.Button(actions, text="开始下载")
    stop_button = ttk.Button(actions, text="停止", state="disabled")
    open_button = ttk.Button(actions, text="打开目录")
    start_button.grid(row=0, column=0, padx=(0, 8))
    stop_button.grid(row=0, column=1, padx=(0, 8))
    open_button.grid(row=0, column=2)

    body = ttk.Frame(root, padding=(18, 0, 18, 14))
    body.grid(row=1, column=0, sticky="nsew")
    body.columnconfigure(0, weight=1)
    body.rowconfigure(1, weight=1)

    progress_bar = ttk.Progressbar(body, variable=progress_var, maximum=DEFAULT_COUNT)
    progress_bar.grid(row=0, column=0, sticky="ew", pady=(0, 8))

    log_text = ScrolledText(body, height=16, wrap="word", state="disabled", font=("Consolas", 10))
    log_text.grid(row=1, column=0, sticky="nsew")

    status_line = ttk.Label(body, textvariable=status_var, anchor="w")
    status_line.grid(row=2, column=0, sticky="ew", pady=(8, 0))

    def write_log(message: str) -> None:
        log_text.configure(state="normal")
        log_text.insert("end", message + "\n")
        log_text.see("end")
        log_text.configure(state="disabled")

    def set_running(is_running: bool) -> None:
        state = "disabled" if is_running else "normal"
        start_button.configure(state=state)
        keyword_entry.configure(state=state)
        count_spin.configure(state=state)
        output_entry.configure(state=state)
        delay_spin.configure(state=state)
        browse_button.configure(state=state)
        stop_button.configure(state="normal" if is_running else "disabled")

    def open_output_dir() -> None:
        try:
            path = Path(output_var.get()).expanduser().resolve()
            path.mkdir(parents=True, exist_ok=True)
            os.startfile(path)
        except Exception as exc:
            messagebox.showerror("打开失败", str(exc))

    def validate_inputs() -> tuple[str, int, Path, float] | None:
        keyword = keyword_var.get().strip()
        if not keyword:
            messagebox.showwarning("参数错误", "请输入关键词。")
            return None

        try:
            count = int(count_var.get())
        except ValueError:
            messagebox.showwarning("参数错误", "下载数量必须是整数。")
            return None
        if count <= 0:
            messagebox.showwarning("参数错误", "下载数量必须大于 0。")
            return None

        try:
            delay = float(delay_var.get())
        except ValueError:
            messagebox.showwarning("参数错误", "间隔秒数必须是数字。")
            return None
        if delay < 0:
            messagebox.showwarning("参数错误", "间隔秒数不能小于 0。")
            return None

        output_dir = Path(output_var.get()).expanduser().resolve()
        return keyword, count, output_dir, delay

    def worker_run(keyword: str, count: int, output_dir: Path, delay: float) -> None:
        try:
            saved = download_images(
                keyword=keyword,
                count=count,
                output_dir=output_dir,
                delay=delay,
                log=lambda message: messages.put(("log", message)),
                progress=lambda saved_count, total: messages.put(
                    ("progress", saved_count, total)
                ),
                stop_requested=stop_event.is_set,
            )
            messages.put(("done", saved, None, stop_event.is_set()))
        except Exception as exc:
            messages.put(("done", 0, str(exc), False))

    def start_download() -> None:
        nonlocal worker
        if worker is not None and worker.is_alive():
            return

        values = validate_inputs()
        if values is None:
            return

        keyword, count, output_dir, delay = values
        stop_event.clear()
        progress_var.set(0)
        progress_bar.configure(maximum=count)
        status_var.set("正在下载...")
        set_running(True)

        log_text.configure(state="normal")
        log_text.delete("1.0", "end")
        log_text.configure(state="disabled")

        worker = threading.Thread(
            target=worker_run,
            args=(keyword, count, output_dir, delay),
            daemon=True,
        )
        worker.start()

    def stop_download() -> None:
        if worker is not None and worker.is_alive():
            stop_event.set()
            status_var.set("正在停止...")
            stop_button.configure(state="disabled")

    def poll_messages() -> None:
        while True:
            try:
                message = messages.get_nowait()
            except queue.Empty:
                break

            kind = message[0]
            if kind == "log":
                write_log(message[1])
            elif kind == "progress":
                progress_var.set(message[1])
            elif kind == "done":
                saved, error, stopped = message[1], message[2], message[3]
                set_running(False)
                if error:
                    status_var.set("运行失败")
                    messagebox.showerror("下载失败", error)
                elif stopped:
                    status_var.set(f"已停止，已保存 {saved} 张")
                else:
                    status_var.set(f"完成，已保存 {saved} 张")

        root.after(120, poll_messages)

    def on_close() -> None:
        if worker is not None and worker.is_alive():
            if not messagebox.askyesno("确认退出", "下载仍在进行，确定退出？"):
                return
            stop_event.set()
        root.destroy()

    start_button.configure(command=start_download)
    stop_button.configure(command=stop_download)
    open_button.configure(command=open_output_dir)
    root.protocol("WM_DELETE_WINDOW", on_close)
    root.after(120, poll_messages)
    keyword_entry.focus_set()
    root.mainloop()


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="百度图片批量下载工具；无参数运行时默认打开 GUI")
    parser.add_argument(
        "-k",
        "--keyword",
        default=DEFAULT_KEYWORD,
        help=f"搜索关键词，默认：{DEFAULT_KEYWORD}",
    )
    parser.add_argument(
        "-n",
        "--count",
        type=int,
        default=DEFAULT_COUNT,
        help=f"下载数量，默认：{DEFAULT_COUNT}",
    )
    parser.add_argument(
        "-o",
        "--output",
        default=str(DEFAULT_OUTPUT_DIR),
        help=f"保存目录，默认：{DEFAULT_OUTPUT_DIR}",
    )
    parser.add_argument(
        "--delay",
        type=float,
        default=0.2,
        help="每次请求后的等待秒数，默认：0.2",
    )
    parser.add_argument(
        "--gui",
        action="store_true",
        help="打开图形界面",
    )
    parser.add_argument(
        "--proxy-mode",
        choices=["auto", "none", "system", "custom"],
        default="auto",
        help="代理模式：auto 自动兼容，none 强制直连，system 使用系统代理，custom 使用 --proxy",
    )
    parser.add_argument(
        "--proxy",
        default="",
        help="自定义代理地址，例如 http://127.0.0.1:7890，仅 proxy-mode=custom 时使用",
    )
    return parser.parse_args()


def main() -> None:
    args = parse_args()

    if args.gui or len(sys.argv) == 1:
        run_gui()
        return

    if args.count <= 0:
        raise SystemExit("下载数量必须大于 0")
    if args.delay < 0:
        raise SystemExit("delay 不能小于 0")

    output_dir = Path(args.output).expanduser().resolve()
    download_images(
        args.keyword,
        args.count,
        output_dir,
        args.delay,
        proxy_mode=args.proxy_mode,
        proxy_url=args.proxy,
    )


if __name__ == "__main__":
    main()
