# -*- coding: utf-8 -*-
"""
Python 小说爬虫单文件版

本文件由原项目的 config.py、utils.py、text_parser.py、spider.py、main.py、gui.py
合并而成。命令行和图形界面功能都保留，不再依赖同目录下的其他项目模块。

命令行示例：
    python novel_spider_single.py
    python novel_spider_single.py --url "https://www.biquge365.net/book/39884/" --max-chapters 100

图形界面：
    python novel_spider_single.py --gui
"""

from __future__ import annotations

import argparse
import os
import re
import subprocess
import sys
import threading
import time
import tkinter as tk
from collections import Counter
from datetime import datetime
from tkinter import filedialog, messagebox, scrolledtext
from urllib.parse import urljoin, urlparse


BASE_DIR = os.path.dirname(os.path.abspath(__file__))

plt = None
pd = None
requests = None
BeautifulSoup = None


def ensure_dependencies():
    """运行爬虫前检查并加载第三方依赖。"""
    global plt, pd, requests, BeautifulSoup

    missing = []

    try:
        import requests as requests_module
    except ImportError:
        requests_module = None
        missing.append("requests")

    try:
        from bs4 import BeautifulSoup as beautiful_soup_class
    except ImportError:
        beautiful_soup_class = None
        missing.append("beautifulsoup4")

    try:
        import pandas as pandas_module
    except ImportError:
        pandas_module = None
        missing.append("pandas")

    try:
        import matplotlib.pyplot as pyplot_module
    except ImportError:
        pyplot_module = None
        missing.append("matplotlib")

    if missing:
        packages = " ".join(missing)
        raise SystemExit(
            "缺少依赖：" + "、".join(missing) + "\n"
            "请先执行：python -m pip install " + packages
        )

    requests = requests_module
    BeautifulSoup = beautiful_soup_class
    pd = pandas_module
    plt = pyplot_module


"""
项目默认配置

初学者可以直接修改这里的 URL、章节数和请求间隔，不用每次输入很长的命令。
"""

# 默认小说目录页或详情页 URL
DEFAULT_URL = "https://www.biquge365.net/book/39884/"

# 默认最多爬取章节数，程序内部仍会限制不超过 100 章
DEFAULT_MAX_CHAPTERS = 10

# 默认请求间隔，单位是秒
DEFAULT_DELAY = 1.0

# 默认输出根目录
DEFAULT_OUTPUT = "output"


def build_proxy_config(proxy_mode="auto", proxy_url=""):
    """根据代理模式生成 requests 参数。auto 模式先用系统代理，失败后可直连重试。"""
    proxy_mode = (proxy_mode or "auto").lower()
    proxy_url = (proxy_url or "").strip()

    if proxy_mode == "none":
        return False, None, False
    if proxy_mode == "custom":
        if not proxy_url:
            raise ValueError("proxy-mode 为 custom 时必须填写 --proxy")
        proxies = {"http": proxy_url, "https": proxy_url}
        return True, proxies, False
    if proxy_mode == "system":
        return True, None, False
    if proxy_mode == "auto":
        return True, None, True

    raise ValueError("proxy-mode 只能是 auto、none、system、custom")



def log_message(message):
    """打印带时间的日志，方便观察爬取进度。"""
    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"[{now}] {message}", flush=True)


def ensure_dir(path):
    """如果目录不存在，就自动创建目录。"""
    os.makedirs(path, exist_ok=True)


def safe_dir_name(name):
    """把小说名或网址转换成安全目录名。"""
    name = re.sub(r'[\\/:*?"<>|]', "_", name)
    name = re.sub(r"\s+", "_", name).strip("_")
    return name[:60] or "novel"


def guess_name_from_url(url):
    """从 URL 中猜测一个输出目录名称，避免不同小说混在一起。"""
    parsed = urlparse(url)
    path_parts = [part for part in parsed.path.split("/") if part]

    if path_parts:
        for part in reversed(path_parts):
            if part.lower() not in {"book", "newbook", "list.html", "index.html"}:
                return safe_dir_name(part.replace(".html", ""))

    return safe_dir_name(parsed.netloc or "novel")


def create_run_output_dir(base_output_dir, url):
    """每次运行自动创建一个新的输出目录。"""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    novel_name = guess_name_from_url(url)
    run_dir = os.path.join(base_output_dir, "runs", f"{novel_name}_{timestamp}")
    ensure_dir(run_dir)
    return run_dir


def write_text_file(filepath, content):
    """使用 UTF-8 编码写入文本文件。"""
    parent_dir = os.path.dirname(filepath)
    if parent_dir:
        ensure_dir(parent_dir)

    with open(filepath, "w", encoding="utf-8") as file:
        file.write(content)


def safe_filename(name):
    """将章节标题转换为安全文件名，避免 Windows 文件名非法字符。"""
    name = re.sub(r'[\\/:*?"<>|]', "_", name)
    name = re.sub(r"\s+", " ", name).strip()
    return name[:80] or "未命名章节"


def clean_text(text):
    """清洗小说正文，去除广告、空行、重复空白和常见无关内容。"""
    if not text:
        return ""

    stop_markers = [
        "章节报错",
        "请选择错误类型",
        "猜你喜欢",
        "本站所有小说都是转载而来",
        "上一章",
        "目录",
        "存书签",
        "下一章",
    ]
    ad_patterns = [
        r"一秒记住【.*",
        r"请收藏.*?。",
        r"手机用户请浏览.*",
        r"最新网址.*",
        r"本章未完.*",
        r"喜欢.*?请大家收藏",
        r"笔趣阁.*",
        r"www\..*?\.(com|net|org)",
        r"https?://\S+",
    ]

    cleaned = text.replace("\r", "\n")
    for pattern in ad_patterns:
        cleaned = re.sub(pattern, "", cleaned, flags=re.IGNORECASE)

    lines = []
    for line in cleaned.split("\n"):
        line = re.sub(r"\s+", " ", line).strip()
        if any(marker in line for marker in stop_markers):
            break
        if line:
            lines.append(line)

    return "\n\n".join(lines)


def is_likely_chapter_link(text, href):
    """判断一个链接是否像小说章节链接。"""
    if not text or not href:
        return False

    chapter_title_patterns = [
        r"^序章$",
        r"^楔子$",
        r"^尾声$",
        r"^完本感言$",
        r"^第[零〇一二三四五六七八九十百千万两\d]+[章节卷回].*",
        r"^chapter\s*\d+.*",
    ]
    for pattern in chapter_title_patterns:
        if re.search(pattern, text.strip(), flags=re.IGNORECASE):
            return True

    return False


def extract_chapter_number(title):
    """从章节标题中提取章节编号，用于把最新章节顺序修正为正序。"""
    number_match = re.search(r"第0*(\d+)[章节卷回]", title)
    if number_match:
        return int(number_match.group(1))

    chinese_numbers = {
        "零": 0,
        "〇": 0,
        "一": 1,
        "二": 2,
        "两": 2,
        "三": 3,
        "四": 4,
        "五": 5,
        "六": 6,
        "七": 7,
        "八": 8,
        "九": 9,
    }

    chinese_match = re.search(r"第([零〇一二三四五六七八九十百千万两]+)[章节卷回]", title)
    if not chinese_match:
        return 0

    text = chinese_match.group(1)
    if text == "十":
        return 10

    total = 0
    current = 0
    units = {"十": 10, "百": 100, "千": 1000, "万": 10000}
    for char in text:
        if char in chinese_numbers:
            current = chinese_numbers[char]
        elif char in units:
            unit = units[char]
            if current == 0:
                current = 1
            total += current * unit
            current = 0

    return total + current


def collect_chapter_links(links, base_url):
    """从链接列表中收集章节链接，并去除重复 URL。"""
    chapters = []
    seen_urls = set()

    for link in links:
        title = link.get_text(" ", strip=True)
        href = link.get("href", "").strip()
        if not is_likely_chapter_link(title, href):
            continue

        full_url = urljoin(base_url, href)
        if full_url in seen_urls:
            continue

        seen_urls.add(full_url)
        chapters.append(
            {
                "title": title,
                "url": full_url,
                "number": extract_chapter_number(title),
            }
        )

    return chapters


def parse_catalog(html, base_url):
    """从小说目录页解析章节标题和章节 URL。"""
    soup = BeautifulSoup(html, "html.parser")
    best_chapters = []

    catalog_selectors = [
        "#list",
        ".listmain",
        ".chapter-list",
        ".chapters",
        ".book-mulu",
        ".mulu",
        ".chapter",
        ".list",
    ]

    for selector in catalog_selectors:
        for container in soup.select(selector):
            chapters = collect_chapter_links(container.find_all("a", href=True), base_url)
            if len(chapters) > len(best_chapters):
                best_chapters = chapters

    for heading in soup.find_all(string=re.compile(r"章节目录|全部章节目录|最新章节")):
        parent = heading.parent
        for _ in range(4):
            if not parent:
                break

            chapters = collect_chapter_links(parent.find_all("a", href=True), base_url)
            if len(chapters) > len(best_chapters):
                best_chapters = chapters

            next_node = parent.find_next_sibling()
            if next_node:
                sibling_chapters = collect_chapter_links(next_node.find_all("a", href=True), base_url)
                if len(sibling_chapters) > len(best_chapters):
                    best_chapters = sibling_chapters

            parent = parent.parent

    if not best_chapters:
        best_chapters = collect_chapter_links(soup.find_all("a", href=True), base_url)

    numbered_chapters = [chapter for chapter in best_chapters if chapter["number"] > 0]
    if len(numbered_chapters) >= 2:
        best_chapters = sorted(
            best_chapters,
            key=lambda chapter: chapter["number"] if chapter["number"] > 0 else -1,
        )

    for chapter in best_chapters:
        chapter.pop("number", None)

    return best_chapters


def parse_catalog_page_link(html, base_url):
    """从书籍详情页解析“全部章节目录”等目录入口链接。"""
    soup = BeautifulSoup(html, "html.parser")
    catalog_keywords = ["全部章节目录", "章节目录", "目录"]

    for link in soup.find_all("a", href=True):
        text = link.get_text(" ", strip=True)
        if text in catalog_keywords:
            return urljoin(base_url, link.get("href", "").strip())

    return ""


def remove_unwanted_tags(soup):
    """删除网页中不需要的标签，减少广告、脚本和样式干扰。"""
    for tag in soup(["script", "style", "iframe", "ins", "button", "form", "nav", "footer", "header"]):
        tag.decompose()


def parse_chapter_content(html):
    """从章节页面解析章节标题和正文内容。"""
    soup = BeautifulSoup(html, "html.parser")
    remove_unwanted_tags(soup)

    title = ""
    title_tag = soup.find(["h1", "h2"])
    if title_tag:
        title = title_tag.get_text(" ", strip=True)

    content_selectors = [
        "#txt",
        ".txt",
        "#content",
        ".content",
        ".chapter-content",
        ".read-content",
        ".reader-main",
        ".article-content",
        "#chaptercontent",
        "#BookText",
    ]

    content_tag = None
    for selector in content_selectors:
        content_tag = soup.select_one(selector)
        if content_tag:
            break

    if content_tag:
        content = content_tag.get_text("\n", strip=True)
    else:
        body = soup.body or soup
        content = body.get_text("\n", strip=True)

    return title, content


def same_domain(url_a, url_b):
    """判断两个 URL 是否属于同一个域名，避免分页跳到外站。"""
    return urlparse(url_a).netloc == urlparse(url_b).netloc


def parse_next_page(html, current_url):
    """解析章节下一页链接，用于处理单章正文分页。"""
    soup = BeautifulSoup(html, "html.parser")
    next_texts = ["下一页", "下页", "next", "Next", ">"]

    for link in soup.find_all("a", href=True):
        text = link.get_text(" ", strip=True)
        href = link.get("href", "").strip()
        if not href or href.startswith("javascript:"):
            continue

        if "下一章" in text:
            continue

        if text in next_texts or "下一页" in text or "下页" in text:
            full_url = urljoin(current_url, href)
            if same_domain(current_url, full_url):
                return full_url

    return ""


def simple_word_frequency(text, top_n=30):
    """生成简单词频统计，优先按中文词组统计，适合作业展示。"""
    if not text:
        return []

    stop_words = {
        "一个",
        "什么",
        "没有",
        "自己",
        "这个",
        "那个",
        "他们",
        "我们",
        "你们",
        "只是",
        "已经",
        "不是",
        "还是",
        "然后",
        "突然",
        "可以",
        "因为",
        "所以",
        "但是",
        "这样",
    }

    words = re.findall(r"[\u4e00-\u9fa5]{2,4}|[A-Za-z]{2,}", text)
    words = [word.lower() for word in words if word not in stop_words]
    counter = Counter(words)
    return counter.most_common(top_n)


class NovelSpider:
    """小说爬虫类，封装从目录页到章节正文的完整爬取流程。"""

    def __init__(
        self,
        catalog_url,
        output_dir,
        project_dir=None,
        max_chapters=100,
        delay=1.0,
        proxy_mode="auto",
        proxy_url="",
    ):
        """初始化爬虫配置，包括目录页、保存目录、章节上限和请求间隔。"""
        self.catalog_url = catalog_url
        self.output_dir = output_dir
        self.project_dir = project_dir or os.path.dirname(os.path.abspath(__file__))
        self.max_chapters = min(max_chapters, 100)
        self.delay = delay
        self.timeout = 10
        self.headers = {
            "User-Agent": (
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                "AppleWebKit/537.36 (KHTML, like Gecko) "
                "Chrome/124.0.0.0 Safari/537.36"
            )
        }
        self.chapters_dir = os.path.join(self.output_dir, "chapters")
        self.charts_dir = os.path.join(self.output_dir, "charts")
        self.data_dir = os.path.join(self.output_dir, "data")
        self.novel_file = os.path.join(self.output_dir, "novel.txt")
        self.chapter_records = []
        self.trust_env, self.proxies, self.proxy_auto_retry = build_proxy_config(proxy_mode, proxy_url)

    def prepare_dirs(self):
        """创建输出目录，确保章节、图表和数据目录都存在。"""
        ensure_dir(self.output_dir)
        ensure_dir(self.chapters_dir)
        ensure_dir(self.charts_dir)
        ensure_dir(self.data_dir)
        log_message("保存目录检查完成")

    def fetch(self, url):
        """请求网页内容，包含请求头、超时、异常处理和请求间隔。"""
        try:
            log_message(f"正在请求：{url}")
            response = requests.get(
                url,
                headers=self.headers,
                timeout=self.timeout,
                proxies=self.proxies,
                trust_env=self.trust_env,
            )
            response.raise_for_status()

            if not response.encoding or response.encoding.lower() == "iso-8859-1":
                response.encoding = response.apparent_encoding

            time.sleep(self.delay)
            return response.text
        except requests.exceptions.ProxyError as error:
            if self.proxy_auto_retry and self.trust_env:
                log_message(f"代理请求失败，自动切换为直连重试：{error}")
                self.trust_env = False
                self.proxies = None
                return self.fetch(url)
            log_message(f"代理请求失败：{url}，原因：{error}")
        except requests.exceptions.Timeout:
            log_message(f"请求超时：{url}")
        except requests.exceptions.HTTPError as error:
            log_message(f"HTTP 状态错误：{url}，原因：{error}")
        except requests.exceptions.RequestException as error:
            log_message(f"请求失败：{url}，原因：{error}")
        except Exception as error:
            log_message(f"未知请求错误：{url}，原因：{error}")

        time.sleep(self.delay)
        return ""

    def get_chapter_links(self):
        """从小说目录页获取章节链接，并限制最多爬取 100 章。"""
        html = self.fetch(self.catalog_url)
        if not html:
            log_message("目录页获取失败，无法继续爬取")
            return []

        chapters = parse_catalog(html, self.catalog_url)
        catalog_page_url = parse_catalog_page_link(html, self.catalog_url)

        if catalog_page_url and catalog_page_url != self.catalog_url:
            log_message(f"检测到完整目录页，正在切换：{catalog_page_url}")
            catalog_html = self.fetch(catalog_page_url)
            if catalog_html:
                catalog_chapters = parse_catalog(catalog_html, catalog_page_url)
                if len(catalog_chapters) > len(chapters):
                    chapters = catalog_chapters

        chapters = chapters[: self.max_chapters]
        log_message(f"目录解析完成，共获取 {len(chapters)} 个章节链接")
        return chapters

    def crawl_chapter(self, chapter_title, chapter_url):
        """爬取单个章节正文，并自动处理章节内分页。"""
        all_text_parts = []
        visited_urls = set()
        current_url = chapter_url
        page_index = 1

        while current_url and current_url not in visited_urls:
            visited_urls.add(current_url)
            html = self.fetch(current_url)
            if not html:
                break

            title, content = parse_chapter_content(html)
            text = clean_text(content)
            if text:
                all_text_parts.append(text)
                log_message(f"章节分页读取成功：{chapter_title} 第 {page_index} 页")
            else:
                log_message(f"章节分页正文为空：{chapter_title} 第 {page_index} 页")

            next_page = parse_next_page(html, current_url)
            if not next_page:
                break

            next_page_url = urljoin(current_url, next_page)
            if next_page_url == current_url or next_page_url in visited_urls:
                break

            current_url = next_page_url
            page_index += 1

        final_title = chapter_title or title or "未命名章节"
        final_content = "\n\n".join(all_text_parts).strip()
        return final_title, final_content

    def save_chapter(self, index, title, content):
        """将单个章节保存为独立 txt 文件。"""
        filename = f"{index:03d}_{safe_filename(title)}.txt"
        filepath = os.path.join(self.chapters_dir, filename)
        text = f"{title}\n{'=' * 40}\n\n{content}\n"
        write_text_file(filepath, text)
        log_message(f"章节已保存：{filepath}")

    def save_all_novel(self):
        """将所有章节合并保存为一个总 txt 文件。"""
        parts = []
        for record in self.chapter_records:
            parts.append(record["title"])
            parts.append("=" * 40)
            parts.append("")
            parts.append(record["content"])
            parts.append("\n\n")

        write_text_file(self.novel_file, "\n".join(parts))
        log_message(f"总 txt 文件已保存：{self.novel_file}")

    def save_statistics(self):
        """使用 pandas 统计章节数量、总字数和每章字数，并保存 CSV 文件。"""
        if not self.chapter_records:
            log_message("没有章节数据，跳过统计")
            return None

        df = pd.DataFrame(self.chapter_records)
        total_chapters = len(df)
        total_words = int(df["word_count"].sum())

        stats_path = self.get_available_file_path(os.path.join(self.data_dir, "chapter_statistics.csv"))
        df[["index", "title", "url", "word_count"]].to_csv(stats_path, index=False, encoding="utf-8-sig")

        log_message(f"总章节数：{total_chapters}")
        log_message(f"总字数：{total_words}")
        log_message(f"每章字数统计 CSV 已保存：{stats_path}")
        return df

    def draw_word_count_chart(self, df):
        """使用 matplotlib 绘制每章字数折线图。"""
        if df is None or df.empty:
            log_message("没有统计数据，跳过绘图")
            return

        plt.rcParams["font.sans-serif"] = ["SimHei", "Microsoft YaHei", "Arial Unicode MS"]
        plt.rcParams["axes.unicode_minus"] = False

        plt.figure(figsize=(12, 6))
        plt.plot(df["index"], df["word_count"], marker="o", linewidth=1.5)
        plt.title("每章字数折线图")
        plt.xlabel("章节序号")
        plt.ylabel("字数")
        plt.grid(True, linestyle="--", alpha=0.5)
        plt.tight_layout()

        chart_path = os.path.join(self.charts_dir, "chapter_word_count.png")
        plt.savefig(chart_path, dpi=150)
        plt.close()
        log_message(f"每章字数折线图已保存：{chart_path}")

    def save_word_frequency(self):
        """生成简单词频统计，并输出 CSV 和柱状图。"""
        all_text = "\n".join(record["content"] for record in self.chapter_records)
        frequency = simple_word_frequency(all_text, top_n=30)

        if not frequency:
            log_message("没有词频数据，跳过词频统计")
            return

        freq_df = pd.DataFrame(frequency, columns=["word", "count"])
        freq_path = self.get_available_file_path(os.path.join(self.data_dir, "word_frequency.csv"))
        freq_df.to_csv(freq_path, index=False, encoding="utf-8-sig")
        log_message(f"词频 CSV 已保存：{freq_path}")

        plt.rcParams["font.sans-serif"] = ["SimHei", "Microsoft YaHei", "Arial Unicode MS"]
        plt.rcParams["axes.unicode_minus"] = False

        plt.figure(figsize=(12, 6))
        plt.bar(freq_df["word"], freq_df["count"])
        plt.title("词频统计 Top 30")
        plt.xlabel("词语")
        plt.ylabel("出现次数")
        plt.xticks(rotation=45)
        plt.tight_layout()

        chart_path = os.path.join(self.charts_dir, "word_frequency.png")
        plt.savefig(chart_path, dpi=150)
        plt.close()
        log_message(f"词频统计图已保存：{chart_path}")

    def get_available_file_path(self, filepath):
        """获取可写入的文件路径，如果原文件被占用，就自动生成带序号的新文件名。"""
        base, ext = os.path.splitext(filepath)

        for index in range(0, 100):
            candidate = filepath if index == 0 else f"{base}_{index}{ext}"
            try:
                with open(candidate, "a", encoding="utf-8"):
                    pass
                return candidate
            except PermissionError:
                log_message(f"文件被占用，尝试新文件名：{candidate}")

        raise PermissionError(f"无法找到可写入文件：{filepath}")

    def run(self):
        """执行完整爬取流程：目录解析、章节爬取、保存、统计和绘图。"""
        self.prepare_dirs()
        chapters = self.get_chapter_links()

        if not chapters:
            log_message("没有可爬取章节，程序退出")
            return

        for index, chapter in enumerate(chapters, start=1):
            title = chapter["title"]
            url = chapter["url"]
            log_message(f"开始爬取第 {index}/{len(chapters)} 章：{title}")

            final_title, content = self.crawl_chapter(title, url)
            if not content:
                log_message(f"章节正文为空，跳过保存：{final_title}")
                continue

            word_count = len(content.replace("\n", "").replace(" ", ""))
            self.chapter_records.append(
                {
                    "index": index,
                    "title": final_title,
                    "url": url,
                    "content": content,
                    "word_count": word_count,
                }
            )
            self.save_chapter(index, final_title, content)
            log_message(f"第 {index} 章完成，字数：{word_count}")

        self.save_all_novel()
        df = self.save_statistics()
        self.draw_word_count_chart(df)
        self.save_word_frequency()
        log_message("全部任务完成")


def parse_args(argv=None):
    """解析命令行参数；加 --gui 时启动图形界面。"""
    parser = argparse.ArgumentParser(description="Python 小说爬虫课程作业（单文件版）")
    parser.add_argument(
        "--gui",
        action="store_true",
        help="启动图形界面",
    )
    parser.add_argument(
        "--url",
        default=DEFAULT_URL,
        help="小说目录页 URL，不填写时使用默认 URL",
    )
    parser.add_argument(
        "--max-chapters",
        type=int,
        default=DEFAULT_MAX_CHAPTERS,
        help="最多爬取章节数，不填写时使用默认值",
    )
    parser.add_argument(
        "--delay",
        type=float,
        default=DEFAULT_DELAY,
        help="每次请求后的等待秒数，不填写时使用默认值",
    )
    parser.add_argument(
        "--output",
        default=DEFAULT_OUTPUT,
        help="保存根目录，不填写时使用默认 output 目录",
    )
    parser.add_argument(
        "--no-auto-output",
        action="store_true",
        help="关闭自动新建运行目录，直接使用 --output 指定的目录",
    )
    parser.add_argument(
        "--proxy-mode",
        choices=["auto", "none", "system", "custom"],
        default="auto",
        help="代理模式：auto 自动兼容，none 强制直连，system 使用系统代理，custom 使用 --proxy",
    )
    parser.add_argument(
        "--proxy",
        default="",
        help="自定义代理地址，例如 http://127.0.0.1:7890，仅 proxy-mode=custom 时使用",
    )
    return parser.parse_args(argv)


def run_cli(args):
    """执行命令行爬取流程。"""
    ensure_dependencies()

    output_dir = args.output
    if not os.path.isabs(output_dir):
        output_dir = os.path.join(BASE_DIR, output_dir)
    output_dir = os.path.abspath(output_dir)

    if not args.no_auto_output:
        output_dir = create_run_output_dir(output_dir, args.url)

    log_message("程序启动")
    log_message(f"目录页 URL：{args.url}")
    log_message(f"最多爬取章节数：{args.max_chapters}")
    log_message(f"请求间隔：{args.delay} 秒")
    log_message(f"输出目录：{output_dir}")

    spider = NovelSpider(
        catalog_url=args.url,
        output_dir=output_dir,
        project_dir=BASE_DIR,
        max_chapters=args.max_chapters,
        delay=args.delay,
        proxy_mode=args.proxy_mode,
        proxy_url=args.proxy,
    )
    spider.run()

    log_message("程序结束")


class SpiderGUI:
    """小说爬虫窗口类，负责收集参数、启动爬虫并显示日志。"""

    def __init__(self, root):
        """初始化窗口布局和默认参数。"""
        self.root = root
        self.root.title("Python 小说爬虫")
        self.root.geometry("900x620")
        self.root.minsize(760, 520)
        self.process = None

        self.url_var = tk.StringVar(value=DEFAULT_URL)
        self.max_chapters_var = tk.StringVar(value=str(DEFAULT_MAX_CHAPTERS))
        self.delay_var = tk.StringVar(value=str(DEFAULT_DELAY))
        self.output_var = tk.StringVar(value=DEFAULT_OUTPUT)
        self.auto_output_var = tk.BooleanVar(value=True)

        self.create_widgets()

    def create_widgets(self):
        """创建窗口中的输入框、按钮和日志区域。"""
        container = tk.Frame(self.root, padx=14, pady=12)
        container.pack(fill=tk.BOTH, expand=True)

        title = tk.Label(container, text="小说爬虫控制面板", font=("Microsoft YaHei UI", 16, "bold"))
        title.pack(anchor="w", pady=(0, 12))

        form = tk.Frame(container)
        form.pack(fill=tk.X)

        self.add_labeled_entry(form, "小说 URL：", self.url_var, 0, width=78)
        self.add_labeled_entry(form, "章节数量：", self.max_chapters_var, 1, width=16)
        self.add_labeled_entry(form, "请求间隔：", self.delay_var, 2, width=16)

        output_row = tk.Frame(form)
        output_row.grid(row=3, column=0, columnspan=3, sticky="ew", pady=6)
        tk.Label(output_row, text="输出目录：", width=10, anchor="e").pack(side=tk.LEFT)
        tk.Entry(output_row, textvariable=self.output_var).pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 8))
        tk.Button(output_row, text="选择目录", command=self.choose_output_dir).pack(side=tk.LEFT)

        option_row = tk.Frame(form)
        option_row.grid(row=4, column=0, columnspan=3, sticky="w", pady=6)
        tk.Checkbutton(
            option_row,
            text="每次运行自动创建新目录",
            variable=self.auto_output_var,
        ).pack(side=tk.LEFT)

        button_row = tk.Frame(container)
        button_row.pack(fill=tk.X, pady=(10, 8))
        self.start_button = tk.Button(button_row, text="开始爬取", width=14, command=self.start_spider)
        self.start_button.pack(side=tk.LEFT)
        self.stop_button = tk.Button(button_row, text="停止爬取", width=14, state=tk.DISABLED, command=self.stop_spider)
        self.stop_button.pack(side=tk.LEFT, padx=8)
        tk.Button(button_row, text="清空日志", width=14, command=self.clear_log).pack(side=tk.LEFT)
        tk.Button(button_row, text="打开输出目录", width=14, command=self.open_output_dir).pack(side=tk.LEFT, padx=8)

        self.log_text = scrolledtext.ScrolledText(container, wrap=tk.WORD, height=20)
        self.log_text.pack(fill=tk.BOTH, expand=True)
        self.log("窗口已启动，可以修改参数后点击“开始爬取”。")

    def add_labeled_entry(self, parent, label_text, variable, row, width):
        """添加一行带标签的输入框。"""
        tk.Label(parent, text=label_text, width=10, anchor="e").grid(row=row, column=0, sticky="e", pady=6)
        entry = tk.Entry(parent, textvariable=variable, width=width)
        entry.grid(row=row, column=1, sticky="w", pady=6)
        parent.grid_columnconfigure(1, weight=1)

    def choose_output_dir(self):
        """打开目录选择窗口，设置输出目录。"""
        directory = filedialog.askdirectory(initialdir=BASE_DIR)
        if directory:
            self.output_var.set(directory)

    def build_command(self):
        """根据窗口参数组装单文件脚本命令。"""
        url = self.url_var.get().strip()
        max_chapters = self.max_chapters_var.get().strip()
        delay = self.delay_var.get().strip()
        output = self.output_var.get().strip() or DEFAULT_OUTPUT

        if not url:
            raise ValueError("小说 URL 不能为空")

        try:
            max_chapters_value = int(max_chapters)
        except ValueError as error:
            raise ValueError("章节数量必须是整数") from error

        if max_chapters_value < 1 or max_chapters_value > 100:
            raise ValueError("章节数量必须在 1 到 100 之间")

        try:
            delay_value = float(delay)
        except ValueError as error:
            raise ValueError("请求间隔必须是数字") from error

        if delay_value < 0:
            raise ValueError("请求间隔不能小于 0")

        command = [
            sys.executable,
            os.path.abspath(__file__),
            "--url",
            url,
            "--max-chapters",
            str(max_chapters_value),
            "--delay",
            str(delay_value),
            "--output",
            output,
        ]

        if not self.auto_output_var.get():
            command.append("--no-auto-output")

        return command

    def start_spider(self):
        """启动爬虫子进程，并在后台线程读取日志。"""
        if self.process and self.process.poll() is None:
            messagebox.showwarning("提示", "爬虫正在运行，请先停止或等待完成。")
            return

        try:
            command = self.build_command()
        except ValueError as error:
            messagebox.showerror("参数错误", str(error))
            return

        self.start_button.config(state=tk.DISABLED)
        self.stop_button.config(state=tk.NORMAL)
        self.log("开始运行命令：")
        self.log(" ".join(command))

        thread = threading.Thread(target=self.run_process, args=(command,), daemon=True)
        thread.start()

    def run_process(self, command):
        """在线程中运行爬虫，并把控制台输出写入日志框。"""
        try:
            env = os.environ.copy()
            env["PYTHONIOENCODING"] = "utf-8"
            env["PYTHONUTF8"] = "1"

            self.process = subprocess.Popen(
                command,
                cwd=BASE_DIR,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                text=True,
                encoding="utf-8",
                errors="replace",
                bufsize=1,
                env=env,
            )

            for line in self.process.stdout:
                self.root.after(0, self.log, line.rstrip())

            return_code = self.process.wait()
            self.root.after(0, self.on_process_finished, return_code)
        except Exception as error:
            self.root.after(0, self.log, f"运行失败：{error}")
            self.root.after(0, self.on_process_finished, -1)

    def stop_spider(self):
        """停止当前正在运行的爬虫进程。"""
        if self.process and self.process.poll() is None:
            self.process.terminate()
            self.log("已发送停止信号，请稍等...")

    def on_process_finished(self, return_code):
        """爬虫结束后恢复按钮状态。"""
        self.start_button.config(state=tk.NORMAL)
        self.stop_button.config(state=tk.DISABLED)
        if return_code == 0:
            self.log("爬取完成。")
        else:
            self.log(f"爬虫已结束，退出码：{return_code}")

    def clear_log(self):
        """清空日志显示区域。"""
        self.log_text.delete("1.0", tk.END)

    def open_output_dir(self):
        """打开输出目录，方便查看生成文件。"""
        output = self.output_var.get().strip() or DEFAULT_OUTPUT
        if not os.path.isabs(output):
            output = os.path.join(BASE_DIR, output)

        os.makedirs(output, exist_ok=True)
        os.startfile(output)

    def log(self, message):
        """向日志区域追加一行文本。"""
        self.log_text.insert(tk.END, message + "\n")
        self.log_text.see(tk.END)


def launch_gui():
    """启动图形界面。"""
    root = tk.Tk()
    SpiderGUI(root)
    root.mainloop()


def main(argv=None):
    """单文件入口：默认命令行运行，传入 --gui 打开窗口。"""
    args = parse_args(argv)
    if args.gui:
        launch_gui()
    else:
        run_cli(args)


if __name__ == "__main__":
    main()
