#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import re
import sys
import time
import json
import base64
import hashlib
import logging
import requests
from datetime import datetime
from bs4 import BeautifulSoup
from Cryptodome.Cipher import AES
from Cryptodome.Util.Padding import unpad
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

# ================= 配置区域 =================
# 1. KaraKeep 配置
KARAKEEP_URL = "http://3.9.3.8:3030"
KARAKEEP_API_KEY = "ak2_d80fab02dcf9bb166"

# 2. CookieCloud 配置
CC_URL = "http://6.6.7.6:3000/cookiecloud"
CC_UUID = "fbUpmXrhd673VhcFG"
CC_PASSWORD = "sqjrxKw8DjHgF"

# 3. 转寄与记录配置
TARGET_EMAIL = "8785@qq.com"
HISTORY_FILE = "forwarded_karakeep.txt"
MAX_HISTORY_RECORDS = 100

# 4. 获取数量配置 [新增]
MAX_FETCH_COUNT = 30  # 自定义选项：每次任务最多处理的最新有效书签数量

# ================= 全局常量 =================
BASE_URL = "https://m.newsmth.net"
HEADERS = {
    "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
}

# 预编译正则表达式
RE_FORWARD_LINK = re.compile(r'/article/[^/]+/forward/\d+')
RE_TIME_FORMAT = re.compile(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')

# ================= 日志配置 =================
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

# ================= 核心功能 =================

def create_robust_session() -> requests.Session:
    """创建带有自动重试机制的强健 Session（全局复用），支持 POST 429 重试"""
    session = requests.Session()
    retry = Retry(
        total=5, 
        backoff_factor=2, 
        status_forcelist=[429, 500, 502, 503, 504],
        allowed_methods=["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE", "POST"]
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

def fetch_and_decrypt_cookies(session: requests.Session) -> dict:
    """从 CookieCloud 获取水木社区的 Cookie"""
    logger.info("正在连接 CookieCloud 获取验证信息...")
    req_url = f"{CC_URL}/get/{CC_UUID}"
    try:
        response = session.post(req_url, json={"password": CC_PASSWORD}, timeout=10)
        response.raise_for_status()
        data = response.json()
    except requests.exceptions.RequestException as e:
        logger.error(f"连接 CookieCloud 失败: {e}")
        sys.exit(1)

    cookie_data = data.get("cookie_data")
    if not cookie_data:
        logger.error("CookieCloud 返回的数据为空，请检查 UUID 或服务端同步状态。")
        sys.exit(1)

    if isinstance(cookie_data, dict):
        all_cookies = cookie_data
    elif isinstance(cookie_data, str):
        try:
            key = hashlib.md5(CC_PASSWORD.encode('utf-8')).hexdigest().encode('utf-8')
            iv = key[:16]
            cipher = AES.new(key, AES.MODE_CBC, iv)
            encrypted_bytes = base64.b64decode(cookie_data)
            decrypted_bytes = unpad(cipher.decrypt(encrypted_bytes), AES.block_size)
            all_cookies = json.loads(decrypted_bytes.decode('utf-8'))
        except Exception as e:
            logger.error(f"Cookie 数据解密失败: {e}")
            sys.exit(1)
    else:
        logger.error("获取到的 Cookie 数据格式异常。")
        sys.exit(1)

    pw_cookies = {}
    for domain, cookies_list in all_cookies.items():
        if 'newsmth.net' in domain:
            for c in cookies_list:
                pw_cookies[c["name"]] = c["value"]
    
    if not pw_cookies:
        logger.error("数据提取完毕，但未找到 newsmth.net 的关联 Cookie。")
        sys.exit(1)
        
    return pw_cookies

def verify_smth_connection_and_cookie(session: requests.Session) -> bool:
    """提前验证水木服务器连通性及 Cookie 状态"""
    logger.info("正在验证水木社区服务器连通性及 Cookie 状态...")
    try:
        resp = session.get(BASE_URL, headers=HEADERS, timeout=10)
        resp.raise_for_status() 

        if 'id="TencentCaptcha"' in resp.text or "用户登录" in resp.text:
            logger.error("水木社区连通正常，但 Cookie 可能已失效（检测到未登录特征），请更新 CookieCloud！")
            return False
            
        logger.info("连通性测试通过，Cookie 状态有效。")
        return True
    except requests.exceptions.RequestException as e:
        logger.error(f"水木社区服务器当前无法访问或不稳定，停止本次任务。详情: {e}")
        return False

def load_history() -> dict:
    history = {}
    if not os.path.exists(HISTORY_FILE):
        return history
    try:
        with open(HISTORY_FILE, "r", encoding="utf-8") as f:
            for line in f:
                parts = line.strip().split(',', 1)
                if len(parts) == 2:
                    history[parts[0]] = parts[1]
    except IOError as e:
        logger.warning(f"读取历史记录文件失败: {e}")
    return history

def save_history(history_dict: dict):
    history_items = list(history_dict.items())[-MAX_HISTORY_RECORDS:]
    try:
        with open(HISTORY_FILE, "w", encoding="utf-8") as f:
            for url, last_time in history_items:
                f.write(f"{url},{last_time}\n")
    except IOError as e:
        logger.error(f"写入历史记录文件失败: {e}")

def extract_best_title(bookmark_data: dict) -> str:
    explicit_title = bookmark_data.get("title")
    if explicit_title and str(explicit_title).strip():
        return str(explicit_title).strip()

    content = bookmark_data.get("content", {})
    if isinstance(content, dict):
        content_title = content.get("title")
        if content_title and str(content_title).strip():
            return str(content_title).strip()
            
        html_data = content.get("html", {})
        if isinstance(html_data, dict):
            html_title = html_data.get("title")
            if html_title and str(html_title).strip():
                return str(html_title).strip()

    return "无标题"

def get_karakeep_bookmarks(session: requests.Session) -> list:
    """获取 KaraKeep 书签，支持设置最大获取数量"""
    endpoint = f"{KARAKEEP_URL}/api/v1/bookmarks/search"
    api_headers = {"Authorization": f"Bearer {KARAKEEP_API_KEY}", "Content-Type": "application/json"}
    
    parsed_bookmarks = []
    seen_urls = set()
    
    page = 1
    max_pages = 20     
    offset = 0         
    cursor = None      
    
    try:
        while page <= max_pages:
            params = {
                "q": "newsmth.net",
                "limit": 20,      
                "sortOrder": "desc",
                "page": page,
                "offset": offset
            }
            if cursor:
                params["cursor"] = cursor
            
            response = session.get(endpoint, headers=api_headers, params=params, timeout=10)
            response.raise_for_status()
            data = response.json()
            
            bookmarks = data.get("bookmarks", [])
            if not bookmarks:
                break
                
            new_items_count = 0
            for bm in bookmarks:
                title = extract_best_title(bm)
                url = bm.get("content", {}).get("url", "")
                original_url = url
                
                if original_url in seen_urls:
                    continue
                
                seen_urls.add(original_url)
                new_items_count += 1
                
                # --- 链接清洗逻辑 ---
                if url.startswith("http://"):
                    url = url.replace("http://", "https://", 1)
                url = url.replace("https://www.newsmth.net/nForum/#!article", "https://m.newsmth.net/article")
                url = url.replace("https://www.newsmth.net/article", "https://m.newsmth.net/article")
                
                if url.startswith("https://m.newsmth.net/article"):
                    parsed_bookmarks.append({"title": title, "url": url})
                    
                    # [新增] 熔断机制：达到用户设定的数量，立即结束拉取
                    if len(parsed_bookmarks) >= MAX_FETCH_COUNT:
                        logger.info(f"已达到设定的最大获取数量上限 ({MAX_FETCH_COUNT})，停止后续 API 拉取。")
                        return parsed_bookmarks
                else:
                    logger.debug(f"  [-] 忽略非帖子/不合规链接: [{title[:15]}...] -> {original_url}")
            
            next_cursor = data.get("nextCursor") or data.get("cursor") or data.get("next_cursor")
            
            if new_items_count == 0:
                break
                
            if next_cursor:
                cursor = next_cursor
            offset += len(bookmarks) 
            page += 1
            
        logger.info(f"KaraKeep API 历经 {page - 1} 次分页拉取，清洗后共得到 {len(parsed_bookmarks)} 个有效的水木帖子。")
                
    except requests.exceptions.RequestException as e:
        logger.error(f"请求 KaraKeep API 失败: {e}")
        sys.exit(1)
        
    return parsed_bookmarks

def process_and_forward(session: requests.Session, item: dict, history: dict) -> bool:
    original_url = item["url"]
    title = item["title"]
    
    clean_url = original_url.split('?')[0]
    last_page_url = f"{clean_url}?p=100"
    
    try:
        resp = session.get(last_page_url, headers=HEADERS, timeout=15)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, 'html.parser')
        
        forward_links = soup.find_all('a', href=RE_FORWARD_LINK)
        if not forward_links:
            return False
            
        last_forward_a = forward_links[-1]
        forward_url = BASE_URL + last_forward_a['href']
        
        last_li = last_forward_a.find_parent('li')
        last_reply_time = "1970-01-01 00:00:00"
        if last_li:
            time_match = RE_TIME_FORMAT.search(last_li.get_text())
            if time_match:
                last_reply_time = time_match.group(0)
        
        recorded_time = history.get(clean_url, "")
        if recorded_time and last_reply_time <= recorded_time:
            return False
            
        logger.info(f"触发转寄 -> {title} (时间 {recorded_time or '无记录'} => {last_reply_time})")
        
        time.sleep(1.5)
        
        payload = {"target": TARGET_EMAIL, "threads": "on", "submit": "转寄"}
        post_resp = session.post(forward_url, data=payload, headers=HEADERS, timeout=10)
        
        if "操作成功" in post_resp.text or post_resp.status_code == 200:
            logger.info(f"推送成功 -> {TARGET_EMAIL}")
            history.pop(clean_url, None)
            history[clean_url] = last_reply_time 
            return True
        else:
            logger.warning(f"转寄请求失败，返回状态码: {post_resp.status_code}")
            return False
            
    except requests.exceptions.RequestException as e:
        logger.error(f"网络请求异常 [{title[:15]}...]: {e}")
        return False
    except Exception as e:
        logger.error(f"解析异常 [{title[:15]}...]: {e}")
        return False

def main():
    logger.info("=" * 60)
    logger.info("启动 KaraKeep 水木合集自动转寄任务")
    
    session = create_robust_session()
    
    smth_cookies = fetch_and_decrypt_cookies(session)
    requests.utils.add_dict_to_cookiejar(session.cookies, smth_cookies)
    
    if not verify_smth_connection_and_cookie(session):
        logger.info("=" * 60)
        sys.exit(1)
    
    history = load_history()
    bookmarks = get_karakeep_bookmarks(session)
    
    logger.info("=" * 60)
    logger.info(f"=== 即将处理以下 {len(bookmarks)} 个有效书签 ===")
    for idx, item in enumerate(bookmarks, 1):
        logger.info(f"[{idx:02d}] {item['title']}")
        logger.info(f"     -> {item['url']}")
    logger.info("=" * 60)
    
    forward_count = 0
    for item in bookmarks:
        if process_and_forward(session, item, history):
            forward_count += 1
            
        time.sleep(2) 
    
    save_history(history)
    logger.info("-" * 60)
    logger.info(f"任务结束：本次共新增或更新了 {forward_count} 份合集。")
    logger.info("=" * 60)

if __name__ == "__main__":
    os.chdir(os.path.dirname(os.path.abspath(__file__)) or '.')
    main()
