#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import sys
import time
import logging
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from datetime import datetime, timezone, timedelta

# ================= 日志配置 =================
logging.basicConfig(
    level=logging.INFO, 
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# ================= 配置区域 =================
KARAKEEP_URL = "http://4.9.1.8:303"
KARAKEEP_API_KEY = "ak2_d3a838cf9bb166"

DAYS_TO_KEEP = 15              # 删除多少天之前的收藏 (n天)
SEARCH_KEYWORD = "newsmth.net" # 要清理的网址关键字
BATCH_SIZE = 100               # 全局拉取可适当调大单次数量以加快速度

def create_robust_session() -> requests.Session:
    session = requests.Session()
    retry_strategy = Retry(
        total=3,
        backoff_factor=1,
        status_forcelist=[429, 500, 502, 503, 504],
        allowed_methods=["GET", "DELETE"]
    )
    adapter = HTTPAdapter(max_retries=retry_strategy)
    session.mount("http://", adapter)
    session.mount("https://", adapter)
    session.headers.update({
        "Authorization": f"Bearer {KARAKEEP_API_KEY}",
        "Content-Type": "application/json"
    })
    return session

def clean_expired_bookmarks():
    now = datetime.now(timezone.utc)
    threshold_date = now - timedelta(days=DAYS_TO_KEEP)
    logger.info(f"🚀 任务启动: 将进行全库遍历，清理早于 {threshold_date.strftime('%Y-%m-%d %H:%M:%S UTC')} 的 '{SEARCH_KEYWORD}' 书签")

    # 【核心修改】：改用全局列表接口，彻底绕过搜索接口的数量截断限制
    list_endpoint = f"{KARAKEEP_URL}/api/v1/bookmarks"
    
    expired_bookmarks_to_delete = []
    seen_ids = set()
    total_scanned = 0
    pull_count = 0
    cursor = None 

    with create_robust_session() as session:
        # ================= 阶段一：全库深度遍历 =================
        logger.info("🔍 [阶段 1/2] 正在绕过搜索限制，执行全库数据拉取...")
        
        while True:
            pull_count += 1
            # 仅使用游标进行翻页，不再使用搜索关键字 q
            params = {"limit": BATCH_SIZE}
            if cursor:
                params["cursor"] = cursor
            
            try:
                response = session.get(list_endpoint, params=params, timeout=15)
                response.raise_for_status()
                data = response.json()
                bookmarks = data.get("bookmarks", [])
            except requests.exceptions.RequestException as e:
                logger.error(f"❌ 请求 KaraKeep API 失败: {e}")
                sys.exit(1)

            if not bookmarks:
                break

            for bm in bookmarks:
                bm_id = bm.get("id")
                
                if not bm_id or bm_id in seen_ids:
                    continue
                
                seen_ids.add(bm_id)
                total_scanned += 1
                
                # 【核心逻辑】：在本地 Python 进行关键字严格过滤
                url = bm.get("content", {}).get("url", "")
                if SEARCH_KEYWORD not in url:
                    continue
                
                title = bm.get("title") or bm.get("content", {}).get("title") or "未命名书签"
                created_str = bm.get("createdAt") or bm.get("created_at")
                
                if not created_str:
                    continue

                try:
                    if created_str.endswith('Z'):
                        created_str = created_str.replace('Z', '+00:00')
                    created_date = datetime.fromisoformat(created_str)
                    
                    if created_date.tzinfo is None:
                        created_date = created_date.replace(tzinfo=timezone.utc)

                    formatted_time = created_date.strftime('%Y-%m-%d %H:%M:%S')
                    logger.info(f"📄 提取到匹配书签: [{formatted_time}] {title}")

                    # 判定是否过期
                    if created_date < threshold_date:
                        expired_bookmarks_to_delete.append((bm_id, title, created_date))

                except Exception as e:
                    logger.error(f"解析书签时间异常 (ID: {bm_id}): {e}")

            # 获取下一页游标
            next_cursor = data.get("nextCursor") or data.get("cursor") or data.get("next_cursor")
            
            if not next_cursor:
                break
                
            cursor = next_cursor
            time.sleep(0.3)

        logger.info(f"✅ 全库扫描完毕。历经 {pull_count} 次拉取，共排查了 {total_scanned} 条底层数据。")
        logger.info(f"📌 锁定 {len(expired_bookmarks_to_delete)} 个 '{SEARCH_KEYWORD}' 过期书签准备清理。")

        # ================= 阶段二：执行精准删除 =================
        if not expired_bookmarks_to_delete:
            logger.info("🎉 当前没有需要清理的过期书签，任务完成。")
            return

        logger.info(f"💥 [阶段 2/2] 开始执行删除操作（共 {len(expired_bookmarks_to_delete)} 条）...")
        total_deleted = 0

        for bm_id, title, created_date in expired_bookmarks_to_delete:
            try:
                delete_endpoint = f"{KARAKEEP_URL}/api/v1/bookmarks/{bm_id}"
                del_response = session.delete(delete_endpoint, timeout=10)
                
                if del_response.status_code in (200, 204):
                    logger.info(f"🗑️ 成功删除书签: [{created_date.strftime('%Y-%m-%d')}] {title} (ID: {bm_id})")
                    total_deleted += 1
                else:
                    logger.error(f"⚠️ 删除失败，ID: {bm_id}, 状态码: {del_response.status_code}")
                    
                time.sleep(0.2) 
                
            except Exception as e:
                logger.error(f"删除请求异常 (ID: {bm_id}): {e}")

    logger.info(f"🎉 清理任务彻底完成！本次共清除了 {total_deleted} 条过期收藏。")

if __name__ == "__main__":
    try:
        clean_expired_bookmarks()
    except KeyboardInterrupt:
        logger.info("\n🛑 用户手动中断了程序。")
        sys.exit(0)
