import email
import re
from email.header import decode_header
from email.utils import parsedate_to_datetime
from datetime import datetime
from imapclient import IMAPClient
from rich.console import Console
from rich.table import Table
from rich.text import Text
from rich.prompt import Confirm

# ================= 配置信息 =================
IMAP_SERVER = 'imap.qq.com'
EMAIL_ADDRESS = '8785@qq.com'
AUTH_CODE = 'nnfiiimdejf'
FETCH_COUNT = 200  
SUBJECT_KEYWORD = '合集转寄'  
AUTO_DELETE = True  # <--- 是否允许自动删除 (True: 自动删除, False: 需手工确认)
# ============================================

console = Console()

# ================= 预编译正则表达式 (提升性能) =================
RE_COLOR = re.compile(r'\x1b?\[[0-9;]*[A-Za-z]')
RE_DIVIDER = re.compile(r'☆[─]+☆')
RE_WHITESPACE = re.compile(r'\s+')
RE_BBS_HEADER = re.compile(r'^(转寄人|标\s*题|发信站|来\s*源|Date|From|To):', re.IGNORECASE)

# ================= 核心工具函数 =================

def decode_str(s):
    """安全解码邮件头部文本"""
    if not s: return "无标题"
    parts = decode_header(s)
    decoded = []
    for value, charset in parts:
        if isinstance(value, bytes):
            try:
                decoded.append(value.decode(charset or 'utf-8', errors='ignore'))
            except Exception:
                decoded.append(value.decode('gb18030', errors='ignore'))
        else:
            decoded.append(str(value))
    return "".join(decoded).replace('\n', '').replace('\r', '')

def get_email_body(msg):
    """提取邮件的纯文本正文"""
    body = ""
    if msg.is_multipart():
        for part in msg.walk():
            if part.get_content_type() == 'text/plain':
                charset = part.get_content_charset() or 'utf-8'
                try:
                    payload = part.get_payload(decode=True)
                    if payload:
                        body += payload.decode(charset, errors='ignore')
                except Exception:
                    pass
    else:
        charset = msg.get_content_charset() or 'utf-8'
        try:
            payload = msg.get_payload(decode=True)
            if payload:
                body = payload.decode(charset, errors='ignore')
        except Exception:
            pass
    return body

def extract_core_content(text):
    """精准提取第一个发帖内容，剔除多余回复和颜色代码"""
    if not text: return ""
    
    clean_text = RE_COLOR.sub('', text)
    keyword = "的大作中提到:"
    
    if keyword in clean_text:
        content_after = clean_text.split(keyword, 1)[1]
        first_post = RE_DIVIDER.split(content_after, maxsplit=1)[0]
        return RE_WHITESPACE.sub(' ', first_post).strip()
    
    # 备用处理逻辑
    core_lines = [
        line.strip() for line in clean_text.split('\n') 
        if not RE_BBS_HEADER.match(line)
    ]
    return RE_WHITESPACE.sub(' ', '\n'.join(core_lines)).strip()

def safe_get_timestamp(date_str, internal_date):
    """统一转换为时间戳，彻底消除带时区与不带时区的时间比较崩溃隐患"""
    ts = 0
    if date_str:
        try:
            dt = parsedate_to_datetime(date_str)
            ts = dt.timestamp()
        except Exception:
            pass
    if not ts and internal_date:
        ts = internal_date.timestamp()
    return ts

# ================= 业务逻辑函数 =================

def process_emails(client):
    """处理邮件的主要逻辑"""
    all_ids = client.search(['ALL'])
    recent_ids = all_ids[-FETCH_COUNT:]
    actual_check_count = len(recent_ids)
    
    if actual_check_count == 0:
        console.print("[bold yellow]收件箱为空，没有找到任何邮件。[/bold yellow]")
        return

    console.print(f"[info] [步骤 1/2] 正在拉取最近 {actual_check_count} 封邮件的标题进行初步筛查...")
    response = client.fetch(recent_ids, ['BODY.PEEK[HEADER]', 'INTERNALDATE'])
    
    subject_groups = {}
    matched_count = 0
    
    # 1. 标题初筛
    for uid, data in response.items():
        msg = email.message_from_bytes(data.get(b'BODY[HEADER]', b''))
        subject = decode_str(msg.get("Subject", ""))
        
        if SUBJECT_KEYWORD and SUBJECT_KEYWORD not in subject:
            continue  
            
        matched_count += 1
        ts = safe_get_timestamp(msg.get("Date"), data.get(b'INTERNALDATE'))
        
        subject_groups.setdefault(subject, []).append((uid, ts))

    if matched_count == 0:
        console.print(f"[bold yellow]未找到标题包含 '{SUBJECT_KEYWORD}' 的邮件。[/bold yellow]")
        return

    # 2. 正文深度筛查准备
    uids_to_fetch_body = [uid for mails in subject_groups.values() if len(mails) > 1 for uid, _ in mails]
    body_dict = {}

    if uids_to_fetch_body:
        console.print(f"[info] [步骤 2/2] 发现 {len(uids_to_fetch_body)} 封同名标题，正在拉取正文深度对比...")
        body_response = client.fetch(uids_to_fetch_body, ['BODY.PEEK[]'])
        for uid, data in body_response.items():
            msg = email.message_from_bytes(data.get(b'BODY[]', b''))
            body_dict[uid] = extract_core_content(get_email_body(msg))

    # 3. 分组并判定重复
    processed_groups = []
    for subject, mails in subject_groups.items():
        if len(mails) == 1:
            processed_groups.append({
                'subject': subject, 'keep_uid': mails[0][0], 'keep_ts': mails[0][1], 'deletes': []
            })
        else:
            content_subgroups = {}
            for uid, ts in mails:
                c_content = body_dict.get(uid, "")
                content_subgroups.setdefault(c_content, []).append((uid, ts))
            
            for c_content, sub_mails in content_subgroups.items():
                sorted_mails = sorted(sub_mails, key=lambda x: x[1], reverse=True)
                processed_groups.append({
                    'subject': subject,
                    'keep_uid': sorted_mails[0][0],
                    'keep_ts': sorted_mails[0][1],
                    'deletes': sorted_mails[1:]
                })

    # 4. 排序并渲染UI
    processed_groups.sort(key=lambda x: x['keep_ts'], reverse=True)
    render_and_execute(client, processed_groups, actual_check_count, matched_count)

def render_and_execute(client, processed_groups, check_count, matched_count):
    """渲染表格并执行删除操作"""
    table_title = f"邮件处理清单 (追溯范围: {check_count} 封 | 匹配到: {matched_count} 封)"
    if SUBJECT_KEYWORD:
        table_title += f" - 过滤词: '{SUBJECT_KEYWORD}'"
        
    table = Table(title=table_title, show_lines=True)
    table.add_column("操作", justify="center")
    table.add_column("发送时间", style="cyan")
    table.add_column("标题", style="white")
    table.add_column("UID", style="dim")

    uids_to_delete = []
    
    for group in processed_groups:
        subject = group['subject']
        # 将时间戳格式化回直观的字符串
        keep_date_str = datetime.fromtimestamp(group['keep_ts']).strftime('%Y-%m-%d %H:%M') if group['keep_ts'] else "未知时间"
        
        table.add_row(Text("保留", style="bold green"), keep_date_str, subject, str(group['keep_uid']))
        
        for del_uid, del_ts in group['deletes']:
            uids_to_delete.append(del_uid)
            del_date_str = datetime.fromtimestamp(del_ts).strftime('%Y-%m-%d %H:%M') if del_ts else "未知时间"
            table.add_row(Text("删除", style="bold red"), del_date_str, Text(subject, style="dim"), str(del_uid))

    console.print(table)

    # 5. 执行清理 (根据 AUTO_DELETE 变量决定策略)
    if uids_to_delete:
        console.print(f"\n[bold yellow]在比对标题与正文首帖后，发现了 {len(uids_to_delete)} 封完全重复的邮件。[/bold yellow]")
        
        if AUTO_DELETE:
            console.print("[bold cyan]➜ 已开启自动删除模式，正在清理服务器邮件...[/bold cyan]")
            client.delete_messages(uids_to_delete)
            client.expunge()
            console.print("[bold green]✔ 重复邮件已成功自动彻底删除！[/bold green]")
        else:
            if Confirm.ask("确定要从服务器上彻底删除这些重复邮件吗？(不可逆)"):
                client.delete_messages(uids_to_delete)
                client.expunge()
                console.print("[bold green]✔ 重复邮件已成功彻底删除！[/bold green]")
            else:
                console.print("[info] 操作取消。")
    else:
        console.print(f"[bold green]检查完毕，没有发现完全重复的邮件。[/bold green]")

# ================= 主程序入口 =================

if __name__ == "__main__":
    try:
        with IMAPClient(IMAP_SERVER, use_uid=True) as client:
            with console.status("[bold green]正在连接并登录QQ邮箱..."):
                client.login(EMAIL_ADDRESS, AUTH_CODE)
                client.select_folder('INBOX', readonly=False)
            
            process_emails(client)
            
    except KeyboardInterrupt:
        console.print("\n[bold red]检测到用户中止操作 (Ctrl+C)，程序已安全退出。[/bold red]")
    except Exception as e:
        console.print(f"\n[bold red]执行过程中发生错误:[/bold red] {e}")
