#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
migra-drive.py — Migração por PAR (dest,orig), com marcadores de log para UI.

Entradas:
  --sa-json   : Service Account JSON (DWD habilitado)
  --folders   : CSV "email_destino,email_origem,pasta_id_correta"
  --manifest  : CSV "email_origem,email_destino,orig_file_id,name,mime,md5,mtime,path"
  --update-changed : "", "skip", "replace", "keep-both"
                     "" => modo FULL (não substituir os já migrados)
"""

import argparse, csv, json, logging, os, random, time
from collections import defaultdict
from typing import Dict, List, Tuple

from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

SCOPES = ["https://www.googleapis.com/auth/drive"]
FOLDER_MIME = "application/vnd.google-apps.folder"

# ---------------- logging ----------------
def setup_logging(log_dir:str, level:str)->str:
    run_id=time.strftime("%Y%m%d-%H%M%S")
    run_dir=os.path.join(log_dir, run_id); os.makedirs(run_dir, exist_ok=True)
    logging.getLogger().handlers.clear()
    lvl=getattr(logging, level.upper(), logging.INFO)
    fmt=logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
    sh=logging.StreamHandler(); sh.setLevel(lvl); sh.setFormatter(fmt)
    fh=logging.FileHandler(os.path.join(run_dir,"run.log"), encoding="utf-8"); fh.setLevel(lvl); fh.setFormatter(fmt)
    root=logging.getLogger(); root.addHandler(sh); root.addHandler(fh); root.setLevel(lvl)
    return run_dir

def attach_pair_logger(run_dir:str, ed:str, eo:str):
    safe=lambda s: "".join(c if c.isalnum() or c in "._-@" else "_" for c in s)
    fh=logging.FileHandler(os.path.join(run_dir, f"{safe(ed)}__{safe(eo)}.log"), encoding="utf-8")
    fh.setLevel(logging.getLogger().level)
    fh.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
    logging.getLogger().addHandler(fh); return fh

def detach_logger(h):
    try: logging.getLogger().removeHandler(h); h.close()
    except: pass

# ---------------- auth/backoff ----------------
def get_drive(user:str, sa_json:str):
    creds = service_account.Credentials.from_service_account_file(sa_json, scopes=SCOPES, subject=user)
    return build("drive","v3",credentials=creds, cache_discovery=False)

RETRYABLE_STATUS={429,500,503}
RETRYABLE_403={"rateLimitExceeded","userRateLimitExceeded","sharingRateLimitExceeded",
               "domainRateLimitExceeded","backendError","internalError","dailyLimitExceeded"}
NONRETRY_403={"cannotCopyFile","fileNeverWritable","copyRequiresWriterPermission",
             "appNotAuthorizedToFile","insufficientFilePermissions","forbidden",
             "fileNotFound","teamDriveFileNotFound","domainPolicy","invalidSharingRequest"}

def reason(e:HttpError)->str:
    try:
        data=json.loads(e.content.decode("utf-8")); err=data.get("error",{})
        arr=err.get("errors",[]) or []
        if arr: return arr[0].get("reason") or ""
        return err.get("message") or ""
    except Exception:
        return ""

def should_retry(ex:Exception):
    if isinstance(ex, HttpError):
        st=getattr(getattr(ex,"resp",None),"status",None)
        try: st=int(st) if st is not None else None
        except: st=None
        r=reason(ex)
        if st==403:
            if r in NONRETRY_403: return False, st
            if r in RETRYABLE_403: return True, st
            return False, st
        if st in RETRYABLE_STATUS: return True, st
        return False, st
    return True, None

def api(cb, desc, attempts):
    delay=0.8
    for i in range(1, attempts+1):
        try: return cb()
        except Exception as e:
            retry,st=should_retry(e)
            if not retry or i==attempts:
                if isinstance(e, HttpError):
                    logging.warning(f"[no-retry] {desc} status={st} reason={reason(e)}")
                raise
            sl=min(delay*1.7+random.uniform(0,0.5),20)
            logging.warning(f"[backoff] {desc} tentativa {i}/{attempts} aguardando {sl:.2f}s")
            time.sleep(sl); delay=sl

# ---------------- drive wrappers ----------------
def files_list(drv, **kw):  return api(lambda: drv.files().list(**kw).execute(), "files.list", kw.pop("attempts",5))
def files_get(drv, **kw):   return api(lambda: drv.files().get(**kw).execute(), "files.get", kw.pop("attempts",5))
def files_create(drv, **kw):return api(lambda: drv.files().create(**kw).execute(), "files.create", kw.pop("attempts",5))
def files_update(drv, **kw):return api(lambda: drv.files().update(**kw).execute(), "files.update", kw.pop("attempts",5))
def files_copy(drv, **kw):  return api(lambda: drv.files().copy(**kw).execute(), "files.copy", kw.pop("attempts",5))

def q(s:str)->str:
    return (s or "").replace("\\","\\\\").replace("'","\\'").replace("\r"," ").replace("\n"," ")

# ---------------- helpers ----------------
def depth_of_path(p:str)->int:
    if not p: return 0
    return len([x for x in p.split("/") if x])

def find_folder_in_parent(drv, parent_id:str, name:str, attempts:int):
    try:
        resp=files_list(drv, q=f"'{parent_id}' in parents and trashed=false and mimeType='{FOLDER_MIME}' and name='{q(name)}'",
                        spaces="drive", corpora="user", pageSize=100,
                        fields="files(id,name)", includeItemsFromAllDrives=True, supportsAllDrives=True, attempts=attempts)
    except HttpError as e:
        if getattr(getattr(e,"resp",None),"status",None)==400:
            resp=files_list(drv, q=f"'{parent_id}' in parents and trashed=false and mimeType='{FOLDER_MIME}' and name contains '{q(name[:100])}'",
                            spaces="drive", corpora="user", pageSize=100,
                            fields="files(id,name)", includeItemsFromAllDrives=True, supportsAllDrives=True, attempts=attempts)
        else:
            raise
    arr=resp.get("files",[])
    for f in arr:
        if f.get("name")==name: return f["id"]
    return arr[0]["id"] if arr else None

def ensure_folder(drv, parent_id:str, name:str, attempts:int, dry:bool)->str:
    fid=find_folder_in_parent(drv, parent_id, name, attempts)
    if fid: return fid
    if dry: return f"DRY_{hash((parent_id,name))}"
    meta=files_create(drv, body={"name":name,"parents":[parent_id],"mimeType":FOLDER_MIME},
                      fields="id", supportsAllDrives=True, attempts=attempts)
    return meta["id"]

def ensure_path(drv, root_id:str, rel_path:str, attempts:int, dry:bool)->str:
    parent=root_id
    if rel_path:
        for part in [x for x in rel_path.split("/") if x]:
            parent=ensure_folder(drv, parent, part, attempts, dry)
    return parent

def find_existing_migrated_in_parent(drv, parent_id:str, src_id:str, attempts:int):
    try:
        resp=files_list(drv,
            q=(f"'{parent_id}' in parents and trashed=false "
               f"and appProperties has {{ key='src_id' and value='{q(src_id)}' }}"),
            spaces="drive", corpora="user", pageSize=50,
            fields="files(id,name,appProperties,md5Checksum,modifiedTime)",
            includeItemsFromAllDrives=True, supportsAllDrives=True, attempts=attempts)
        arr=resp.get("files",[])
        return arr[0] if arr else None
    except HttpError:
        return None

# ---------------- worker por DESTINO/PAR ----------------
def process_pair(drv_dest, ed:str, eo:str, root_id:str, rows:List[dict],
                 sleep_s:float, attempts:int, update_changed:str, dry:bool):
    # normaliza modo: full = "" => tratar como 'skip' para itens já migrados
    eff_update = update_changed.strip().lower() or "skip"

    # 1) pastas (preserva vazias também)
    folders=[r for r in rows if (r.get("mime") or "")==FOLDER_MIME]
    for r in sorted(folders, key=lambda x: depth_of_path(x.get("path",""))):
        parent=ensure_path(drv_dest, root_id, r.get("path",""), attempts, dry)
        ensure_folder(drv_dest, parent, r.get("name",""), attempts, dry)
        if sleep_s: time.sleep(sleep_s)

    # 2) arquivos
    files=[r for r in rows if (r.get("mime") or "")!=FOLDER_MIME]
    for r in files:
        # sanity do manifesto
        if (r.get("email_destino","").strip().lower()!=ed.lower() or
            r.get("email_origem","").strip().lower()!=eo.lower()):
            raise SystemExit(f"Manifesto mistura pares: {r.get('email_origem')}->{r.get('email_destino')} em execução {eo}->{ed}")

        name=r.get("name","")
        src=r.get("orig_file_id","")
        md5=r.get("md5","") or ""
        mtime=r.get("mtime","") or ""
        parent=ensure_path(drv_dest, root_id, r.get("path",""), attempts, dry)

        existing = find_existing_migrated_in_parent(drv_dest, parent, src, attempts)

        if existing:
            ex_md5 = (existing.get("appProperties") or {}).get("src_md5","") or existing.get("md5Checksum","") or ""
            ex_mt  = (existing.get("appProperties") or {}).get("src_mtime","") or existing.get("modifiedTime","") or ""
            changed = False
            if md5 and ex_md5 and md5 != ex_md5:
                changed = True
            elif (not md5) and mtime and ex_mt and (mtime > ex_mt):
                changed = True

            if not changed or eff_update=="skip":
                logging.info(f"[SKIP] {name} (src_id já migrado{' e sem mudança' if not changed else ''})")
                if sleep_s: time.sleep(sleep_s)
                continue

            if eff_update=="replace" and not dry:
                try:
                    meta=files_copy(drv_dest, fileId=src,
                        body={"name":name,"parents":[parent],
                              "appProperties":{"src_id":src,"src_md5":md5,"src_mtime":mtime,
                                               "dest_email":ed,"src_email":eo,"migrator":"MDRV2"}},
                        fields="id,parents,name", supportsAllDrives=True, attempts=attempts)
                    files_update(drv_dest, fileId=existing["id"], body={"trashed": True},
                                 fields="id,trashed", supportsAllDrives=True, attempts=attempts)
                    logging.info(f"[REPLACE] {name}")
                except HttpError as e:
                    logging.warning(f"[FAIL replace] {name}: {reason(e)}")
                if sleep_s: time.sleep(sleep_s)
                continue

            if eff_update=="keep-both":
                if not dry:
                    try:
                        files_copy(drv_dest, fileId=src,
                            body={"name":name,"parents":[parent],
                                  "appProperties":{"src_id":src,"src_md5":md5,"src_mtime":mtime,
                                                   "dest_email":ed,"src_email":eo,"migrator":"MDRV2"}},
                            fields="id,parents,name", supportsAllDrives=True, attempts=attempts)
                        logging.info(f"[KEEP-BOTH] {name}")
                    except HttpError as e:
                        logging.warning(f"[FAIL keep-both] {name}: {reason(e)}")
                if sleep_s: time.sleep(sleep_s)
                continue

        # não existia (primeira migração daquele src_id nesse parent) -> copia
        if not dry:
            try:
                files_copy(drv_dest, fileId=src,
                    body={"name":name,"parents":[parent],
                          "appProperties":{"src_id":src,"src_md5":md5,"src_mtime":mtime,
                                           "dest_email":ed,"src_email":eo,"migrator":"MDRV2"}},
                    fields="id,parents,name", supportsAllDrives=True, attempts=attempts)
                logging.info(f"[COPY] {name}")
            except HttpError as e:
                logging.warning(f"[FAIL copy] {name}: {reason(e)}")
        else:
            logging.info(f"[DRY COPY] {name}")
        if sleep_s: time.sleep(sleep_s)

# ---------------- main ----------------
def main():
    ap=argparse.ArgumentParser(description="Migração por PAR (sem dedupe global, sem atalhos).")
    ap.add_argument("--sa-json", required=True)
    ap.add_argument("--folders", required=True, help="CSV: email_destino,email_origem,pasta_id_correta")
    ap.add_argument("--manifest", required=True)
    ap.add_argument("--only-dest", default="")
    ap.add_argument("--only-src", default="")
    ap.add_argument("--sleep-ms", type=int, default=25)
    ap.add_argument("--max-attempts", type=int, default=5)
    ap.add_argument("--update-changed", default="", help='""(full), skip, replace, keep-both')
    ap.add_argument("--dry-run", default="false", choices=["true","false"])
    ap.add_argument("--log-dir", default="logs")
    ap.add_argument("--log-level", default="INFO", choices=["DEBUG","INFO","WARNING","ERROR"])
    args=ap.parse_args()

    run_dir=setup_logging(args.log_dir, args.log_level)
    dry=args.dry_run.lower()=="true"
    sleep_s=max(args.sleep_ms,0)/1000.0
    attempts=max(1,args.max_attempts)

    # carrega folders
    pair2root: Dict[Tuple[str,str],str]={}
    with open(args.folders, newline="", encoding="utf-8") as fh:
        r=csv.DictReader(fh)
        need={"email_destino","email_origem","pasta_id_correta"}
        fns=[(fn or "").strip().lower() for fn in (r.fieldnames or [])]
        if not need.issubset(set(fns)):
            raise SystemExit("out_folders.csv precisa conter colunas: email_destino,email_origem,pasta_id_correta")
        for row in r:
            ed=(row.get("email_destino") or row.get("EMAIL_DESTINO") or "").strip().lower()
            eo=(row.get("email_origem") or row.get("EMAIL_ORIGEM") or "").strip().lower()
            pid=(row.get("pasta_id_correta") or row.get("PASTA_ID_CORRETA") or "").strip()
            if not ed or not eo or not pid: continue
            if args.only_dest and ed != args.only_dest.strip().lower(): continue
            if args.only_src and eo != args.only_src.strip().lower(): continue
            key=(ed,eo)
            if key in pair2root and pair2root[key]!=pid:
                raise SystemExit(f"Conflito: {key} mapeado para dois IDs de pasta ({pair2root[key]} vs {pid})")
            pair2root[key]=pid
    if not pair2root:
        raise SystemExit("Nenhum PAR para processar (ver filtros --only-dest/--only-src).")

    # agrupa manifest por DESTINO->ORIGEM
    rows_by_dest_then_src: Dict[str, Dict[str, List[dict]]] = defaultdict(lambda: defaultdict(list))
    with open(args.manifest, newline="", encoding="utf-8") as fh:
        r=csv.DictReader(fh)
        for row in r:
            ed=(row.get("email_destino") or row.get("EMAIL_DESTINO") or "").strip().lower()
            eo=(row.get("email_origem") or row.get("EMAIL_ORIGEM") or "").strip().lower()
            if args.only_dest and ed != args.only_dest.strip().lower(): continue
            if args.only_src and eo != args.only_src.strip().lower(): continue
            rows_by_dest_then_src[ed][eo].append(row)

    # processa DESTINO por DESTINO
    for ed, pairs_rows in rows_by_dest_then_src.items():
        h=attach_pair_logger(run_dir, ed, "ALL-SRCS")
        try:
            total=sum(len(v) for v in pairs_rows.values())
            logging.info("="*80)
            logging.info(f"DEST={ed} | PARs={len(pairs_rows)} | ITENS={total} | dry={dry} | update={args.update_changed or '(full/skip)'}")
            d=get_drive(ed, args.sa_json)
            for eo, rows in pairs_rows.items():
                pid = pair2root.get((ed,eo))
                if not pid:
                    logging.warning(f"[SKIP PAR] sem pasta_id_correta para {eo}->{ed}")
                    continue
                logging.info(f"[PAR] {eo} -> {ed} | root={pid} | itens={len(rows)}")
                process_pair(d, ed, eo, pid, rows, sleep_s, attempts, args.update_changed, dry)
        finally:
            detach_logger(h)

    logging.info("Concluído.")

if __name__=="__main__":
    main()





