#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
pre-migracao.py — PREPARE: mapeia pastas por PAR e gera manifest.
Saídas:
  - out_folders.csv: email_destino,email_origem,pasta_id_correta
  - manifest.csv:    email_origem,email_destino,orig_file_id,name,mime,md5,mtime,path
  - share_failures.csv (opcional)
"""

import argparse, csv, logging, json, time, random
from collections import deque
from typing import Dict, Optional

from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

LOG_FORMAT="%(asctime)s [%(levelname)s] %(message)s"
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
logger=logging.getLogger(__name__)

SCOPES=["https://www.googleapis.com/auth/drive"]
FOLDER_MIME="application/vnd.google-apps.folder"

def get_drive(user, sa_json):
    creds=service_account.Credentials.from_service_account_file(sa_json, scopes=SCOPES, subject=user)
    return build("drive","v3",credentials=creds, cache_discovery=False)

def reason(e:HttpError)->str:
    try:
        data=json.loads(e.content.decode("utf-8"))
        err=data.get("error",{}); arr=err.get("errors",[]) or []
        if arr: return arr[0].get("reason") or ""
        return err.get("message") or ""
    except Exception:
        return ""

def with_backoff(fn):
    def wrap(*a, **k):
        delay=0.8
        for i in range(10):
            try: return fn(*a, **k)
            except HttpError as e:
                st=getattr(getattr(e,"resp",None),"status",None)
                if str(st) in ("403","429","500","503"):
                    sl=min(delay*1.7+random.uniform(0,0.5),20)
                    logging.warning(f"[backoff] tentativa {i+1}/10 status={st} aguardando {sl:.2f}s")
                    time.sleep(sl); delay=sl; continue
                raise
    return wrap

@with_backoff
def files_list(drv, **kw):   return drv.files().list(**kw).execute()
@with_backoff
def files_get(drv, **kw):    return drv.files().get(**kw).execute()
@with_backoff
def files_create(drv, **kw): return drv.files().create(**kw).execute()
@with_backoff
def files_update(drv, **kw): return drv.files().update(**kw).execute()
@with_backoff
def perms_create(drv, **kw): return drv.permissions().create(**kw).execute()

def q(s:str)->str:
    return (s or "").replace("\\","\\\\").replace("'","\\'").replace("\r"," ").replace("\n"," ")

def find_root_by_mark(drv, email_origem:str)->Optional[dict]:
    resp=files_list(drv,
        q=f"'root' in parents and trashed=false and mimeType='{FOLDER_MIME}' and appProperties has {{ key='src_email' and value='{q(email_origem)}' }}",
        spaces="drive", corpora="user", pageSize=50,
        fields="files(id,name,appProperties)", includeItemsFromAllDrives=True, supportsAllDrives=True)
    arr=resp.get("files",[])
    return arr[0] if arr else None

def find_root_by_name(drv, name:str)->Optional[dict]:
    resp=files_list(drv,
        q=f"'root' in parents and trashed=false and mimeType='{FOLDER_MIME}' and name='{q(name)}'",
        spaces="drive", corpora="user", pageSize=50,
        fields="files(id,name,appProperties)", includeItemsFromAllDrives=True, supportsAllDrives=True)
    arr=resp.get("files",[])
    return arr[0] if arr else None

def adopt_mark(drv, folder_id:str, eo:str, ed:str):
    files_update(drv, fileId=folder_id,
                 body={"appProperties":{"src_email":eo,"dest_email":ed,"migrator":"MDRV2"}},
                 fields="id,appProperties", supportsAllDrives=True)

def get_or_create_root(dest_drv, eo:str, ed:str, reuse_map:Dict[tuple,str], mode:str, dry:bool)->str:
    key=(ed.lower(), eo.lower())
    if key in reuse_map:
        fid=reuse_map[key]
        try:
            meta=files_get(dest_drv, fileId=fid, fields="id,mimeType,trashed")
            if meta.get("mimeType")==FOLDER_MIME and not meta.get("trashed"):
                if not dry: adopt_mark(dest_drv, fid, eo, ed)
                logging.info(f"[ROOT] reuse {eo}->{ed} root={fid}")
                return fid
        except HttpError:
            logging.warning(f"[reuse] id {fid} inválido; tentando outras estratégias.")

    ex=find_root_by_mark(dest_drv, eo)
    if ex:
        if not dry: adopt_mark(dest_drv, ex["id"], eo, ed)
        logging.info(f"[ROOT] adopt-by-mark {eo}->{ed} root={ex['id']}")
        return ex["id"]

    ex=find_root_by_name(dest_drv, eo)
    if ex:
        if not dry: adopt_mark(dest_drv, ex["id"], eo, ed)
        logging.info(f"[ROOT] adopt-by-name {eo}->{ed} root={ex['id']}")
        return ex["id"]

    if mode=="must_exist":
        raise RuntimeError(f"Pasta '{eo}' não existe no destino {ed} (folder-mode=must_exist).")

    if dry: 
        logging.info(f"[ROOT] dry-create {eo}->{ed}")
        return "DRY_ROOT"
    meta=files_create(dest_drv, body={
        "name": eo, "mimeType": FOLDER_MIME,
        "appProperties":{"src_email":eo,"dest_email":ed,"migrator":"MDRV2"}
    }, fields="id,name", supportsAllDrives=True)
    logging.info(f"[ROOT] created {eo}->{ed} root={meta['id']}")
    return meta["id"]

def share(drv, fid:str, dest_email:str, role:str, notify:bool, ctx:dict, fail_writer):
    try:
        perms_create(drv, fileId=fid, body={"type":"user","role":role,"emailAddress":dest_email},
                     sendNotificationEmail=notify, supportsAllDrives=True, fields="id")
        logging.info(f"[share] {ctx.get('where')} -> {dest_email} ok")
    except HttpError as e:
        r=reason(e); st=getattr(getattr(e,"resp",None),"status",None)
        logging.info(f"[share] {ctx.get('where')} -> {dest_email} fail status={st} reason={r}")
        if fail_writer:
            fail_writer.writerow([ctx.get("email_origem",""), ctx.get("email_destino",""),
                                  fid, ctx.get("item_name",""), ctx.get("is_folder",False),
                                  ctx.get("where",""), st or "", r or ""])

def list_children(drv, parent_id:str):
    tok=None; q=f"'{parent_id}' in parents and trashed=false"
    while True:
        resp=files_list(drv, q=q, spaces="drive", corpora="user", pageSize=1000, pageToken=tok,
                        fields="nextPageToken,files(id,name,mimeType)", includeItemsFromAllDrives=True, supportsAllDrives=True)
        for f in resp.get("files",[]): yield f
        tok=resp.get("nextPageToken"); 
        if not tok: break

def list_root_children(drv): return list_children(drv, "root")

def list_computers_roots(drv):
    tok=None
    q=f"trashed=false and mimeType='{FOLDER_MIME}' and 'me' in owners and not 'root' in parents"
    while True:
        resp=files_list(drv, q=q, spaces="drive", corpora="user", pageSize=1000, pageToken=tok,
                        fields="nextPageToken,files(id,name,parents)", includeItemsFromAllDrives=True, supportsAllDrives=True)
        for f in resp.get("files",[]):
            if not (f.get("parents") or []): yield f
        tok=resp.get("nextPageToken"); 
        if not tok: break

def bfs_manifest(o_drv, root_id:str, parent_path:str, mw, eo:str, ed:str, sleep_s:float):
    dq=deque([(root_id,parent_path)])
    while dq:
        fid, ppath=dq.popleft()
        try:
            meta=files_get(o_drv, fileId=fid, fields="id,name,mimeType,md5Checksum,modifiedTime")
        except HttpError: continue
        name=meta.get("name",""); mime=meta.get("mimeType",""); md5=meta.get("md5Checksum","") or ""; mt=meta.get("modifiedTime","") or ""
        next_parent=f"{ppath}/{name}" if ppath else name

        if mime==FOLDER_MIME:
            mw.writerow({"email_origem":eo,"email_destino":ed,"orig_file_id":fid,"name":name,"mime":mime,"md5":"","mtime":"","path":ppath})
            for ch in list_children(o_drv, fid):
                if ch.get("mimeType")==FOLDER_MIME:
                    dq.append((ch["id"], next_parent))
                else:
                    try:
                        cmeta=files_get(o_drv, fileId=ch["id"], fields="md5Checksum,modifiedTime")
                    except HttpError:
                        cmeta={"md5Checksum":"","modifiedTime":""}
                    mw.writerow({"email_origem":eo,"email_destino":ed,"orig_file_id":ch["id"],"name":ch.get("name",""),
                                 "mime":ch.get("mimeType",""),"md5":cmeta.get("md5Checksum","") or "",
                                 "mtime":cmeta.get("modifiedTime","") or "", "path":next_parent})
        else:
            mw.writerow({"email_origem":eo,"email_destino":ed,"orig_file_id":fid,"name":name,"mime":mime,"md5":md5,"mtime":mt,"path":ppath})
        if sleep_s: time.sleep(sleep_s)

def main():
    ap=argparse.ArgumentParser(description="PREPARE (mapa por PAR).")
    ap.add_argument("--csv", required=True)
    ap.add_argument("--sa-json", required=True)
    ap.add_argument("--out-folders", default="out_folders.csv")
    ap.add_argument("--out-manifest", default="manifest.csv")
    ap.add_argument("--share-failures", default="")
    ap.add_argument("--reuse-folders", default="", help="CSV antigo/novo para reuso (2-3 colunas)")
    ap.add_argument("--folder-mode", default="create_or_reuse", choices=["create_or_reuse","must_exist"])
    ap.add_argument("--role", default="writer", choices=["reader","commenter","writer"])
    ap.add_argument("--notify", default="false", choices=["true","false"])
    ap.add_argument("--sleep-ms", type=int, default=5)
    ap.add_argument("--dry-run", default="false", choices=["true","false"])
    args=ap.parse_args()

    send_email=args.notify.lower()=="true"
    dry=args.dry_run.lower()=="true"
    sleep_s=max(args.sleep_ms,0)/1000.0

    logging.info("[SYS] PREPARE START")

    # reuse map (2-3 colunas)
    reuse_map: Dict[tuple,str]={}
    if args.reuse_folders:
        with open(args.reuse_folders, newline="", encoding="utf-8") as fh:
            r=csv.DictReader(fh)
            fns=[(x or "").lower() for x in (r.fieldnames or [])]
            has_src=("email_origem" in fns)
            for row in r:
                ed=(row.get("email_destino") or "").strip().lower()
                pid=(row.get("pasta_id_correta") or "").strip()
                if not ed or not pid: continue
                eo=(row.get("email_origem") or "").strip().lower() if has_src else ""
                key=(ed, eo) if has_src else (ed, "")
                reuse_map[key]=pid

    used_folder_ids=set()

    with open(args.out_folders,"w",newline="",encoding="utf-8") as fh_out, \
         open(args.out_manifest,"w",newline="",encoding="utf-8") as fh_mani, \
         open(args.csv, newline="", encoding="utf-8") as fh_in:

        fw=csv.DictWriter(fh_out, fieldnames=["email_destino","email_origem","pasta_id_correta"]); fw.writeheader()
        mw=csv.DictWriter(fh_mani, fieldnames=["email_origem","email_destino","orig_file_id","name","mime","md5","mtime","path"]); mw.writeheader()

        sfh=None; sfw=None
        if args.share_failures:
            sfh=open(args.share_failures,"w",newline="",encoding="utf-8")
            sfw=csv.writer(sfh); sfw.writerow(["email_origem","email_destino","item_id","item_name","is_folder","where","status","reason"])

        reader=csv.DictReader(fh_in)
        for row in reader:
            eo=(row.get("email_origem") or "").strip()
            ed=(row.get("email_destino") or "").strip()
            if not eo or not ed:
                logging.warning(f"linha inválida: {row}"); continue

            o=get_drive(eo, args.sa_json)
            d=get_drive(ed, args.sa_json)

            # raiz por PAR
            root_id=get_or_create_root(d, eo, ed, reuse_map, args.folder_mode, dry)
            if root_id in used_folder_ids:
                # colisão de id: cria nova pasta exclusiva
                if not dry:
                    meta=files_create(d, body={"name": f"{eo} (MDRV2-{int(time.time())})", "mimeType":FOLDER_MIME,
                                               "appProperties":{"src_email":eo,"dest_email":ed,"migrator":"MDRV2"}},
                                      fields="id", supportsAllDrives=True)
                    root_id=meta["id"]
                else:
                    root_id=f"DRY_COLLISION_{eo}_{ed}"
            used_folder_ids.add(root_id)

            fw.writerow({"email_destino":ed,"email_origem":eo,"pasta_id_correta":root_id})

            # compartilhar filhos do Meu Drive + Computers
            for it in list_root_children(o):
                ctx={"email_origem":eo,"email_destino":ed,"item_name":it.get("name",""),"is_folder":it.get("mimeType")==FOLDER_MIME,"where":"mydrive_root_child"}
                if not dry: share(o, it["id"], ed, args.role, send_email, ctx, sfw)
                if sleep_s: time.sleep(sleep_s)
            for comp in list_computers_roots(o):
                ctx={"email_origem":eo,"email_destino":ed,"item_name":comp.get("name",""),"is_folder":True,"where":"computers_root"}
                if not dry: share(o, comp["id"], ed, args.role, send_email, ctx, sfw)
                if sleep_s: time.sleep(sleep_s)

            # manifesto: pastas + arquivos soltos + computers
            for it in list_root_children(o):
                if it.get("mimeType")==FOLDER_MIME:
                    bfs_manifest(o, it["id"], "", mw, eo, ed, sleep_s)
                else:
                    try:
                        meta=files_get(o, fileId=it["id"], fields="md5Checksum,modifiedTime")
                        md5=meta.get("md5Checksum","") or ""; mt=meta.get("modifiedTime","") or ""
                    except HttpError:
                        md5=""; mt=""
                    mw.writerow({"email_origem":eo,"email_destino":ed,"orig_file_id":it["id"],"name":it.get("name",""),
                                 "mime":it.get("mimeType",""),"md5":md5,"mtime":mt,"path":""})
                if sleep_s: time.sleep(sleep_s)
            for comp in list_computers_roots(o):
                bfs_manifest(o, comp["id"], "", mw, eo, ed, sleep_s)

        if sfh: sfh.close()

    logging.info("[SYS] PREPARE DONE")
    logging.info(f"Pronto. Gerados {args.out_folders} e {args.out_manifest}")

if __name__=="__main__":
    main()

