import argparse
import gzip
import json
import logging
import mmap
import os
import shutil
import sys
from datetime import datetime
from pathlib import Path


CRAB_BLOCK_SIZE = 4 * (2 ** 20)  # 4mb


class PatchFileError(Exception):
    pass


def read_block_data(part_path):
    if part_path.name.endswith('.part.gz'):
        with gzip.open(part_path, 'rb') as f:
            return f.read()
    elif part_path.name.endswith('.part'):
        return part_path.read_bytes()
    else:
        raise ValueError('Unsupported block data file type', part_path)


def patch_crab_file(read_path, write_path, target_file_size, diff_bmap, do_copy=None, dry_run=None):
    action_log_level = logging.INFO if dry_run else logging.DEBUG

    if read_path.is_symlink():
        logging.error('TODO: restore symlink unsupported: %s', read_path)  # todo
        return

    read_file_stat = read_path.lstat()
    fd = None  # type: int
    dst_mmap = None  # type: mmap.mmap
    read_fd = None  # type: int
    read_data = None  # type: mmap.mmap

    try:
        if do_copy:
            read_fd = os.open(read_path, os.O_RDONLY)
            if read_file_stat.st_size:
                read_data = mmap.mmap(read_fd, read_file_stat.st_size, prot=mmap.PROT_READ)
        if not dry_run:
            fd = os.open(write_path, os.O_RDWR | os.O_CREAT, read_file_stat.st_mode)
        logging.log(action_log_level, 'Set size %s to %s', write_path, target_file_size)
        if not dry_run:
            os.ftruncate(fd, target_file_size)
        if target_file_size == 0:
            logging.debug('Finish restore %s due its empty', write_path)
            return  # Нельзя мапить пустой диапазон, потому закончим сразу.
        if not dry_run:
            dst_mmap = mmap.mmap(fd, target_file_size, prot=mmap.PROT_WRITE)
        # logging.debug('diff_bmap %s', diff_bmap)
        # чтобы последний неполный блок попадал, добавляем к длине CRAB_BLOCK_SIZE - 1
        for block_nn in range((target_file_size+CRAB_BLOCK_SIZE-1)//CRAB_BLOCK_SIZE):
            data_offset = block_nn * CRAB_BLOCK_SIZE
            data_len = CRAB_BLOCK_SIZE
            if block_nn in diff_bmap:
                # Берём данные из diff
                logging.debug('Read %s', diff_bmap[block_nn])
                block_data = read_block_data(diff_bmap[block_nn])
                data_len = len(block_data)
            elif do_copy:
                if data_offset + data_len > read_file_stat.st_size:
                    # Последний неполный блок
                    data_len = read_file_stat.st_size - data_offset
                    if data_len < 0:
                        raise PatchFileError(
                            'No data',
                            f'Restore from {read_path} (file size {read_file_stat.st_size}),'
                            + f' requested offset {data_offset}',
                            read_path, read_file_stat.st_size, data_offset)
                block_data = read_data[data_offset:data_offset+data_len]
            else:
                continue  # Нечего менять
            if data_offset + data_len > target_file_size:
                raise PatchFileError(
                    'Extra data',
                    f'Restore from {read_path} (file size {read_file_stat.st_size}),'
                    + f' to {write_path} (file size {target_file_size}),'
                    + f' requested offset {data_offset + data_len}'
                    + f' Maybe part file lost?',
                    read_path, read_file_stat.st_size, write_path, target_file_size, data_offset, data_len)
            if not dry_run:
                try:
                    dst_mmap[data_offset:data_offset+data_len] = block_data
                except IndexError:
                    logging.error('offset %s, data_ler %s, len(data) %s', data_offset, data_len, len(block_data))
                    raise
    finally:
        if read_data is not None:
            read_data.close()
        if read_fd is not None:
            os.close(read_fd)
        if dst_mmap is not None:
            dst_mmap.close()
        if fd is not None:
            os.close(fd)


class CrabVersion:
    def __init__(self, version, crab_dir):
        self.crab_dir = crab_dir  # type: CrabDir
        self.version = version
        self.datetime = datetime.fromtimestamp(int(version))

    @property
    def path(self):
        return self.crab_dir.path/'.INFO'/self.version

    @property
    def files_list(self):
        list_path = self.path/'file.list'
        if list_path.exists():
            return list_path.read_text().strip().splitlines()

    @property
    def info(self):
        info_path = self.path/'info.txt'
        if info_path.exists():
            return info_path.read_text().strip()

    def __str__(self):
        return self.version


class CrabFile:
    def __init__(self, file_path, crab_dir):
        """
        :type file_path: Path
        :type crab_dir: CrabPath
        """
        self.crab_dir = crab_dir  # type: CrabDir
        self.path = file_path

    def restore(self, version=None, dst_path=None, with_blog=None, dry_run=None):
        action_log_level = logging.INFO if dry_run else logging.DEBUG
        logging.log(action_log_level, 'restore version %s of %s to %s', version, self.path, dst_path or self.path)
        read_path = self.path
        target_file_size = None

        if dst_path:
            write_path = dst_path
            do_copy = True
        else:
            write_path = read_path
            do_copy = False

        diff_bmap = {}  # block_no -> block_data_path
        blog_paths_to_store = []
        blog_paths_to_merge = []

        for v in self.crab_dir.get_versions():
            blog_path = self.crab_dir.path/'.BLOG'/v.version/self.path.name
            if not blog_path.exists():
                logging.debug('No version in %s', blog_path)
                continue
            if v.datetime <= version.datetime:
                # старые версии, их нужно сохранить
                blog_paths_to_store.append(blog_path)
            else:
                # более новые версии, их нужно откатить
                blog_paths_to_merge.append(blog_path)
                if target_file_size is None:
                    # у первой версии после нужной, наш целевой размер файла
                    target_file_size = int((blog_path / 'FILESIZE').read_text().strip())
                for block_path in blog_path.iterdir():
                    for suffix in '.part', '.part.gz':
                        if block_path.name.endswith(suffix) and block_path.name[:-len(suffix)].isdigit():
                            block_no = int(block_path.name[:-len(suffix)])
                            if block_no not in diff_bmap:
                                # у первой версии после нужной, нужные нам данные
                                # logging.debug('Found diff for %s: %s', block_no, blog_path)
                                diff_bmap[block_no] = block_path
        if target_file_size is None:
            # По умолчанию работаем с текущей версией
            target_file_size = self.path.lstat().st_size

        patch_crab_file(read_path, write_path, target_file_size, diff_bmap, do_copy=do_copy, dry_run=dry_run)

        if dst_path and with_blog:
            for blog_path in blog_paths_to_store:
                dst_blog_path = dst_path.parent/blog_path.relative_to(self.crab_dir.path)
                logging.log(action_log_level, 'Copy blog %s to %s', blog_path, dst_blog_path)
                if not dry_run:
                    dst_blog_path.parent.mkdir(0o750, parents=True, exist_ok=True)
                    shutil.copytree(blog_path, dst_blog_path, symlinks=True)

        if not dst_path:
            for blog_path in blog_paths_to_merge:
                logging.log(action_log_level, 'Remove merged blog %s', blog_path)
                if not dry_run:
                    shutil.rmtree(blog_path)


class CrabDir:
    SYS_FILES = {'.BLOG', '.BMAP', '.INFO', '.crab_sync', '.hashbin'}

    def __init__(self, path, root=None):
        """
        :type path: Path
        :type root: CrabDir
        """
        self.root_dir = root or self
        self.path = Path(path)

    def get_versions(self):
        return sorted(
            (
                CrabVersion(ver_path.name, self.root_dir)
                for ver_path in (self.root_dir.path/'.INFO').iterdir()
                if ver_path.name.isdigit()),
            key=lambda x: x.datetime)

    def iter_children(self):
        for file_path in sorted(self.path.iterdir()):
            if file_path.name in self.SYS_FILES:
                continue
            if file_path.is_symlink():
                logging.warning('Symlink is not supported %s', file_path)
                continue
            if file_path.is_dir():
                if (file_path/'.BMAP').exists():
                    yield CrabDir(file_path, self.root_dir)
                else:
                    logging.warning(f'Not crab dir found {file_path}')
            else:
                yield CrabFile(file_path, self)

    def restore(self, version=None, dst_path=None, with_blog=None, dry_run=None):
        # TODO сохранять откаченные версии, с возможностью обратного наката.
        action_log_level = logging.INFO if dry_run else logging.DEBUG
        if dst_path:
            logging.log(action_log_level, 'make dst dir %s', dst_path)
            if not dry_run:
                dst_path.mkdir(0o640, parents=True, exist_ok=True)
        for crab_item in self.iter_children():
            # TODO Check deleted files???
            # TODO Skip new files
            crab_item.restore(
                version=version,
                dst_path=dst_path/crab_item.path.name if dst_path else None,
                with_blog=with_blog,
                dry_run=dry_run)

        dst_info_path = None  # for linter

        if dst_path and with_blog:
            if self is self.root_dir:
                dst_info_path = dst_path/'.INFO'
                logging.log(action_log_level, 'Make info dir %s', dst_info_path)
                if not dry_run:
                    dst_info_path.mkdir(0o750)
            logging.log(action_log_level, 'Copy bmap dir %s to %s', self.path/'.BMAP', dst_path/'.BMAP')
            if not dry_run:
                shutil.copytree(self.path/'.BMAP', dst_path/'.BMAP')

        for v in self.get_versions():
            if v.datetime <= version.datetime:
                # старые версии, их нужно сохранить
                if dst_path and with_blog and self is self.root_dir:
                    logging.log(action_log_level, 'Copy info dir %s to %s', v.path, dst_info_path)
                    if not dry_run:
                        shutil.copytree(v.path, dst_info_path/v.path.name, symlinks=True)
            else:
                if not dst_path and self is self.root_dir:
                    # Удалим откаченные версии
                    logging.log(action_log_level, 'Remove version %s in %s', v, v.path)
                    if not dry_run:
                        shutil.rmtree(v.path)


def parse_version(version, crab_dir=None):
    # todo version=ts(exact)|last_bk|approximate(2020-12-12)|relative(3d)|array_index(0, 1, -1, -2)
    if version:
        return CrabVersion(version, crab_dir)
    elif crab_dir:
        # По умолчанию последняя(актуальная) версия. Может надо предпоследнюю брать?
        return crab_dir.get_versions()[-1]


def do_list(crab_dir=None, json_=None, **_kwargs):
    for version in crab_dir.get_versions():
        if json_:
            print(json.dumps({
                'version': version.version,
                'datetime': version.datetime.strftime('%Y-%m-%d %H:%M:%S'),
                'info': version.info,
                'files_list': version.files_list}))
        else:
            print(
                f'{version.version} {version.datetime:%Y-%m-%d %H:%M:%S}'
                f' files:{len(version.files_list)} {version.info}')


def do_remove_version(crab_dir=None, **_kwargs):
    logging.error('TODO: remove version!')


def do_restore(crab_dir=None, dst=None, with_blog=None, version=None, dry_run=None, **_kwargs):
    """
    :param crab_dir:
    :type crab_dir: CrabDir
    :param dst:
    :type dst: Path|None
    :param with_blog:
    :type with_blog: bool
    :param version:
    :type version: str
    :type dry_run: bool
    """
    crab_version = parse_version(version, crab_dir)
    crab_dir.restore(version=crab_version, dst_path=dst, with_blog=with_blog, dry_run=dry_run)


def do_gc(crab_dir=None, **_kwargs):
    logging.error('TODO: gc!')


def main(args=None):
    if args is None:
        args = sys.argv
    logging.info('Start %s', args)
    arg_parser = argparse.ArgumentParser(
        prog=args and args[0][:-3] or 'crab_sync_utils',
        description='Утилита для управления резервными копиями crab_sync')
    arg_parser.set_defaults(action=do_list)
    arg_parser.add_argument('--debug', action='store_true', help='Показывать отладку')
    arg_parser.add_argument('--dry-run', '-n', action='store_true', help='Ничего не менять')
    sub_parsers = arg_parser.add_subparsers(dest='command')

    list_parser = sub_parsers.add_parser('list', help='Список версий резервной копии')
    list_parser.add_argument(
        '--crab-sync-dir', '--dir', '-d', default='.', metavar='.', type=CrabDir, dest='crab_dir',
        help='Путь к резервной копии')
    list_parser.add_argument('--json', action='store_true', help='Вывод в JSON', dest='json_')
    list_parser.set_defaults(action=do_list)

    gc_parser = sub_parsers.add_parser('gc', help='Сборка мусора')
    gc_parser.add_argument(
        '--crab-sync-dir', '--dir', '-d', default='.', metavar='.', type=CrabDir, dest='crab_dir',
        help='Путь к резервной копии')
    gc_parser.set_defaults(action=do_gc)

    restore_parser = sub_parsers.add_parser(
        'restore', help='Восстановить определённую версию резервной копии. Все более новые версии теряются.')
    restore_parser.add_argument(
        '--crab-sync-dir', '--dir', '-d', default='.', metavar='.', type=CrabDir, dest='crab_dir',
        help='Путь к резервной копии')
    restore_parser.add_argument(
        '--dst', '-t', type=Path, help='Восстановить версию в указанной папке. Исходный архив не изменяется.')
    restore_parser.add_argument('--version', '-v', help='Версия timestamp, которую нужно восстановить.')
    restore_parser.add_argument(
        '--with-blog', '-l', action='store_true', help='Так же в dst копировать предыдущие версии.')
    restore_parser.set_defaults(action=do_restore)

    remove_version_parser = sub_parsers.add_parser('remove-version', help='Удаление/слияние версий')
    remove_version_parser.add_argument(
        '--crab-sync-dir', '--dir', '-d', default='.', metavar='.', type=CrabDir, dest='crab_dir',
        help='Путь к резервной копии')
    remove_version_parser.add_argument(
        '--version',
        help='Версия или диапазон from:to, from - включительно, to - нет.'
             ' --version=2:4 оставит 1, 2, 4, 5.'
             ' Диапазон может быть открытый: --version=from: или --version=:to или --version=:')
    remove_version_parser.set_defaults(action=do_remove_version)

    params = arg_parser.parse_args(args[1:])
    if params.debug:
        logging.root.setLevel(logging.DEBUG)
    params.action(**vars(params))
    logging.info('Finish %s', args)
    return 0


if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(process)d %(name)s %(levelname)s %(message)s')
    main()
