from hashlib import md5

from bs4 import BeautifulSoup
from tqdm import tqdm

from patch.include import *
import re


def find_file(img_name, doc_path):
    path = os.path.join(config.UPLOAD_DIR, doc_path)
    for root, dirs, files in os.walk(path):
        if img_name in files:
            return os.path.join(root, img_name)


@app_context(commit=False)
def patch():
    cnt = models.CmfDocument.count(filter=['cmf_import_id', '!=', None])
    g.debug(f'Всего: {cnt}')
    start = 0
    limit = 100
    bad_links = dict()
    while True:
        for document in tqdm(models.CmfDocument.list(filter=['cmf_import_id', '!=', None], fields=['text'],
                             slice=[start, start + limit])):
            g.debug(f'=============================={document.code.value}')
            soup = BeautifulSoup(document.text.value, 'html.parser')
            for tag in soup.find_all('img'):
                if tag.attrs.get('cmf_converted'):
                    continue
                src = tag['src']
                if tag.get('src_old'):
                    src = tag.get('src_old')
                if not src.startswith('http'):
                    continue
                g.debug(src)
                src_md5sum = md5(src.encode('utf-8')).hexdigest()
                file_path = find_file(src_md5sum, os.path.join('files', document.code.value))
                if not file_path:
                    g.debug(f'ERROR: не нашли картинку {src} документа {document.code} file_path={file_path}')
                    if not document.code.value in bad_links:
                        bad_links[document.code.value] = []
                    bad_links[document.code.value].append(src)
                    continue
                g.debug(file_path)
                format_tag = tag['src'].split('/')[-1].split('?')[0].split('.')[-1]
                img_name = f'{src_md5sum}.{format_tag[:6]}'
                g.debug(img_name)
                attach = models.CmfAttachment.get(filter=[['parent', '==', document], ['name', '==', img_name]],
                                                  fields=['*'])
                if not attach:
                    g.debug('Создадим вложение')
                    attach = models.CmfAttachment(parent=document, name=img_name)
                    attach.save()
                with open(file_path, 'rb') as f:
                    attach.upload_file(f.read(), replace=False)
                if not tag.attrs.get('src_old'):
                    tag.attrs['src_old'] = tag.attrs['src']
                tag.attrs['src'] = attach.url.value
                tag.attrs['cmf_converted'] = True
                g.debug(f'Заменили {tag}')
            document.text = str(soup)
            document.save(only_data=True)
        g.debug(f'{start} из {cnt}')
        models.CmfDocument.dp.commit()
        if start > cnt:
            break
        start += limit
        # models.CmfDocument.dp.commit()

    with open('bad.json', 'w+') as f:
        f.write(json.dumps(bad_links))


if __name__ == "__main__":
    patch()
