from bs4 import BeautifulSoup
from tqdm import tqdm

from patch.include import *
import re


def find_file(img_name):
    path = f'{config.UPLOAD_DIR}/Firefox'
    for root, dirs, files in os.walk(path):
        if img_name in files:
            return os.path.join(root, img_name)


@app_context(commit=False)
def patch():
    cnt = models.CmfDocument.count(filter=['cmf_import_id', '!=', None])
    g.debug(f'Всего: {cnt}')
    start = 0
    limit = 100
    links = {}
    while True:
        for document in tqdm(models.CmfDocument.list(filter=['cmf_import_id', '!=', None], fields=['text', 'parent_id'],
                             slice=[start, start + limit])):
            soup = BeautifulSoup(document.text.value, 'html.parser')
            for tag in soup.find_all('img'):
                img_name = tag['src'].split('/')[-1].split('?')[0]
                if img_name not in links:
                    links[img_name] = []
                links[img_name].append({'tag': tag, 'doc': document})

        start += limit
        g.debug(f'{start} из {cnt}')
        if start > cnt:
            break
    for img_name in links:
        if len(links[img_name]) > 1:
            g.debug(f'{img_name} встречается {len(links[img_name])}')
            for obj in links[img_name]:
                g.print(obj['doc'].parent_id)


if __name__ == "__main__":
    patch()
