U
    ;i1                    @   s  d dl T d dlmZmZmZmZ d dlZd dlZd dlZd dl	m
Z
mZmZ d dl	Z	d dlZd dlZd dlZd dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ e Zddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~dddddddddddddddddddddddddddddddgZddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd ddddddddd	d
ddddddddddddddddddgZddd
dddddddddddddddddddddd d!d"ddd8d&d'ddd*d+d,dd.d/d0d1d2d3d4d5d6d7d8d8dd;dd=dd>d?ddAdBdCdDdEdd dHdIdJdKdLddNdOddHdRd8dTdUdVdWddYdZdDd\d8d^d_d`daddcd!dedfdHdhd!djdkdldmdnddpd8drdsd"dudvdwdxd+d{d|d}d~dddadOdd"dHdeddadldd#daddddddHdddd"dgZed$dbddddPdFdSdd]dd$d-dgd:d)dzdddd[dtdd@ddGd<d#dddidqd(ddMddd%d	dQd=dddXd9ddog.7 Zed%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;g7 Zeee e Zdd<d=d>d?d?d@d@dAd@ddBdCdDdEdFddGddHdIdJddKdLdMdDd:dzgZdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]gZ ed^d_d`dadbdcdddedfdgdhdidjdkdldmg7 Zeee Z!dndodpdqdrdsdtgZ"e
G dudv dvZ#G dwdx dxZ$G dydz dze$Z%G d{d| d|e$Z&G d}d~ d~Z'dS (      )*)ListSetDictOptionalN)	dataclassfieldfields)
namedtuple)BeautifulSoup   иu   вu   воu   неu   чтоu   онu   наu   яu   сu   соu   какu   аu   тоu   всеu   онаu   такu   егоu   ноu   даu   тыu   кu   уu   жеu   выu   заu   быu   поu   толькоu   ееu   мнеu   былоu   вотu   отu   меняu   ещеu   нетu   оu   изu   емуu   теперьu
   когдаu   дажеu   нуu
   вдругu   лиu   еслиu   уже   илиu   ниu   бытьu   былu   негоu   доu   васu   нибудьu
   опятьu   ужu   вамu   ведьu   тамu
   потомu   себяu   ничегоu   ейu
   можетu   ониu   тутu   гдеu   естьu   надоu   нейu   дляu   мыu   тебяu   ихu   чемu   былаu   самu   чтобu   безu
   будтоu   чегоu   разu   тожеu   себеu   подu
   будетu   жu
   тогдаu   ктоu   этотu   тогоu   потомуu
   этогоu
   какойu   совсемu   нимu
   здесьu   этомu   одинu
   почтиu   мойu   темu
   чтобыu   нееu   сейчасu   былиu   кудаu
   зачемu   всехu   никогдаu
   можноu   приu   наконецu   дваu   обu   другойu   хотьu
   послеu   надu   большеu   тотu
   черезu   этиu   насu   проu
   всегоu   нихu
   какаяu
   многоu
   развеu   триu   этуu   мояu   впрочемu   хорошоu   своюu   этойu
   передu   иногдаu
   лучшеu   чутьu   томu   нельзяu
   такойu   имu
   болееu   всегдаu   конечноu   всюu
   междуimeZmyZmyselfZweZourZoursZ	ourselvesZyouZyourZyoursZyourselfZ
yourselvesheZhimZhisZhimselfZsheZherZhersZherselfitZitsZitselfZtheyZthemZtheirZtheirsZ
themselvesZwhatwhichZwhoZwhomthisthatZtheseZthoseamisZareZwasZwerebeZbeenZbeingZhaveZhasZhadZhavingZdoZdoesZdidZdoingaZanZtheandZbutiforZbecauseasZuntilwhileZofZatZbyforwithZaboutZagainstZbetweenZintoZthroughZduringbeforeZafterZaboveZbelowtofromZupZdowninoutZonZoffZoverZunderZagainZfurtherZthenoncehereZthereZwhenwherewhyZhowallanyZbothZeachZfewZmoreZmostotherZsomeZsuchnoZnornotZonlyZownsameZsoZthanZtooZverystZcanZwillZjustZdonZshouldnowu   всёu   ещёu   нибытьu   мочьu   этоu   весьu   свойu
   такоеu
   нужноu   хотяu   затоu   именноu   вообщеu   тудаu   такиu   делатьu   помнитьu
   менееu   оноu
   кромеu   твойu   откудаu   считатьu   внеu   тойu   каждыйu
   никтоu   которыйu
   затемu   вопросu   обаu   ещu   нибu   мочu   этu   веu   своu   нужнu   хотu   затu
   вообщu   тудu   менu   кромu   твоu   & внеu   каждu   никтu
   которu
   авторu   бизнес-процессu   владелецu   датu   исполнителu   контрагентu   логическu
   отделu   постановщикu   приоритетu   проектu   процессu
   спискu   стандартнu   статусu   типZapprovbaseclosedefaultZdocumentopenZtasku   бизнесu   документu
   задачu   задачаu   закрытu
   обычнu   системu   созданu   черновикtextml_textnametagscommentsZaddon_fieldsZkey_phrasesc                   @   s  e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	eed< dZ
eed< dZeed< dZeed	< dZeed
< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZ	eed< dZeed< dZeed< dZ
eed< dZeed< dZeed< dZeed< dZ eed< dZ!eed< dZ"eed< dZ#eed < dZeed< dZ$eed!< dZeed	< dZ%eed"< dZ&eed#< dZ'eed$< dZ(eed%< dZ)eed&< dZ*eed'< dZ+eed(< dZ,eed)< dZ-eed*< dZ.eed+< dZ/eed,< dZ0eed-< dZ1eed.< dZ2eed/< dZ3eed0< dZ4eed1< dZ5eed2< dS )3DriverSearchObjectN	obj_modelobj_idobj_codeobj_nameobj_modified_atobj_project_idobj_related_person_loginsobj_ml_textobj_textobj_tagsobj_logic_type_codeobj_activity_codeobj_status_typeobj_result_textobj_commentsobj_owner_nameobj_responsible_namesobj_hrefobj_parent_idobj_tree_parent_idobj_root_parent_idobj_created_atobj_deletedobj_archivedobj_author_nameobj_modified_by_nameobj_addon_fieldsobj_user_ratingobj_key_phrasesobj_company_idobj_breadcrumbs"obj_related_person_logins_tsvectorname_tsvectortext_tsvectortags_tsvectorresult_text_tsvectorcomments_tsvectoraddon_fields_tsvectorkey_phrases_tsvectorml_text_tsvectorheadlineheadline_rawtitlebreadcrumbslabelrankage_daysdebug_labelmark)6__name__
__module____qualname__r<   str__annotations__r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   rl    rr   rr   !./cmf/models/cmf_search_engine.pyr;   J   sj   
r;   c                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )SearchDriverBasec                 C   s   t d S NNotImplementedselfsearchrr   rr   rs   select_empty_query_top   s    z'SearchDriverBase.select_empty_query_topc                 C   s   t d S ru   rv   rx   rr   rr   rs   select_empty_query   s    z#SearchDriverBase.select_empty_queryc                 C   s   t d S ru   rv   rx   rr   rr   rs   select_bm25   s    zSearchDriverBase.select_bm25c                 C   s   t d S ru   rv   rx   rr   rr   rs   select_default   s    zSearchDriverBase.select_defaultc                 C   s   t d S ru   rv   rx   rr   rr   rs   select_attachment   s    z"SearchDriverBase.select_attachmentN)rm   rn   ro   r{   r|   r}   r~   r   rr   rr   rr   rs   rt      s
   rt   c                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
SearchDriverEvaFullSearchc                 C   s.   t j|}g }|D ]}|tf | q|S ru   )modelsCmfFullSearchZfilter_empty_topappendr;   )ry   rz   Z
found_objsres	found_objrr   rr   rs   r{      s    z0SearchDriverEvaFullSearch.select_empty_query_topc           	      C   s  g }g }g }g }g }|j dks(|j dkr^tjj|jttjjdd|d}tjj|jdd|d}|j dksr|j dkrtjj|j	ttjjdd|d}tjj|j	dd|d}|j dks|j dkrtjj|j
d	dd|d
}g }|| | | | D ]}|tf | q|S )NANYCmfTaskZEMPTY   )Zforce_related_person_loginrh   rj   rz   )rh   rj   rz   CmfDocumentr   r   r6   )force_field_namerh   rj   rz   )
model_namer   r   Zfilter_onceAKA_TASK_MODELSrp   gcurrent_userloginAKA_DOC_MODELSAKA_DICT_MODELSr   r;   )	ry   rz   task_empty_query_related_usertask_empty_querydoc_empty_query_related_userdoc_empty_queryZother_empty_queryr   r   rr   rr   rs   r|      sZ    

z,SearchDriverEvaFullSearch.select_empty_queryc              	   C   s  |j dks|j dkr8tjj|j|jdddgdd|d}ng }|j dksP|j d	krttjj|j|jdddgdd|d}ng }d
}|j dkrd}|j dkrtjj|j|jdd|gd|d}n2|j dkrtjj|j g|jdd|gd|d}ng }g }|| | D ]}|	t
f | q|S )Nr   r   r8   r   i,  ZTOPT)r   force_slicerh   Zinclude_attachmentrz   r         )r   r   rh   rz   r   )r   r   r   Zsearch_once_top_bm25r   tsquery_without_synr   search_oncer   r   r;   )ry   rz   result_tasksresult_docsZother_sliceresult_dictr   r   rr   rr   rs   r}     sP        

   
   z%SearchDriverEvaFullSearch.select_bm25c           
      C   sT  g }g }g }g }g }g }|j dkr<tjj|j|jd|d}n$|j dkr`tjj|j g|jd|d}|j dkst|j dkrtjj|j|jdd|d}tjj|j|jd|d}|j dks|j d	krtjj|j|jdd|d}tjj|j|jd|d}|j dks|j d
krtjj|j	|jd|d}g }|| | | | | D ]}	|
tf |	 q8|S )Nr   ZMAIN)rh   rz   r   r   ZSYNsyn)rh   rl   rz   r   CmfAttachment)r   r   r   r   r   r   r   tsquery_with_synr   AKA_ATTACHMENT_MODELSr   r;   )
ry   rz   r   result_tasks_synr   result_docs_synr   result_attachsr   r   rr   rr   rs   r~   6  sf    

      z(SearchDriverEvaFullSearch.select_defaultN)rm   rn   ro   r{   r|   r}   r~   rr   rr   rr   rs   r      s   
2$r   c                   @   s   e Zd ZdS )SearchDriverElasticSearchN)rm   rn   ro   rr   rr   rr   rs   r     s   r   c                   @   sP  e Zd ZdZdgZdgZdgZdd ejj	
 D Ze ZdZdZed>d	d
Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zedd Zdd Zdd Zedd  Zed!d"d#d$d%Zed&d' Zed(d) Z ed*d+ Z!ed,d- Z"ed.d/ Z#d0d1 Z$ee%e%e&d2d3d4Z'ed5d6d7d8Z(ed?d:d;Z)ed@d<d=Z*dS )ACmfSearchEngineu  
    Главная задача: выполнение поиска (без учета дата-драйвера PG или Elastic).

    Алгоритм:
    - Подготовка запроса
    - Запрос делаем через дата-драйвер, получаем сырые предварительные данные (х10 объема)
    - Сортируем по классам
    - Сортируем по bm25eva
    - Проверяем права
    - Смешиваем потоки (оптимизированно с проверкой прав)
    - Формируем вывод

    Драйвер:
    - Уметь делать запросы
    - Не обязательно, но желательно: давать статистику bm25 (даже если используется Elastic,
          мы храним данные в нашей таблице все равно и можем посчитать bm25 статистику из нее)
    r   r   r   c                 C   s"   g | ]}|j r|jd kr|jqS ))r   r   r   )Zfull_search
class_name).0mrr   rr   rs   
<listcomp>  s     
zCmfSearchEngine.<listcomp>NFc                 K   s  t |dkrtddd |d kr$d}|s0ddg}t }||_||_||_||_||_||_||_	||_
||_|	|_|
|_||_d|_||_||_||_tjjdd}|d	krtd
|  ttds|   |  |  t||_d|jd< |jdr|jd d |_|jdkr d|_|jdkr8d|_d|_|rJt||_ nt |_ |j!dkrp|jrp|"  n0|j!dkr|#  n|jr|$  n|%  |&  |j'S )Ni   uY   Превышена максимально допустимая длина запроса!T)abort r   d   )Zis_dirty   u   Идет процесс индексации, могут быть доступны не все результаты поиска. Осталось объектов: FTS_STAT_WORDSZModelr   r7   Z
CmfCommentr:   )(len	cmf_alertr   r   orig_field_name
field_nameorig_search_queryonly_idsslicer	   no_analitycscheck_access_custom_fntoparchiveddeletedFSTorig_tag_nameorig_tree_parent_idZaddon_filterr   r   countZcmf_notehasattrAPPbm25_stat_words_loadprepare_search_queryprepare_additional_filterlistZfullsearch_sliceendswithsetZrecent_projectlike_search_querysearch_empty_top_recentsearch_empty
search_topsearch_main_prepare_final_resultfinal_result)clsr   r   search_queryr   r   r	   r   Zcheck_accessr   r   r   tag_nametree_parent_idZrecent_projectskwargsrz   Zdirty_countrr   rr   rs   
fts_search  sf    

	




zCmfSearchEngine.fts_searchc                 C   sx   g }g }g }g }|D ]V}|j | jkr0|| q|j | jkrH|| q|j | jkr`|| q|| q||||fS ru   )r<   r   r   r   r   )ry   search_objectsZ	aka_tasksZaka_docsZaka_dictZ
aka_attachsearch_objectrr   rr   rs   _split_search_objects_to_4_aka'  s    z.CmfSearchEngine._split_search_objects_to_4_akac                 C   sr   g }g }t jr&t jjjr&t jjjdkr.g |fS tt jj}|D ]*}|jr^||jkr^|| q>|| q>||fS )Nr   )r   r   r   valuerp   rB   r   )ry   r   ZrelatedZnot_related
user_loginr   rr   rr   rs   %_split_search_objects_to_related_user7  s    z5CmfSearchEngine._split_search_objects_to_related_userc                 C   s:   g }g }|D ]$}|j dkr&|| q|| q||fS )Nr   )rl   r   )ry   r   r   Znosynr   rr   rr   rs   "_split_search_objects_to_nosyn_synF  s    
z2CmfSearchEngine._split_search_objects_to_nosyn_sync                 C   s   g }t  }t }g }| j| }| |\}}}}dd }	ddd}
t|}|	||
| |	||
| |	||
| |	||
| |	||
| |	||
| t|}|dks||krFqqF|d d | _t | dkrtdt |   d S )	Nc                 S   sJ   |d |d krd S |sd S | d}d|_| | |d  d7  < d S )Nr   total_limitr   Z	empty_top   poprk   r   r   countersobj_listobjrr   rr   rs   append_if_exists_empty_top[  s    

zKCmfSearchEngine.search_empty_top_recent.<locals>.append_if_exists_empty_topr      r   r      皙?z3PROF fulltext_search empty_queries_top selects got )	r   timedriverr{   r   r   sorted_resultr   debug)ry   r   skip_idsprof_str   Ztop_results_tasksZtop_results_docsZtop_results_dictsZtop_results_attachr   Zcounters_empty_topres_count_beforeres_count_afterrr   rr   rs   r   Q  s*    
z'CmfSearchEngine.search_empty_top_recentc                 C   s   g }t  }t }| j| }| |\}}}}| |\}	}
| |\}}d}t|	|
|||gr|	df|
df|df|df|df|dff}| ||| |d7 }|dkrPt	d qqPt | dkrt	dt |   || _
d S )Nr   r     W   Баг в поиске, много данных, либо не идет вычитка.r   z&PROF fulltext_search search_empty got )r   r   r   r|   r   r   r*   _add_if_existsr   r   r   )ry   r   r   r   r   Zresults_tasksZresults_docsZresults_dictsresults_attachr   r   r   r   
iter_countprocessing_grouprr   rr   rs   r   |  s8     	
zCmfSearchEngine.search_emptyc                 C   s  t   }g }t }| jdkr g S | jdkr.g S | j| }| |\}}}}| || j}| || j}t   | dkrt	
dt   |   t   }t }g }	dd }
dd }d	d	d	d	d	d
d}d	dd}t|	}|
|	|| |
|	|| |
|	|| |
|	|| ||	|| |
|	|| |
|	|| |
|	|| |
|	|| t|	}|dksb||krΐqbq|	d d | _t   | dkrt	
dt   |   d S )Nr:   r   r   z'PROF fulltext_search TOP25 selects got c                 S   s@   t tjj}|sd S |d}d}| | ||  d7  < d S )Nr   cr   )rp   r   r   r   r   r   )r   r   r   r   r   Z	cur_classrr   rr   rs   append_if_exists  s    

z4CmfSearchEngine.search_top.<locals>.append_if_existsc                 S   sJ   |d |d krd S |sd S | d}d|_| | |d  d7  < d S )Nr   r   r   r+   r   r   r   rr   rr   rs   append_if_exists_other  s    

z:CmfSearchEngine.search_top.<locals>.append_if_exists_otherr      )r   br   Za_limitZb_limitZc_limitr   r   r   z2PROF fulltext_search TOP25 mixing and prepare got )r   r   r   r   r   r}   r   	bm25_sortr   r   r   r   r   )ry   r   r   r   r   r   r   result_dictsr   r   r   r   r   Zcounters_otherr   r   rr   rr   rs   r     sP    

  
zCmfSearchEngine.search_topc                 C   sX  g }t  }t }| j| }| |\}}}}| |\}}	| |\}}
t | dkrttdt |   t }d}t|||||	|
gr|df|df|df|df|	df|df|	df|df|df|
df|df|
df|df|dfg}| 	||| |d7 }|dkrtd qq|d d | _
t | dkrTtd	t |   d S )
Nr   z&PROF fulltext_search main selects got r      r   r   r   r   z1PROF fulltext_search main mixing and prepare got )r   r   r   r~   r   r   r   r   r*   r   r   )ry   r   r   r   r   r   r   r   r   r   r   r   r   rr   rr   rs   r     sF    

zCmfSearchEngine.search_mainc                 C   sN   |S ]:}|sq|d dkr4t|dks|d dkr4q|| qd|}|S )u   
        Подчистка оригинального квери, который ввел пользователь:
        - удаление стоп-слов
         r   -r   )splitr   r   join)r   r   Zclean_search_query_listwZclean_search_queryrr   rr   rs   _clean_search_queryR  s     
z#CmfSearchEngine._clean_search_queryc                 C   s   | j }| }t|}|| _| j  | _d | _| jr^td| jsRtd| jr^| j	 | _| j
| jddd| _| j
| jdd| _| j
| jdd| _d S )Nz^[a-zA-Z0-9]+-[0-9]+$z^[0-9]+$FT)synonyms
stop_wordsr  )r   lowercmfutilZninjar   r   Zsearch_obj_coderematchupperparse_search_queryr  r   r   )ry   r   rr   rr   rs   r   f  s    
z$CmfSearchEngine.prepare_search_queryc                 C   s$   |  | j| j| _| | j| _d S ru   )	calc_tagsr   r   r   _get_all_branchesr   Ztree_parent_filter)ry   rr   rr   rs   r   }  s    z)CmfSearchEngine.prepare_additional_filterc                 C   sP   t  }|r4t|tr*|D ]}|| qn
|| | |\}}t||S ru   )r   
isinstancer   add_extract_tagsunion)r   r   r   r9   Z_tagZextracted_tagsrr   rr   rs   r    s    

zCmfSearchEngine.calc_tagsrp   ztuple[str, set[str]])r   returnc                 C   s   t  }| d}t|dkr$| |fS d} |dd D ]Z}|dkrBq4td|d}|d dkrj||d  t|dkr4|  |d  |d  } q4|  } | |fS )	zExtrats tags from the given search_query and returns its reminder and a set of extracted tags

        Args:
            search_query (str)

        Returns:
            tuple[str, list[str]]: search_query reminder and a set of extractd tags
        #r   r   Nz(\W)r   r      )r   r  r   r  r  strip)r   r9   Zsharp_splittedtokenZ
sub_tokensrr   rr   rs   r    s    

zCmfSearchEngine._extract_tagsc                 C   s   t ||d  S )Nr   )mathlog)r   NZdfrr   rr   rs   bm25_idf  s    zCmfSearchEngine.bm25_idfc                 C   s&  i }i }t jjdd|d< t jjdd|d< t jjj d }t jjj d }|t	|d d  d }|t	|d	 d  d }t	|d
kr|d d }|d d }|t	|d d  d }|t	|d	 d  d }t	|d
kr|d d }|d d }d}	|D ]:}
i ||
j
< | |d |
j||
j
 d< ||
j
 d }q|D ]F}
|
j
|krhi ||
j
< | |d |
j||
j
 d< ||
j
 d }qNi |d< ||d d< ||d d< i |d< i |d< || d |d d< || d |d d< || d |d d< || d |d d< |t_|t_d S )Nr   )r<   r   z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfDocument''  ') where  ndoc > 10  order by ndoc desc limit 10000 z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfTask''  ') where  ndoc > 10  order by ndoc desc limit 10000 r   r   r   
   i  i  i  ditiFST_STAT_WORDS_DEFAULTZFST_FRQ_MIDZFST_FRQ_LOW)r   r   r   ZCmfGlobalSettingsdpdata_driverSessionexecuteZfetchallr   wordr  Zndocr   r   
FTS_COUNTS)r   r   r(  Zcmfdoc_statZcmftask_statZdoc_mid_wordZdoc_low_wordZtask_mid_wordZtask_low_wordlastrowZlast_docZ	last_taskrr   rr   rs   r     sJ    

z$CmfSearchEngine.bm25_stat_words_loadc                 C   s6  g }|D ]"}|  ||\|_|_|| qt|dd dd}t|dkrd}|d j|d j dkrld}n:|d	 j|d j dkrd}n|d
 j|d j dkrd	}|dkrt|d | dd d||d   }|d d D ]R}|jd d|jdd|j d|  |_t|j|jdd|j	d|j|j
 q|S )Nc                 S   s   | j S ru   )	rank_bm25r   rr   rr   rs   <lambda>      z+CmfSearchEngine.bm25_sort.<locals>.<lambda>T)keyreverser   r   r   g333333?r  r   c                 S   s   | j S ru   )rj   r,  rr   rr   rs   r-  
  r.  )r/  r   r  zrbm25=.2fz | tsq=z.4f)	bm25_rankr+  
rank_debugr   sortedr   rh   printr?   ri   r>   )r   r   tsqueryZnewresr*  r.   rrr   rr   rs   r     s&    $((zCmfSearchEngine.bm25_sortc                    s   fdd}| j }i } jd k	r8 jdkr8| j}tdd| }d}d}d}	d}
d}d}g }|D ]X}||krzqj|| |d7 }||krqjd} jd	krd
}|tjkr|tj| krtj| | }ntjd | }d}d}t	|| d}|dkrd}|||d   ||  }|}d}||kr^ j
r^| j
 kr^|
d7 }
d}|d }n|	d7 }	|dkr|dkr|d }|| }|d| d|dd|dd| d|dd| 7 }||7 }qj|}tjtjj j  d d }ddd d!|    }|| }d"|d#d$|dd| }d}tjjjr`tjjjdkr` jr`tjjj jkr`d%}d}d&tkr jtjkrd%}d'|	|
  d(| d)|dd| }|| | }||fS )*Nc                    s:    j dd} tdd| } | d d } td|  d S )N'"z":([0-9,]*)z":"\1", z{ z })rc   replacer  subjsonloads)Ztmpr   rr   rs   tsvector2dict  s    z0CmfSearchEngine.bm25_rank.<locals>.tsvector2dictr   z[()!|&']r  r   r   r   r   r!  r"  g      ?g      ?,r   FTr   r   g?z w=z
 bm25Orig=r1  z idf=z.1fz fqd=z ord=z	 in_name=i     r  r     z age=.0fz ageF=g?FTS_RECENT_PROJECTSz wc=z rur=z rpr=)rc   r\   r  r<  r  r   r<   r   r   r   r?   r
  datetimer1   timezoneZutcr@   Ztotal_secondsr   r   r   r   rB   rA   rE  )r   r   r6  r@  Zdoc_vector_dictZname_vector_dictqueryri   r3  Z	w_doc_cntZ
w_name_cntZw_cntZorder_factorZ
done_wordsr  Z	model_keyZidfkr   ZfqdZbm25Z	bm25_origZin_namerj   Z
age_factorZrelated_user_rankZrecent_project_rankrr   r?  rs   r2    sx    	





"
2
"$"zCmfSearchEngine.bm25_rankc                 C   s   d| kr| S |  dd S )Nz@#@#@#r   )r  )rD   rr   rr   rs   _remove_suffix_from_headlinem  s    z,CmfSearchEngine._remove_suffix_from_headlinec                 C   s<  | j | jd | jd  }| jri }g }|D ]}||j |jrN| |jnd}d}|jdk	rj|jd}|j}|j	r|d |j	 }|j|j
|j| d| d	| d	|jd
dtj 	t|j||j|jd||j< q,|t_td |S i }| jri }	|D ]}|	|jg |j q|	 D ]}
tt|
 jr\| jdg }dd|	|
 gdddgg}n| j}dd|	|
 g}|
dkr|dddg }|
dkr|dg }t|
j||| j| jd}|D ]8}tt|j jr|jr|||j j!< n
|||j!< qq"nt"d td g }|D ]}|#|j}|r|jrB| |jnd}d}|jdk	r`|jd}|j}|j	r||d |j	 }|j$dkr|j%sq|j&|j%j'|j%j(j)| d| d	| d	|jd
dtj 	||j|j|j!j)|j
|j*|j+|j,t|jd}n^t-.d|j
d }|j!j)||j(j)| d| d	| d	|jd
dtj 	||j|jt|jd}|j$dkrr|j/j)|d< zl| j0s|j1ddd  n| 0|st2| jD ].}|.d!d }t3||j |d}|||< q|| W n t2k
r   Y nX qtd | j4s2t5tj6j7| j8t9|dd" d#d$ || _:dS )%u5   
        Итоговая обработка
        r   r   r   NoneNz.6fr  z ||| r  rD  z words=)idr8   coderd   rg   rh   ri   rj   zfulltext_search ENDcmf_ver_headINZcmf_ver_curz==TrL  r   urlurl_previewurl_preview_imgr   fullsearch_answer)r	   filterZinclude_archivedZinclude_deleteduY   DEV: FATAL. Укажите в запросе поиска список полей fields=z"fulltext_search Start check access)rL  r8   rM  rd   rh   ri   rj   Z	attach_idZattach_nameZ
attach_urlZattach_url_previewZattach_url_preview_imgrg   z DZQTN )rL  r8   rM  rd   rh   ri   rj   rg   )ZTEXKOM_skip_failread_auditZTEXKOM_ppp_project_simplecheck.r   )r   obj_dict)r   );r   r   r   r   r=   rd   rJ  ri   rh   rk   rf   r>   rj   r   r   r=  r>  rg   fulltext_search_headlinesr   r	   
setdefaultr<   keysvarsr   Zcmf_verr  Zget_model_by_namer   r   r   rN  rL  r   getr   parentZ	parent_idr8   rM  r   rP  rQ  rR  r  r  rS  r   Z_acl_check_readZCmfPermissionErrorgetattrr   Zschedule_deferred_jobr   Z_do_calc_statisticsr   Z
dumps_dictr   )ry   r   rW  Z	result_idr7  rd   Zformated_rankrh   ZobjectsZids_by_modelr   _fieldsZ_filterr   r   r   rV  rf   r   attrrr   rr   rs   r   s  s    

$








$
$



z%CmfSearchEngine._prepare_final_result)r   r   r   c                 C   sj   |D ]`\}}t |D ]N}|s  q|d j|kr:|d q||d  ||d j |d qqd S )Nr   )ranger=   r   r   r  )r   r   r   r   Zlstr   _rr   rr   rs   r     s    
zCmfSearchEngine._add_if_existsz	list[str])r  c                 C   s0   | sg S t jjj dd| i}dd |D S )Na  
                WITH tree_parents AS (
                    WITH RECURSIVE r AS (
                        SELECT obj_id, obj_code, obj_tree_parent_id
                        FROM cmf_full_search
                        WHERE obj_tree_parent_id = :tree_parent_id

                        UNION

                        SELECT cfs.obj_id, cfs.obj_code, cfs.obj_tree_parent_id
                        FROM cmf_full_search AS cfs
                        JOIN r ON cfs.obj_tree_parent_id = r.obj_id
                    )
                    SELECT obj_id FROM r
                    WHERE r.obj_id IN (SELECT obj_tree_parent_id FROM r)

                    UNION

                    SELECT :tree_parent_id
                )
                SELECT * FROM tree_parents;
            r   c                 S   s   g | ]}|d  qS )r   rr   )r   r7  rr   rr   rs   r   =  s     z5CmfSearchEngine._get_all_branches.<locals>.<listcomp>)r   r   r#  r$  r%  r&  )r   Zrecordsrr   rr   rs   r    s    z!CmfSearchEngine._get_all_branchesTc              
   C   sr  dt _|dd}tdd|}td|}d}d}d}	|D ]}
t|
dkrPq<t|
dkr|
dkrfq<|
d	kr~|	d|
 7 }	q<|
dkrq<|
d
kr|	d7 }	q<|
dkr|	d7 }	q<t|
dkrq<|
d dkrt|
dkrq<|	d|
dd   7 }	|d|
dd   7 }q<|rq<|
dd}
|d7 }|dkr.d}d}|	rF|	d dkrJd}t|
dkr| j|
|d}tt	|t	t
 }t|dkr|	| d|d  7 }	n,t|dkr|	| dd| d 7 }	n |r|dkr q|dkr< qq<q<|r&t|dkr"|d dkr"|dd  }|S |	dddddd d!d  }	|	rn|	d dkrn|	dd  }	|	r|	d dkr|	dd  }	|	r|	d dkr|	dd  }	|	r|	dd  dkr|	d d }	z.tjjj d"d#|	i}t|d d }	W n` tjjk
r` } z<tjjj  tjjj d$d#|i}t|d d }	W 5 d }~X Y nX t|d%|	 |	S )&Nr   zwww.u   [^-A-Za-zА-Яа-я0-9()|&!' ]r  z(,| |&|\||\(|\))r   r   )r  !z()&|)r   r   |z |)r   r   &z &r  z& !rb  r  r   F)rc  rd  rd  r	  z ( z | z )r  )rd  rc  z OR z or z AND z & z and z!select to_tsquery('russian', :q);qz+select websearch_to_tsquery('russian', :q);z->)r   r   r;  r  r<  r  r   prepare_wordr   r   EVA_OR_QUERY_STOP_WORDS_NORMr  r  r   
CmfSynonymr#  r$  r%  r&  
sqlalchemyexcZProgrammingErrorZrollbackr5  )r   r   r  r  Z
first_wordZsearch_query_allowed_symbtokensZ
word_countZstopsrf  r0   ZoperZ	sug_wordsr6  err   rr   rs   r  @  s    

("z"CmfSearchEngine.parse_search_queryc                 C   s  t  jd| d7  _|d tjkr0d}tj}n
d}tj}g }t|s||}g }t	|}|rt  jd| d7  _|
| t  jd7  _d}	|D ]}
|	d	kr qHt|
d	krq|
d |d kr|
d
 |d
 krqd|
kr|
dd}
t  jd|
 d7  _|
|
 |	d
7 }	qt  jd|
 d7  _|
|
 |	d
7 }	qg }tjjj dd|i}d}	|D ]\}}|	d
kr qt|d	krqn|d |d ks|d
 |d
 krn|dd}t  jd| d7  _|
| |	d
7 }	qnt|t|B |hB }n|h}t }|D ]D}
t|
d d	 D ]*}||j t  jd|j d7  _q2q||B }t }|rtjjddt||hB gddgdgdd
gd}|D ]\}|jr|jjdd d D ]6}| dd}t  jd| d7  _|| qʐq||B t|B }t|S )Nz|w:z: r   enruzaddNinjaRevers z, zspellError, r  r   r  r   z	addSpell z
            SELECT
                name, similarity(:word, name) as sim
            FROM cmf_synonym
            WHERE
                :word % name
            ORDER BY "sim" desc
            LIMIT 5;
             r'  z<->zaddSpellTrgm z
normalize r8   rO  r6   Zorderno)rT  r	   Zorder_byr   rA     zsynAdd )r   r   stringascii_lettersr  Zdictionary_enZdictionary_ruZdictionary_checkZsuggestZninja_reversr   r   r;  r   ri  r#  r$  r%  r&  r   morphparser  Znormal_formr   r6   r   r  r  )r   r'  r  langZ
dictionaryZfiltered_suggestions3ZsuggestionsZfiltered_suggestionsZnwr   r  Zfiltered_suggestions2Zsuggestions2_listZsuggra  Zall_suggestionsZnormalized_wordsZsynonym_wordsZsynonym_listZsynonymr/   rr   rr   rs   rg    s    


 



	 
$
 zCmfSearchEngine.prepare_word)FNNFNNFFNNN)TFF)T)+rm   rn   ro   __doc__r   r   r   Zcmfr   Z	CmfEntityZiter_subclassesr   r   r   r   r   classmethodr   r   r   r   r   r   r   r   r  r   r   r  staticmethodr  r  r   r   r2  rJ  r   r   r   r   r  r  rg  rr   rr   rr   rs   r     sp   #                 j+,k?

-

3

Z
 #mr   )(Zcmf.includetypingr   r   r   r   r   r  rF  Zdataclassesr   r   r	   	tracebacksys	itertoolscollectionsr
   Zenchantrq  Z	pymorphy3r  rj  Zbs4r   ZMorphAnalyzerrs  ZRUSSIAN_STOP_WORDSZENGLISH_STOP_WORDSrh  r   ZEVA_OR_STOP_TSQUERYZEVA_ARTIFACT_STOP_WORDSZALL_STOP_WORDSZALLOWED_FIELDSr;   rt   r   r   r   rr   rr   rr   rs   <module>
   sL    3 = 9fdlDH y 0