
    i                       d dl T d dlmc mZ d dlmZmZmZmZ d dl	Z	d dl
Z
d dlZd dlmZmZmZ d dlZd dlZd dlZd dlZd dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ  ej                     Z!g dZ"g d	Z#g d
Z$g dZ%g dZ&g dZ' e(e"e#z   e%z             Z)g dZ*e G d d                      Z+ G d d          Z, G d de,          Z- G d de,          Z. G d d          Z/dS )    )*N)ListSetDictOptional)	dataclassfieldfields)
namedtuple)make_prof_point)BeautifulSoup)   иu   вu   воu   неu   чтоu   онu   наu   яu   сu   со   какu   аu   тоu   всеu   онаu   такu   егоu   ноu   даu   тыu   кu   уu   жеu   выu   заu   быu   поu   толькоu   ееu   мнеu   былоu   вотu   отu   меняu   ещеu   нетu   оu   изu   емуu   теперь
   когдаu   дажеu   нуu
   вдругu   лиu   еслиu   уже   илиu   ниu   бытьu   былu   негоu   доu   васu   нибудьu
   опятьu   ужu   вамu   ведьu   тамu
   потомu   себяu   ничегоu   ейu
   можетu   ониu   тут   гдеu   естьu   надоu   нейu   дляu   мыu   тебяu   их   чемu   былаu   самu   чтобu   безu
   будтоu   чегоu   разu   тожеu   себеu   подu
   будетu   жu
   тогда   ктоu   этотu   тогоu   потомуu
   этого
   какойu   совсемu   нимu
   здесьu   этомu   одинu
   почтиu   мойu   темu
   чтобыu   нееu   сейчасu   были   кудаu
   зачемu   всехu   никогдаu
   можноu   приu   наконецu   дваu   обu   другойu   хотьu
   послеu   надu   большеu   тотu
   черезu   этиu   насu   проu
   всегоu   нихu
   какаяu
   многоu
   развеu   триu   этуu   мояu   впрочемu   хорошоu   своюu   этойu
   передu   иногдаu
   лучшеu   чутьu   томu   нельзяu
   такойu   имu
   болееu   всегдаu   конечноu   всюu
   между)imemymyselfweourours	ourselvesyouyouryoursyourself
yourselveshehimhishimselfsheherhersherselfititsitselftheythemtheirtheirs
themselveswhatwhichwhowhomthisthatthesethoseamisarewaswerebebeenbeinghavehashadhavingdodoesdiddoingaantheandbutiforbecauseasuntilwhileofatbyforwithaboutagainstbetweenintothroughduringbeforeafterabovebelowtofromupdowninoutonoffoverunderagainfurtherthenonceheretherewhenwherewhyhowallanybotheachfewmoremostothersomesuchnonornotonlyownsamesothantooverystcanwilljustdonshouldnow)	r   r   u   почемуr   r   r   r   u   можешьr   )docwwwhttphttpsmailto)u
   авторu   бизнес-процессu   владелецu   датu   исполнителu   контрагентu   логическu
   отделu   постановщикu   приоритетu   проектu   процессu
   спискu   стандартнu   статусu   тип)approvbaseclosedefaultdocumentopentasku   бизнесu   документu
   задачu   задачаu   закрытu
   обычнu   системu   созданu   черновик)textml_textnametagscommentsaddon_fieldskey_phrasesc                   .   e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed<   dZeed<   dZeed	<   dZeed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed<   dZeed<   dZeed<   dZ
eed<   dZeed<   dZeed<   dZeed<   dZ eed<   dZ!eed<   dZ"eed<   dZ#eed <   dZeed<   dZ$eed!<   dZeed	<   dZ%e&ed"<   dZ'e&ed#<   dZ(eed$<   dZ)eed%<   dZ*eed&<   dZ+eed'<   dZ,eed(<   dZ-eed)<   dZ.eed*<   dZ/eed+<   dZ0eed,<   dZ1eed-<   dZ2eed.<   dZ3eed/<   dZ4eed0<   dZ5eed1<   dZ6eed2<   dZ7eed3<   dZ8eed4<   dZ9eed5<   dZ:eed6<   dS )7DriverSearchObjectN	obj_modelobj_idobj_codeobj_nameobj_modified_atobj_project_idobj_related_person_loginsobj_ml_textobj_textobj_tagsobj_logic_type_codeobj_activity_codeobj_status_typeobj_result_textobj_commentsobj_owner_nameobj_responsible_namesobj_hrefobj_parent_idobj_tree_parent_idobj_root_parent_idobj_created_atobj_deletedobj_archivedobj_author_nameobj_modified_by_nameobj_addon_fieldsobj_user_ratingobj_key_phrasesobj_company_idobj_breadcrumbs"obj_related_person_logins_tsvectorobj_user_portal_topobj_client_portal_topname_tsvectortext_tsvectortags_tsvectorresult_text_tsvectorcomments_tsvectoraddon_fields_tsvectorkey_phrases_tsvectorml_text_tsvectorresult_tsvectorresult_textheadlineheadline_rawtitlebreadcrumbslabelrankage_daysdebugmark);__name__
__module____qualname__r   str__annotations__r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   boolr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r        !./cmf/models/cmf_search_engine.pyr   r   \   s        IcF3HSHSOCN3$(c(((KHSHS"""" c   OCOCLN3 $#$$$HSM#!s!!!!s!!!N3OCKLN3OC####SOCOCN3OC$(c(((-1&s111K####!%$%%%M#M#M##### c    $#$$$####SOCKHSLE#KE#DOOOHSE#DOOOOOr   r   c                   ,    e Zd Zd Zd Zd Zd Zd ZdS )SearchDriverBasec                     t           NNotImplementedselfsearchs     r   select_empty_query_topz'SearchDriverBase.select_empty_query_top       r   c                     t           r   r   r   s     r   select_empty_queryz#SearchDriverBase.select_empty_query   r   r   c                     t           r   r   r   r   querys      r   select_bm25zSearchDriverBase.select_bm25   r   r   c                     t           r   r   r   s     r   
select_synzSearchDriverBase.select_syn   r   r   c                     t           r   r   r   s     r   select_attachmentz"SearchDriverBase.select_attachment   r   r   N)r   r   r   r   r   r   r   r   r   r   r   r   r      s_                    r   r   c                   .    e Zd Zd Zd ZddZd Zd ZdS )	SearchDriverEvaFullSearchc                     t           j                            |          }g }|D ]!}|                    t	          di |           "|S )Nr   )modelsCmfFullSearchfilter_empty_topappendr   )r   r   
found_objsres	found_objs        r   r   z0SearchDriverEvaFullSearch.select_empty_query_top   sh    )::6BB
# 	 	IJJ)        
r   c                    g }g }g }g }g }|j         dk    s|j         dk    rmt          j                            |j        t          t          j        j                  dd|          }t          j                            |j        dd|          }|j         dk    s|j         dk    rmt          j                            |j	        t          t          j        j                  dd|          }t          j                            |j	        dd|          }|j         dk    s	|j         dvr)t          j                            |j
        d	dd|
          }g }||z   |z   |z   |z   D ]!}|                    t          di |           "|S )NANYCmfTaskEMPTY   )force_related_person_loginr   r   r   )r   r   r   CmfDocumentr
  r  r   )force_field_namer   r   r   r   )
model_namer  r  filter_onceAKA_TASK_MODELSr   gcurrent_userloginAKA_DOC_MODELSAKA_DICT_MODELSr  r   )	r   r   task_empty_query_related_usertask_empty_querydoc_empty_query_related_userdoc_empty_queryother_empty_queryr  r  s	            r   r   z,SearchDriverEvaFullSearch.select_empty_query   s   (*%')$%%):i)G)G,2,@,L,L&+.q~/C+D+D -M - -)  &3??&	  @     %%):m)K)K+1+?+K+K%+.q~/C+D+D ,L , ,( %2>>%	 ?  O %%):B\)\)\ & 4 @ @&!' !A ! ! 69IILhhkzz  ~O  O 	8 	8IJJ)66I667777
r    c                    g }g }g }t          d|j        d                   }|j        dk    s|j        dk    r/t          j                            |j        |d|g| dd|          }|j        dk    s|j        d	k    r/t          j                            |j        |d|g| d
d|          }d}|j        dk    s|j        r|}|j        dk    r/t          j                            |j	        |d|g| d|          }n8|j        dvr/t          j                            |j        g|d|g| d|          }g }	||z   |z   D ]!}
|	
                    t          di |
           "|	S )Nd      r	  r
  r   TSK25F)force_slicer   include_attachmentr   r  DOC25   DICT25)r#  r   r   r  MDL25r   )maxslicer  r  r  search_once_top_bm25r  r  extendedr  r  r   )r   r   r   pfxresult_tasksresult_docsresult_dictslice_toother_slicer  r  s              r   r   z%SearchDriverEvaFullSearch.select_bm25)  s   sFLO,,%%):i)G)G!/DDVE[]b9:8sMMM?DV E U UL %%):m)K)K .CCFDY[`9:8sMMM?DV D U UK
 %%%"K%% .CCFDZ\a9:;RU~~~fl D n nKK&@@@ .CCVEVDWY^9:;RU}}}ek D m mK %3kA 	8 	8IJJ)66I667777
r   c                 0    |                      ||d          S )Nsyn)r   r   s      r   r   z$SearchDriverEvaFullSearch.select_synH  s    u555r   c                    g }g }g }g }g }g }|j         dk    r.t          j                            |j        |j        d|          }n7|j         dvr.t          j                            |j         g|j        d|          }|j         dk    s|j         dk    r[t          j                            |j        |j        dd|	          }t          j                            |j        |j        d
|          }|j         dk    s|j         dk    r[t          j                            |j        |j        dd|	          }t          j                            |j        |j        d|          }|j         dk    s|j         dk    r-t          j                            |j	        |j        d|          }g }||z   |z   |z   |z   |z   D ]!}	|
                    t          di |	           "|S )Nr	  DICM2)r   r   r  MDLM2r
  TSKSYNM2r4  )r   r   r   TSKM2r  DOCSYNM2DOCM2CmfAttachmentATCHM2r   )r  r  r  search_oncer  tsquery_without_synr  tsquery_with_synr  AKA_ATTACHMENT_MODELSr  r   )
r   r   r0  result_tasks_synr.  result_docs_synr/  result_attachsr  r  s
             r   select_default_OLDz,SearchDriverEvaFullSearch.select_default_OLDK  sJ    %% .::&*	 ;  KK &@@@ .::"#*	 ;  K %%):i)G)G%3??@VX^Xo6@uU[  @  ]  ]!/;;F<RTZTn6=f < N NL %%):m)K)K$2>>v?TV\Vm6@uU[ ? ] ]O .::6;PRXRl6=f ; N NK %%):o)M)M#1==,*	 >  N$ %(88;FX[iilww 	8 	8IJJ)66I667777
r   N)r  )r   r   r   r   r   r   r   rE  r   r   r   r   r      sj          0 0 0d   >6 6 6J J J J Jr   r   c                       e Zd ZdS )SearchDriverElasticSearchN)r   r   r   r   r   r   rG  rG    s        Dr   rG  c                   2   e Zd ZdZdgZdgZdgZdZ e            Z	d Z
e	 	 	 	 	 	 d1d            Zd	 Zd
 Zd Zed             Zed2d            Zd Zd Zd Zd Zd Zd Zd Zd Zed             Zed             Zed             Zed             Zed             Zed             Z ed              Z!ed!             Z"d3d#Z#d$ Z$ed%             Z%d& Z&ed'e'd(e'd)e(fd*            Z)ed4d,            Z*ed5d.            Z+ed/             Z,ed6d0            Z-dS )7CmfSearchEngineu  
    Главная задача: выполнение поиска (без учета дата-драйвера PG или Elastic).

    Алгоритм:
    - Подготовка запроса
    - Запрос делаем через дата-драйвер, получаем сырые предварительные данные (х10 объема)
    - Сортируем по классам
    - Сортируем по bm25eva
    - Проверяем права
    - Смешиваем потоки (оптимизированно с проверкой прав)
    - Формируем вывод

    Драйвер:
    - Уметь делать запросы
    - Не обязательно, но желательно: давать статистику bm25 (даже если используется Elastic,
          мы храним данные в нашей таблице все равно и можем посчитать bm25 статистику из нее)
    r  r
  r<  Nc                    d| _         d| _        d | _        d | _        d| _        ddg| _        d | _        d| _        d | _        d| _	        d| _
        d| _        d| _        d| _        d | _        d | _        i | _        t#                      | _        d | _        d | _        d | _        d | _        t/                      | _        | j        j        9d t6          j        j                                        D             | j        _        d S d S )Nr	  Fr   r   r  c                 <    g | ]}|j         r|j        d v|j        S ))r  r
  r<  )full_search
class_name).0ms     r   
<listcomp>z,CmfSearchEngine.__init__.<locals>.<listcomp>  s?     .f .f .fq=.f%&\9d%d%d /0l%d%d%dr   )r  orig_field_name
field_nameorig_search_queryonly_idsr*  r
   no_analitycscheck_access_custom_fntopr,  archiveddeletedFSTorig_tag_nameorig_tree_parent_idaddon_filtersetrecent_projecttree_parent_filterquery_qstop_wordssorted_resultfinal_resultlistsearch_query_partspeech	__class__r  cmfr  	CmfEntityiter_subclassesr   s    r   __init__zCmfSearchEngine.__init__  s   $!%#Z
!&*#!#' !ee"&!% " '+vv$>)1.f .fCJDXDhDhDjDj .f .f .fDN*** 21r   Fc                    t          dd          }t          |          dk    r
|d d         }|d}|r|d         dk    rddg}|sdd	g}t                      }||_        ||_        ||_        ||_        ||_        ||_        ||_	        ||_
        ||_        |	|_        ||_        |
|_        ||_        ||_        ||_        ||_        t          d
d          }t&          j                            d          }|dk    rt-          d|             |             |                                 |                                 t3          |          |_        d|j        d<   |j                            d          r|j        d d         |_        |j        dk    rd|_        |j        dk    rd|_        d|_        |rt9          |          |_        nt9                      |_         |             |j        dk    r,|j        s%t          dd          }|                                 n|j        dk    r%t          dd          }|                                 nP|j        r%t          dd          }|                                  n$t          dd          }|!                                  |             t          dd          }|"                                  |             |j#        S )Nz#CmfSearchEngine.fts_search__prepare   i   r  r!     r   2   r   z'CmfSearchEngine.fts_search__dirty_count
   T)is_dirtyu   Идет процесс индексации, могут быть доступны не все результаты поиска. Осталось объектов: Modelr	  r   
CmfCommentr   z/CmfSearchEngine.fts_search__do_empty_top_recent   z$CmfSearchEngine.fts_search__do_emptyz#CmfSearchEngine.fts_search__do_mainz"CmfSearchEngine.fts_search__do_topz0CmfSearchEngine.fts_search__prepare_final_result  )$r   lenrI  r  rQ  rR  rS  rT  r*  r
   rU  rV  rW  r,  rX  rY  r[  r\  r]  r  r  countcmf_noteprepare_search_queryprepare_additional_filterrd  fullsearch_sliceendswithr^  r_  search_empty_top_recentsearch_emptysearch_main
search_top_prepare_final_resultrc  )clsr  rR  search_queryrT  r*  r
   rU  check_accessrW  rX  rY  tag_nametree_parent_idrecent_projectsr,  kwargs
prof_pointr   prof_point_dcdirty_counts                        r   
fts_searchzCmfSearchEngine.fts_search  sG    %%JCPP
 |S  '-L L 	U1X^^GE 	HE ""
 '!+&#/ "*(4%
"" '%3"$
 ((QSUVV*00$0??  K  ~I  K  K  L  L  L 	##%%%((***"&u++%&" %%g.. 	7 & 1#2# 6F%% )F ,, %F *F  	*$'$8$8F!!$'EEF!& 	
#r))&/)()Z\_``J**,,,,%++()OQTUUJ!!!!_ 	 ()NPSTTJ    ()MsSSJ
$%WY\]]
$$&&&
""r   c                 n   t          j        dd| j                                                  | _        t          j        dd| j                  | _        t          j        dd| j                                                  | _        d | _        d | _        t          | j        
                                          dk    rxt          j        d| j                  st          j        d| j                  r| j                                        | _        t          j        d	| j                  r| j        | _        |                     | j        d
d          | _        |                     | j                  | _        | j        | j        g| j        | _        t          j        dd| j                  | _        | j        | _        | j        | _        d | _        | j                            d           d S |                     | j                  | _        | j        
                                D ]0}t3          j        |          }| j                            |           1t6                              | j                  | _        |                     | j                  | _        d | _        | j        | _        |                     | j                  | _        | j        | _        | j        s| j        | _        d S |                      | j                  }|                     |          }|                     |          }|| _        d S )Nz -[^ ]*r  z[|&()] u;   ([^A-Za-zА-Яа-я0-9](\s|$)|(\s|^)[^A-Za-zА-Яа-я0-9])r!  z'^[a-zA-Z0-9]+(?:-[a-zA-Z0-9]+)*-[0-9]+$z^[0-9]+$z@^[a-zA-Z0-9]*:[0-9a-f]*-[0-9a-f]*-[0-9a-f]*-[0-9a-f]*-[0-9a-f]*$FT)synonyms
stop_wordsu   [^А-Яа-яA-Za-z0-9.-]-CODE)!resubrS  lowersearch_query_wo_qstopr  stripquery_obj_codequery_obj_idrw  splitmatchupperparse_search_queryra  text_stemmingsearch_query_ninjasearch_query_normalizer?  r@  
query_likere  r  query_remove_stop_wordssearch_query_wo_stop_wordscmfnlp
partspeechcmfutilninjaquery_normalizesearch_query_stemmr,  query_add_syns)r   wr  ninja_with_syns search_query_normalize_with_synssearch_query_stemm_with_synss         r   rz  z$CmfSearchEngine.prepare_search_queryv  s	    &(VJr4;Q%R%R%X%X%Z%Z"F9S$2LMMF#abegkgxyy  B  B # t%++--..!33xBDDZ[[ E_a_ghsuy  vL  `M  `M E&*&<&B&B&D&D#x[]a]stt ;$($:!!%!8!89OZ_lp!8!q!q!%!3!3D4J!K!K *d.?.K&*&7D#*,&1LSQUQh*i*iD''+'BD$ %)$<D!"DO(//777F*.*F*FtGX*Y*Y' 06688 	< 	<A*1--J(//
;;;;")--0O"P"P '+&:&:4;R&S&S#  4"&"4"4T5P"Q"Q#'#:  } 	A$($<D!!! #11$2IJJO/3/C/CO/T/T,+/+=+=>^+_+_($@D!!!r   c           	      .   g }|                                 D ]i}|                     d          }|d         }|g}|dd          D ]*}t          |          dk    r|                    |           +t          j        |          }t          |          }	t          j                            ddt          ||hz            gddgdgddg	          }
|
D ]}|j	        r~|j	        j
                             d
          d d         D ]V}d|                                v r|                                                    dd          }|	                    |           W|                    d                    |	                     kd                    |          S )N|r   r!     r   INr   ordernofilterr
   order_byr*  ,   r  r  )r  rw  r  r  tokens_enrich_morphr^  r  
CmfSynonymrd  r   valuer  replaceaddjoin)r   r   r  tokentokensfirst_tokensearch_tokensr  normalized_wordssynonym_wordssynonym_listsynonymr   s                r   r  zCmfSearchEngine.query_add_syns  s    [[]] 	0 	0E[[%%F )K(MMABBZ , , q66Q;;!((+++  &9-HHKKM!,11&$M]al`mMmHnHn9o:@&9I<E;9:1 2 ? ?L ( - -< 
-$\/55c::2A2> 	- 	- !''))++$GGII--c266%))!,,,,JJsxx..////xx}}r   c                     |                      | j        | j                  | _        |                     | j                  | _        d S r   )	calc_tagsr[  rS  r  _get_all_branchesr\  r`  rj  s    r   r{  z)CmfSearchEngine.prepare_additional_filter  s<    t'94;QRR"&"8"89Q"R"Rr   c                     t                      }|rEt          |t                    r|D ]}|                    |           n|                    |           |                     |          \  }}t          |                    |                    S r   )r^  
isinstancerd  r  _extract_tagsunion)r  r  r  r   _tagextracted_tagss         r   r  zCmfSearchEngine.calc_tags  s    uu 	#(D)) #$ # #DHHTNNNN# """'*'8'8'F'F$nDJJ~..///r   r  r   returntuple[str, set[str]]c                    t                      }|                     d          }t          |          dk    r| |fS d} |dd         D ]m}|dk    r	t          j        d|d          }|d         dk    r|                    |d                    t          |          dk    r|  |d          |d          } n|                                 } | |fS )	zExtrats tags from the given search_query and returns its reminder and a set of extracted tags

        Args:
            search_query (str)

        Returns:
            tuple[str, list[str]]: search_query reminder and a set of extractd tags
        #r!  r  Nz(\W)r   r&     )r^  r  rw  r  r  r  )r  r   sharp_splittedr  
sub_tokenss        r   r  zCmfSearchEngine._extract_tags  s     uu%++C00~!##%%#ABB' 
	O 
	OE{{'5!44J!}""A''' :!##".N
1Nz!}NN#))++T!!r   c                 8   g }g }g }|D ]}|j         }|| j        v r(|j        s|j                            d          d         }|| j        v r|                    |           Y|| j        v r|                    |           x|                    |           |||fS )N:r   )r   rA  r   r  r  r  r  )r   search_objects	aka_tasksaka_docsaka_dictsearch_objectr   s          r   _split_search_objects_to_3_akaz.CmfSearchEngine._split_search_objects_to_3_aka  s    	+ 	/ 	/M%/ID666$2 )7==cBB1E	D000  ////d111........(H,,r   c                 N   g }g }t           j        r0t           j        j        j        rt           j        j        j        dk    rg |fS t	          t           j        j                  }|D ]=}|j        r||j        v r|                    |           (|                    |           >||fS )Nr  )r  r  r  r  r   r   r  )r   r  relatednot_related
user_loginr  s         r   %_split_search_objects_to_related_userz5CmfSearchEngine._split_search_objects_to_related_user0  s    ~ 	&Q^%9%? 	&1>CWC]acCcCc~%%-..
+ 	2 	2M6 2:Ip;p;p}----""=1111##r   c                     g }g }|D ]8}|j         |k    r|                    |           #|                    |           9||fS r   )r   r  )r   r  keyr   nomarkr  s         r   _split_search_objects_by_markz-CmfSearchEngine._split_search_objects_by_mark?  s]    + 	- 	-M!S((M****m,,,,t|r   c                 Z   g }t                      }t          j                    }g }| j                            |           }|                     |          \  }}}d }ddd}		 t          |          }
 |||	|            |||	|            |||	|            |||	|            |||	|           t          |          }|dk    s||
k    rnm|d d         | _        t          j                    |z
  dk    r3t                              dt          j                    |z
              d S d S )	Nc                     |d         |d         k    rd S |sd S |                     d          }d|_        |                     |           |dxx         dz  cc<   d S )Nrx  total_limitr   	empty_topr!  )popr   r  r  countersobj_listobjs       r   append_if_exists_empty_topzKCmfSearchEngine.search_empty_top_recent.<locals>.append_if_exists_empty_topT  sp     H]$;;; ,,q//C#CIJJsOOOW"r   r      rx  r  Trn  皙?z3PROF fulltext_search empty_queries_top selects got )	r^  timedriverr   r  rw  rb  r  r   )r   rc  skip_idsprof_str  top_results_taskstop_results_docstop_results_dictsr  counters_empty_topres_count_beforeres_count_afters               r   r~  z'CmfSearchEngine.search_empty_top_recentJ  s   55)++;;DAAAEAdAdesAtAt>+->		# 		# 		# ()<<	"<00&&|5GIZ[[[&&|5GIZ[[[&&|5GIZ[[[&&|5GIYZZZ&&|5GIYZZZ ",//O"$$;K(K(K	 *#2#.9;; 3&&GGa$)++X_J_aabbbbb '&r   c                    g }t                      }t          j                    }| j                            |           }|                     |          \  }}}|                     |          \  }}	|                     |          \  }
}d}t          ||	|
||g          rb|df|	df|
df|df|dff}|                     |||           |dz  }|dk    rt          	                    d           nt          ||	|
||g          bt          j                    |z
  dk    r1t          	                    dt          j                    |z
              || j
        d         | j
        d                  | _        d S )Nr   r!    W   Баг в поиске, много данных, либо не идет вычитка.r  z&PROF fulltext_search search_empty got )r^  r  r  r   r  r  r{   _add_if_existsr  r   r*  rb  )r   rb  r  r  r  results_tasksresults_docsresults_dictsr  r  r  r  
iter_countprocessing_groups                 r   r  zCmfSearchEngine.search_emptyu  s    55 )++77==595X5XYg5h5h2|]9=9c9cdq9r9r7%'77;7a7abn7o7o5$o
02B.! " " 	 /2!1%-q1 !$"   0-JJJ!OJD  vwww# 02B.! " " 	& 9;; 3&&GGTTY[[7=RTTUUU*4:a=A+FGr   c                     t          j                     }g }t                      }i  fd}d }ddddddd}ddd}t          d	d
          } j                              j                  }	 |                                  |	          \  }
}}                     |d          \  }}                     |
d          \  }
}                     |d          \  }}                     |
d          \  }
}                     |d          \  }}                     |
d          \  }
}|}|
}~~
                     | j                  }                     | j                  }                     | j                  }                     | j                  }|d d         }|d d         }                     | j                  }                     | j                  }                     | j                  }                     | j                  }                     | j                  }t          j                     |z
  dk    r1t          
                    dt          j                     |z
              t          j                     }t                      }g }t          |          dk    r|d         j        dk    r ||||d            t          |          dk    r|d         j        dk    r ||||d            t          |          dk    r|d         j        dk    r ||||d            t          |          dk    r|d         j        dk    r ||||d            t          |          dk    r ||||d            t          |          dk    r ||||d            d}d}d}d}t          |          dk    r|d         j        }t          |          dk    r|d         j        }t          |          dk    r|d         j        }t          |          dk    r|d         j        }                     |          }                     |          }                     |          }                     |          }d}|dz  |k    s	|dz  |k    rd}t           j                                                  dk    r ||||           	 t          |          }|rt          |          dk    rLt          |          dk    r9|d         j        |d         j        k    r |||||            |||||           n |||||            |||||            |||||            |||||            ||||            ||||d             ||||d             |||||            |||||            ||||d             |||||            |||||            ||||d            nt          |          dk    rLt          |          dk    r9|d         j        |d         j        k    r |||||            |||||           n |||||            |||||            |||||            |||||            ||||            ||||d             ||||d             |||||            |||||            ||||d             |||||            |||||            ||||d            t          |          }|dk    s||k    rnZ|d d          _        t          j                     |z
  dk    r3t          
                    dt          j                     |z
              d S d S )Nc                    t          t          j        j                  }|sd S |                    d          }|j        v s|j        dk    r?|j        v r6|sd S |                    d          }|j        v "|j        dk    r	|j        v 6d|j        <   |j        dk    r
d|j        <   |r|j        |z  dk     rd S d}|j        j	        vrd}||         || d         k    rd S | 
                    |           ||xx         dz  cc<   d S )	Nr   r<  Tg?r   r   _limitr!  )r   r  r  r  r  r   r   r   real_rank_bm25r  r  )	r  r  r  max_rankr  r  	cur_classexists_dictr   s	          r   append_if_existsz4CmfSearchEngine.search_top.<locals>.append_if_exists  s_   Q^122J ,,q//C *++/1Q1QVYVgkvVvVv Fll1oo *++/1Q1QVYVgkvVvVv
 '+K
#}//15C-.  &1T99F I}D$777"	 	"h)/C/C/C&DDD JJsOOOY1$r   c                     |d         |d         k    rd S |sd S |                     d          }|                     |           |dxx         dz  cc<   d S )Nrx  r  r   r!  )r  r  r  s       r   append_if_exists_otherz:CmfSearchEngine.search_top.<locals>.append_if_exists_other  si     H]$;;; ,,q//CJJsOOOW"r   r   r     )r   r   c	doc_limit
task_limitc_limitr&  r  z'CmfSearchEngine.search_top__select_bm25rv  flow_idcodelike	flow_nameflow_attach   r  z'PROF fulltext_search TOP25 selects got   T      ?Fr!     z2PROF fulltext_search TOP25 mixing and prepare got )r  r^  r   r  r   r?  r  r  	bm25_sortr  r   rw  	rank_bm25r  bm25_sort_date4r  r  rb  )r   r  rc  r  r  r  r  counters_otherr  r  r.  r/  result_dictsresult_docs_idcodelikeresult_tasks_idcodelikeresult_docs_nameresult_tasks_nameresult_docs_attachresult_tasks_attachresult_docs_mainresult_tasks_mainrb  max_doc_name_rankmax_task_name_rankmax_doc_main_rankmax_task_main_rankdoc_prior  r  r  s   `                            @r   r  zCmfSearchEngine.search_top  s	   )++55*	% *	% *	% *	% *	% *	%X	# 	# 	# aa!#2"F F $%Q77* %%NPSTT
00t7OPP
262U2UVd2e2e/k< /3.P.PQ\^o.p.p++040R0RS_ar0s0s--(,(J(J;Xc(d(d%%*.*L*L\[f*g*g''*.*L*L[Zg*h*h'',0,N,N|]j,k,k))&(>>*:D<TUU NN+<d>VWW!^^,>@XYY"nn-@$BZ[[/31"1"5!%0FH`!a!a"&..1H$Jb"c"c>>*:D<TUU NN+<d>VWW~~lD4LMM& 9;; 3&&GGUdikkG>SUUVVV )++55   1$$)9!)<)F)M)M]H6FMMM  1$$)9!)<)F)M)M]H6FMMM  1$$)9!)<)F)M)M]H6FMMM  1$$)9!)<)F)M)M]H6FMMM %&&**]H6LdSSS&''!++]H6MtTTT    "" 0 3 B !!!##!21!5!D  "" 0 3 B !!!##!21!5!D  //0@AA 001BCC//0@AA 001BCC s"%666:LS:PSd:d:dHt &&(())Q..""=.,OOO7	"=11  ,T)**Q..37G3H3H13L3L)!,;>Nq>Q>```$$]H>NPabbb$$]H>NPabbbb$$]H>NPabbb$$]H>NPabbb  :KM_```  :KM_```&&}nlSSS  :PRVWWW  :QSWXXX  :JL]^^^  :JL]^^^  :LdSSS  :KM_```  :KM_```  :MtTTTT *++a//C8I4J4JQ4N4N)!,;>OPQ>R>aaa$$]H>OQcddd$$]H>OQcdddd$$]H>OQcddd$$]H>OQcddd  :JL]^^^  :JL]^^^&&}nlSSS  :QSWXXX  :PRVWWW  :KM_```  :KM_```  :MtTTT  :JL]^^^  :JL]^^^  :LdSSS
 "-00O"$$;K(K(Ko7	r +3B3/9;; 3&&GG`W^I^``aaaaa '&r   c                 p   t          j                     }g }t                      }t                      fd}t          dd          }| j                            | | j                  } |             |                     |          \  }}}	|                     || j        d          }|                     || j        d          }|                     |	| j        d          }	t          j                     |z
  dk    r1t          	                    dt          j                     |z
              t          j                     }t                      }g }
	 t          |
          } ||
|            ||
|            ||
|            ||
|            ||
|	           t          |
          }|| j        d	         k    s||k    rns|
| j        d
         | j        d	                  | _        t          j                     |z
  dk    r3t          	                    dt          j                     |z
              d S d S )Nc                     |sd S |                     d          }|j        v r"|sd S |                     d          }|j        v "                    |j                   |                     |           d S Nr   )r  r   r  r  )r  r  r  r  s      r   r  z5CmfSearchEngine.search_main.<locals>.append_if_exists  s     ,,q//C*++ Fll1oo *++ OOCJ'''JJsOOOOOr   z'CmfSearchEngine.search_main__select_synrv     )limitr  &PROF fulltext_search main selects got Tr!  r   1PROF fulltext_search main mixing and prepare got )r  r^  r   r  r   r@  r  r  r  r   rw  r*  rb  )r   r  rc  r  r  r  r  r.  r/  r#  rb  r  r  r  s                @r   r  zCmfSearchEngine.search_main  sH   )++55ee
	 
	 
	 
	 
	 %%NPSTT
//d6KLL
262U2UVd2e2e/k< ~~lD4IQT~UUnn[$2GsnSS~~lD4IQT~UU9;; 3&&GGTTY[[7=RTTUUU )++55	"=11]K888]L999]K888]L999]L999!-00OA../EU2U2U	 +4:a=A+FG9;; 3&&GG_	V]H]__````` '&r   c                    g }t                      }t          j                    }| j                            |           }|                     |          \  }}}|                     |d          \  }}|                     |d          \  }}	t          j                    |z
  dk    r1t                              dt          j                    |z
              t          j                    }d}
t          |||||	g          rt|df|df|df|df|df|df|df|	df|df|	df|dfg}| 	                    |||           |
dz  }
|
dk    rt                              d           nt          |||||	g          t|d d	         | _
        t          j                    |z
  dk    r3t                              d
t          j                    |z
              d S d S )Nr4  r  r6  r   r  r!  r  r  rn  r7  )r^  r  r  select_defaultr  r  r  r   r{   r  rb  )r   rb  r  r  r  r.  r/  r#  rB  rC  r  r  s               r   search_main_oldzCmfSearchEngine.search_main_old  s   55 )++33D99262U2UVd2e2e/k<)-)K)KLZ_)`)`&&'+'I'I+W\']']$_9;; 3&&GGTTY[[7=RTTUUU )++
<l<Lo^__ 	q!a q!!1%q!!1% q! !$a  !$ q!% 0  0-JJJ!OJD  vwww= <l<Lo^__ 	@ +3B3/9;; 3&&GG_	V]H]__````` '&r   c                     |S )u   
        Подчистка оригинального квери, который ввел пользователь:
        - удаление стоп-слов
        )r  rw  r  r  )r  r  clean_search_query_listr  clean_search_querys        r   _clean_search_queryz#CmfSearchEngine._clean_search_query.  s
     r   c                 \    d}|                                 D ]}|t          v r|d|z   z  }|S )Nr  r  )r  ALL_STOP_WORDS)r  r   r  r  s       r   r  z'CmfSearchEngine.query_remove_stop_wordsB  sB     	 	AN"" 37NCC
r   c                    d}|                                 D ]}|t          v rd|v r	|d|z   z  }t          j        |          }|dk    r4|                    dd                              dd          }||k    rd}d}nd|z   }||k    r|d|z   |z   z  }t
          j                                        t          j        ddd          k     r|dz   |z   dz   |z   }|dd                    |                                           z   z  }d                    |	                                                                           }d                    d	 |                     d          D                       }t          j        d
d|          }|S )Nr  r  r  @.i  r  r!  c                     g | ]}||S r   r   )rN  r  s     r   rP  z3CmfSearchEngine.query_normalize.<locals>.<listcomp>o  s    777aQ7777r   u   [^ A-Za-zА-Яа-я0-9|-]r  )r  r@  r  text_normalize_enrichr  datetimedatetodayr  r  r  r  )r  r   r  r  lemmalegacy_wlegacy_w_strs          r   r  zCmfSearchEngine.query_normalizeN  s    	9 	9A N""axxsQw4Q77B;;99S--55c#>>q==#%L!HH#&>L::37\11CC  }**,,x}T1a/H/HHH !C( 2S 85 @3%++--!8!888CChhsyy{{((**++hh77399S>>77788 f13<<
r   c                 6    t          j        ||dz   z            S )Nr!  )mathlog)r  Ndfs      r   bm25_idfzCmfSearchEngine.bm25_idf  s     x2a4!!!r   c                 8    t          j        | j                   d S r   )geventspawn'_gevent_bm25_stat_words_load_with_delayr  s    r   &gevent_bm25_stat_words_load_with_delayz6CmfSearchEngine.gevent_bm25_stat_words_load_with_delay  s    S@AAAAAr   c                     t          t          dd           rd S t          j        d           t          t          dd           rd S |                                  d S )NFTS_STAT_WORDS<   )getattrAPPrS  sleepbm25_stat_words_loadrV  s    r   rU  z7CmfSearchEngine._gevent_bm25_stat_words_load_with_delay  s`     3($// 	FR3($// 	F  """""r   c                 6   i }i }t           j                            d          |d<   t           j                            d          |d<   t           j        j        j                                                            d                                          }t           j        j        j                                                            d                                          }t          |          dk    rr|t          |          dz  dz           d         }|t          |          d	z  dz           d         }t          |          d
k    r|d         d         }|d         d         }nd}d}t          |          dk    rr|t          |          dz  dz           d         }|t          |          d	z  dz           d         }t          |          d
k    r|d         d         }|d         d         }nd}d}d}	|D ]N}
i ||
j
        <   |                     |d         |
j                  ||
j
                 d<   ||
j
                 d         }O|D ]W}
|
j
        |vr
i ||
j
        <   |                     |d         |
j                  ||
j
                 d<   ||
j
                 d         }X|sd}|sd}i |d<   ||d         d<   ||d         d<   i |d<   i |d<   t          |          dk    r/||         d         |d         d<   ||         d         |d         d<   nd|d         d<   d|d         d<   t          |          dk    r/||         d         |d         d<   ||         d         |d         d<   nd|d         d<   d|d         d<   |t          _        |t          _        d S )Nr  )r   r
  z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfDocument''  ') where  ndoc > 10  order by ndoc desc limit 10000 z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfTask''  ') where  ndoc > 10  order by ndoc desc limit 10000 r   r  r   rp    rv  i  ditir!  FST_STAT_WORDS_DEFAULTFST_FRQ_MIDFST_FRQ_LOW)r  r  rx  CmfGlobalSettingsdpdata_driverSessionexecutefetchallrw  wordrQ  ndocr\  rY  
FTS_COUNTS)r  rY  rn  cmfdoc_statcmftask_statdoc_mid_worddoc_low_wordtask_mid_wordtask_low_wordlastrowlast_doc	last_tasks                r   r^  z$CmfSearchEngine.bm25_stat_words_load  s   
$*$8$>$>$>$W$W
=! & 4 : :Y : O O
9.1=EEGGOO  Q  @  @  I  I  K  K/2>FFHHPP  R|  }  }  F  F  H  H{C&s;'7'7'93'>?BL&s;'7'7':C'?@CL;$&&*3/2*403LL|S  (\):):1)<c)AB1EM(\):):2)=s)BCAFM<  4'' ,S 1! 4 ,T 21 5MM 	6 	6C')N38$-0\\*]:SUXU]-^-^N38$T*%ch/5HH   	7 	7Cx~--+-sx(-0\\*Y:OQTQY-Z-ZN38$T*&sx06II 	H 	I35/09A/069B/06(*}%(*}%{C2@2Nt2TN=)$/2@2Nt2TN=)$//23N=)$/23N=)$/|S  2@2OPT2UN=)$/2@2OPT2UN=)$//23N=)$/23N=)$/+#r   c                 T   |}t          |          dk    rd}|d         j        |d         j        z  dk    rd}nC|d         j        |d         j        z  dk    rd}n!|d         j        |d         j        z  dk    rd}|dk    r%t          |d |         d           ||d          z   }|S )	Nr  r   r&  g333333?r  r!  c                     | j         S r   )r   r   s    r   <lambda>z1CmfSearchEngine.bm25_sort_date4.<locals>.<lambda>  s    !* r   )r  )rw  r  sorted)r  r  newresr   s       r   r!  zCmfSearchEngine.bm25_sort_date4  s     s88a<<D1v$SV%::TAAQ&s1v'<<tCCQ&s1v'<<tCC axxETE
0D0DEEEDEE
Rr   r   c                 N   g }d}t          |          }|D ]F}|dz  }|                     ||          \  |_        |_        ||_        |                    |           Gt          |d d          }|d d         D ]1}|j        r(|xj        dz  c_        d|j        d	d
|j         |_        2t          |d d          }d}|d |         D ]Q}|dz  }||_	        d|j         d|j	         d| d
|j        z   |_        |j
                            dd          |_
        R|d |         S )Nr   r!  c                     | j         S r   r   r{  s    r   r|  z+CmfSearchEngine.bm25_sort.<locals>.<lambda>      AK r   T)r  reverserp  r  u    ТОП.2fr  c                     | j         S r   r  r{  s    r   r|  z+CmfSearchEngine.bm25_sort.<locals>.<lambda>  r  r   Rz Bz LDZQTNr  )rw  	bm25_rankr   r  debug_sql_posr  r}  r   r   debug_bm25_posr   r  )	r   r  r   r5  r~  r   llrv  rs	            r   r  zCmfSearchEngine.bm25_sort  sw    XX 	 	CFA04sE0J0J-CM3- !CMM#f"7"7FFF 	@ 	@A $ @t#?AK???ag??f"7"7FFF 	8 	8AFA AF!/FFQ-=FFFFFPAG++GB77AJJ fuf~r   c                    fd} |j                   }i }j        j        dk    r |j                  }d}d}d}d}	d}
d}d}t                      }d}d}d}j        | j        v rd}d	}i }|                                D ]}||v r|                    |           |
dz  }
d}d d t                      g d dt                      t                      d
}|                    d          D ]}||vr|d                             |           |d                             d ||                             d          D                        |dz  }t          t          d          rt          j
        sd}d}nS|t          j
        v r-|t          j
        |         v rt          j
        |         |         }nt          j
        d         |         }d}d}t          t          ||                             d                    d          }|||dz   z  z  ||z   z  }|}d}j        dk    r3||v r/j        r(|j                                        v r|	dz  }	d}|dz  }n|dz  }|dxx         d|dz    d| d| d|dd|dd|dd| d | z  cc<   |d!                             |           t          |d!                   dk    r?|dz  }t!          |d!                   }| j        |
dz
           }d}|} |d"v r|dz  } d#}|dxx         d$| d%| z  cc<   t          |d!                   dk    r|dxx         d&|dd'| dz  cc<   ||vr|||<   | |d(<   ||d)<   d}!d}"d}#d}$d}%|                                D ]\  }}d}&d}'d}(d})d}*|                                D ]\  }+},d}-d}.||+k    rd}-|d         D ]}/|,d         D ]}0t'          |0|/z
            d*k    r'|&dz  }&d}-|)dz  })|d+                             |+           t!          dd*t'          |0|/z
            z   z  d,z  |.          }.|*dd*t'          |0|/z
            z   z  d,z  z  }*|'|-z  }'|(|.z  }(|%|)z  }%|(dk    rd-|(z  }1|d(         |1z  |d.<   |!dz  }!|'|#k    r|'}#|(|$k    r|(}$|"|&z  }"t)          |d+                                       d/d                              d0d                              d1d          }2d2| d3|2 d4|$d5d6|(d5}3nd7}3|d(         |d.<   |d         d8| d9|' d%t          |d+                    d:|d.         dd|d(         dd;|3 z   |d<   |d         d<|d.         dd=|% z   |d<   t-          d> |                                D                       }dd?dd@j        z   z  z  z   }4|t3          |4          z  }|dA|! dB|" dCj        dDdE|4d5dFj        d5
z  }d}5t6          j        j        j        r@t6          j        j        j        dk    r&j        rt6          j        j        j        j        v rdG}5d}6dHt6          v rj         t6          j!        v rdG}6||5z  |6z  }dI|d5dJ| dK|5 dL|6d5d%	|z   }|dk    rj        }|dk    rdM}|}7j        dNv r|j        z  }dO|d5d%| }| d"                    dP |                                D                       z   }8|8_#        |rtI          dQ           ||7fS )RNc                     j                             dd          } t          j        dd|           } | d d         } t	          j        d| z   dz             S )N'"z":([0-9,]*)z":"\1", z{ z })r   r  r  r  orjsonloads)tmpr  s    r   tsvector2dictz0CmfSearchEngine.bm25_rank.<locals>.tsvector2dict  sX     %--c377C&c::Ccrc(C
 <s
T 1222r   r  r   r!  Fra  il  rb  ru  )r  
smart_rank
found_synsvector_positionstoken_found_idxr   near_tokensrank_setr  r  r  c                 ,    g | ]}t          |          S r   )intrN  r   s     r   rP  z-CmfSearchEngine.bm25_rank.<locals>.<listcomp>f  s    1`1`1`Q#a&&1`1`1`r   r  rY  Trc  r  g      ?r  r  r  r   z<br>wrC  (z) bm25nm(bm25)=z.1fz) idf=z fqd=z	 in_name=r  )VERBINFNz
verbdiv/2 z<BR>&nbsp;&nbsp; r  z	 synRank=z psRank=r  r  r  r  (   gffffff?r  r  {}z<BR>&nbsp;&nbsp;#near(z)=[z] MFNCr  z FNC0z	 maxNear[z]=z bm25near(bm25)=z) z<br>#BM25final=z SSSS=c                     g | ]
}|d          S )r  r   r  s     r   rP  z-CmfSearchEngine.bm25_rank.<locals>.<listcomp>  s    EEEAlOEEEr   rp     z nearC=z
 sumNearC=z age=z.0fz ageF=z	 sqlRank=g?FTS_RECENT_PROJECTSzrbm25=z wc=z rur=z rpr=g-C6?)r  r  r`  z codec                     g | ]
}|d          S )r   r   )rN  r   s     r   rP  z-CmfSearchEngine.bm25_rank.<locals>.<listcomp>  s    4a4a4aAQwZ4a4a4ar   u   Система еще загружает кеш для алгоритма ранжирования. Если поисковая выдача некорректна, повторите запрос через минуту)%r   r   r^  r   r  r  r  extendhasattrr\  rY  minrw  r   r   r  r)  re  itemsabsr   r  sumvaluesr   floatr   r  r  r  r  r   r   r  r  r   	cmf_alert)9r   r  r   r  doc_vector_dictname_vector_dictr   
rank_debug	w_doc_cnt
w_name_cntsynw_cntorder_factorfound_tokens_cnt
done_wordsalert_fts_stat_not_load	model_keyAVGDmatched_tokenssynww_cntr  r  idfkbfqdbm25	bm25_origin_namesyn_rankr  verbdivps_ranknear_tokens_cntsum_near_tokens_cntmax_of_max_near_contextmax_of_factor_near_contextsum_tmp_near_allhave_near_token_cntmax_near_contextfactor_near_contexttmp_near_alltmp_factor_allsynw2token2tmp_max_near_contexttmp_factor_near_contextpospos2mltr  d_nears
age_factorrelated_user_rankrecent_project_rank	real_rankhtml_rank_debugs9    `                                                       r   r  zCmfSearchEngine.bm25_rank  s
   
	3 	3 	3 	3 	3 (-(;<<(S->"-D-D,}S->??
	
UU
"'	 =D000ID
 KKMM W	8 W	8Dz!!NN4   MH
 E ""!ee$&#'"uuEE	 	E ZZ__ +, +,O++l#''***()001`1`/RSBTBZBZ[^B_B_1`1`1`aaa
s$455 RS=O RC.2++#,,,c>PQR>S1S1S,Q/	:CC,-EFyQC#oa066s;;<<a@@ cQqSk*CE2 	8{**q4D/D/D/DZ[_b_k_q_q_s_sZsZs!OJ"G!8DDNI g  #_*:1*<  #_  #_u  #_  #_q  #_  #_Y]  #_  #_  #_dm  #_  #_  #_y|  #_  #_  #_  HK  #_  #_  V]  #_  #_  _j!%%d++++5$%%**!5,--H5hqjAJGG---"Q,$'NNNH*HHwHHHNNN5$%%**g"Qh"Q"Q"QG"Q"Q"QQ>))',t$!(E+'7E#$$ "#%&")//11 A	r A	rKD%"# "#LN!/!5!5!7!7 #1 #1v'($*+'5=='($ !34 D DC &'9 : D D
 tcz??b00 014/340(1,L!-044U;;;25aCs
OO9K6LR6OQh2i2i/&!RD3J-?*@*CCD* !$88 
 $'>># L0   #Q&&
 23&+K&83&>l#1$#&===.>+&)CCC1D.#'::#!%"677??BGGOOPSTVWW__`cdfgg M4  M  MK  M  MOi  M  M  M  tG  M  M  M&+K&8l#"7^  /~$  /~  /~BR  /~  /~UXY^_lYmUnUn  /~  /~  AF  GS  AT  /~  /~  /~  [`  al  [m  /~  /~  /~  u|  /~  /~  ~E'N"7^.ql@S.q.q.q_o.q.qqE'NNEE^-B-B-D-DEEEFF r1c#,./00
eJ'''  U  U  U;N  U  UUXUa  U  U  Umw  U  U  U  GJ  GO  U  U  U  	U

 ^!' 	%AN,@,F",L,LQTQn,L$*c.KKK $   A%%#*<@U*U*U"&''*==uduuu,<uuCTuu[nuuuu  yC  C
199 8D199D	8+++CHD8888J88J'/BGG4a4aI^I^I`I`4a4a4a,b,bb#	" 	w  v  w  w  wYr   c                 D    d| vr| S |                      d          d         S )Nz@#@#@#r   )r  )r   s    r   _remove_suffix_from_headlinez,CmfSearchEngine._remove_suffix_from_headline$  s)    8##O~~h''**r   c                 	   | j         }|D ]4}d}|j        	|j        d}d|j         d|j         d|j         d|_        5| j        ri }g }|D ]}|                    |j                   |j        r| 	                    |j                  nd}|j        |j
        |j        | t                              |j                  |j        |j        |j        |j        d	||j        <   |t           _        t                               d	           |S i }| j        r7i }|D ]5}|                    |j        g                               |j                   6|                                D ]}	t-          t.                    |	         j        r| j        d
gz   }
d
d||	         gg dg}n| j        }
dd||	         g}|	dk    r|
g dz   }
|	dk    r|
dgz   }
t2                              |	                              |
|| j        | j                  }|D ]B}t-          t.                    |	         j        r|j        r|||j        j        <   8|||j        <   CntA          d           t                               d           g }|D ]}|!                    |j                  }|r|j        r| 	                    |j                  nd}|j"        dk    r|j#        sV|j$        |j#        j%        |j#        j&        j'        | |j        |j        |j        |j        j'        |j
        |j(        |j)        |j*        t                              |j                  |j        d}nrtW          j,        d|j
                  d         }|j        j'        ||j&        j'        | |j        |j        |j        t                              |j                  |j        d	}|j"        dk    r|j-        j'        |d<   	 | j.        s|/                    dd           n| .                    |          st`          | j        D ]9} |j,        d          d         }tc          ||j                 |d          }|||<   :|                    |           # t`          $ r Y w xY w	t                               d	           | j2        sHtg          t.          j4        j5        | j6        t2          7                    |dd                   d           || _8        dS )u5   
        Итоговая обработка
        NoneNz.6fz<BR>r  z<br>r  )	idr   coder   r   r   r   r   r   zfulltext_search ENDcmf_ver_headr  )cmf_ver_curz==Tr  r<  )urlurl_previewurl_preview_imgr  fullsearch_answer)r
   r  include_archivedinclude_deleteduY   DEV: FATAL. Укажите в запросе поиска список полей fields=z"fulltext_search Start check access)r  r   r  r   r   r   r   	attach_idattach_name
attach_urlattach_url_previewattach_url_preview_imgr   r   z DZQTN r   )	r  r   r  r   r   r   r   r   r   T)TEXKOM_skip_failread_auditTEXKOM_ppp_project_simplecheckrC  rn  )r  obj_dict)r  )9rb  r   r   r   r   rT  r  r   r   r  r   r   jsonr  r   r   r  fulltext_search_headlinesr
   
setdefaultr   keysvarsr  cmf_verr  get_model_by_namerd  rX  rY  r  r  r  getrM  parent	parent_idr   r  r  r  r  r  r  r  r  rV  _acl_check_readCmfPermissionErrorr[  rU  schedule_deferred_jobr  _do_calc_statisticsrS  
dumps_dictrc  )r   rc  r  formated_rankr  	result_idr   objectsids_by_modelr  _fields_filterr  r  r  r  r   r	   attrs                      r   r  z%CmfSearchEngine._prepare_final_result*  s    ) 	> 	>A"Mv!#$6=QV==ag=====AGG= 	(*%I!    ***LMJ^4<<QZHHH\^(GJ#+#'::am#<#<WF !
W
7 
7)!(33 +DA'GG)***; 	tL! J J''R88??IIII*//11 . .
<<
+3 C K>*::G .l:6NOQlQlQlmGG KG!4j)ABG00%(Q(Q(QQG..%)<(==G"44Z@@EE""%)]$(L	 F   $ . .CF||J/7 .CK .7: 0 344*-	.).4 rsss	4555 B	 B	A++ah''C @LMJ^4<<QZHHH\^>_44 : !  "m #
 #
 5'/M!" !$%J%(V\'(w&)g.1o252E'+zz!-'@'@!"   HH$ HZ99!<E "fl % #'/M!" !$%J'+zz!-'@'@!"
  
 H >]22474I4OH016 1++tlp+qqqq!88== 100!% / / +C 0 0 3&wqx'8%FF*.JJx(((()    Du@D 	
%&&&   	!$8(,(>GL^L^_bcfdfcf_gLhLhii   
  s   BQ--
Q;:Q;r  rc  r  c                 ,   |D ]\  }}t          |          D ]{}|s nv|d         j        |v r|                    d           +|                    |d                    |                    |d         j                   |                    d           |d S r3  )ranger   r  r  r  )r  r  rc  r  lstrx  _s          r   r  zCmfSearchEngine._add_if_exists  s    " 		 		JC5\\   Eq6=H,,GGAJJJ##CF+++SV]+++



		 		r   	list[str]c                     | sg S t           j        j        j                                                            dd| i          }d |D             S )Na  
                WITH tree_parents AS (
                    WITH RECURSIVE r AS (
                        SELECT obj_id, obj_code, obj_tree_parent_id
                        FROM cmf_full_search
                        WHERE obj_tree_parent_id = :tree_parent_id

                        UNION

                        SELECT cfs.obj_id, cfs.obj_code, cfs.obj_tree_parent_id
                        FROM cmf_full_search AS cfs
                        JOIN r ON cfs.obj_tree_parent_id = r.obj_id
                    )
                    SELECT obj_id FROM r
                    WHERE r.obj_id IN (SELECT obj_tree_parent_id FROM r)

                    UNION

                    SELECT :tree_parent_id
                )
                SELECT * FROM tree_parents;
            r  c                     g | ]
}|d          S )r   r   )rN  r  s     r   rP  z5CmfSearchEngine._get_all_branches.<locals>.<listcomp>  s    &&&!&&&r   )r  r  rg  rh  ri  rj  )r  recordss     r   r  z!CmfSearchEngine._get_all_branches  s`    
  	I&)5==??GG, ~./
 
2 '&g&&&&r   Tc                    dt           _        |                    dd          }t          j        dd|          }t          j        d|          }d}d}d}	|D ]}
t          |
          dk    rt          |
          dk    r|
dv r/|
d	v r	|	d|
 z  }	<|
dk    rC|
d
v r|	dz  }	M|
dv r|	dz  }	Wt          |
          dk    rk|
d         dv r%t          |
          dk    r|d|
dd           z  }|r|
                    dd          }
|dz  }|dk    rd}d}|	r
|	d         dvrd}t          |
          dk    r|                     |
|          }t          t          |          t          t                    z
            }t          |          dk    r|	| d|d          z  }	n6t          |          dk    r"|	| dd                    |          z   dz   z  }	n	 |r|dk    r n
|dk    r n|r)t          |          dk    r|d         dv r
|dd          }|S |	                    dd                              dd                              dd                              d d                                          }	|	r|	d         dv r
|	dd          }	|	r|	d         dv r
|	dd          }	|	r|	d         dv r
|	dd          }	|	r|	dd          dv r
|	d d         }		 t          j        j        j                                                            d!d"|	i          }t          |          d         d         }	n# t&          j        j        $ r}t          j        j        j                                         t          j        j        j                                                            d#d"|i          }t          |          d         d         }	Y d }~nd }~ww xY wt/          |d$|	           |	S )%Nr  zwww.u   [^-A-Za-zА-Яа-я0-9()|&!' ]r  z(,| |&|\||\(|\))r   r!  )r  !z()&|)rR   r   r  z |)r   rO   &z &r  r  r  r  F)r  r  r  )r  z (  | z )rp  )r  r  z OR z or z AND  & z and z!select to_tsquery('russian', :q);q+select websearch_to_tsquery('russian', :q);z->)r  rZ  r  r  r  r  rw  prepare_wordrd  r^  r@  r  r  r  r  rg  rh  ri  rj  
sqlalchemyexcProgrammingErrorrollbackprint)r  r  r  r  
first_wordsearch_query_allowed_symbr  
word_countstopsr  r   oper	sug_wordstsqueryes                  r   r  z"CmfSearchEngine.parse_search_query  s    !))&44$&F+KSR^$_$_!-/HII 
 6	 6	A1vv{{1vv{{
??;;QLA88)))U
&&&U
 1vv{{tz!!q66Q;;QqrrU$ 		#s##A!OJA~~ D "Z//1vvzz,,Q,BB	 Y#n2E2E!EFF	y>>Q&&D119Q<111AA^^a''D

9(=(==DDAA jAoo R 	5zz!||aJ 6 6abb	LIIfe$$,,VU;;CCGUSS[[\cejkkqqss 	1##!""A 	1##!""A 	1##!""A 	233:%%#2#A	$'*6>>@@HHIpsvxyrz{{GWa #AA~. 	$ 	$ 	$ ,55777 '*6>>@@HHAl#% %G Wa #AAAAAA	$ 	lD!$$$s   AL3 3OBOOc           
         d}|                                 D ]}d|v r|d|z   dz   z  }|d|z   z  }|                                }|                    dd          }|                    dd          }	 t          j        j        j                                                            d	d
|i          }n# t          j
        j        $ ru}t          j        j        j                                         t          j        j        j                                                            ddt          i          }Y d }~nd }~ww xY wt          |          d         d         }|                    dd                              dd                              dd                              dd                              dd                              dd          }d}|                                 D ]J}d|vr	|d|z   z  }|dd                    t!          |                     d                              z   z  }Kd                    |                                           }|S )Nr  r  z ()r  r  r  hhhackskiptirez%select to_tsquery('russian', :query);r   r  r  r   r  r  r  )r  r  r  r  r  rg  rh  ri  rj  r   r!  r"  r#  r   rd  r  r^  )r  r   r  r  r+  r,  r  s          r   r  zCmfSearchEngine.text_stemmingb  sP    	 	AaxxTAX^#S1WGGIIyyU##||C!122	'*6>>@@HHItw~  AE  wF  G  GGG~. 
	 
	 
	 ,55777 '*6>>@@HHAe GGGGGG
	 MM!QIIeC  ((R0088sCCKKCPRSS[[\_`bcckkl|  B  C  C 	9 	9A!||sQw sSXXc!''#,,&7&78888hhsyy{{##
s   3>B2 2D;A+D66D;c                 $   t           xj        d| dz  c_        |d         t          j        v rd}t          j        }nd}t          j        }g }t                              |          s6|                    |          }g }t          	                    |          }|r.t           xj        d| dz  c_        |
                    |           t           xj        dz  c_        d}	|D ]}
|	d	k    r nt          |
          d	k    r|
d         |d         k    r|
d
         |d
         k    rCd|
v rJ|
                    dd          }
t           xj        d|
 dz  c_        |
                    |
           |	d
z  }	t           xj        d|
 dz  c_        |
                    |
           |	d
z  }	g }t          j        j        j                                                            dd|i          }d}	|D ]\  }}|	d
k    r nt          |          d	k    r!|d         |d         k    s|d
         |d
         k    rI|                    dd          }t           xj        d| dz  c_        |
                    |           |	d
z  }	t'          |          t'          |          z  |hz  }n|h}t'                      }|D ]_}
t(                              |
          d d	         D ]:}|                    |j                   t           xj        d|j         dz  c_        ;`||z  }t'                      }|rt          j                            ddt1          ||hz            gddgdgdd
g          }|D ]}|j        r|j        j                            d          d d         D ]X}|                                                    dd          }t           xj        d| dz  c_        |                    |           Y||z  t'          |          z  }t1          |          S )Nz|w:z: r   enruzaddNinjaRevers z, zspellError, r  r!  r  r  z	addSpell z
            SELECT
                name, similarity(:word, name) as sim
            FROM cmf_synonym
            WHERE
                :word % name
            ORDER BY "sim" desc
            LIMIT 5;
             rl  zaddSpellTrgm z
normalize r   r  r   r  r  r  r  zsynAdd )r  rZ  stringascii_lettersr  dictionary_endictionary_rudictionary_checksuggestninja_reversr  rw  r  r  r  rg  rh  ri  rj  r^  morphparser  normal_formrd  r   r  r  r  )r  rl  r  lang
dictionaryfiltered_suggestions3suggestionsfiltered_suggestionsnwr   r  filtered_suggestions2suggestions2_listsuggr  all_suggestionsr  r  r  r  r   s                        r   r  zCmfSearchEngine.prepare_word  s   	t7f***D .JJD .J "''-- D	%$,,T22K#% %%d++B 0121111$++B///EE_$EEA   66Eq66Q;;Q447??qttAw!88 		#r**AEE.....EE)00333FA*Q****$++A...Q %'! & 1 4 @ H H J J R R T 
! 
! A,  a66Et99>>7d1g%%aDG););  <<R00DEE5T5555EE)00666FA ""677#>S:T:TTX\W]]OO  $fO 55  	9 	9Akk!nnRaR( 9 9 !$$R^4448bn88889 *,<<  	-!,11&$M]ae`fMfHgHg9h:@&9I<E;9:1 2 ? ?L ( - -< -$\/55c::2A2> - - GGII--c26610%))!,,,,)M9C@U<V<VVO$$$r   )FNNFNNFFNNNN)r  r   r  r  )r   )r  r  )TFF)T).r   r   r   __doc__r  r  rA  r  r   r  rk  classmethodr  rz  r  r{  r  staticmethodr  r  r  r  r~  r  r  r  r:  r>  r  r  rQ  rW  rU  r^  r!  r  r  r  r  r   r   r  r  r  r  r  r   r   r   rI  rI    s        F $_N kO,- O&&((Ff f fH `d$) ~# ~# ~# [~#FFA FA FAR% % %NS S S 
0 
0 [
0 " " " \"J- - -&$ $ $	 	 	(c (c (cV)H )H )HVRb Rb Rbj7a 7a 7at<a <a <a~ " " ["& 	 	 [	 $ $ [$f " " ["
 B B [B # # [# F$ F$ [F$P   [$       FH H H\ + + \+
V  V  V p 
d 
$ 
# 
 
 
 [
  '  '  ' \ 'F i i i [iZ " " ["J q% q% q% [q% q% q%r   rI  )0cmf.includecmf.util.cmfnlputilr  typingr   r   r   r   r  rM  rF  dataclassesr   r	   r
   	tracebacksys	itertoolscollectionsr   rS  enchantr3  	pymorphy3r  r   r  cmf.util.cmfutilr   bs4r   MorphAnalyzerr:  RUSSIAN_STOP_WORDSENGLISH_STOP_WORDSPROMPT_STOP_WORDS_NORMTECH_STOP_WORDSEVA_ARTIFACT_KEYS_STOP_WORDSEVA_ARTIFACT_VALUES_STOP_WORDSr^  r@  ALLOWED_FIELDSr   r   r   rG  rI  r   r   r   <module>r_     s                         - , , , , , , , , , , ,    0 0 0 0 0 0 0 0 0 0         



     " " " " " "        				      , , , , , ,      	!! @  @  @  A  A  A  K  K  K ;;;  M   M   M  "x  "x  "x  '*<<NOO `__, = = = = = = = =|       "i i i i i 0 i i iZ	 	 	 	 	 0 	 	 	]% ]% ]% ]% ]% ]% ]% ]% ]% ]%r   