
    ri                       d dl  d dlmc mZ d dlmZmZmZmZ d dl	Z	d dl
Z
d dlZd dlmZmZmZ d dlZd dlZd dlZd dlZd dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ  ej:                         Zg dZg dZ g d	Z!g d
Z"g dZ#g dZ$ e%ee z   e"z         Z&g dZ'e G d d             Z( G d d      Z) G d de)      Z* G d de)      Z+ G d d      Z,y)    )*N)ListSetDictOptional)	dataclassfieldfields)
namedtuple)BeautifulSoup)   иu   вu   воu   неu   чтоu   онu   наu   яu   сu   со   какu   аu   тоu   всеu   онаu   такu   егоu   ноu   даu   тыu   кu   уu   жеu   выu   заu   быu   поu   толькоu   ееu   мнеu   былоu   вотu   отu   меняu   ещеu   нетu   оu   изu   емуu   теперь
   когдаu   дажеu   нуu
   вдругu   лиu   еслиu   уже   илиu   ниu   бытьu   былu   негоu   доu   васu   нибудьu
   опятьu   ужu   вамu   ведьu   тамu
   потомu   себяu   ничегоu   ейu
   можетu   ониu   тут   гдеu   естьu   надоu   нейu   дляu   мыu   тебяu   их   чемu   былаu   самu   чтобu   безu
   будтоu   чегоu   разu   тожеu   себеu   подu
   будетu   жu
   тогда   ктоu   этотu   тогоu   потомуu
   этого
   какойu   совсемu   нимu
   здесьu   этомu   одинu
   почтиu   мойu   темu
   чтобыu   нееu   сейчасu   были   кудаu
   зачемu   всехu   никогдаu
   можноu   приu   наконецu   дваu   обu   другойu   хотьu
   послеu   надu   большеu   тотu
   черезu   этиu   насu   проu
   всегоu   нихu
   какаяu
   многоu
   развеu   триu   этуu   мояu   впрочемu   хорошоu   своюu   этойu
   передu   иногдаu
   лучшеu   чутьu   томu   нельзяu
   такойu   имu
   болееu   всегдаu   конечноu   всюu
   между)imemymyselfweourours	ourselvesyouyouryoursyourself
yourselveshehimhishimselfsheherhersherselfititsitselftheythemtheirtheirs
themselveswhatwhichwhowhomthisthatthesethoseamisarewaswerebebeenbeinghavehashadhavingdodoesdiddoingaantheandbutiforbecauseasuntilwhileofatbyforwithaboutagainstbetweenintothroughduringbeforeafterabovebelowtofromupdowninoutonoffoverunderagainfurtherthenonceheretherewhenwherewhyhowallanybotheachfewmoremostothersomesuchnonornotonlyownsamesothantooverystcanwilljustdonshouldnow)	r   r   u   почемуr   r   r   r   u   можешьr   )docwwwhttphttpsmailto)u
   авторu   бизнес-процессu   владелецu   датu   исполнителu   контрагентu   логическu
   отделu   постановщикu   приоритетu   проектu   процессu
   спискu   стандартнu   статусu   тип)approvbaseclosedefaultdocumentopentasku   бизнесu   документu
   задачu   задачаu   закрытu
   обычнu   системu   созданu   черновик)textml_textnametagscommentsaddon_fieldskey_phrasesc                   ,   e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed<   dZeed<   dZeed	<   dZeed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed<   dZeed<   dZeed<   dZ
eed<   dZeed<   dZeed<   dZeed<   dZ eed<   dZ!eed<   dZ"eed<   dZ#eed <   dZeed<   dZ$eed!<   dZeed	<   dZ%e&ed"<   dZ'e&ed#<   dZ(eed$<   dZ)eed%<   dZ*eed&<   dZ+eed'<   dZ,eed(<   dZ-eed)<   dZ.eed*<   dZ/eed+<   dZ0eed,<   dZ1eed-<   dZ2eed.<   dZ3eed/<   dZ4eed0<   dZ5eed1<   dZ6eed2<   dZ7eed3<   dZ8eed4<   dZ9eed5<   dZ:eed6<   y)7DriverSearchObjectN	obj_modelobj_idobj_codeobj_nameobj_modified_atobj_project_idobj_related_person_loginsobj_ml_textobj_textobj_tagsobj_logic_type_codeobj_activity_codeobj_status_typeobj_result_textobj_commentsobj_owner_nameobj_responsible_namesobj_hrefobj_parent_idobj_tree_parent_idobj_root_parent_idobj_created_atobj_deletedobj_archivedobj_author_nameobj_modified_by_nameobj_addon_fieldsobj_user_ratingobj_key_phrasesobj_company_idobj_breadcrumbs"obj_related_person_logins_tsvectorobj_user_portal_topobj_client_portal_topname_tsvectortext_tsvectortags_tsvectorresult_text_tsvectorcomments_tsvectoraddon_fields_tsvectorkey_phrases_tsvectorml_text_tsvectorresult_tsvectorresult_textheadlineheadline_rawtitlebreadcrumbslabelrankage_daysdebugmark);__name__
__module____qualname__r   str__annotations__r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   boolr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r        !./cmf/models/cmf_search_engine.pyr   r   U   sK   IcF3HSHSOCN3$(c(KHSHS"" c OCOCLN3 $#$HSM#!s!!s!N3OCKLN3OC##SOCOCN3OC$(c(-1&s1K##!%$%M#M#M### c  $#$##SOCKHSLE#KE#DOHSE#DOr   r   c                   *    e Zd Zd Zd Zd Zd Zd Zy)SearchDriverBasec                     t         NNotImplementedselfsearchs     r   select_empty_query_topz'SearchDriverBase.select_empty_query_top       r   c                     t         r   r   r   s     r   select_empty_queryz#SearchDriverBase.select_empty_query   r   r   c                     t         r   r   )r   r   querys      r   select_bm25zSearchDriverBase.select_bm25   r   r   c                     t         r   r   r   s     r   select_defaultzSearchDriverBase.select_default   r   r   c                     t         r   r   r   s     r   select_attachmentz"SearchDriverBase.select_attachment   r   r   N)r   r   r   r   r   r   r   r   r   r   r   r   r      s    r   r   c                   $    e Zd Zd Zd Zd Zd Zy)SearchDriverEvaFullSearchc                     t         j                  j                  |      }g }|D ]  }|j                  t	        di |        |S )Nr   )modelsCmfFullSearchfilter_empty_topappendr   )r   r   
found_objsres	found_objs        r   r   z0SearchDriverEvaFullSearch.select_empty_query_top   sO    ))::6B
# 	IJJ)  	 
r   c                 v   g }g }g }g }g }|j                   dk(  s|j                   dk(  r|t        j                  j                  |j                  t        t        j                  j                        dd|      }t        j                  j                  |j                  dd|      }|j                   dk(  s|j                   dk(  r|t        j                  j                  |j                  t        t        j                  j                        dd|      }t        j                  j                  |j                  dd|      }|j                   dk(  s|j                   dvr.t        j                  j                  |j                  d	dd|
      }g }||z   |z   |z   |z   D ]  }|j                  t        di |        |S )NANYCmfTaskEMPTY   )force_related_person_loginr   r   r   )r   r   r   CmfDocumentr  r  r   )force_field_namer   r   r   r   )
model_namer   r   filter_onceAKA_TASK_MODELSr   gcurrent_userloginAKA_DOC_MODELSAKA_DICT_MODELSr  r   )	r   r   task_empty_query_related_usertask_empty_querydoc_empty_query_related_userdoc_empty_queryother_empty_queryr  r  s	            r   r   z,SearchDriverEvaFullSearch.select_empty_query   s   (*%')$%):):i)G,2,@,@,L,L&&+.q~~/C/C+D -M -)  &33??&&	  @   %):):m)K+1+?+?+K+K%%+.q~~/C/C+D ,L ,( %22>>%%	 ? O %):):B\)\ & 4 4 @ @&&!' !A ! 69IILhhkzz  ~O  O 	8IJJ)6I67	8 
r   c                    g }g }g }t        d|j                  d         }|j                  dk(  s|j                  dk(  r1t        j                  j                  |j                  |d|gdd|      }|j                  dk(  s|j                  d	k(  r1t        j                  j                  |j                  |d|gdd|      }d
}|j                  dk7  s|j                  s|}|j                  dk(  r1t        j                  j                  |j                  |d|gd|      }n?|j                  dvr1t        j                  j                  |j                  g|d|gd|      }g }||z   |z   D ]  }	|j                  t        di |	        |S )Nd      r  r  r   B25SF)force_slicer   include_attachmentr   r     TOP)r   r   r   r  r   )maxslicer  r   r   search_once_top_bm25r  r  topr  r  r   )
r   r   r   result_tasksresult_docsresult_dictslice_toother_slicer  r  s
             r   r   z%SearchDriverEvaFullSearch.select_bm25"  s   sFLLO,%):):i)G!//DDVE[E[]b9:8F?DV E UL %):):m)K ..CCFDYDY[`9:8F?DV D UK
 %VZZ"K% ..CCFDZDZ\a9:;u]c D eK&@@ ..CCVEVEVDWY^9:;u]c D eK %3kA 	8IJJ)6I67	8
r   c                 z   g }g }g }g }g }g }|j                   dk(  r8t        j                  j                  |j                  |j
                  d|      }nF|j                   dvr8t        j                  j                  |j                   g|j
                  d|      }|j                   dk(  s|j                   dk(  rot        j                  j                  |j                  |j                  dd|      }t        j                  j                  |j                  |j
                  d|      }|j                   dk(  s|j                   d	k(  rot        j                  j                  |j                  |j                  dd|      }t        j                  j                  |j                  |j
                  d|      }|j                   dk(  s|j                   d
k(  r7t        j                  j                  |j                  |j
                  d|      }g }||z   |z   |z   |z   |z   D ]  }	|j                  t        di |	        |S )Nr  MAIN)r   r   r  r  SYNsyn)r   r   r   r  CmfAttachmentr   )r  r   r   search_oncer  tsquery_without_synr  tsquery_with_synr  AKA_ATTACHMENT_MODELSr  r   )
r   r   r*  result_tasks_synr(  result_docs_synr)  result_attachsr  r  s
             r   r   z(SearchDriverEvaFullSearch.select_defaultB  sS    % ..::&&**	 ; K &@@ ..::""#**	 ; K %):):i)G%33??@V@VX^XoXo6;%PV  @  X!//;;F<R<RTZTnTn6<V < ML %):):m)K$22>>v?T?TV\VmVm6;%PV ? XO ..::6;P;PRXRlRl6<V ; MK %):):o)M#11==,,**	 > N$ %(88;FX[iilww 	8IJJ)6I67	8 
r   N)r   r   r   r   r   r   r   r   r   r   r   r      s    0d@Jr   r   c                       e Zd Zy)SearchDriverElasticSearchN)r   r   r   r   r   r   r:  r:    s    r   r:  c                      e Zd ZdZdgZdgZdgZdZ e       Z	d Z
e	 	 	 	 	 d'd       Zd Zd	 Zd
 Zed        Zed(d       Zd Zd Zd Zd Zd Zd Zd Zd Zed        Zed        Zed        Zed        Zed        Zed        Z d)dZ!d Z"ed        Z#d Z$ede%d e%d!e&fd"       Z'ed*d#       Z(ed+d$       Z)ed%        Z*ed,d&       Z+y)-CmfSearchEngineu  
    Главная задача: выполнение поиска (без учета дата-драйвера PG или Elastic).

    Алгоритм:
    - Подготовка запроса
    - Запрос делаем через дата-драйвер, получаем сырые предварительные данные (х10 объема)
    - Сортируем по классам
    - Сортируем по bm25eva
    - Проверяем права
    - Смешиваем потоки (оптимизированно с проверкой прав)
    - Формируем вывод

    Драйвер:
    - Уметь делать запросы
    - Не обязательно, но желательно: давать статистику bm25 (даже если используется Elastic,
          мы храним данные в нашей таблице все равно и можем посчитать bm25 статистику из нее)
    r  r  r1  Nc                    d| _         d| _        d | _        d | _        d| _        ddg| _        d | _        d| _        d | _        d| _	        d| _
        d| _        d| _        d | _        d | _        i | _        t!               | _        d | _        d | _        d | _        d | _        | j,                  j.                  it0        j2                  j4                  j7                         D cg c](  }|j8                  r|j:                  dvr|j:                  * c}| j,                  _        t=        t>        d      s| jA                          y y c c}w )Nr  Fr   r   )r  r  r1  FTS_STAT_WORDS)!r  orig_field_name
field_nameorig_search_queryonly_idsr%  r
   no_analitycscheck_access_custom_fnr'  archiveddeletedFSTorig_tag_nameorig_tree_parent_idaddon_filtersetrecent_projecttree_parent_filterquery_qstop_wordssorted_resultfinal_result	__class__r  cmfr   	CmfEntityiter_subclassesfull_search
class_namehasattrAPPbm25_stat_words_load)r   ms     r   __init__zCmfSearchEngine.__init__  s'   $!%#Z
!&*#!#' !e"&!% "  >>))1DGJJDXDXDhDhDj .fq==Q\\9d%d /0ll .fDNN*s,-%%' ..fs   -D?c                 T   t        |      dkD  r|d d }|d}|r|d   dk(  rddg}|sddg}t               }||_        ||_        ||_        ||_        ||_        ||_        ||_        ||_	        ||_
        |	|_        |
|_        ||_        ||_        ||_        ||_        t"        j$                  j'                  d	      }|d
kD  rt)        d|        |j+                          |j-                          t/        |      |_        d|j0                  d<   |j                  j3                  d      r|j                  d d |_        |j                  dk(  rd|_        |j                  dk(  rd|_        d|_        |rt5        |      |_        nt5               |_        |j
                  dk(  r|j                  r|j9                          nM|j
                  dk(  r|j;                          n-|j                  r|j=                          n|j?                          |jA                          |jB                  S )Ni   r>  r     r   2   r  T)is_dirty   u   Идет процесс индексации, могут быть доступны не все результаты поиска. Осталось объектов: Modelr  r   
CmfCommentr   )"lenr<  r  r@  rA  rB  rC  r%  r
   rD  rE  r'  rF  rG  rI  rJ  rK  r   r   countcmf_noteprepare_search_queryprepare_additional_filterlistfullsearch_sliceendswithrL  rM  search_empty_top_recentsearch_empty
search_topsearch_main_prepare_final_resultrQ  )clsr  rA  search_queryrC  r%  r
   rD  check_accessr'  rF  rG  tag_nametree_parent_idrecent_projectskwargsr   dirty_counts                     r   
fts_searchzCmfSearchEngine.fts_search  s4    |S '-L LU1X^GEHE "
 '!+&#/ "*(4%
" '%3"$ **00$0?  }  ~I  }J  K  L 	##%((*"&u+%&" %%g. & 1 1#2 6F% )F , %F *F $'$8F!$'EF!$ ##r)fjj**,%%+!ZZ $$&"""r   c                    t        j                  dd| j                        j                         | _        t        j                  dd| j                        | _        t        j                  dd| j
                        j                         | _        d | _        d | _        t        | j                  j                               dk(  rt        j                  d| j                        s t        j                  d| j                        r| j                  j                         | _        t        j                  d	| j                        r| j                  | _        | j                  | j                  d
d      | _        | j                  | j                        | _        | j                  | j                  W| j
                  | _        t        j                  dd| j                         | _        | j"                  | _        d | _        d | _        y | j+                  | j
                        | _        t/               | _        | j,                  j                         D ]2  }t3        j4                  |      }| j0                  j7                  |       4 t8        j;                  | j,                        | _        | j=                  | j                         | _        d | _        | j                  | _        | j                  | j"                        | _        | j>                  | _        | j@                  r| j$                  | _        y | jC                  | j                         }| j=                  |      }| j                  |      }|| _        y )Nz -[^ ]*r>  z[|&()] u;   ([^A-Za-zА-Яа-я0-9](\s|$)|(\s|^)[^A-Za-zА-Яа-я0-9])r  z^[a-zA-Z0-9]+-[0-9]+$z^[0-9]+$z@^[a-zA-Z0-9]*:[0-9a-f]*-[0-9a-f]*-[0-9a-f]*-[0-9a-f]*-[0-9a-f]*$FT)synonyms
stop_wordsu   [^А-Яа-яA-Za-z0-9.-]-)"resubrB  lowersearch_query_wo_qstoprs  stripquery_obj_codequery_obj_idre  splitmatchupperparse_search_queryrO  text_stemmingsearch_query_ninjasearch_query_normalizer3  r4  
query_likequery_remove_stop_wordssearch_query_wo_stop_wordsrj  search_query_partspeechcmfnlp
partspeechr  cmfutilninjaquery_normalizesearch_query_stemmr'  query_add_syns)r   wr  ninja_with_syns search_query_normalize_with_synssearch_query_stemm_with_synss         r   rh  z$CmfSearchEngine.prepare_search_queryZ  s    &(VVJr4;Q;Q%R%X%X%Z"FF9S$2L2LMFF#abegkgxgxy  B # t%%++-.!3xx0$2H2HIRXXVacgcycyMz&*&<&<&B&B&D#xx[]a]s]st$($:$:!!%!8!89O9OZ_lp!8!q!%!3!3D4J4J!K *d.?.?.K&*&7&7D#*,&&1LSQUQhQh*iD''+'B'BD$$(D!"DO*.*F*FtGXGX*Y' (,v$00668 	<A**1-J((//
;	< #*--0O0O"P '+&:&:4;R;R&S#  44"&"4"4T5P5P"Q#'#:#:  88$($<$<D! #11$2I2IJO/3/C/CO/T,+/+=+=>^+_($@D!r   c           	         g }|j                         D ]C  }|j                  d      }|d   }|g}|dd  D ]"  }t        |      dk\  s|j                  |       $ t        j                  |      }t        |      }	t        j                  j                  ddt        ||hz        gddgdgddg	      }
|
D ]  }|j                  s|j                  j                  j                  d
      d d D ]F  }d|j                         v r|j                         j                  dd      }|	j                  |       H  |j                  dj                  |	             F dj                  |      S )N|r   r     r   INr   ordernofilterr
   order_byr%  ,   r|  r>  )r  re  r  r  tokens_enrich_morphrL  r   
CmfSynonymrj  r   valuer  replaceaddjoin)r   r   r  tokentokensfirst_tokensearch_tokensr  normalized_wordssynonym_wordssynonym_listsynonymr   s                r   r  zCmfSearchEngine.query_add_syns  ss    [[] 	0E[[%F )K(MMABZ , q6Q;!((+	,  &99-HKM!,,11&$M]al`mMmHn9o:@&9I<E;9:1 2 ?L ( -<<$\\//55c:2A> 	- !'')+$GGI--c26%))!,	-- JJsxx./?	0@ xx}r   c                     | j                  | j                  | j                        | _        | j	                  | j
                        | _        y r   )	calc_tagsrI  rB  ru  _get_all_branchesrJ  rN  )r   s    r   ri  z)CmfSearchEngine.prepare_additional_filter  s9    t'9'94;Q;QR"&"8"89Q9Q"Rr   c                     t               }|r:t        |t              r|D ]  }|j                  |        n|j                  |       | j	                  |      \  }}t        |j                  |            S r   )rL  
isinstancerj  r  _extract_tagsunion)rr  ru  rs  r   _tagextracted_tagss         r   r  zCmfSearchEngine.calc_tags  sg    u(D)$ #DHHTN# "'*'8'8'F$nDJJ~.//r   c                 H   t               }| j                  d      }t        |      dk(  r| |fS d} |dd D ]X  }|dk(  r	t        j                  d|d      }|d   dk7  r|j	                  |d          t        |      dk(  sK|  |d    |d    } Z | j                         } | |fS )	zExtrats tags from the given search_query and returns its reminder and a set of extracted tags

        Args:
            search_query (str)

        Returns:
            tuple[str, list[str]]: search_query reminder and a set of extractd tags
        #r  r>  Nz(\W)r   r"     )rL  r  re  r  r  r  )rs  r   sharp_splittedr  
sub_tokenss        r   r  zCmfSearchEngine._extract_tags  s     u%++C0~!#%%#AB' 
	OE{'5!4J!}"A' :!#".
1z!}oN
	O $))+T!!r   c                 R   g }g }g }|D ]  }|j                   }|| j                  v r+|j                  s*|j                  j                  d      d   }|| j                  v r|j                  |       h|| j                  v r|j                  |       |j                  |        |||fS )N:r   )r   r5  r   r  r  r  r  )r   search_objects	aka_tasksaka_docsaka_dictsearch_objectr   s          r   _split_search_objects_to_3_akaz.CmfSearchEngine._split_search_objects_to_3_aka  s    	+ 	/M%//ID666$22)77==cB1E	D000  /d111..	/ (H,,r   c                    g }g }t         j                  rKt         j                  j                  j                  r't         j                  j                  j                  dk(  rg |fS t	        t         j                  j                        }|D ]?  }|j
                  r ||j
                  v r|j                  |       /|j                  |       A ||fS )Nr>  )r  r  r  r  r   r   r  )r   r  relatednot_related
user_loginr  s         r   %_split_search_objects_to_related_userz5CmfSearchEngine._split_search_objects_to_related_user  s    ~~Q^^%9%9%?%?1>>CWCWC]C]acCc~%%--.
+ 	2M66:IpIp;p}-""=1		2
 ##r   c                     g }g }|D ]4  }|j                   |k(  r|j                  |       $|j                  |       6 ||fS r   )r   r  )r   r  keyr   nomarkr  s         r   _split_search_objects_by_markz-CmfSearchEngine._split_search_objects_by_mark!  sM    + 	-M!!S(M*m,		-
 t|r   c                    g }t               }t        j                         }g }| j                  j                  |       }| j	                  |      \  }}}d }ddd}		 t        |      }
 |||	|        |||	|        |||	|        |||	|        |||	|       t        |      }|dk\  s||
k(  rnT|d d | _        t        j                         |z
  dkD  r.t        j                  dt        j                         |z
          y y )Nc                     |d   |d   k\  ry |sy |j                  d      }d|_        | j                  |       |dxx   dz  cc<   y )Nrf  total_limitr   	empty_topr  )popr   r  r  countersobj_listobjs       r   append_if_exists_empty_topzKCmfSearchEngine.search_empty_top_recent.<locals>.append_if_exists_empty_top6  sL     H]$;;,,q/C#CIJJsOW"r   r      rf  r  r^  皙?z3PROF fulltext_search empty_queries_top selects got )	rL  timedriverr   r  re  rP  r  r   )r   rQ  skip_idsprof_str  top_results_taskstop_results_docstop_results_dictsr  counters_empty_topres_count_beforeres_count_afters               r   rm  z'CmfSearchEngine.search_empty_top_recent,  s!   5))+;;DAAEAdAdesAt>+->		# ()<"<0&|5GIZ[&|5GIZ[&|5GIZ[&|5GIYZ&|5GIYZ ",/O"$;K(K  *#2.99; 3&GGI$))+X_J_I`ab 'r   c                 ^   g }t               }t        j                         }| j                  j                  |       }| j	                  |      \  }}}| j                  |      \  }}	| j                  |      \  }
}d}t        ||	|
||g      rU|df|	df|
df|df|dff}| j                  |||       |dz  }|dkD  rt        j                  d       nt        ||	|
||g      rUt        j                         |z
  dkD  r-t        j                  dt        j                         |z
          || _
        y )Nr   r    W   Баг в поиске, много данных, либо не идет вычитка.r  z&PROF fulltext_search search_empty got )rL  r  r  r   r  r  rz   _add_if_existsr  r   rP  )r   rP  r  r  r  results_tasksresults_docsresults_dictsr  r  r  r  
iter_countprocessing_groups                 r   rn  zCmfSearchEngine.search_emptyW  sZ    5 ))+77=595X5XYg5h2|]9=9c9cdq9r7%'77;7a7abn7o5$o
02B.! " /2!1%-q1 !$"   0-J!OJD sw# 02B.! "& 99; 3&GG<TYY[7=R<STU*r   c                     t        j                          }g }t               }i  fd}d }ddddddd}ddd} j                  j                    j                        } j                  |      \  }	}
} j                  |
d	      \  }
} j                  |	d	      \  }	} j                  |
d
      \  }
} j                  |	d
      \  }	} j                  |
d      \  }
} j                  |	d      \  }	}|
}|	}~
~	 j                  | j                        } j                  | j                        } j                  | j                        } j                  | j                        }|d d }|d d } j                  | j                        } j                  | j                        } j                  | j                        } j                  | j                        }t        j                          |z
  dkD  r-t        j                  dt        j                          |z
          t        j                          }t               }g }t        |      dkD  r|d   j                  dkD  r ||||d        t        |      dkD  r|d   j                  dkD  r ||||d        t        |      dkD  r|d   j                  dkD  r ||||d        t        |      dkD  r|d   j                  dkD  r ||||d        t        |      dkD  r ||||d        t        |      dkD  r ||||d        d}d}d}d}t        |      dkD  r|d   j                  }t        |      dkD  r|d   j                  }t        |      dkD  r|d   j                  }t        |      dkD  r|d   j                  } j                  |      } j                  |      } j                  |      } j                  |      }d}||kD  s||kD  rd}t         j                  j                               dk(  r
 ||||       	 t        |      }|rt        |      dkD  rDt        |      dkD  r6|d   j                  |d   j                  kD  r |||||        |||||       n |||||        |||||        |||||        |||||        ||||        ||||d         ||||d         |||||        |||||        ||||d         |||||        |||||        ||||d        nt        |      dkD  rDt        |      dkD  r6|d   j                  |d   j                  kD  r |||||        |||||       n |||||        |||||        |||||        |||||        |||||        |||||        ||||        ||||d         ||||d         |||||        |||||        ||||d         |||||        |||||        ||||d        t        |      }|dk\  s||k(  rn|d d  _        t        j                          |z
  dkD  r.t        j                  dt        j                          |z
          y y )Nc                 b   t        t        j                  j                        }|sy |j	                  d      }|j
                  v s|j                  dk(  rO|j                  v rA|sy |j	                  d      }|j
                  v r#|j                  dk(  r|j                  v rAd|j
                  <   |j                  dk(  rd|j                  <   |r|j                  |z  dk  ry d}|j                  j                  vrd}||   || d   k\  ry | j                  |       ||xx   dz  cc<   y )	Nr   r1  Tg?r   r   _limitr  )r   r  r  r  r  r   r   r   real_rank_bm25r  r  )	r  r  r  max_rankr  r  	cur_classexists_dictr   s	          r   append_if_existsz4CmfSearchEngine.search_top.<locals>.append_if_exists  s-   Q^^112J,,q/C **+/1QVYVgVgkvVvll1o **+/1QVYVgVgkvVv
 '+K

#}}/15C--. &&1T9 I}}D$7$77"	 	"h)F/C&DD JJsOY1$r   c                     |d   |d   k\  ry |sy |j                  d      }| j                  |       |dxx   dz  cc<   y )Nrf  r  r   r  )r  r  r  s       r   append_if_exists_otherz:CmfSearchEngine.search_top.<locals>.append_if_exists_other  sE     H]$;;,,q/CJJsOW"r   r   r     )r   r   c	doc_limit
task_limitc_limitr"  r  flow_idcodelike	flow_nameflow_attach   r  z'PROF fulltext_search TOP25 selects got   TFr     z2PROF fulltext_search TOP25 mixing and prepare got )r  rL  r  r   r3  r  r  	bm25_sortr  r   re  	rank_bm25r  bm25_sort_date4rs  r  rP  )r   r  rQ  r  r  r  r  counters_otherr  r(  r)  result_dictsresult_docs_idcodelikeresult_tasks_idcodelikeresult_docs_nameresult_tasks_nameresult_docs_attachresult_tasks_attachresult_docs_mainresult_tasks_mainrP  max_doc_name_rankmax_task_name_rankmax_doc_main_rankmax_task_main_rankdoc_prior  r  r  s   `                           @r   ro  zCmfSearchEngine.search_top  sl   ))+5*	%X	# aa!#2"F $%Q7* 00t7O7OP262U2UVd2e/k< /3.P.PQ\^o.p++040R0RS_ar0s--(,(J(J;Xc(d%%*.*L*L\[f*g''*.*L*L[Zg*h'',0,N,N|]j,k))&(>>*:D<T<TU NN+<d>V>VW!^^,>@X@XY"nn-@$BZBZ[/31"15!%0FH`H`!a"&..1H$JbJb"c>>*:D<T<TU NN+<d>V>VW 99; 3&GG=diikG>S=TUV ))+5  1$)9!)<)F)F)M]H6FM 1$)9!)<)F)F)M]H6FM 1$)9!)<)F)F)M]H6FM 1$)9!)<)F)F)M]H6FM %&*]H6LdS&'!+]H6MtT   " 0 3 B B !!#!21!5!D!D " 0 3 B B !!#!21!5!D!D  //0@A 001BC//0@A 001BC !226HK\6\Ht  &&()Q."=.,O"=1 )*Q.37G3H13L)!,;;>Nq>Q>`>``$]H>NPab$]H>NPab$]H>NPab$]H>NPab :KM_` :KM_`&}nlS :PRVW :QSWX :JL]^ :JL]^ :LdS :KM_` :KM_` :MtT *+a/C8I4JQ4N)!,;;>OPQ>R>a>aa$]H>OQcd$]H>OQcd$]H>OQcd$]H>OQcd :KM_` :KM_` :JL]^ :JL]^&}nlS :QSWX :PRVW :KM_` :KM_` :MtT :JL]^ :JL]^ :LdS
 "-0O"$;K(Kg j +3B/99; 3&GGHW^I^H_`a 'r   c                    t        j                          }g }t               }t               fd}| j                  j                  | | j                        }| j                  |      \  }}}| j                  || j                  d      }| j                  || j                  d      }| j                  || j                  d      }t        j                          |z
  dkD  r-t        j                  dt        j                          |z
          t        j                          }t               }g }		 t        |	      }
 ||	|        ||	|        ||	|        ||	|        ||	|       t        |	      }|| j                  d   kD  s||
k(  rn\|	| j                  d   | j                  d    | _        t        j                          |z
  dkD  r.t        j                  dt        j                          |z
          y y )	Nc                     |sy |j                  d      }|j                  v r#|sy |j                  d      }|j                  v r#j                  |j                         | j                  |       y Nr   )r  r   r  r  )r  r  r  r  s      r   r  z5CmfSearchEngine.search_main.<locals>.append_if_exists  s`    ,,q/C**+ll1o **+ OOCJJ'JJsOr      )limitr  &PROF fulltext_search main selects got r  r   1PROF fulltext_search main mixing and prepare got )r  rL  r  r   r4  r  r	  r  r   re  r%  rP  )r   r  rQ  r  r  r  r(  r)  r  rP  r  r  r  s               @r   rp  zCmfSearchEngine.search_main  s   ))+5e
	 00t7L7LM262U2UVd2e/k< ~~lD4I4IQT~Unn[$2G2GsnS~~lD4I4IQT~U99; 3&GG<TYY[7=R<STU ))+5"=1]K8]L9]K8]L9]L9!-0OA./EU2U  +4::a=AG99; 3&GGG		V]H]G^_` 'r   c                 D   g }t               }t        j                         }| j                  j                  |       }| j	                  |      \  }}}| j                  |d      \  }}| j                  |d      \  }}	t        j                         |z
  dkD  r-t        j                  dt        j                         |z
          t        j                         }d}
t        |||||	g      rg|df|df|df|df|df|df|df|	df|df|	df|dfg}| j                  |||       |
dz  }
|
dkD  rt        j                  d       nt        |||||	g      rg|d d	 | _
        t        j                         |z
  dkD  r.t        j                  d
t        j                         |z
          y y )Nr0  r  r   r   r  r  r  r  r^  r!  )rL  r  r  r   r  r  r  r   rz   r  rP  )r   rP  r  r  r  r(  r)  r  r6  r7  r  r  s               r   search_main_oldzCmfSearchEngine.search_main_old  s   5 ))+33D9262U2UVd2e/k<)-)K)KLZ_)`&&'+'I'I+W\']$_99; 3&GG<TYY[7=R<STU ))+
<l<Lo^_q!a q!!1%q!!1% q! !$a  !$ q!% 0  0-J!OJD sw= <l<Lo^_@ +3B/99; 3&GGG		V]H]G^_` 'r   c                     |S )u   
        Подчистка оригинального квери, который ввел пользователь:
        - удаление стоп-слов
        )r  re  r  r  )rr  rs  clean_search_query_listr  clean_search_querys        r   _clean_search_queryz#CmfSearchEngine._clean_search_query  s
     r   c                 V    d}|j                         D ]  }|t        v r|d|z   z  } |S )Nr>  r|  )r  ALL_STOP_WORDS)rr  r   r  r  s       r   r  z'CmfSearchEngine.query_remove_stop_words  s=     	AN" 37NC	 
r   c                 t   d}|j                         D ]  }|t        v rd|v r	|d|z   z  }t        j                  |      }|dk(  r4|j	                  dd      j	                  dd      }||k(  rd}d}nd|z   }||k(  r|d|z   |z   z  }vt
        j                  j                         t        j                  ddd      k  r|dz   |z   dz   |z   }|ddj                  |j                               z   z  } dj                  |j                         j                               }t        j                  d	d
|      }|S )Nr>  r  r|  @.i  r  r  u   [^ A-Za-zА-Яа-я0-9|-]r  )r  r)  r  text_normalize_enrichr  datetimedatetodayr  r  r  r  )rr  r   r  r  lemmalegacy_wlegacy_w_strs          r   r  zCmfSearchEngine.query_normalize  s;    	9A N"axsQw44Q7B;99S-55c#>q=#%L!H#&>L:37\11C  }}**,x}}T1a/HH !C( 2S 85 @3%++-!888C7	98 hhsyy{((*+ ff13<
r   c                 8    t        j                  ||dz   z        S )Nr  )mathlog)rr  Ndfs      r   bm25_idfzCmfSearchEngine.bm25_idfL  s     xx2a4!!r   c                 Z   i }i }t         j                  j                  d      |d<   t         j                  j                  d      |d<   t         j                  j                  j
                  j                         j                  d      j                         }t         j                  j                  j
                  j                         j                  d      j                         }|t        |      dz  dz     d   }|t        |      d	z  dz     d   }t        |      d
kD  r|d   d   }|d   d   }|t        |      dz  dz     d   }|t        |      d	z  dz     d   }t        |      d
kD  r|d   d   }|d   d   }d}	|D ]R  }
i ||
j                  <   | j                  |d   |
j                        ||
j                     d<   ||
j                     d   }T |D ]`  }
|
j                  |vri ||
j                  <   | j                  |d   |
j                        ||
j                     d<   ||
j                     d   }b i |d<   |d   d<   |d   d<   i |d<   i |d<   ||   d   |d   d<   ||   d   |d   d<   ||   d   |d   d<   ||   d   |d   d<   |t        _        |t        _        y )Nr  )r   r  z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfDocument''  ') where  ndoc > 10  order by ndoc desc limit 10000 z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfTask''  ') where  ndoc > 10  order by ndoc desc limit 10000 r  r  r   
   i  i  i  ditiFST_STAT_WORDS_DEFAULTFST_FRQ_MIDFST_FRQ_LOW)r   r   rf  CmfGlobalSettingsdpdata_driverSessionexecutefetchallre  wordr9  ndocrY  r?  
FTS_COUNTS)rr  r?  rI  cmfdoc_statcmftask_statdoc_mid_worddoc_low_wordtask_mid_wordtask_low_wordlastrowlast_doc	last_tasks                r   rZ  z$CmfSearchEngine.bm25_stat_words_loadR  s   
$*$8$8$>$>$>$W
=! & 4 4 : :Y : O
9..11==EEGOO  Q  @  I  I  K//22>>FFHPP  R|  }  F  F  H #3{#3A#5s#:;A>"3{#3B#6#;<Q?{d"&s+A.L&t,Q/L %S%6q%8#%=>qA$S%6r%93%>?B|t#(-a0M(.q1M 	6C')N388$-0\\*]:SUXU]U]-^N388$T*%chh/5H	6   	7Cxx~-+-sxx(-0\\*Y:OQTQYQY-ZN388$T*&sxx06I		7 46/09A/069B/06(*}%(*}%.<\.J4.P}%d+.<\.J4.P}%d+.<].KD.Q}%d+.<].KD.Q}%d++#r   c                 >   |}t        |      dk\  rd}|d   j                  |d   j                  z  dkD  rd}nI|d   j                  |d   j                  z  dkD  rd}n$|d   j                  |d   j                  z  dkD  rd}|dkD  rt        |d | d       ||d  z   }|S )	Nr  r   r"  g333333?r  r  c                     | j                   S r   )r   r   s    r   <lambda>z1CmfSearchEngine.bm25_sort_date4.<locals>.<lambda>  s
    !** r   )r  )re  r  sorted)rr  r  newresr   s       r   r  zCmfSearchEngine.bm25_sort_date4  s     s8a<D1v$$SV%:%::TAQ&&s1v'<'<<tCQ&&s1v'<'<<tC axET
0DEDE
Rr   c                 j   g }d}t        |      }|D ]>  }|dz  }| j                  ||      \  |_        |_        ||_        |j                  |       @ t        |d d      }|d d D ]F  }|j                  s|xj                  dz  c_        d|j                  d	d
|j                   |_        H t        |d d      }d}|d | D ]g  }|dz  }||_	        d|j                   d|j                   d| d|j                  z   d| z   |_        |j                  j                  dd      |_
        i |d | S )Nr   r  c                     | j                   S r   r
  rV  s    r   rW  z+CmfSearchEngine.bm25_sort.<locals>.<lambda>  
    AKK r   T)r  reverser;  r  u    ТОП1.2fr|  c                     | j                   S r   r\  rV  s    r   rW  z+CmfSearchEngine.bm25_sort.<locals>.<lambda>  r]  r   rz b(z) z | tsq=DZQTNr>  )re  	bm25_rankr
  r  debug_sql_posr  rX  r   r   debug_bm25_posr   r  )	r   r  r   r  rY  r   llrQ  ra  s	            r   r	  zCmfSearchEngine.bm25_sort  s_    X 	CFA04sE0J-CM3- !CMM#		
 f"7F 	AA $$t#$Q[[$5Qqwwi@	A f"7F 	8AFA A!//*"Q-=-=,>at2FPU\]b\cSddAG++GB7AJ	8 fu~r   c                    fd} |j                         }i }j                  !j                  dk7  r |j                        }d}d}d}d}	d}
d}d}t               }d}j                  | j                  v rd}i }|j                         D ]c  }||v r	|j                  |       |
dz  }
t               }d}d d t               g d dt               d}|j                  d      D ]  }||vr	|d	   j                  |       |d
   j                  ||   j                  d      D cg c]  }t        |       c}       |dz  }|t        j                  v r,|t        j                  |   v rt        j                  |   |   }nt        j                  d   |   }d}d}t        t        ||   j                  d            d      }|||dz   z  z  ||z   z  }|}d}j                  dk(  r9||v r5j                  r)|j                  j                         v r|	dz  }	d}|dz  }n|dz  }|dxx   d|dz    d| d| d|dd|dd|dd| d|dd| z  cc<   |j                  |        t        |      dk(  r|dz  }t!        |      }| j"                  |
dz
     dv r%|dz  }|dxx   d | j"                  |
dz
      z  cc<   t        |      dkD  r|dxx   d!|d"z  cc<   ||vr|||<   ||d#<   ||d$<   f d}d}d} |j%                         D ]  \  }}d}!d}"|d
   D ]e  }#d}$|j%                         D ]F  \  }%}&||%k(  r|&d
   D ]3  }'t'        |'|#z
        d%k  s|!dz  }!|$dz  }$|d&   j                  |%        F H |$|"kD  sd|$}"g |!dkD  rd'|"z  }(|d#   |(z  |d(<   |d   d)| d*|" d+|d(   dd|d#   dd,	z   |d<   |dz  }|"| kD  r|"} ||!z  }t)        |d&         j+                  d-d      j+                  d.d      j+                  d/d      })|dxx   d0| d1|) z  cc<   |d#   |d(<    j                   d |z   }t-        |j/                         D cg c]  }|d(   	 c}      }|d'| z  z  }dd2dd3j0                  z   z  z  z   }*||*z  }|d4| d5| d6j0                  d7d8|*d"d9j2                  d"
z  }d}+t4        j6                  j8                  j:                  ret4        j6                  j8                  j:                  dk7  r>j<                  r2t4        j6                  j8                  j:                  j<                  v rd:}+d},d;t4        v rj>                  t4        j@                  v rd:},||+z  |,z  }d<|d"d=| d>|+ d?|,d"d 	|z   }|dk(  rd@}|}-j2                  dAk(  r|dAz  }dB|d"d | }| dCdCjC                  |j/                         D .cg c]  }.|.d   	 c}.      z   }/|/_"        ||-fS c c}w c c}w c c}.w )DNc                     j                   j                  dd      } t        j                  dd|       } | d d } t	        j
                  d| z   dz         S )N'"z":([0-9,]*)z":"\1", z{ z })r   r  r  r  orjsonloads)tmpr  s    r   tsvector2dictz0CmfSearchEngine.bm25_rank.<locals>.tsvector2dict  sT     %%--c37C&&c:Ccr(C
 <<s
T 122r   r>  r   r  r<  r=  )rd  
smart_rank
found_synsvector_positionstoken_found_idxr   near_tokensr  rr  rs  r  r>  g      ?g      ?r  Fr  Tr   z wr,  =z bm25t(bm25)=z.1frb  z) idf=z fqd=z ord=z	 in_name=)VERBINFNr  r|  z
 SYNW_AWG=r_  rd  rt  r  ru  g?rq  z maxNRC[z]=z smrt(bm25t)=)rj  {}z #: r;     z nearC=z
 sumNearC= age=.0fz ageF=z	 sqlRank=g?FTS_RECENT_PROJECTSzrbm25=z wc=z rur=z rpr=gMbP?r  z codez<br>)#r   r   rL  r   r  r  r  extendintrY  r?  minre  r   r   r  r$  r  itemsabsr   r  sumvaluesr   r   r  r  r  r  r   r   r  r  r   )0r   r  r   rp  doc_vector_dictname_vector_dictr   
rank_debug	w_doc_cnt
w_name_cntsynw_cntorder_factorfound_tokens_cnt
done_words	model_keymatched_tokenssynwrank_setw_cntr  r  r   idfkbfqdbm25	bm25_origin_namew_avgnear_tokens_cntsum_near_tokens_cntmax_of_max_near_contexthave_near_token_cntmax_near_contextpostmp_max_near_contextsynw2token2pos2mltru  
age_factorrelated_user_rankrecent_project_rank	real_rankr   html_rank_debugs0    `                                              r   rd  zCmfSearchEngine.bm25_rank  s   	3 ((;(;<(S->->"-D,S->->?
	
U
	==D000I

 KKM G	8Dz!NN4 MH
 uHE ""!e$&#'"uE ZZ_  #O+l#''*()00/RSBTBZBZ[^B_1`Q#a&1`a
***yC<N<Nq<Q/Q,,Q/	:C,,-EFyQC#oa066s;<a@cQqSk*CE2 	88{*q4D/DZ[_b_k_k_q_q_sZs!OJ"G!8DNI
 gB'7'9&:!E7!A3mTXY\S]]^_hil^mmstwx{s|  }B  CF  BG  GL  MY  Z]  L^  ^g  ho  gp  #q  qT"A #D 8}!!ME++HQJ7;KK	gAd&B&B8A:&N%O"PP8}q gJuSk"::>)',t$!&E+'7E#$OG	8V "#)//1 "	9KD%"# /0 <'($%3%9%9%; "ME6u}  &'9 : 	"
 tcz?b0/14/0A50!-044U;!	"" (*::';$!<" #Q& --&+K&83&>l#!&wHTF"EUDVVcdijvdwx{c||}  D  EP  Q  RU  ~V  VW  3X  "Xg1$#&==.>+#'::#!%"67??BGOOPSTVW__`cdfggBtfB{m"<<&+K&8l#E"	9F z^j0
^-B-B-DEAlOEFs556 r1c#,,./00
j 0
;N:OuUXUaUabeTfflmwx{l|  }F  GJ  GO  GO  PS  FT  U  	U

 ^^!!''ANN,@,@,F,F",LQTQnQn$$**c.K.KK $   A%#*<*<@U@U*U"&''*==d3Zt,<+=UCTBUUZ[norZsstu  yC  C
19 D	88tDLD c
!J<8J'L-Q_QfQfQh<iAQwZ<i0jj#	Yw 2ap FB =js   %W)W.	W3
c                 6    d| vr| S | j                  d      d   S )Nz@#@#@#r   )r  )r   s    r   _remove_suffix_from_headlinez,CmfSearchEngine._remove_suffix_from_headline  s#    8#O~~h'**r   c                    | j                   | j                  d   | j                  d    }|D ]m  }d}|j                  |j                  d}|j                   d|j                   d| d|j
                  d	d|j                   d
t        j                   |_        o | j                  ri }g }|D ]  }|j                  |j                         |j                  r| j                  |j                        nd}|j                  |j                  |j                  | t         j#                  |j$                        |j                  |j                  |j
                  |j                  d	||j                  <    |t        _        t        j                  d       |S i }| j(                  rXi }|D ]7  }|j+                  |j,                  g       j                  |j                         9 |j/                         D ]  }	t1        t2              |	   j4                  r| j(                  dgz   }
dd||	   gg dg}n| j(                  }
dd||	   g}|	dk(  r|
g dz   }
|	dk(  r|
dgz   }
t6        j9                  |	      j;                  |
|| j<                  | j>                        }|D ]]  }t1        t2              j,                     j4                  r&|j4                  r|||j@                  jB                  <   O|||jB                  <   _  ntE        d       t        j                  d       g }|D ]a  }|jG                  |j                        }|s"|j                  r| j                  |j                        nd}|jH                  dk(  r|jJ                  sg|jL                  |jJ                  jN                  |jJ                  jP                  jR                  | |j                  |j                  |j
                  |jB                  jR                  |j                  |jT                  |jV                  |jX                  t         j#                  |j$                        |j                  d}nt[        j\                  d|j                        d   }|jB                  jR                  ||jP                  jR                  | |j                  |j                  |j
                  t         j#                  |j$                        |j                  d	}	 | j^                  s|ja                  dd       n| j_                  |      stb        | j(                  D ]6  } |j\                  d      d   }te        ||j                     |d      }|||<   8 |j                  |       d t        j                  d       | jf                  sHti        t2        jj                  jl                  | jn                  t6        jq                  |dd       d !       || _9        y# tb        $ r Y w xY w)"u5   
        Итоговая обработка
        r   r  NoneNz.6fr|  z rank=r~  r  z
<br>words=r>  )	idr   coder   r   r   r   r   r   zfulltext_search ENDcmf_ver_headr  )cmf_ver_curz==Tr  r1  )urlurl_previewurl_preview_imgr  fullsearch_answer)r
   r  include_archivedinclude_deleteduY   DEV: FATAL. Укажите в запросе поиска список полей fields=z"fulltext_search Start check access)r  r   r  r   r   r   r   	attach_idattach_name
attach_urlattach_url_previewattach_url_preview_imgr   r   z DZQTN )	r  r   r  r   r   r   r   r   r   T)TEXKOM_skip_failread_auditTEXKOM_ppp_project_simplecheckr,  r^  )rs  obj_dict)rx  ):rP  r%  r   r   r   r   r   r  rH  rC  r  r   r   r  r   r   jsonrn  r   fulltext_search_headlinesr
   
setdefaultr   keysvarsr   cmf_verr  get_model_by_namerj  rF  rG  r  r  	cmf_alertgetrW  parent	parent_idr   r  r  r  r  r  r  r  rE  _acl_check_readCmfPermissionErrorgetattrrD  schedule_deferred_jobr   _do_calc_statisticsrB  
dumps_dictrQ  )r   rQ  ra  formated_rankr  	result_idr   objectsids_by_modelr  _fields_filterr  r  r  r  r   r	   attrs                      r   rq  z%CmfSearchEngine._prepare_final_result  s7    ))$**Q-

1F  	qA"Mvv!#$66#,!''&uQZZPSDTTUVWV]V]U^^hijininhopAG		q ==(*%I!   *LMJJ4<<QZZH\^((GGJJ#+*#'::amm#<WWFF !

WW
7)!((3  +DA'GG)*;;L! J''R8??IJ +//1 .
<
+33 KK>*::G .l:6NOQlmG KKG!4j)ABG0%(QQG.%)<(==G"44Z@EE""%)]]$(LL	 F  $ .CF|AKK088S[[7: 0 0 3 34*-	.).4 qs	45 B	A++ahh'CLMJJ4<<QZZH\^>>_4 ::  "mm #

 #

 5 5'/j!" !$%JJ%(VV\\'(ww&)gg.1oo252E2E'+zz!--'@!" H$ HHZ9!<E "ffll % #'/j!" !$%JJ'+zz!--'@!"
 H66++tlp+q!88=00!% / +C 0 3&wqxx'8%F*./ JJx(wB	H 	
%&   !$$88(,(>(>GL^L^_bcfdf_gLhi
  ) *  s   6BW77	XXr  rQ  r  c                    |D ]  \  }}t        |      D ]l  }|s |d   j                  |v r|j                  d       *|j                  |d          |j	                  |d   j                         |j                  d       n  y r  )ranger   r  r  r  )rr  r  rQ  r  lstrf  _s          r   r  zCmfSearchEngine._add_if_exists#  s}    " 		JC5\ q6==H,GGAJ##CF+SV]]+
		r   c                     | sg S t         j                  j                  j                  j	                         j                  dd| i      }|D cg c]  }|d   	 c}S c c}w )Na  
                WITH tree_parents AS (
                    WITH RECURSIVE r AS (
                        SELECT obj_id, obj_code, obj_tree_parent_id
                        FROM cmf_full_search
                        WHERE obj_tree_parent_id = :tree_parent_id

                        UNION

                        SELECT cfs.obj_id, cfs.obj_code, cfs.obj_tree_parent_id
                        FROM cmf_full_search AS cfs
                        JOIN r ON cfs.obj_tree_parent_id = r.obj_id
                    )
                    SELECT obj_id FROM r
                    WHERE r.obj_id IN (SELECT obj_tree_parent_id FROM r)

                    UNION

                    SELECT :tree_parent_id
                )
                SELECT * FROM tree_parents;
            rv  r   )r   r   rB  rC  rD  rE  )rv  recordsra  s      r   r  z!CmfSearchEngine._get_all_branches1  s`    
 I&&))55==?GG, ~./
2 &&!&&&s   Ac                    dt         _        |j                  dd      }t        j                  dd|      }t        j
                  d|      }d}d}d}	|D ]I  }
t        |
      dk(  rt        |
      dk(  r|
dv r&|
d	v r	|	d|
 z  }	3|
dk(  r9|
d
v r|	dz  }	C|
dv r|	dz  }	Mt        |
      dk(  r\|
d   dv rt        |
      dk(  rr|d|
dd   z  }~|r|
j                  dd      }
|dz  }|dkD  rd}d}|	r|	d   dvrd}t        |
      dkD  r| j                  |
|      }t        t        |      t        t              z
        }t        |      dk(  r|	| d|d    z  }	n-t        |      dkD  r|	| ddj                  |      z   dz   z  }	n	 |r|dk\  r n
|dk\  r nL |rt        |      dkD  r|d   dv r|dd  }|S |	j                  dd      j                  dd      j                  dd      j                  d d      j                         }	|	r|	d   dv r|	dd  }	|	r|	d   dv r|	dd  }	|	r|	d   dv r|	dd  }	|	r|	dd  dv r|	d d }		 t        j                  j                  j                   j#                         j%                  d!d"|	i      }t        |      d   d   }	t/        |d$|	       |	S # t&        j(                  j*                  $ r}t        j                  j                  j                   j-                          t        j                  j                  j                   j#                         j%                  d#d"|i      }t        |      d   d   }	Y d }~d }~ww xY w)%Nr>  zwww.u   [^-A-Za-zА-Яа-я0-9()|&!' ]r|  z(,| |&|\||\(|\))r   r  )r  !z()&|)rQ   r   r  z |)r   rN   &z &r  r  r  r  F)r  r  r  )r}  z (  | z )r;  )r  r  z OR z or z AND  & z and z!select to_tsquery('russian', :q);q+select websearch_to_tsquery('russian', :q);z->)r  rH  r  r  r  r  re  prepare_wordrj  rL  r)  r  r  r   r  rB  rC  rD  rE  
sqlalchemyexcProgrammingErrorrollbackprint)rr  rs  r}  r~  
first_wordsearch_query_allowed_symbr  
word_countstopsr  r   oper	sug_wordstsqueryes                  r   r  z"CmfSearchEngine.parse_search_queryU  s    !))&4$&FF+KSR^$_!-/HI 
 6	A1v{1v{
?;1QCLA8))r
&&r
 1v{tz!q6Q;1QqrUG$		#s#A!OJA~ D"Z/1vz,,Q,B	 Y#n2E!EF	y>Q&D69Q<.11A^a'D6

9(==DDAjAo Rm6	p 5z!|aJ 6ab	LIIfe$,,VU;CCGUS[[\cejkqqs1#!"A1#!"A1#!"A23:%#2A	$''**66>>@HHIpsvxyrz{GWa #A 	lD!$ ~~.. 	$  ,,557 ''**66>>@HHAl#%G Wa #A	$s   AJ+ +MBMMc           
      j   d}|j                         D ]  }d|v r|d|z   dz   z  }|d|z   z  } |j                         }|j                  dd      }|j                  dd      }	 t        j                  j
                  j                  j                         j                  d	d
|i      }t        |      d   d   }|j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      }d}|j                         D ]>  }d|vr	|d|z   z  }|ddj                  t!        |j                  d                  z   z  }@ dj                  |j                               }|S # t        j                  j                  $ r}t        j                  j
                  j                  j                          t        j                  j
                  j                  j                         j                  ddt        i      }Y d }~d }~ww xY w)Nr>  r  z (ry  r|  r  r  hhhackskiptirez%select to_tsquery('russian', :query);r   r  r  r   rj  r  rb  )r  r  r  r   r  rB  rC  rD  rE  r  r  r  r  r   rj  r  rL  )rr  r   r  r  r  r  r  s          r   r  zCmfSearchEngine.text_stemming  s    	AaxTAX^#S1W		
 GGIyyU#||C!12	''**66>>@HHItw~  AE  wF  GG M!QIIeC ((R088sCKKCPRS[[\_`bckkl|  B  C 	9A!|sQw sSXXc!''#,&7888	9 hhsyy{#
- ~~.. 
	  ,,557 ''**66>>@HHAeG
	s   %AF H2-A:H--H2c                    t         xj                  d| dz  c_        |d   t        j                  v rd}t        j
                  }nd}t        j                  }g }t        j                  |      s|j                  |      }g }t        j                  |      }|r.t         xj                  d| dz  c_        |j                  |       t         xj                  dz  c_        d}	|D ]  }
|	d	k\  r nt        |
      d	k  r|
d   |d   k7  r|
d
   |d
   k7  r0d|
v rF|
j                  dd      }
t         xj                  d|
 dz  c_        |j                  |
       |	d
z  }	zt         xj                  d|
 dz  c_        |j                  |
       |	d
z  }	 g }t        j                  j                  j                   j#                         j%                  dd|i      }d}	|D ]w  \  }}|	d
k\  r nmt        |      d	k  r|d   |d   k(  s|d
   |d
   k(  s3|j                  dd      }t         xj                  d| dz  c_        |j                  |       |	d
z  }	y t'        |      t'        |      z  |hz  }n|h}t'               }|D ]a  }
t(        j+                  |
      d d	 D ]D  }|j-                  |j.                         t         xj                  d|j.                   dz  c_        F c ||z  }t'               }|rt        j                  j1                  ddt1        ||hz        gddgdgdd
g      }|D ]  }|j2                  s|j2                  j4                  j7                  d      d d D ]P  }|j9                         j                  dd      }t         xj                  d| dz  c_        |j-                  |       R  ||z  t'        |      z  }t1        |      S )Nz|w:r|  r   enruzaddNinjaRevers z, zspellError, r  r  r|  r>  z	addSpell z
            SELECT
                name, similarity(:word, name) as sim
            FROM cmf_synonym
            WHERE
                :word % name
            ORDER BY "sim" desc
            LIMIT 5;
             rG  zaddSpellTrgm z
normalize r   r  r   r  r  r  r  zsynAdd )r  rH  stringascii_lettersr  dictionary_endictionary_rudictionary_checksuggestninja_reversr  re  r  r   r  rB  rC  rD  rE  rL  morphparser  normal_formrj  r   r  r  r  )rr  rG  r}  lang
dictionaryfiltered_suggestions3suggestionsfiltered_suggestionsnwr   r  filtered_suggestions2suggestions2_listsuggr  all_suggestionsr  r  r  r  r   s                        r   r  zCmfSearchEngine.prepare_word  s   	3tfB7f***D ..JD ..J "''-$,,T2K#% %%d+B?2$b11$++B/EE|$EA  6q6Q;Q447?qttAw!8 		#r*AEEy2..E)003FA9QCr**$++A.Q)4 %'! & 1 1 4 4 @ @ H H J R R T 
! A, a6t9>7d1g%aDG);  <<R0DEE}TF"55E)006FA ""67#>S:TTX\W]]O  $fO 5  	9Akk!nRa( 9 !$$R^^4:bnn%5R889	9 *,<< !,,11&$M]ae`fMfHg9h:@&9I<E;9:1 2 ?L ( -<<$\\//55c:2A> - GGI--c2671#R0%))!,-- *M9C@U<VVO$$r   )FNNFNNFFNNN)rs  r   returnztuple[str, set[str]])r  )r  z	list[str])TFF)T),r   r   r   __doc__r  r  r5  r  r   r  r\  classmethodrz  rh  r  ri  r  staticmethodr  r  r  r  rm  rn  ro  rp  r#  r'  r  r  r9  rZ  r  r	  rd  r  rq  r   r   r  r  r  r  r  r   r   r   r<  r<    s   F $_N kO,- O&(F(D `d$) m# m#dDAN%NS 
0 
0 " "J-&$	(cV)+X}b@5ap<a~ " "& 	 	 " "b " "
 1$ 1$f  $ FBP + +
W r 
d 
$ 
# 
 
  '  'F i iZ " "J q% q%r   r<  )-cmf.includecmf.util.cmfnlputilr  typingr   r   r   r   r  r5  r.  dataclassesr   r	   r
   	tracebacksys	itertoolscollectionsr   enchantr  	pymorphy3r  r  rm  bs4r   MorphAnalyzerr  RUSSIAN_STOP_WORDSENGLISH_STOP_WORDSPROMPT_STOP_WORDS_NORMTECH_STOP_WORDSEVA_ARTIFACT_KEYS_STOP_WORDSEVA_ARTIFACT_VALUES_STOP_WORDSrL  r)  ALLOWED_FIELDSr   r   r   r:  r<  r   r   r   <module>r)     s         - ,    0 0   
  "    	   	! @  A  K ;  M  "x  '*<<NO `, = = =| "g 0 gV	 0 	G% G%r   