
    wki                       d dl  d dlmc mZ d dlmZmZmZmZ d dl	Z	d dl
Z
d dlZd dlmZmZmZ d dlZd dlZd dlZd dlZd dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ  ej@                         Z!g dZ"g d	Z#g d
Z$g dZ%g dZ&g dZ' e(e"e#z   e%z         Z)g dZ*e G d d             Z+ G d d      Z, G d de,      Z- G d de,      Z. G d d      Z/y)    )*N)ListSetDictOptional)	dataclassfieldfields)
namedtuple)make_prof_point)BeautifulSoup)   иu   вu   воu   неu   чтоu   онu   наu   яu   сu   со   какu   аu   тоu   всеu   онаu   такu   егоu   ноu   даu   тыu   кu   уu   жеu   выu   заu   быu   поu   толькоu   ееu   мнеu   былоu   вотu   отu   меняu   ещеu   нетu   оu   изu   емуu   теперь
   когдаu   дажеu   нуu
   вдругu   лиu   еслиu   уже   илиu   ниu   бытьu   былu   негоu   доu   васu   нибудьu
   опятьu   ужu   вамu   ведьu   тамu
   потомu   себяu   ничегоu   ейu
   можетu   ониu   тут   гдеu   естьu   надоu   нейu   дляu   мыu   тебяu   их   чемu   былаu   самu   чтобu   безu
   будтоu   чегоu   разu   тожеu   себеu   подu
   будетu   жu
   тогда   ктоu   этотu   тогоu   потомуu
   этого
   какойu   совсемu   нимu
   здесьu   этомu   одинu
   почтиu   мойu   темu
   чтобыu   нееu   сейчасu   были   кудаu
   зачемu   всехu   никогдаu
   можноu   приu   наконецu   дваu   обu   другойu   хотьu
   послеu   надu   большеu   тотu
   черезu   этиu   насu   проu
   всегоu   нихu
   какаяu
   многоu
   развеu   триu   этуu   мояu   впрочемu   хорошоu   своюu   этойu
   передu   иногдаu
   лучшеu   чутьu   томu   нельзяu
   такойu   имu
   болееu   всегдаu   конечноu   всюu
   между)imemymyselfweourours	ourselvesyouyouryoursyourself
yourselveshehimhishimselfsheherhersherselfititsitselftheythemtheirtheirs
themselveswhatwhichwhowhomthisthatthesethoseamisarewaswerebebeenbeinghavehashadhavingdodoesdiddoingaantheandbutiforbecauseasuntilwhileofatbyforwithaboutagainstbetweenintothroughduringbeforeafterabovebelowtofromupdowninoutonoffoverunderagainfurtherthenonceheretherewhenwherewhyhowallanybotheachfewmoremostothersomesuchnonornotonlyownsamesothantooverystcanwilljustdonshouldnow)	r   r   u   почемуr   r   r   r   u   можешьr   )docwwwhttphttpsmailto)u
   авторu   бизнес-процессu   владелецu   датu   исполнителu   контрагентu   логическu
   отделu   постановщикu   приоритетu   проектu   процессu
   спискu   стандартнu   статусu   тип)approvbaseclosedefaultdocumentopentasku   бизнесu   документu
   задачu   задачаu   закрытu
   обычнu   системu   созданu   черновик)textml_textnametagscommentsaddon_fieldskey_phrasesc                   ,   e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed<   dZeed<   dZeed	<   dZeed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed<   dZeed<   dZeed<   dZ
eed<   dZeed<   dZeed<   dZeed<   dZ eed<   dZ!eed<   dZ"eed<   dZ#eed <   dZeed<   dZ$eed!<   dZeed	<   dZ%e&ed"<   dZ'e&ed#<   dZ(eed$<   dZ)eed%<   dZ*eed&<   dZ+eed'<   dZ,eed(<   dZ-eed)<   dZ.eed*<   dZ/eed+<   dZ0eed,<   dZ1eed-<   dZ2eed.<   dZ3eed/<   dZ4eed0<   dZ5eed1<   dZ6eed2<   dZ7eed3<   dZ8eed4<   dZ9eed5<   dZ:eed6<   y)7DriverSearchObjectN	obj_modelobj_idobj_codeobj_nameobj_modified_atobj_project_idobj_related_person_loginsobj_ml_textobj_textobj_tagsobj_logic_type_codeobj_activity_codeobj_status_typeobj_result_textobj_commentsobj_owner_nameobj_responsible_namesobj_hrefobj_parent_idobj_tree_parent_idobj_root_parent_idobj_created_atobj_deletedobj_archivedobj_author_nameobj_modified_by_nameobj_addon_fieldsobj_user_ratingobj_key_phrasesobj_company_idobj_breadcrumbs"obj_related_person_logins_tsvectorobj_user_portal_topobj_client_portal_topname_tsvectortext_tsvectortags_tsvectorresult_text_tsvectorcomments_tsvectoraddon_fields_tsvectorkey_phrases_tsvectorml_text_tsvectorresult_tsvectorresult_textheadlineheadline_rawtitlebreadcrumbslabelrankage_daysdebugmark);__name__
__module____qualname__r   str__annotations__r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   boolr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r        !./cmf/models/cmf_search_engine.pyr   r   \   sK   IcF3HSHSOCN3$(c(KHSHS"" c OCOCLN3 $#$HSM#!s!!s!N3OCKLN3OC##SOCOCN3OC$(c(-1&s1K##!%$%M#M#M### c  $#$##SOCKHSLE#KE#DOHSE#DOr   r   c                   *    e Zd Zd Zd Zd Zd Zd Zy)SearchDriverBasec                     t         NNotImplementedselfsearchs     r   select_empty_query_topz'SearchDriverBase.select_empty_query_top       r   c                     t         r   r   r   s     r   select_empty_queryz#SearchDriverBase.select_empty_query   r   r   c                     t         r   r   r   r   querys      r   select_bm25zSearchDriverBase.select_bm25   r   r   c                     t         r   r   r   s     r   
select_synzSearchDriverBase.select_syn   r   r   c                     t         r   r   r   s     r   select_attachmentz"SearchDriverBase.select_attachment   r   r   N)r   r   r   r   r   r   r   r   r   r   r   r   r      s    r   r   c                   ,    e Zd Zd Zd ZddZd Zd Zy)SearchDriverEvaFullSearchc                     t         j                  j                  |      }g }|D ]  }|j                  t	        di |        |S )Nr   )modelsCmfFullSearchfilter_empty_topappendr   )r   r   
found_objsres	found_objs        r   r   z0SearchDriverEvaFullSearch.select_empty_query_top   sO    ))::6B
# 	IJJ)  	 
r   c                 v   g }g }g }g }g }|j                   dk(  s|j                   dk(  r|t        j                  j                  |j                  t        t        j                  j                        dd|      }t        j                  j                  |j                  dd|      }|j                   dk(  s|j                   dk(  r|t        j                  j                  |j                  t        t        j                  j                        dd|      }t        j                  j                  |j                  dd|      }|j                   dk(  s|j                   dvr.t        j                  j                  |j                  d	dd|
      }g }||z   |z   |z   |z   D ]  }|j                  t        di |        |S )NANYCmfTaskEMPTY   )force_related_person_loginr   r   r   )r   r   r   CmfDocumentr
  r  r   )force_field_namer   r   r   r   )
model_namer  r  filter_onceAKA_TASK_MODELSr   gcurrent_userloginAKA_DOC_MODELSAKA_DICT_MODELSr  r   )	r   r   task_empty_query_related_usertask_empty_querydoc_empty_query_related_userdoc_empty_queryother_empty_queryr  r  s	            r   r   z,SearchDriverEvaFullSearch.select_empty_query   s   (*%')$%):):i)G,2,@,@,L,L&&+.q~~/C/C+D -M -)  &33??&&	  @   %):):m)K+1+?+?+K+K%%+.q~~/C/C+D ,L ,( %22>>%%	 ? O %):):B\)\ & 4 4 @ @&&!' !A ! 69IILhhkzz  ~O  O 	8IJJ)6I67	8 
r   c                 &   g }g }g }t        d|j                  d         }|j                  dk(  s|j                  dk(  r4t        j                  j                  |j                  |d|g| dd|      }|j                  dk(  s|j                  d	k(  r4t        j                  j                  |j                  |d|g| d
d|      }d}|j                  dk7  s|j                  r|}|j                  dk(  r4t        j                  j                  |j                  |d|g| d|      }nB|j                  dvr4t        j                  j                  |j                  g|d|g| d|      }g }	||z   |z   D ]  }
|	j                  t        di |
        |	S )Nd      r	  r
  r   TSK25F)force_slicer   include_attachmentr   r  DOC25   DICT25)r"  r   r   r  MDL25r   )maxslicer  r  r  search_once_top_bm25r  r  extendedr  r  r   )r   r   r   pfxresult_tasksresult_docsresult_dictslice_toother_slicer  r  s              r   r   z%SearchDriverEvaFullSearch.select_bm25)  s   sFLLO,%):):i)G!//DDVE[E[]b9:8seSXM?DV E UL %):):m)K ..CCFDYDY[`9:8seSXM?DV D UK
 %"K% ..CCFDZDZ\a9:;RUQVV\~fl D nK&@@ ..CCVEVEVDWY^9:;RUQVV[}ek D mK %3kA 	8IJJ)6I67	8
r   c                 (    | j                  ||d      S )Nsyn)r   r   s      r   r   z$SearchDriverEvaFullSearch.select_synH  s    u55r   c                 z   g }g }g }g }g }g }|j                   dk(  r8t        j                  j                  |j                  |j
                  d|      }nF|j                   dvr8t        j                  j                  |j                   g|j
                  d|      }|j                   dk(  s|j                   dk(  rot        j                  j                  |j                  |j                  dd|	      }t        j                  j                  |j                  |j
                  d
|      }|j                   dk(  s|j                   dk(  rot        j                  j                  |j                  |j                  dd|	      }t        j                  j                  |j                  |j
                  d|      }|j                   dk(  s|j                   dk(  r7t        j                  j                  |j                  |j
                  d|      }g }||z   |z   |z   |z   |z   D ]  }	|j                  t        di |	        |S )Nr	  DICM2)r   r   r  MDLM2r
  TSKSYNM2r3  )r   r   r   TSKM2r  DOCSYNM2DOCM2CmfAttachmentATCHM2r   )r  r  r  search_oncer  tsquery_without_synr  tsquery_with_synr  AKA_ATTACHMENT_MODELSr  r   )
r   r   r/  result_tasks_synr-  result_docs_synr.  result_attachsr  r  s
             r   select_default_OLDz,SearchDriverEvaFullSearch.select_default_OLDK  sS    % ..::&&**	 ; K &@@ ..::""#**	 ; K %):):i)G%33??@V@VX^XoXo6@uU[  @  ]!//;;F<R<RTZTnTn6=f < NL %):):m)K$22>>v?T?TV\VmVm6@uU[ ? ]O ..::6;P;PRXRlRl6=f ; NK %):):o)M#11==,,**	 > N$ %(88;FX[iilww 	8IJJ)6I67	8 
r   N) )r   r   r   r   r   r   r   rD  r   r   r   r   r      s    0d>6Jr   r   c                       e Zd Zy)SearchDriverElasticSearchN)r   r   r   r   r   r   rG  rG    s    r   rG  c                      e Zd ZdZdgZdgZdgZdZ e       Z	d Z
e	 	 	 	 	 	 d)d       Zd Zd	 Zd
 Zed        Zed*d       Zd Zd Zd Zd Zd Zd Zd Zd Zed        Zed        Zed        Zed        Zed        Zed        Z ed        Z!ed        Z"d+dZ#d Z$ed        Z%d  Z&ed!e'd"e'd#e(fd$       Z)ed,d%       Z*ed-d&       Z+ed'        Z,ed.d(       Z-y)/CmfSearchEngineu  
    Главная задача: выполнение поиска (без учета дата-драйвера PG или Elastic).

    Алгоритм:
    - Подготовка запроса
    - Запрос делаем через дата-драйвер, получаем сырые предварительные данные (х10 объема)
    - Сортируем по классам
    - Сортируем по bm25eva
    - Проверяем права
    - Смешиваем потоки (оптимизированно с проверкой прав)
    - Формируем вывод

    Драйвер:
    - Уметь делать запросы
    - Не обязательно, но желательно: давать статистику bm25 (даже если используется Elastic,
          мы храним данные в нашей таблице все равно и можем посчитать bm25 статистику из нее)
    r  r
  r;  Nc                 t   d| _         d| _        d | _        d | _        d| _        ddg| _        d | _        d| _        d | _        d| _	        d| _
        d| _        d| _        d| _        d | _        d | _        i | _        t#               | _        d | _        d | _        d | _        d | _        t/               | _        | j2                  j4                  jt6        j8                  j:                  j=                         D cg c](  }|j>                  r|j@                  dvr|j@                  * c}| j2                  _        y y c c}w )Nr	  Fr   r  rE  )r  r
  r;  )!r  orig_field_name
field_nameorig_search_queryonly_idsr)  r
   no_analitycscheck_access_custom_fntopr+  archiveddeletedFSTorig_tag_nameorig_tree_parent_idaddon_filtersetrecent_projecttree_parent_filterquery_qstop_wordssorted_resultfinal_resultlistsearch_query_partspeech	__class__r  cmfr  	CmfEntityiter_subclassesfull_search
class_name)r   ms     r   __init__zCmfSearchEngine.__init__  s   $!%#Z
!&*#!#' !e"&!% " '+v$>>))1DGJJDXDXDhDhDj .fq==Q\\9d%d /0ll .fDNN* 2.fs   4-D5c                 B   t        dd      }t        |      dkD  r|d d }|d}|r|d   dk(  rddg}|sdd	g}t               }||_        ||_        ||_        ||_        ||_        ||_        ||_	        ||_
        ||_        |	|_        ||_        |
|_        ||_        ||_        ||_        ||_        t        d
d      }t&        j(                  j+                  d      }|dkD  rt-        d|         |        |j/                          |j1                          t3        |      |_        d|j4                  d<   |j                  j7                  d      r|j                  d d |_        |j
                  dk(  rd|_        |j                  dk(  rd|_        d|_        |rt9        |      |_        nt9               |_         |        |j                  dk(  r)|j                  st        dd      }|j=                          nq|j                  dk(  rt        dd      }|j?                          nE|j                  rt        dd      }|jA                          nt        dd      }|jC                           |        t        dd      }|jE                           |        |jF                  S )Nz#CmfSearchEngine.fts_search__prepare   i   rE  r      r   2   r  z'CmfSearchEngine.fts_search__dirty_count
   T)is_dirtyu   Идет процесс индексации, могут быть доступны не все результаты поиска. Осталось объектов: Modelr	  r   
CmfCommentr   z/CmfSearchEngine.fts_search__do_empty_top_recent   z$CmfSearchEngine.fts_search__do_emptyz#CmfSearchEngine.fts_search__do_mainz"CmfSearchEngine.fts_search__do_topz0CmfSearchEngine.fts_search__prepare_final_result  )$r   lenrI  r  rK  rL  rM  rN  r)  r
   rO  rP  rQ  r+  rR  rS  rU  rV  rW  r  r  countcmf_noteprepare_search_queryprepare_additional_filterr^  fullsearch_sliceendswithrX  rY  search_empty_top_recentsearch_emptysearch_main
search_top_prepare_final_resultr]  )clsr  rL  search_queryrN  r)  r
   rO  check_accessrQ  rR  rS  tag_nametree_parent_idrecent_projectsr+  kwargs
prof_pointr   prof_point_dcdirty_counts                        r   
fts_searchzCmfSearchEngine.fts_search  s    %%JCP
 |S '-L LU1X^GEHE "
 '!+&#/ "*(4%
"" '%3"$
 ((QSUV**00$0?  }  ~I  }J  K  L 	##%((*"&u+%&" %%g. & 1 1#2 6F% )F , %F *F $'$8F!$'EF!& 	##r)&//()Z\_`J**,%%+()OQTUJ!__()NPSTJ ()MsSJ$%WY\]
$$&"""r   c                    t        j                  dd| j                        j                         | _        t        j                  dd| j                        | _        t        j                  dd| j
                        j                         | _        d | _        d | _        t        | j                  j                               dk(  rt        j                  d| j                        s t        j                  d| j                        r| j                  j                         | _        t        j                  d	| j                        r| j                  | _        | j                  | j                  d
d      | _        | j                  | j                        | _        | j                  | j                  || j
                  | _        t        j                  dd| j                         | _        | j"                  | _        | j$                  | _        d | _        | j*                  j-                  d       y | j/                  | j
                        | _        | j0                  j                         D ]2  }t3        j4                  |      }| j*                  j-                  |       4 t6        j9                  | j0                        | _        | j;                  | j                         | _        d | _        | j                  | _        | j                  | j"                        | _        | j<                  | _        | j>                  s| j$                  | _        y | jA                  | j                         }| j;                  |      }| j                  |      }|| _        y )Nz -[^ ]*rE  z[|&()] u;   ([^A-Za-zА-Яа-я0-9](\s|$)|(\s|^)[^A-Za-zА-Яа-я0-9])r   z'^[a-zA-Z0-9]+(?:-[a-zA-Z0-9]+)*-[0-9]+$z^[0-9]+$z@^[a-zA-Z0-9]*:[0-9a-f]*-[0-9a-f]*-[0-9a-f]*-[0-9a-f]*-[0-9a-f]*$FT)synonyms
stop_wordsu   [^А-Яа-яA-Za-z0-9.-]-CODE)!resubrM  lowersearch_query_wo_qstopr  stripquery_obj_codequery_obj_idrs  splitmatchupperparse_search_queryr[  text_stemmingsearch_query_ninjasearch_query_normalizer>  r?  
query_liker_  r  query_remove_stop_wordssearch_query_wo_stop_wordscmfnlp
partspeechcmfutilninjaquery_normalizesearch_query_stemmr+  query_add_syns)r   wr  ninja_with_syns search_query_normalize_with_synssearch_query_stemm_with_synss         r   rv  z$CmfSearchEngine.prepare_search_queryv  s    &(VVJr4;Q;Q%R%X%X%Z"FF9S$2L2LMFF#abegkgxgxy  B # t%%++-.!3xxBDDZDZ[_a_g_ghsuy  vL  vL  `M&*&<&<&B&B&D#xx[]a]s]st$($:$:!!%!8!89O9OZ_lp!8!q!%!3!3D4J4J!K *d.?.?.K&*&7&7D#*,&&1LSQUQhQh*iD''+'B'BD$ %)$<$<D!"DO((//7*.*F*FtGXGX*Y' 00668 	<A**1-J((//
;	< #*--0O0O"P '+&:&:4;R;R&S#  44"&"4"4T5P5P"Q#'#:#:  }}$($<$<D! #11$2I2IJO/3/C/CO/T,+/+=+=>^+_($@D!r   c           	         g }|j                         D ]C  }|j                  d      }|d   }|g}|dd  D ]"  }t        |      dk\  s|j                  |       $ t        j                  |      }t        |      }	t        j                  j                  ddt        ||hz        gddgdgddg	      }
|
D ]  }|j                  s|j                  j                  j                  d
      d d D ]F  }d|j                         v r|j                         j                  dd      }|	j                  |       H  |j                  dj                  |	             F dj                  |      S )N|r   r      r   INr   ordernofilterr
   order_byr)  ,   r  rE  )r  rs  r  r  tokens_enrich_morphrX  r  
CmfSynonymr^  r   valuer  replaceaddjoin)r   r   r  tokentokensfirst_tokensearch_tokensr  normalized_wordssynonym_wordssynonym_listsynonymr   s                r   r  zCmfSearchEngine.query_add_syns  ss    [[] 	0E[[%F )K(MMABZ , q6Q;!((+	,  &99-HKM!,,11&$M]al`mMmHn9o:@&9I<E;9:1 2 ?L ( -<<$\\//55c:2A> 	- !'')+$GGI--c26%))!,	-- JJsxx./?	0@ xx}r   c                     | j                  | j                  | j                        | _        | j	                  | j
                        | _        y r   )	calc_tagsrU  rM  r  _get_all_branchesrV  rZ  )r   s    r   rw  z)CmfSearchEngine.prepare_additional_filter  s9    t'9'94;Q;QR"&"8"89Q9Q"Rr   c                     t               }|r:t        |t              r|D ]  }|j                  |        n|j                  |       | j	                  |      \  }}t        |j                  |            S r   )rX  
isinstancer^  r  _extract_tagsunion)r  r  r  r   _tagextracted_tagss         r   r  zCmfSearchEngine.calc_tags  sg    u(D)$ #DHHTN# "'*'8'8'F$nDJJ~.//r   c                 H   t               }| j                  d      }t        |      dk(  r| |fS d} |dd D ]X  }|dk(  r	t        j                  d|d      }|d   dk7  r|j	                  |d          t        |      dk(  sK|  |d    |d    } Z | j                         } | |fS )	zExtrats tags from the given search_query and returns its reminder and a set of extracted tags

        Args:
            search_query (str)

        Returns:
            tuple[str, list[str]]: search_query reminder and a set of extractd tags
        #r   rE  Nz(\W)r   r%     )rX  r  rs  r  r  r  )r  r   sharp_splittedr  
sub_tokenss        r   r  zCmfSearchEngine._extract_tags  s     u%++C0~!#%%#AB' 
	OE{'5!4J!}"A' :!#".
1z!}oN
	O $))+T!!r   c                 R   g }g }g }|D ]  }|j                   }|| j                  v r+|j                  s*|j                  j                  d      d   }|| j                  v r|j                  |       h|| j                  v r|j                  |       |j                  |        |||fS )N:r   )r   r@  r   r  r  r  r  )r   search_objects	aka_tasksaka_docsaka_dictsearch_objectr   s          r   _split_search_objects_to_3_akaz.CmfSearchEngine._split_search_objects_to_3_aka  s    	+ 	/M%//ID666$22)77==cB1E	D000  /d111..	/ (H,,r   c                    g }g }t         j                  rKt         j                  j                  j                  r't         j                  j                  j                  dk(  rg |fS t	        t         j                  j                        }|D ]?  }|j
                  r ||j
                  v r|j                  |       /|j                  |       A ||fS )NrE  )r  r  r  r  r   r   r  )r   r  relatednot_related
user_loginr  s         r   %_split_search_objects_to_related_userz5CmfSearchEngine._split_search_objects_to_related_user0  s    ~~Q^^%9%9%?%?1>>CWCWC]C]acCc~%%--.
+ 	2M66:IpIp;p}-""=1		2
 ##r   c                     g }g }|D ]4  }|j                   |k(  r|j                  |       $|j                  |       6 ||fS r   )r   r  )r   r  keyr   nomarkr  s         r   _split_search_objects_by_markz-CmfSearchEngine._split_search_objects_by_mark?  sM    + 	-M!!S(M*m,		-
 t|r   c                    g }t               }t        j                         }g }| j                  j                  |       }| j	                  |      \  }}}d }ddd}		 t        |      }
 |||	|        |||	|        |||	|        |||	|        |||	|       t        |      }|dk\  s||
k(  rnT|d d | _        t        j                         |z
  dkD  r.t        j                  dt        j                         |z
          y y )Nc                     |d   |d   k\  ry |sy |j                  d      }d|_        | j                  |       |dxx   dz  cc<   y )Nrt  total_limitr   	empty_topr   )popr   r  r  countersobj_listobjs       r   append_if_exists_empty_topzKCmfSearchEngine.search_empty_top_recent.<locals>.append_if_exists_empty_topT  sL     H]$;;,,q/C#CIJJsOW"r   r      rt  r  rj  皙?z3PROF fulltext_search empty_queries_top selects got )	rX  timedriverr   r  rs  r\  r  r   )r   r]  skip_idsprof_str  top_results_taskstop_results_docstop_results_dictsr  counters_empty_topres_count_beforeres_count_afters               r   rz  z'CmfSearchEngine.search_empty_top_recentJ  s!   5))+;;DAAEAdAdesAt>+->		# ()<"<0&|5GIZ[&|5GIZ[&|5GIZ[&|5GIYZ&|5GIYZ ",/O"$;K(K  *#2.99; 3&GGI$))+X_J_I`ab 'r   c                    g }t               }t        j                         }| j                  j                  |       }| j	                  |      \  }}}| j                  |      \  }}	| j                  |      \  }
}d}t        ||	|
||g      rU|df|	df|
df|df|dff}| j                  |||       |dz  }|dkD  rt        j                  d       nt        ||	|
||g      rUt        j                         |z
  dkD  r-t        j                  dt        j                         |z
          || j                  d   | j                  d    | _        y )Nr   r     W   Баг в поиске, много данных, либо не идет вычитка.r  z&PROF fulltext_search search_empty got )rX  r  r  r   r  r  r{   _add_if_existsr  r   r)  r\  )r   r\  r  r  r  results_tasksresults_docsresults_dictsr  r  r  r  
iter_countprocessing_groups                 r   r{  zCmfSearchEngine.search_emptyu  sq    5 ))+77=595X5XYg5h2|]9=9c9cdq9r7%'77;7a7abn7o5$o
02B.! " /2!1%-q1 !$"   0-J!OJD sw# 02B.! "& 99; 3&GG<TYY[7=R<STU*4::a=AGr   c                 .    t        j                          }g }t               }i  fd}d }ddddddd}ddd}t        d	d
      } j                  j	                    j
                        }	 |         j                  |	      \  }
}} j                  |d      \  }} j                  |
d      \  }
} j                  |d      \  }} j                  |
d      \  }
} j                  |d      \  }} j                  |
d      \  }
}|}|
}~~
 j                  | j
                        } j                  | j
                        } j                  | j
                        } j                  | j
                        }|d d }|d d } j                  | j
                        } j                  | j
                        } j                  | j
                        } j                  | j
                        } j                  | j
                        }t        j                          |z
  dkD  r-t        j                  dt        j                          |z
          t        j                          }t               }g }t        |      dkD  r|d   j                  dkD  r ||||d        t        |      dkD  r|d   j                  dkD  r ||||d        t        |      dkD  r|d   j                  dkD  r ||||d        t        |      dkD  r|d   j                  dkD  r ||||d        t        |      dkD  r ||||d        t        |      dkD  r ||||d        d}d}d}d}t        |      dkD  r|d   j                  }t        |      dkD  r|d   j                  }t        |      dkD  r|d   j                  }t        |      dkD  r|d   j                  } j                  |      } j                  |      } j                  |      } j                  |      }d}|dz  |kD  s|dz  |kD  rd}t         j                  j!                               dk(  r
 ||||       	 t        |      }|rt        |      dkD  rDt        |      dkD  r6|d   j                  |d   j                  kD  r |||||        |||||       n |||||        |||||        |||||        |||||        ||||        ||||d         ||||d         |||||        |||||        ||||d         |||||        |||||        ||||d        nt        |      dkD  rDt        |      dkD  r6|d   j                  |d   j                  kD  r |||||        |||||       n |||||        |||||        |||||        |||||        ||||        ||||d         ||||d         |||||        |||||        ||||d         |||||        |||||        ||||d        t        |      }|dk\  s||k(  rn|d d  _        t        j                          |z
  dkD  r.t        j                  dt        j                          |z
          y y )Nc                 b   t        t        j                  j                        }|sy |j	                  d      }|j
                  v s|j                  dk(  rO|j                  v rA|sy |j	                  d      }|j
                  v r#|j                  dk(  r|j                  v rAd|j
                  <   |j                  dk(  rd|j                  <   |r|j                  |z  dk  ry d}|j                  j                  vrd}||   || d   k\  ry | j                  |       ||xx   dz  cc<   y )	Nr   r;  Tg?r   r   _limitr   )r   r  r  r  r  r   r   r   real_rank_bm25r  r  )	r  r  r  max_rankr  r  	cur_classexists_dictr   s	          r   append_if_existsz4CmfSearchEngine.search_top.<locals>.append_if_exists  s-   Q^^112J,,q/C **+/1QVYVgVgkvVvll1o **+/1QVYVgVgkvVv
 '+K

#}}/15C--. &&1T9 I}}D$7$77"	 	"h)F/C&DD JJsOY1$r   c                     |d   |d   k\  ry |sy |j                  d      }| j                  |       |dxx   dz  cc<   y )Nrt  r  r   r   )r  r  r  s       r   append_if_exists_otherz:CmfSearchEngine.search_top.<locals>.append_if_exists_other  sE     H]$;;,,q/CJJsOW"r   r   r     )r   r   c	doc_limit
task_limitc_limitr%  r  z'CmfSearchEngine.search_top__select_bm25rr  flow_idcodelike	flow_nameflow_attach   r  z'PROF fulltext_search TOP25 selects got   T      ?Fr      z2PROF fulltext_search TOP25 mixing and prepare got )r  rX  r   r  r   r>  r  r  	bm25_sortr  r   rs  	rank_bm25r  bm25_sort_date4r  r  r\  )r   r  r]  r  r
  r  r  counters_otherr  r  r-  r.  result_dictsresult_docs_idcodelikeresult_tasks_idcodelikeresult_docs_nameresult_tasks_nameresult_docs_attachresult_tasks_attachresult_docs_mainresult_tasks_mainr\  max_doc_name_rankmax_task_name_rankmax_doc_main_rankmax_task_main_rankdoc_prior  r  r	  s   `                            @r   r}  zCmfSearchEngine.search_top  s   ))+5*	%X	# aa!#2"F $%Q7* %%NPST
00t7O7OP262U2UVd2e/k< /3.P.PQ\^o.p++040R0RS_ar0s--(,(J(J;Xc(d%%*.*L*L\[f*g''*.*L*L[Zg*h'',0,N,N|]j,k))&(>>*:D<T<TU NN+<d>V>VW!^^,>@X@XY"nn-@$BZBZ[/31"15!%0FH`H`!a"&..1H$JbJb"c>>*:D<T<TU NN+<d>V>VW~~lD4L4LM& 99; 3&GG=diikG>S=TUV ))+5  1$)9!)<)F)F)M]H6FM 1$)9!)<)F)F)M]H6FM 1$)9!)<)F)F)M]H6FM 1$)9!)<)F)F)M]H6FM %&*]H6LdS&'!+]H6MtT   " 0 3 B B !!#!21!5!D!D " 0 3 B B !!#!21!5!D!D  //0@A 001BC//0@A 001BC s"%66:LS:PSd:dHt  &&()Q."=.,O"=1 )*Q.37G3H13L)!,;;>Nq>Q>`>``$]H>NPab$]H>NPab$]H>NPab$]H>NPab :KM_` :KM_`&}nlS :PRVW :QSWX :JL]^ :JL]^ :LdS :KM_` :KM_` :MtT *+a/C8I4JQ4N)!,;;>OPQ>R>a>aa$]H>OQcd$]H>OQcd$]H>OQcd$]H>OQcd :JL]^ :JL]^&}nlS :QSWX :PRVW :KM_` :KM_` :MtT :JL]^ :JL]^ :LdS
 "-0O"$;K(Ko r +3B/99; 3&GGHW^I^H_`a 'r   c                    t        j                          }g }t               }t               fd}t        dd      }| j                  j	                  | | j
                        } |        | j                  |      \  }}}	| j                  || j
                  d      }| j                  || j
                  d      }| j                  |	| j
                  d      }	t        j                          |z
  dkD  r-t        j                  dt        j                          |z
          t        j                          }t               }g }
	 t        |
      } ||
|        ||
|        ||
|        ||
|        ||
|	       t        |
      }|| j                  d   kD  s||k(  rn\|
| j                  d	   | j                  d    | _        t        j                          |z
  dkD  r.t        j                  d
t        j                          |z
          y y )Nc                     |sy |j                  d      }|j                  v r#|sy |j                  d      }|j                  v r#j                  |j                         | j                  |       y Nr   )r  r   r  r  )r  r  r  r	  s      r   r
  z5CmfSearchEngine.search_main.<locals>.append_if_exists  s`    ,,q/C**+ll1o **+ OOCJJ'JJsOr   z'CmfSearchEngine.search_main__select_synrr     )limitr  &PROF fulltext_search main selects got r   r   1PROF fulltext_search main mixing and prepare got )r  rX  r   r  r   r?  r  r  r  r   rs  r)  r\  )r   r  r]  r  r
  r  r  r-  r.  r  r\  r  r  r	  s                @r   r|  zCmfSearchEngine.search_main  s   ))+5e
	 %%NPST
//d6K6KL262U2UVd2e/k< ~~lD4I4IQT~Unn[$2G2GsnS~~lD4I4IQT~U99; 3&GG<TYY[7=R<STU ))+5"=1]K8]L9]K8]L9]L9!-0OA./EU2U  +4::a=AG99; 3&GGG		V]H]G^_` 'r   c                 D   g }t               }t        j                         }| j                  j                  |       }| j	                  |      \  }}}| j                  |d      \  }}| j                  |d      \  }}	t        j                         |z
  dkD  r-t        j                  dt        j                         |z
          t        j                         }d}
t        |||||	g      rg|df|df|df|df|df|df|df|	df|df|	df|dfg}| j                  |||       |
dz  }
|
dkD  rt        j                  d       nt        |||||	g      rg|d d	 | _
        t        j                         |z
  dkD  r.t        j                  d
t        j                         |z
          y y )Nr3  r  r0  r   r  r   r  r  rj  r1  )rX  r  r  select_defaultr  r  r  r   r{   r  r\  )r   r\  r  r  r  r-  r.  r  rA  rB  r  r  s               r   search_main_oldzCmfSearchEngine.search_main_old  s   5 ))+33D9262U2UVd2e/k<)-)K)KLZ_)`&&'+'I'I+W\']$_99; 3&GG<TYY[7=R<STU ))+
<l<Lo^_q!a q!!1%q!!1% q! !$a  !$ q!% 0  0-J!OJD sw= <l<Lo^_@ +3B/99; 3&GGG		V]H]G^_` 'r   c                     |S )u   
        Подчистка оригинального квери, который ввел пользователь:
        - удаление стоп-слов
        )r  rs  r  r  )r  r  clean_search_query_listr  clean_search_querys        r   _clean_search_queryz#CmfSearchEngine._clean_search_query.  s
     r   c                 V    d}|j                         D ]  }|t        v r|d|z   z  } |S )NrE  r  )r  ALL_STOP_WORDS)r  r   r  r  s       r   r  z'CmfSearchEngine.query_remove_stop_wordsB  s=     	AN" 37NC	 
r   c                    d}|j                         D ]  }|t        v rd|v r	|d|z   z  }t        j                  |      }|dk(  r4|j	                  dd      j	                  dd      }||k(  rd}d}nd|z   }||k(  r|d|z   |z   z  }vt
        j                  j                         t        j                  ddd      k  r|dz   |z   dz   |z   }|ddj                  |j                               z   z  } dj                  |j                         j                               }dj                  |j                  d      D cg c]  }|s|	 c}      }t        j                  d	d
|      }|S c c}w )NrE  r  r  @.i  r  r   u   [^ A-Za-zА-Яа-я0-9|-]r  )r  r:  r  text_normalize_enrichr  datetimedatetodayr  r  r  r  )r  r   r  r  lemmalegacy_wlegacy_w_strs          r   r  zCmfSearchEngine.query_normalizeN  sc    	9A N"axsQw44Q7B;99S-55c#>q=#%L!H#&>L:37\11C  }}**,x}}T1a/HH !C( 2S 85 @3%++-!888C7	98 hhsyy{((*+hh399S>7aQ78 ff13<
	 8s   ?E+E+c                 8    t        j                  ||dz   z        S )Nr   )mathlog)r  Ndfs      r   bm25_idfzCmfSearchEngine.bm25_idf  s     xx2a4!!r   c                 B    t        j                  | j                         y r   )geventspawn'_gevent_bm25_stat_words_load_with_delayr  s    r   &gevent_bm25_stat_words_load_with_delayz6CmfSearchEngine.gevent_bm25_stat_words_load_with_delay  s    S@@Ar   c                     t        t        dd       ry t        j                  d       t        t        dd       ry | j	                          y )NFTS_STAT_WORDS<   )getattrAPPrL  sleepbm25_stat_words_loadrO  s    r   rN  z7CmfSearchEngine._gevent_bm25_stat_words_load_with_delay  s<     3($/R3($/  "r   c                 2   i }i }t         j                  j                  d      |d<   t         j                  j                  d      |d<   t         j                  j                  j
                  j                         j                  d      j                         }t         j                  j                  j
                  j                         j                  d      j                         }t        |      dkD  rM|t        |      dz  dz     d   }|t        |      d	z  dz     d   }t        |      d
kD  r|d   d   }|d   d   }nd}d}t        |      dkD  rM|t        |      dz  dz     d   }|t        |      d	z  dz     d   }t        |      d
kD  r|d   d   }|d   d   }nd}d}d}	|D ]R  }
i ||
j                  <   | j                  |d   |
j                        ||
j                     d<   ||
j                     d   }T |D ]`  }
|
j                  |vri ||
j                  <   | j                  |d   |
j                        ||
j                     d<   ||
j                     d   }b |sd}|sd}i |d<   |d   d<   |d   d<   i |d<   i |d<   t        |      dkD  r||   d   |d   d<   ||   d   |d   d<   nd|d   d<   d|d   d<   t        |      dkD  r||   d   |d   d<   ||   d   |d   d<   nd|d   d<   d|d   d<   |t        _        |t        _        y )Nr  )r   r
  z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfDocument''  ') where  ndoc > 10  order by ndoc desc limit 10000 z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfTask''  ') where  ndoc > 10  order by ndoc desc limit 10000 r  r  r   rl    rr  i  ditir   FST_STAT_WORDS_DEFAULTFST_FRQ_MIDFST_FRQ_LOW)r  r  rt  CmfGlobalSettingsdpdata_driverSessionexecutefetchallrs  wordrJ  ndocrU  rR  
FTS_COUNTS)r  rR  rg  cmfdoc_statcmftask_statdoc_mid_worddoc_low_wordtask_mid_wordtask_low_wordlastrowlast_doc	last_tasks                r   rW  z$CmfSearchEngine.bm25_stat_words_load  s   
$*$8$8$>$>$>$W
=! & 4 4 : :Y : O
9..11==EEGOO  Q  @  I  I  K//22>>FFHPP  R|  }  F  F  H{C&s;'7'93'>?BL&s;'7':C'?@CL;$&*3/2*403LL|S (\):1)<c)AB1EM(\):2)=s)BCAFM< 4' ,S 1! 4 ,T 21 5MM 	6C')N388$-0\\*]:SUXU]U]-^N388$T*%chh/5H	6   	7Cxx~-+-sxx(-0\\*Y:OQTQYQY-ZN388$T*&sxx06I		7
 HI35/09A/069B/06(*}%(*}%{C2@2Nt2TN=)$/2@2Nt2TN=)$/23N=)$/23N=)$/|S 2@2OPT2UN=)$/2@2OPT2UN=)$/23N=)$/23N=)$/+#r   c                 >   |}t        |      dk\  rd}|d   j                  |d   j                  z  dkD  rd}nI|d   j                  |d   j                  z  dkD  rd}n$|d   j                  |d   j                  z  dkD  rd}|dkD  rt        |d | d       ||d  z   }|S )	Nr  r   r%  g333333?r  r   c                     | j                   S r   )r   r   s    r   <lambda>z1CmfSearchEngine.bm25_sort_date4.<locals>.<lambda>  s
    !** r   )r  )rs  r  sorted)r  r  newresr   s       r   r  zCmfSearchEngine.bm25_sort_date4  s     s8a<D1v$$SV%:%::TAQ&&s1v'<'<<tCQ&&s1v'<'<<tC axET
0DEDE
Rr   c                 ^   g }d}t        |      }|D ]>  }|dz  }| j                  ||      \  |_        |_        ||_        |j                  |       @ t        |d d      }|d d D ]F  }|j                  s|xj                  dz  c_        d|j                  d	d
|j                   |_        H t        |d d      }d}|d | D ]a  }|dz  }||_	        d|j                   d|j                   d| d
|j                  z   |_        |j                  j                  dd      |_
        c |d | S )Nr   r   c                     | j                   S r   r  rt  s    r   ru  z+CmfSearchEngine.bm25_sort.<locals>.<lambda>  
    AKK r   T)r  reverserl  r  u    ТОП.2fr  c                     | j                   S r   rz  rt  s    r   ru  z+CmfSearchEngine.bm25_sort.<locals>.<lambda>  r{  r   Rz Bz LDZQTNrE  )rs  	bm25_rankr  r  debug_sql_posr  rv  r   r   debug_bm25_posr   r  )	r   r  r   r/  rw  r   llro  rs	            r   r  zCmfSearchEngine.bm25_sort  sP    X 	CFA04sE0J-CM3- !CMM#		
 f"7F 	@A $$t##AKK#4AaggY?	@ f"7F 	8AFA A!//*"Q-=-=,>bAFPAG++GB7AJ	8 fu~r   c                 ^   fd} |j                         }i }j                  !j                  dk7  r |j                        }d}d}d}d}	d}
d}d}t               }d}d}d}j                  | j                  v rd}d	}i }|j                         D ]  }||v r	|j                  |       |
dz  }
d}d d t               g d dt               t               d
}|j                  d      D ]  }||vr	|d   j                  |       |d   j                  ||   j                  d      D cg c]  }t        |       c}       |dz  }t        t        d      rt        j                  sd}d}nT|t        j                  v r,|t        j                  |   v rt        j                  |   |   }nt        j                  d   |   }d}d}t        t        ||   j                  d            d      }|||dz   z  z  ||z   z  }|}d}j                  dk(  r9||v r5j                  r)|j                  j!                         v r|	dz  }	d}|dz  }n|dz  }|dxx   d|dz    d| d| d|dd|dd|dd| d| z  cc<   |d    j                  |        t        |d          dk(  r|dz  }t#        |d          }| j$                  |
dz
     }d} |}!|d!v r|dz  }!d"} |dxx   d#| d$|  z  cc<   t        |d          dk\  r|dxx   d%|dd&|!dz  cc<   ||vr|||<   |!|d'<   ||d(<    d}"d}#d}$d}%d}&|j'                         D ]  \  }}d}'d}(d})d}*d}+|j'                         D ]  \  },}-d}.d}/||,k(  rd}.|d   D ]x  }0|-d   D ]n  }1t)        |1|0z
        d)k  r |'dz  }'d}.|*dz  }*|d*   j                  |,       t#        dd)t)        |1|0z
        z   z  d+z  |/      }/|+dd)t)        |1|0z
        z   z  d+z  z  }+p z |(|.z  }(|)|/z  })|&|*z  }& |)dkD  rwd,|)z  }2|d'   |2z  |d-<   |"dz  }"|(|$kD  r|(}$|)|%kD  r|)}%|#|'z  }#t+        |d*         j-                  d.d      j-                  d/d      j-                  d0d      }3d1| d2|3 d3|%d4d5|)d4}4n
d6}4|d'   |d-<   |d   d7| d8|( d$t        |d*          d9|d-   dd|d'   dd:|4 z   |d<   |d   d;|d-   dd<|& z   |d<    t/        |j1                         D cg c]  }|d-   	 c}      }dd=dd>j2                  z   z  z  z   }5||5z  }|d?|" d@|# dAj2                  dBdC|5d4dDj4                  d4
z  }d}6t6        j8                  j:                  j<                  ret6        j8                  j:                  j<                  dk7  r>j>                  r2t6        j8                  j:                  j<                  j>                  v rdE}6d}7dFt6        v rj@                  t6        jB                  v rdE}7||6z  |7z  }dG|d4dH| dI|6 dJ|7d4d$	|z   }|dk(  rj4                  }|dk(  rdK}|}8j4                  dLv r|j4                  z  }dM|d4d$| }| djE                  |j1                         D 9cg c]  }9|9d   	 c}9      z   }:|:_#        |rtI        dN       ||8fS c c}w c c}w c c}9w )ONc                     j                   j                  dd      } t        j                  dd|       } | d d } t	        j
                  d| z   dz         S )N'"z":([0-9,]*)z":"\1", z{ z })r   r  r  r  orjsonloads)tmpr  s    r   tsvector2dictz0CmfSearchEngine.bm25_rank.<locals>.tsvector2dict  sT     %%--c37C&&c:Ccr(C
 <<s
T 122r   rE  r   r   FrZ  il  r[  rq  )r  
smart_rank
found_synsvector_positionstoken_found_idxr   near_tokensrank_setr  r  r  r  rR  Tr\  r  g      ?r  r  r  r   z<br>wr=  (z) bm25nm(bm25)=z.1fz) idf=z fqd=z	 in_name=r  )VERBINFNz
verbdiv/2 z<BR>&nbsp;&nbsp; r  z	 synRank=z psRank=r  r  r  r  (   gffffff?r  r  {}z<BR>&nbsp;&nbsp;#near(z)=[z] MFNCr}  z FNC0z	 maxNear[z]=z bm25near(bm25)=z) z<br>#BM25final=z SSSS=rl     z nearC=z
 sumNearC=z age=z.0fz ageF=z	 sqlRank=g?FTS_RECENT_PROJECTSzrbm25=z wc=z rur=z rpr=g-C6?)r  r  rY  z codeu   Система еще загружает кеш для алгоритма ранжирования. Если поисковая выдача некорректна, повторите запрос через минуту)%r   r   rX  r   r  r  r  extendinthasattrrU  rR  minrs  r   r   r  r(  r_  itemsabsr   r  sumvaluesr   r   r  r  r  r  r   r   r  r  r   	cmf_alert);r   r  r   r  doc_vector_dictname_vector_dictr   
rank_debug	w_doc_cnt
w_name_cntsynw_cntorder_factorfound_tokens_cnt
done_wordsalert_fts_stat_not_load	model_keyAVGDmatched_tokenssynww_cntr  r  r   idfkbfqdbm25	bm25_origin_namesyn_rankr  verbdivps_ranknear_tokens_cntsum_near_tokens_cntmax_of_max_near_contextmax_of_factor_near_contextsum_tmp_near_allhave_near_token_cntmax_near_contextfactor_near_contexttmp_near_alltmp_factor_allsynw2token2tmp_max_near_contexttmp_factor_near_contextpospos2mltr  d_nears
age_factorrelated_user_rankrecent_project_rank	real_rankr   html_rank_debugs;    `                                                         r   r  zCmfSearchEngine.bm25_rank  sm	   
	3 ((;(;<(S->->"-D,S->->?
	
U
"'	 ==D000ID
 KKM W	8Dz!NN4 MH
 E ""!e$&#'"uE	E ZZ_ +,O+l#''*()00/RSBTBZBZ[^B_1`Q#a&1`a
s$45S=O=OC.2+#,,,c>P>PQR>S1S,,Q/	:C,,-EFyQC#oa066s;<a@ cQqSk*CE2 	88{*q4D/DZ[_b_k_k_q_q_sZs!OJ"G!8DNI gE*:1*<)=QugQqcY]^aXbbcdmnqcrrxy|  ~A  yB  BG  HK  GL  LU  V]  U^  #_  _j!%%d+W+,Z 5$%*!5,-H55hqjAJGG--"Q,$'N 1*QwiHHN5$%*gIhs^8GC="QQ>)',t$!(E+'7E#$oW	8v "#%&")//1 A	rKD%"# "#LN!/!5!5!7 #1v'($*+'5='($ !34 DC &'9 : D
 tcz?b0 014/340(1,L!-044U;25aCs
O9K6LR6OQh2i/&!RD3J-?*@*CCDD, !$88 
 $'>># L0 G#1L #Q&
 23&+K&83&>l#1$#&==.>+&)CC1D.#'::#!%"67??BGOOPSTVW__`cdfg24&K=OijmNnnr  tG  HK  sL  M&+K&8l#"7^	$rBRASSTUXY^_lYmUnToo  AF  GS  AT  UX  @Y  YZ  [`  al  [m  nq  Zr  rt  u|  t}  /~  ~E'N"7^l@STW?XX^_o^p.qqE'NCA	rF ^-B-B-DEAlOEF r1c#,,./00
j 0
;N:OuUXUaUabeTfflmwx{l|  }F  GJ  GO  GO  PS  FT  U  	U

 ^^!!''ANN,@,@,F,F",LQTQnQn$$**c.K.KK $   A%#*<*<@U@U*U"&''*==d3Zt,<+=UCTBUUZ[norZsstu  yC  C
19 88D19D	88++CHHD c
!J<8J'LBGGI^I^I`4aAQwZ4a,bb#	"  u  wYo 2aT FR 5bs   *Z *Z%3Z*
c                 6    d| vr| S | j                  d      d   S )Nz@#@#@#r   )r  )r   s    r   _remove_suffix_from_headlinez,CmfSearchEngine._remove_suffix_from_headline#  s#    8#O~~h'**r   c                    | j                   }|D ]M  }d}|j                  |j                  d}d|j                   d|j                   d|j                   d|_        O | j
                  ri }g }|D ]  }|j                  |j                         |j                  r| j                  |j                        nd}|j                  |j                  |j                  | t        j                  |j                        |j                  |j                  |j                  |j                  d	||j                  <    |t         _        t         j	                  d	       |S i }| j$                  rLi }|D ]7  }|j'                  |j(                  g       j                  |j                         9 |j+                         D ]  }	t-        t.              |	   j0                  r| j$                  d
gz   }
d
d||	   gg dg}n| j$                  }
dd||	   g}|	dk(  r|
g dz   }
|	dk(  r|
dgz   }
t2        j5                  |	      j7                  |
|| j8                  | j:                        }|D ]S  }t-        t.              |	   j0                  r&|j0                  r|||j<                  j>                  <   E|||j>                  <   U  ntA        d       t         j	                  d       g }|D ]  }|jC                  |j                        }|s"|j                  r| j                  |j                        nd}|jD                  dk(  r|jF                  sg|jH                  |jF                  jJ                  |jF                  jL                  jN                  | |j                  |j                  |j                  |j>                  jN                  |j                  |jP                  |jR                  |jT                  t        j                  |j                        |j                  d}ntW        jX                  d|j                        d   }|j>                  jN                  ||jL                  jN                  | |j                  |j                  |j                  t        j                  |j                        |j                  d	}|jD                  dk(  r|jZ                  jN                  |d<   	 | j\                  s|j_                  dd       n| j]                  |      st`        | j$                  D ]6  } |jX                  d      d   }tc        ||j                     |d      }|||<   8 |j                  |        t         j	                  d	       | jd                  sHtg        t.        jh                  jj                  | jl                  t2        jo                  |dd       d       || _8        y# t`        $ r Y 
w xY w)u5   
        Итоговая обработка
        NoneNz.6fz<BR>r  z<br>rE  )	idr   coder   r   r   r   r   r   zfulltext_search ENDcmf_ver_headr  )cmf_ver_curz==Tr  r;  )urlurl_previewurl_preview_imgr  fullsearch_answer)r
   r  include_archivedinclude_deleteduY   DEV: FATAL. Укажите в запросе поиска список полей fields=z"fulltext_search Start check access)r  r   r  r   r   r   r   	attach_idattach_name
attach_urlattach_url_previewattach_url_preview_imgr   r   z DZQTN r   )	r  r   r  r   r   r   r   r   r   T)TEXKOM_skip_failread_auditTEXKOM_ppp_project_simplecheckr=  rj  )r  obj_dict)r  )9r\  r   r   r   r   rN  r  r   r   r  r   r   jsonr  r   r   r  fulltext_search_headlinesr
   
setdefaultr   keysvarsr  cmf_verr  get_model_by_namer^  rR  rS  r  r  r  getre  parent	parent_idr   r  r  r  r  r  r  r  r  rP  _acl_check_readCmfPermissionErrorrT  rO  schedule_deferred_jobr  _do_calc_statisticsrM  
dumps_dictr]  )r   r]  r  formated_rankr  	result_idr   objectsids_by_modelr  _fields_filterr  r  r  r  r   r	   attrs                      r   r~  z%CmfSearchEngine._prepare_final_result)  s    )) 	>A"Mvv!#$66#,QVVHAaggYay=AG		> ==(*%I!   *LMJJ4<<QZZH\^((GGJJ#+*#'::amm#<WWFF !

WW
7)!((3  +DA'GG)*;;L! J''R8??IJ +//1 .
<
+33 KK>*::G .l:6NOQlmG KKG!4j)ABG0%(QQG.%)<(==G"44Z@EE""%)]]$(LL	 F  $ .CF|J/77CKK7: 0 0 3 34*-	.).4 qs	45 B	A++ahh'CLMJJ4<<QZZH\^>>_4 ::  "mm #

 #

 5 5'/j!" !$%JJ%(VV\\'(ww&)gg.1oo252E2E'+zz!--'@!" H$ HHZ9!<E "ffll % #'/j!" !$%JJ'+zz!--'@!"
 H >>]2474I4I4O4OH0166++tlp+q!88=00!% / +C 0 3&wqxx'8%F*./ JJx(wB	H 	
%&   !$$88(,(>(>GL^L^_bcfdf_gLhi
  ) *  s   BW	W#"W#r  r]  r  c                    |D ]  \  }}t        |      D ]l  }|s |d   j                  |v r|j                  d       *|j                  |d          |j	                  |d   j                         |j                  d       n  y r-  )ranger   r  r  r  )r  r  r]  r  lstrt  _s          r   r  zCmfSearchEngine._add_if_exists  s}    " 		JC5\ q6==H,GGAJ##CF+SV]]+
		r   c                     | sg S t         j                  j                  j                  j	                         j                  dd| i      }|D cg c]  }|d   	 c}S c c}w )Na  
                WITH tree_parents AS (
                    WITH RECURSIVE r AS (
                        SELECT obj_id, obj_code, obj_tree_parent_id
                        FROM cmf_full_search
                        WHERE obj_tree_parent_id = :tree_parent_id

                        UNION

                        SELECT cfs.obj_id, cfs.obj_code, cfs.obj_tree_parent_id
                        FROM cmf_full_search AS cfs
                        JOIN r ON cfs.obj_tree_parent_id = r.obj_id
                    )
                    SELECT obj_id FROM r
                    WHERE r.obj_id IN (SELECT obj_tree_parent_id FROM r)

                    UNION

                    SELECT :tree_parent_id
                )
                SELECT * FROM tree_parents;
            r  r   )r  r  r`  ra  rb  rc  )r  recordsr  s      r   r  z!CmfSearchEngine._get_all_branches  s`    
 I&&))55==?GG, ~./
2 &&!&&&s   Ac                    dt         _        |j                  dd      }t        j                  dd|      }t        j
                  d|      }d}d}d}	|D ]I  }
t        |
      dk(  rt        |
      dk(  r|
dv r&|
d	v r	|	d|
 z  }	3|
dk(  r9|
d
v r|	dz  }	C|
dv r|	dz  }	Mt        |
      dk(  r\|
d   dv rt        |
      dk(  rr|d|
dd   z  }~|r|
j                  dd      }
|dz  }|dkD  rd}d}|	r|	d   dvrd}t        |
      dkD  r| j                  |
|      }t        t        |      t        t              z
        }t        |      dk(  r|	| d|d    z  }	n-t        |      dkD  r|	| ddj                  |      z   dz   z  }	n	 |r|dk\  r n
|dk\  r nL |rt        |      dkD  r|d   dv r|dd  }|S |	j                  dd      j                  dd      j                  dd      j                  d d      j                         }	|	r|	d   dv r|	dd  }	|	r|	d   dv r|	dd  }	|	r|	d   dv r|	dd  }	|	r|	dd  dv r|	d d }		 t        j                  j                  j                   j#                         j%                  d!d"|	i      }t        |      d   d   }	t/        |d$|	       |	S # t&        j(                  j*                  $ r}t        j                  j                  j                   j-                          t        j                  j                  j                   j#                         j%                  d#d"|i      }t        |      d   d   }	Y d }~d }~ww xY w)%NrE  zwww.u   [^-A-Za-zА-Яа-я0-9()|&!' ]r  z(,| |&|\||\(|\))r   r   )r  !z()&|)rR   r   r  z |)r   rO   &z &r  r
  r  r  F)r  r  r  )r  z (  | z )rl  )r  r  z OR z or z AND  & z and z!select to_tsquery('russian', :q);q+select websearch_to_tsquery('russian', :q);z->)r  rT  r  r  r  r  rs  prepare_wordr^  rX  r:  r  r  r  r  r`  ra  rb  rc  
sqlalchemyexcProgrammingErrorrollbackprint)r  r  r  r  
first_wordsearch_query_allowed_symbr  
word_countstopsr  r   oper	sug_wordstsqueryes                  r   r  z"CmfSearchEngine.parse_search_query  s    !))&4$&FF+KSR^$_!-/HI 
 6	A1v{1v{
?;1QCLA8))r
&&r
 1v{tz!q6Q;1QqrUG$		#s#A!OJA~ D"Z/1vz,,Q,B	 Y#n2E!EF	y>Q&D69Q<.11A^a'D6

9(==DDAjAo Rm6	p 5z!|aJ 6ab	LIIfe$,,VU;CCGUS[[\cejkqqs1#!"A1#!"A1#!"A23:%#2A	$''**66>>@HHIpsvxyrz{GWa #A 	lD!$ ~~.. 	$  ,,557 ''**66>>@HHAl#%G Wa #A	$s   AJ+ +MBMMc           
      j   d}|j                         D ]  }d|v r|d|z   dz   z  }|d|z   z  } |j                         }|j                  dd      }|j                  dd      }	 t        j                  j
                  j                  j                         j                  d	d
|i      }t        |      d   d   }|j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      }d}|j                         D ]>  }d|vr	|d|z   z  }|ddj                  t!        |j                  d                  z   z  }@ dj                  |j                               }|S # t        j                  j                  $ r}t        j                  j
                  j                  j                          t        j                  j
                  j                  j                         j                  ddt        i      }Y d }~d }~ww xY w)NrE  r  z ()r  r  r  hhhackskiptirez%select to_tsquery('russian', :query);r   r  r  r   r  r  r  )r  r  r  r  r  r`  ra  rb  rc  r  r  r  r  r   r^  r  rX  )r  r   r  r  r  r  r  s          r   r  zCmfSearchEngine.text_stemminga  s    	AaxTAX^#S1W		
 GGIyyU#||C!12	''**66>>@HHItw~  AE  wF  GG M!QIIeC ((R088sCKKCPRS[[\_`bckkl|  B  C 	9A!|sQw sSXXc!''#,&7888	9 hhsyy{#
- ~~.. 
	  ,,557 ''**66>>@HHAeG
	s   %AF H2-A:H--H2c                    t         xj                  d| dz  c_        |d   t        j                  v rd}t        j
                  }nd}t        j                  }g }t        j                  |      s|j                  |      }g }t        j                  |      }|r.t         xj                  d| dz  c_        |j                  |       t         xj                  dz  c_        d}	|D ]  }
|	d	k\  r nt        |
      d	k  r|
d   |d   k7  r|
d
   |d
   k7  r0d|
v rF|
j                  dd      }
t         xj                  d|
 dz  c_        |j                  |
       |	d
z  }	zt         xj                  d|
 dz  c_        |j                  |
       |	d
z  }	 g }t        j                  j                  j                   j#                         j%                  dd|i      }d}	|D ]w  \  }}|	d
k\  r nmt        |      d	k  r|d   |d   k(  s|d
   |d
   k(  s3|j                  dd      }t         xj                  d| dz  c_        |j                  |       |	d
z  }	y t'        |      t'        |      z  |hz  }n|h}t'               }|D ]a  }
t(        j+                  |
      d d	 D ]D  }|j-                  |j.                         t         xj                  d|j.                   dz  c_        F c ||z  }t'               }|rt        j                  j1                  ddt1        ||hz        gddgdgdd
g      }|D ]  }|j2                  s|j2                  j4                  j7                  d      d d D ]P  }|j9                         j                  dd      }t         xj                  d| dz  c_        |j-                  |       R  ||z  t'        |      z  }t1        |      S )Nz|w:z: r   enruzaddNinjaRevers z, zspellError, r  r   r  rE  z	addSpell z
            SELECT
                name, similarity(:word, name) as sim
            FROM cmf_synonym
            WHERE
                :word % name
            ORDER BY "sim" desc
            LIMIT 5;
             re  zaddSpellTrgm z
normalize r   r  r   r  r  r  r  zsynAdd )r  rT  stringascii_lettersr  dictionary_endictionary_rudictionary_checksuggestninja_reversr  rs  r  r  r  r`  ra  rb  rc  rX  morphparser  normal_formr^  r   r  r  r  )r  re  r  lang
dictionaryfiltered_suggestions3suggestionsfiltered_suggestionsnwr   r  filtered_suggestions2suggestions2_listsuggr  all_suggestionsr  r  r  r  r   s                        r   r  zCmfSearchEngine.prepare_word  s   	3tfB7f***D ..JD ..J "''-$,,T2K#% %%d+B?2$b11$++B/EE|$EA  6q6Q;Q447?qttAw!8 		#r*AEEy2..E)003FA9QCr**$++A.Q)4 %'! & 1 1 4 4 @ @ H H J R R T 
! A, a6t9>7d1g%aDG);  <<R0DEE}TF"55E)006FA ""67#>S:TTX\W]]O  $fO 5  	9Akk!nRa( 9 !$$R^^4:bnn%5R889	9 *,<< !,,11&$M]ae`fMfHg9h:@&9I<E;9:1 2 ?L ( -<<$\\//55c:2A> - GGI--c2671#R0%))!,-- *M9C@U<VVO$$r   )FNNFNNFFNNNN)r  r   returnztuple[str, set[str]])r  )r9  z	list[str])TFF)T).r   r   r   __doc__r  r  r@  r  r   r  rg  classmethodr  rv  r  rw  r  staticmethodr  r  r  r  rz  r{  r}  r|  r4  r8  r  r  rJ  rP  rN  rW  r  r  r  r  r~  r   r   r  r  r  r  r  r   r   r   rI  rI    s   F $_N kO,- O&(FfH `d$) ~# ~#FFAR%NS 
0 
0 " "J-&$	(cV)HVRbj7at<a~ " "& 	 	 $ $f " "
 B B # # F$ F$P  $ FGZ + +
V p 
d 
$ 
# 
 
  '  'F i iZ " "J q% q%r   rI  )0cmf.includecmf.util.cmfnlputilr  typingr   r   r   r   r  rF  r?  dataclassesr   r	   r
   	tracebacksys	itertoolscollectionsr   rL  enchantr%  	pymorphy3r  r  r  cmf.util.cmfutilr   bs4r   MorphAnalyzerr,  RUSSIAN_STOP_WORDSENGLISH_STOP_WORDSPROMPT_STOP_WORDS_NORMTECH_STOP_WORDSEVA_ARTIFACT_KEYS_STOP_WORDSEVA_ARTIFACT_VALUES_STOP_WORDSrX  r:  ALLOWED_FIELDSr   r   r   rG  rI  r   r   r   <module>rR     s        - ,    0 0   
  "     	   , 	! @  A  K ;  M  "x  '*<<NO `, = = =| "i 0 iZ	 0 	\% \%r   