
    Иmi                       d dl  d dlmc mZ d dlmZmZmZmZ d dl	Z	d dl
Z
d dlZd dlmZmZmZ d dlZd dlZd dlZd dlZd dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ  ej:                         Zg dZg dZ g d	Z!g d
Z"g dZ#g dZ$ e%ee z   e"z         Z&g dZ'e G d d             Z( G d d      Z) G d de)      Z* G d de)      Z+ G d d      Z,y)    )*N)ListSetDictOptional)	dataclassfieldfields)
namedtuple)BeautifulSoup)   иu   вu   воu   неu   чтоu   онu   наu   яu   сu   со   какu   аu   тоu   всеu   онаu   такu   егоu   ноu   даu   тыu   кu   уu   жеu   выu   заu   быu   поu   толькоu   ееu   мнеu   былоu   вотu   отu   меняu   ещеu   нетu   оu   изu   емуu   теперь
   когдаu   дажеu   нуu
   вдругu   лиu   еслиu   уже   илиu   ниu   бытьu   былu   негоu   доu   васu   нибудьu
   опятьu   ужu   вамu   ведьu   тамu
   потомu   себяu   ничегоu   ейu
   можетu   ониu   тут   гдеu   естьu   надоu   нейu   дляu   мыu   тебяu   их   чемu   былаu   самu   чтобu   безu
   будтоu   чегоu   разu   тожеu   себеu   подu
   будетu   жu
   тогда   ктоu   этотu   тогоu   потомуu
   этого
   какойu   совсемu   нимu
   здесьu   этомu   одинu
   почтиu   мойu   темu
   чтобыu   нееu   сейчасu   были   кудаu
   зачемu   всехu   никогдаu
   можноu   приu   наконецu   дваu   обu   другойu   хотьu
   послеu   надu   большеu   тотu
   черезu   этиu   насu   проu
   всегоu   нихu
   какаяu
   многоu
   развеu   триu   этуu   мояu   впрочемu   хорошоu   своюu   этойu
   передu   иногдаu
   лучшеu   чутьu   томu   нельзяu
   такойu   имu
   болееu   всегдаu   конечноu   всюu
   между)imemymyselfweourours	ourselvesyouyouryoursyourself
yourselveshehimhishimselfsheherhersherselfititsitselftheythemtheirtheirs
themselveswhatwhichwhowhomthisthatthesethoseamisarewaswerebebeenbeinghavehashadhavingdodoesdiddoingaantheandbutiforbecauseasuntilwhileofatbyforwithaboutagainstbetweenintothroughduringbeforeafterabovebelowtofromupdowninoutonoffoverunderagainfurtherthenonceheretherewhenwherewhyhowallanybotheachfewmoremostothersomesuchnonornotonlyownsamesothantooverystcanwilljustdonshouldnow)	r   r   u   почемуr   r   r   r   u   можешьr   )docwwwhttphttpsmailto)u
   авторu   бизнес-процессu   владелецu   датu   исполнителu   контрагентu   логическu
   отделu   постановщикu   приоритетu   проектu   процессu
   спискu   стандартнu   статусu   тип)approvbaseclosedefaultdocumentopentasku   бизнесu   документu
   задачu   задачаu   закрытu
   обычнu   системu   созданu   черновик)textml_textnametagscommentsaddon_fieldskey_phrasesc                      e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed<   dZeed<   dZeed	<   dZeed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed<   dZeed<   dZeed<   dZ
eed<   dZeed<   dZeed<   dZeed<   dZ eed<   dZ!eed<   dZ"eed<   dZ#eed <   dZeed<   dZ$eed!<   dZeed	<   dZ%eed"<   dZ&eed#<   dZ'eed$<   dZ(eed%<   dZ)eed&<   dZ*eed'<   dZ+eed(<   dZ,eed)<   dZ-eed*<   dZ.eed+<   dZ/eed,<   dZ0eed-<   dZ1eed.<   dZ2eed/<   dZ3eed0<   dZ4eed1<   dZ5eed2<   y)3DriverSearchObjectN	obj_modelobj_idobj_codeobj_nameobj_modified_atobj_project_idobj_related_person_loginsobj_ml_textobj_textobj_tagsobj_logic_type_codeobj_activity_codeobj_status_typeobj_result_textobj_commentsobj_owner_nameobj_responsible_namesobj_hrefobj_parent_idobj_tree_parent_idobj_root_parent_idobj_created_atobj_deletedobj_archivedobj_author_nameobj_modified_by_nameobj_addon_fieldsobj_user_ratingobj_key_phrasesobj_company_idobj_breadcrumbs"obj_related_person_logins_tsvectorname_tsvectortext_tsvectortags_tsvectorresult_text_tsvectorcomments_tsvectoraddon_fields_tsvectorkey_phrases_tsvectorml_text_tsvectorheadlineheadline_rawtitlebreadcrumbslabelrankage_daysdebug_labelmark)6__name__
__module____qualname__r   str__annotations__r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r        !./cmf/models/cmf_search_engine.pyr   r   U   s!   IcF3HSHSOCN3$(c(KHSHS"" c OCOCLN3 $#$HSM#!s!!s!N3OCKLN3OC##SOCOCN3OC$(c(-1&s1KM#M#M### c  $#$##SHSLE#KE#DOHSKDOr   r   c                   *    e Zd Zd Zd Zd Zd Zd Zy)SearchDriverBasec                     t         NNotImplementedselfsearchs     r   select_empty_query_topz'SearchDriverBase.select_empty_query_top       r   c                     t         r   r   r   s     r   select_empty_queryz#SearchDriverBase.select_empty_query   r   r   c                     t         r   r   r   s     r   select_bm25zSearchDriverBase.select_bm25   r   r   c                     t         r   r   r   s     r   select_defaultzSearchDriverBase.select_default   r   r   c                     t         r   r   r   s     r   select_attachmentz"SearchDriverBase.select_attachment   r   r   N)r   r   r   r   r   r   r   r   r   r   r   r   r      s    r   r   c                   $    e Zd Zd Zd Zd Zd Zy)SearchDriverEvaFullSearchc                     t         j                  j                  |      }g }|D ]  }|j                  t	        di |        |S )Nr   )modelsCmfFullSearchfilter_empty_topappendr   )r   r   
found_objsres	found_objs        r   r   z0SearchDriverEvaFullSearch.select_empty_query_top   sO    ))::6B
# 	IJJ)  	 
r   c                 v   g }g }g }g }g }|j                   dk(  s|j                   dk(  r|t        j                  j                  |j                  t        t        j                  j                        dd|      }t        j                  j                  |j                  dd|      }|j                   dk(  s|j                   dk(  r|t        j                  j                  |j                  t        t        j                  j                        dd|      }t        j                  j                  |j                  dd|      }|j                   dk(  s|j                   dvr.t        j                  j                  |j                  d	dd|
      }g }||z   |z   |z   |z   D ]  }|j                  t        di |        |S )NANYCmfTaskEMPTY   )force_related_person_loginr   r   r   )r   r   r   CmfDocumentr  r  r   )force_field_namer   r   r   r   )
model_namer   r   filter_onceAKA_TASK_MODELSr   gcurrent_userloginAKA_DOC_MODELSAKA_DICT_MODELSr   r   )	r   r   task_empty_query_related_usertask_empty_querydoc_empty_query_related_userdoc_empty_queryother_empty_queryr   r   s	            r   r   z,SearchDriverEvaFullSearch.select_empty_query   s   (*%')$%):):i)G,2,@,@,L,L&&+.q~~/C/C+D -M -)  &33??&&	  @   %):):m)K+1+?+?+K+K%%+.q~~/C/C+D ,L ,( %22>>%%	 ? O %):):B\)\ & 4 4 @ @&&!' !A ! 69IILhhkzz  ~O  O 	8IJJ)6I67	8 
r   c                    g }g }g }|j                   dk(  s|j                   dk(  r;t        j                  j                  |j                  |j
                  ddgdd|      }|j                   dk(  s|j                   dk(  r;t        j                  j                  |j                  |j
                  ddgdd|      }d	}|j                   dk7  rd
}|j                   dk(  r;t        j                  j                  |j                  |j
                  d|gd|      }nI|j                   dvr;t        j                  j                  |j                   g|j
                  d|gd|      }g }||z   |z   D ]  }|j                  t        di |        |S )Nr  r  r   d   B25SF)force_slicer   include_attachmentr   r        TOP)r  r   r   r  r   )
r	  r   r   search_once_top_bm25r  tsquery_without_synr  r  r   r   )r   r   result_tasksresult_docsresult_dictother_slicer   r   s           r   r   z%SearchDriverEvaFullSearch.select_bm25  s   %):):i)G!//DDVE[E[]c]w]w9:3v?DV E UL %):):m)K ..CCFDYDY[a[u[u9:3v?DV D UK
 %K% ..CCFDZDZ\b\v\v9:;u]c D eK&@@ ..CCVEVEVDWY_YsYs9:;u]c D eK %3kA 	8IJJ)6I67	8
r   c                 z   g }g }g }g }g }g }|j                   dk(  r8t        j                  j                  |j                  |j
                  d|      }nF|j                   dvr8t        j                  j                  |j                   g|j
                  d|      }|j                   dk(  s|j                   dk(  rot        j                  j                  |j                  |j                  dd|      }t        j                  j                  |j                  |j
                  d|      }|j                   dk(  s|j                   d	k(  rot        j                  j                  |j                  |j                  dd|      }t        j                  j                  |j                  |j
                  d|      }|j                   dk(  s|j                   d
k(  r7t        j                  j                  |j                  |j
                  d|      }g }||z   |z   |z   |z   |z   D ]  }	|j                  t        di |	        |S )Nr  MAIN)r   r   r  r  SYNsyn)r   r   r   r  CmfAttachmentr   )r	  r   r   search_oncer  r  r  tsquery_with_synr  AKA_ATTACHMENT_MODELSr   r   )
r   r   r"  result_tasks_synr   result_docs_synr!  result_attachsr   r   s
             r   r   z(SearchDriverEvaFullSearch.select_default<  sS    % ..::&&**	 ; K &@@ ..::""#**	 ; K %):):i)G%33??@V@VX^XoXo6;%PV  @  X!//;;F<R<RTZTnTn6<V < ML %):):m)K$22>>v?T?TV\VmVm6;%PV ? XO ..::6;P;PRXRlRl6<V ; MK %):):o)M#11==,,**	 > N$ %(88;FX[iilww 	8IJJ)6I67	8 
r   N)r   r   r   r   r   r   r   r   r   r   r   r      s    0d>Jr   r   c                       e Zd Zy)SearchDriverElasticSearchN)r   r   r   r   r   r   r0  r0    s    r   r0  c                      e Zd ZdZdgZdgZdgZdZ e       Z	d Z
e	 	 	 	 	 d%d       Zd Zd	 Zed
        Zed&d       Zd Zd Zd Zd Zd Zd Zd Zed        Zed        Zed        Zed        Zed        Zed        Zd Zd Z ed        Z!d Z"ede#de#de$fd        Z%ed'd!       Z&ed(d"       Z'ed#        Z(ed)d$       Z)y)*CmfSearchEngineu  
    Главная задача: выполнение поиска (без учета дата-драйвера PG или Elastic).

    Алгоритм:
    - Подготовка запроса
    - Запрос делаем через дата-драйвер, получаем сырые предварительные данные (х10 объема)
    - Сортируем по классам
    - Сортируем по bm25eva
    - Проверяем права
    - Смешиваем потоки (оптимизированно с проверкой прав)
    - Формируем вывод

    Драйвер:
    - Уметь делать запросы
    - Не обязательно, но желательно: давать статистику bm25 (даже если используется Elastic,
          мы храним данные в нашей таблице все равно и можем посчитать bm25 статистику из нее)
    r  r  r(  Nc                    d| _         d| _        d | _        d | _        d| _        ddg| _        d | _        d| _        d | _        d| _	        d| _
        d| _        d| _        d | _        d | _        i | _        t!               | _        d | _        d | _        d | _        d | _        | j,                  j.                  it0        j2                  j4                  j7                         D cg c](  }|j8                  r|j:                  dvr|j:                  * c}| j,                  _        t=        t>        d      s| jA                          y y c c}w )Nr  Fr   r   )r  r  r(  FTS_STAT_WORDS)!r	  orig_field_name
field_nameorig_search_queryonly_idsslicer
   no_analitycscheck_access_custom_fntoparchiveddeletedFSTorig_tag_nameorig_tree_parent_idaddon_filtersetrecent_projecttree_parent_filterquery_qstop_wordssorted_resultfinal_result	__class__r  cmfr   	CmfEntityiter_subclassesfull_search
class_namehasattrAPPbm25_stat_words_load)r   ms     r   __init__zCmfSearchEngine.__init__  s'   $!%#Z
!&*#!#' !e"&!% "  >>))1DGJJDXDXDhDhDj .fq==Q\\9d%d /0ll .fDNN*s,-%%' ..fs   -D?c                 b   t        |      dkD  r|d d }|d}|r|d   dk(  rddg}|sddg}t               }||_        ||_        ||_        ||_        ||_        ||_        ||_        ||_	        ||_
        |	|_        |
|_        ||_        ||_        ||_        ||_        t"        j$                  j'                  d	      }|d
kD  rt)        d|        |j+                          |j-                          t/        |      |_        d|j0                  d<   |j                  j3                  d      r|j                  d d |_        |j                  dk(  rd|_        |j                  dk(  rd|_        d|_        |rt5        |      |_        nt5               |_        d|_        |j
                  dk(  r|j                  r|j9                          nM|j
                  dk(  r|j;                          n-|j                  r|j=                          n|j?                          |jA                          |jB                  S )Ni   r4        r   2   r  T)is_dirty   u   Идет процесс индексации, могут быть доступны не все результаты поиска. Осталось объектов: Modelr  r   
CmfCommentr   )"lenr2  r	  r6  r7  r8  r9  r:  r
   r;  r<  r=  r>  r?  rA  rB  rC  r   r   countcmf_noteprepare_search_queryprepare_additional_filterlistfullsearch_sliceendswithrD  rE  search_empty_top_recentsearch_empty
search_topsearch_main_prepare_final_resultrI  )clsr	  r7  search_queryr9  r:  r
   r;  check_accessr=  r>  r?  tag_nametree_parent_idrecent_projectskwargsr   dirty_counts                     r   
fts_searchzCmfSearchEngine.fts_search  s;    |S '-L LU1X^GEHE "
 '!+&#/ "*(4%
" '%3"$ **00$0?  }  ~I  }J  K  L 	##%((*"&u+%&" %%g. & 1 1#2 6F% )F , %F *F $'$8F!$'EF!$ 
##r)fjj**,%%+!ZZ $$&"""r   c                 D   t        j                  dd| j                        j                         | _        t        j                  dd| j                        | _        t        j                  dd| j
                        j                         | _        d | _        d | _        t        | j                  j                               dk(  rt        j                  d| j                        s t        j                  d| j                        r| j                  j                         | _        t        j                  d	| j                        r| j                  | _        | j                  | j                  d
d      | _        | j                  | j                        | _        | j                  | j                  W| j
                  | _        t        j                  dd| j                         | _        | j"                  | _        d | _        d | _        y | j+                  | j
                        | _        t/               | _        | j,                  j                         D ]2  }t3        j4                  |      }| j0                  j7                  |       4 t8        j;                  | j,                        | _        | j=                  | j                         | _        d | _        | j                  | _        | j                  | j"                        | _        | j>                  | _        | j$                  | _        y )Nz -[^ ]*r4  z[|&()] u;   ([^A-Za-zА-Яа-я0-9](\s|$)|(\s|^)[^A-Za-zА-Яа-я0-9])rV  z^[a-zA-Z0-9]+-[0-9]+$z^[0-9]+$z@^[a-zA-Z0-9]*:[0-9a-f]*-[0-9a-f]*-[0-9a-f]*-[0-9a-f]*-[0-9a-f]*$FT)synonyms
stop_wordsu   [^А-Яа-яA-Za-z0-9.-]-) resubr8  lowersearch_query_wo_qstoprl  stripquery_obj_codequery_obj_idr^  splitmatchupperparse_search_queryrG  text_stemmingsearch_query_ninjasearch_query_normalizer  r*  
query_likequery_remove_stop_wordssearch_query_wo_stop_wordsrc  search_query_partspeechcmfnlp
partspeechr   cmfutilninjaquery_normalizesearch_query_stemm)r   wr  s      r   ra  z$CmfSearchEngine.prepare_search_queryS  s    &(VVJr4;Q;Q%R%X%X%Z"FF9S$2L2LMFF#abegkgxgxy  B # t%%++-.!3xx0$2H2HIRXXVacgcycyMz&*&<&<&B&B&D#xx[]a]s]st$($:$:!!%!8!89O9OZ_lp!8!q!%!3!3D4J4J!K *d.?.?.K&*&7&7D#*,&&1LSQUQhQh*iD''+'B'BD$$(D!"DO*.*F*FtGXGX*Y' (,v$00668 	<A**1-J((//
;	< #*--0O0O"P '+&:&:4;R;R&S#  44"&"4"4T5P5P"Q#'#:#: 
 !% 8 8r   c                     | j                  | j                  | j                        | _        | j	                  | j
                        | _        y r   )	calc_tagsrA  r8  rn  _get_all_branchesrB  rF  )r   s    r   rb  z)CmfSearchEngine.prepare_additional_filter  s9    t'9'94;Q;QR"&"8"89Q9Q"Rr   c                     t               }|r:t        |t              r|D ]  }|j                  |        n|j                  |       | j	                  |      \  }}t        |j                  |            S r   )rD  
isinstancerc  add_extract_tagsunion)rk  rn  rl  r   _tagextracted_tagss         r   r  zCmfSearchEngine.calc_tags  sg    u(D)$ #DHHTN# "'*'8'8'F$nDJJ~.//r   c                 H   t               }| j                  d      }t        |      dk(  r| |fS d} |dd D ]X  }|dk(  r	t        j                  d|d      }|d   dk7  r|j	                  |d          t        |      dk(  sK|  |d    |d    } Z | j                         } | |fS )	zExtrats tags from the given search_query and returns its reminder and a set of extracted tags

        Args:
            search_query (str)

        Returns:
            tuple[str, list[str]]: search_query reminder and a set of extractd tags
        #rV  r4  Nz(\W)r   r     )rD  r  r^  ry  r  r}  )rl  r   sharp_splittedtoken
sub_tokenss        r   r  zCmfSearchEngine._extract_tags  s     u%++C0~!#%%#AB' 
	OE{'5!4J!}"A' :!#".
1z!}oN
	O $))+T!!r   c                 J   g }g }g }g }|D ]  }|j                   | j                  v r|j                  |       -|j                   | j                  v r|j                  |       W|j                   | j                  v r|j                  |       |j                  |        ||||fS r   )r   r  r   r  r+  )r   search_objects	aka_tasksaka_docsaka_dict
aka_attachsearch_objects          r   _split_search_objects_to_4_akaz.CmfSearchEngine._split_search_objects_to_4_aka  s    	
+ 	/M&&$*>*>>  /((D,?,??.((D,F,FF!!-0.	/ (Hj88r   c                    g }g }t         j                  rKt         j                  j                  j                  r't         j                  j                  j                  dk(  rg |fS t	        t         j                  j                        }|D ]?  }|j
                  r ||j
                  v r|j                  |       /|j                  |       A ||fS )Nr4  )r  r  r  valuer   r   r   )r   r  relatednot_related
user_loginr  s         r   %_split_search_objects_to_related_userz5CmfSearchEngine._split_search_objects_to_related_user  s    ~~Q^^%9%9%?%?1>>CWCWC]C]acCc~%%--.
+ 	2M66:IpIp;p}-""=1		2
 ##r   c                     g }g }|D ]4  }|j                   |k(  r|j                  |       $|j                  |       6 ||fS r   )r   r   )r   r  keyr   nomarkr  s         r   _split_search_objects_by_markz-CmfSearchEngine._split_search_objects_by_mark  sM    + 	-M!!S(M*m,		-
 t|r   c                    g }t               }t        j                         }g }| j                  j                  |       }| j	                  |      \  }}}}d }	ddd}
	 t        |      } |	||
|        |	||
|        |	||
|        |	||
|        |	||
|        |	||
|       t        |      }|dk\  s||k(  rn^|d d | _        t        j                         |z
  dkD  r.t        j                  dt        j                         |z
          y y )Nc                     |d   |d   k\  ry |sy |j                  d      }d|_        | j                  |       |dxx   dz  cc<   y )Nr_  total_limitr   	empty_toprV  popr   r   r   countersobj_listobjs       r   append_if_exists_empty_topzKCmfSearchEngine.search_empty_top_recent.<locals>.append_if_exists_empty_top  sL     H]$;;,,q/C)COJJsOW"r   r      r_  r  rW  皙?z3PROF fulltext_search empty_queries_top selects got )	rD  timedriverr   r  r^  rH  r  debug)r   rI  skip_idsprof_str  top_results_taskstop_results_docstop_results_dictstop_results_attachr  counters_empty_topres_count_beforeres_count_afters                r   rf  z'CmfSearchEngine.search_empty_top_recent  s8   5))+;;DAUYUxUx  zH  VIR+->@R		# ()<"<0&|5GIZ[&|5GIZ[&|5GIZ[&|5GIYZ&|5GIYZ&|5GI[\!,/O"$;K(K  *#2.99; 3&GGI$))+X_J_I`ab 'r   c                 f   g }t               }t        j                         }| j                  j                  |       }| j	                  |      \  }}}}| j                  |      \  }	}
| j                  |      \  }}d}t        |	|
|||g      rX|	df|
df|df|df|df|dff}| j                  |||       |dz  }|dkD  rt        j                  d       nt        |	|
|||g      rXt        j                         |z
  dkD  r-t        j                  dt        j                         |z
          || _
        y )Nr   rV    W   Баг в поиске, много данных, либо не идет вычитка.r  z&PROF fulltext_search search_empty got )rD  r  r  r   r  r  rz   _add_if_existsr  r  rH  )r   rH  r  r  r  results_tasksresults_docsresults_dictsresults_attachr  r  r  r  
iter_countprocessing_groups                  r   rg  zCmfSearchEngine.search_empty  sd    5 ))+77=EIEhEhiwExB|]N9=9c9cdq9r7%'77;7a7abn7o5$o
02B.! " /2!1%-q1 !$"#   0-J!OJD sw# 02B.! "& 99; 3&GG<TYY[7=R<STU*r   c                     t        j                          }g }t               } j                  dk(  rg  _        y  j                  dk7  rg  _        y  j
                  j                         } j                  |      \  }}}} j                  |d      \  }}	 j                  |d      \  }}
 j                  |d      \  }} j                  |d      \  }}|}|}~~ j                  | j                        } j                  | j                        } j                  |	 j                        }	 j                  |
 j                        }
 j                  | j                        } j                  | j                        }t        j                          |z
  dkD  r-t        j                  dt        j                          |z
          t        j                          }t               }g }i  fd}d }d	d	d	d
d
dd}d	dd}t        |      d	kD  r|d	   j                  dkD  r ||||d        t        |      d	kD  r|d	   j                  dkD  r ||||d        t        |      d	kD  r|d	   j                  dkD  r ||||d        t        |      d	kD  r|d	   j                  dkD  r ||||d        t        |	      d	kD  r ||||	d        t        |
      d	kD  r ||||
d        d	}d	}d	}d	}t        |      d	kD  r|d	   j                  }t        |      d	kD  r|d	   j                  }t        |      d	kD  r|d	   j                  }t        |      d	kD  r|d	   j                  } j!                  |      } j!                  |      } j!                  |      } j!                  |      }d}||kD  s||kD  rd}t         j"                  j%                               dk(  r
 ||||       	 t        |      }|rt        |      d	kD  rDt        |      d	kD  r6|d	   j                  |d	   j                  kD  r |||||        |||||       n |||||        |||||        |||||        |||||        ||||        ||||	d         ||||
d         |||||        |||||        |||||        |||||       nt        |      d	kD  rDt        |      d	kD  r6|d	   j                  |d	   j                  kD  r |||||        |||||       n |||||        |||||        |||||        |||||        |||||        |||||        ||||        ||||
d         ||||	d         |||||        |||||        |||||        |||||       t        |      }|dk\  s||k(  rn|d d  _        t        j                          |z
  dkD  r.t        j                  dt        j                          |z
          y y )Nr   r  flow_idcodelike	flow_namer  z'PROF fulltext_search TOP25 selects got c                    t        t        j                  j                        }|sy |j	                  d      }|j
                  v r#|sy |j	                  d      }|j
                  v r#d|j
                  <   |r|j                  |z  dk  ry d}|j                  j                  vrd}||   || d   k\  ry | j                  |       ||xx   dz  cc<   y )Nr   T皙?r   r   _limitrV  )
r   r  r  r  r  r   real_rank_bm25r   r  r   )	r   r  r  max_rankr  r  	cur_classexists_dictr   s	          r   append_if_existsz4CmfSearchEngine.search_top.<locals>.append_if_exists  s    Q^^112J,,q/C **+ll1o **+
 '+K

# &&1T9 I}}D$7$77"	 	"h)F/C&DD JJsOY1$r   c                     |d   |d   k\  ry |sy |j                  d      }d|_        | j                  |       |dxx   dz  cc<   y )Nr_  r  r   r   rV  r  r  s       r   append_if_exists_otherz:CmfSearchEngine.search_top.<locals>.append_if_exists_other  sL     H]$;;,,q/C%COJJsOW"r   r   r     )r   r   c	doc_limit
task_limitc_limitr  r    TFrV  rW  z2PROF fulltext_search TOP25 mixing and prepare got )r  rD  r7  rH  r6  r  r   r  r  	bm25_sortr  r  r  r^  	rank_bm25r  bm25_sort_date4rl  r  )r   r  rI  r  r  r   r!  result_dictsr  result_docs_idcodelikeresult_tasks_idcodelikeresult_docs_nameresult_tasks_nameresult_docs_mainresult_tasks_mainrH  r  r  r  counters_othermax_doc_name_rankmax_task_name_rankmax_doc_main_rankmax_task_main_rankdoc_prior  r  r  s   `                          @r   rh  zCmfSearchEngine.search_topH  s   ))+5 ??j(!#D5(!#D006BFBeBeftBu?k< /3.P.PQ\^o.p++040R0RS_ar0s--(,(J(J;Xc(d%%*.*L*L\[f*g''&(>>*:D<T<TU NN+<d>V>VW!%0FH`H`!a"&..1H$JbJb"c>>*:D<T<TU NN+<d>V>VW 99; 3&GG=diikG>S=TUV ))+5 '	%R	# aa!#2"F $%Q7
  1$)9!)<)F)F)M]H6FM 1$)9!)<)F)F)M]H6FM 1$)9!)<)F)F)M]H6FM 1$)9!)<)F)F)M]H6FM %&*]H6LdS&'!+]H6MtT   " 0 3 B B !!#!21!5!D!D " 0 3 B B !!#!21!5!D!D  //0@A 001BC//0@A 001BC !226HK\6\Ht  &&()Q."=.,O"=1 )*Q.37G3H13L)!,;;>Nq>Q>`>``$]H>NPab$]H>NPab$]H>NPab$]H>NPab :KM_` :KM_`&}nlS :PRVW :QSWX :JL]^ :JL]^ :KM_` :KM_` *+a/C8I4JQ4N)!,;;>OPQ>R>a>aa$]H>OQcd$]H>OQcd$]H>OQcd$]H>OQcd :KM_` :KM_` :JL]^ :JL]^&}nlS :QSWX :PRVW :KM_` :KM_` :JL]^ :JL]^ "-0O"$;K(K_ b +3B/99; 3&GGHW^I^H_`a 'r   c                 \   g }t               }t        j                         }| j                  j                  |       }| j	                  |      \  }}}}| j                  |d      \  }}	| j                  |d      \  }}
t        j                         |z
  dkD  r-t        j                  dt        j                         |z
          t        j                         }d}t        |||||	|
g      rq|df|df|df|df|	df|df|	df|df|df|
df|df|
df|df|dfg}| j                  |||       |dz  }|dkD  rt        j                  d       nt        |||||	|
g      rq|d d	 | _
        t        j                         |z
  dkD  r.t        j                  d
t        j                         |z
          y y )Nr'  r  z&PROF fulltext_search main selects got r      rV  r  r  rW  z1PROF fulltext_search main mixing and prepare got )rD  r  r  r   r  r  r  r  rz   r  rH  )r   rH  r  r  r  r   r!  r  r.  r,  r-  r  r  s                r   ri  zCmfSearchEngine.search_main3  s   5 ))+33D9BFBeBeftBu?k<)-)K)KLY^)_&&'+'I'I+V['\$_99; 3&GG<TYY[7=R<STU ))+
<lNL\^mnoq!a #q!!1%q!!1%#q! !$a  !$#q!% 0  0-J!OJD sw= <lNL\^mno@ +3B/99; 3&GGG		V]H]G^_` 'r   c                     |S )u   
        Подчистка оригинального квери, который ввел пользователь:
        - удаление стоп-слов
        )r  r^  r   join)rk  rl  clean_search_query_listr  clean_search_querys        r   _clean_search_queryz#CmfSearchEngine._clean_search_queryr  s
     r   c                 V    d}|j                         D ]  }|t        v r|d|z   z  } |S )Nr4  ru  )r  ALL_STOP_WORDS)rk  queryr   r  s       r   r  z'CmfSearchEngine.query_remove_stop_words  s=     	AN" 37NC	 
r   c                 t   d}|j                         D ]  }|t        v rd|v r	|d|z   z  }t        j                  |      }|dk(  r4|j	                  dd      j	                  dd      }||k(  rd}d}nd|z   }||k(  r|d|z   |z   z  }vt
        j                  j                         t        j                  ddd      k  r|dz   |z   dz   |z   }|ddj                  |j                               z   z  } dj                  |j                         j                               }t        j                  d	d
|      }|S )Nr4  |ru  @.i     rV  u   [^ A-Za-zА-Яа-я0-9|-]rx  )r  r  r  text_normalize_enrichreplacedatetimedatetodayr  r}  ry  rz  )rk  r  r   r  lemmalegacy_wlegacy_w_strs          r   r  zCmfSearchEngine.query_normalize  s;    	9A N"axsQw44Q7B;99S-55c#>q=#%L!H#&>L:37\11C  }}**,x}}T1a/HH !C( 2S 85 @3%++-!888C7	98 hhsyy{((*+ ff13<
r   c                 8    t        j                  ||dz   z        S )NrV  )mathlog)rk  Ndfs      r   bm25_idfzCmfSearchEngine.bm25_idf  s     xx2a4!!r   c                 Z   i }i }t         j                  j                  d      |d<   t         j                  j                  d      |d<   t         j                  j                  j
                  j                         j                  d      j                         }t         j                  j                  j
                  j                         j                  d      j                         }|t        |      dz  dz     d   }|t        |      d	z  dz     d   }t        |      d
kD  r|d   d   }|d   d   }|t        |      dz  dz     d   }|t        |      d	z  dz     d   }t        |      d
kD  r|d   d   }|d   d   }d}	|D ]R  }
i ||
j                  <   | j                  |d   |
j                        ||
j                     d<   ||
j                     d   }T |D ]`  }
|
j                  |vri ||
j                  <   | j                  |d   |
j                        ||
j                     d<   ||
j                     d   }b i |d<   |d   d<   |d   d<   i |d<   i |d<   ||   d   |d   d<   ||   d   |d   d<   ||   d   |d   d<   ||   d   |d   d<   |t        _        |t        _        y )Nr  )r   r  z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfDocument''  ') where  ndoc > 10  order by ndoc desc limit 10000 z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfTask''  ') where  ndoc > 10  order by ndoc desc limit 10000 r  r  r   
   i  i  i  ditiFST_STAT_WORDS_DEFAULTFST_FRQ_MIDFST_FRQ_LOW)r   r   r_  CmfGlobalSettingsdpdata_driverSessionexecutefetchallr^  wordr  ndocrQ  r5  
FTS_COUNTS)rk  r5  r'  cmfdoc_statcmftask_statdoc_mid_worddoc_low_wordtask_mid_wordtask_low_wordlastrowlast_doc	last_tasks                r   rR  z$CmfSearchEngine.bm25_stat_words_load  s   
$*$8$8$>$>$>$W
=! & 4 4 : :Y : O
9..11==EEGOO  Q  @  I  I  K//22>>FFHPP  R|  }  F  F  H #3{#3A#5s#:;A>"3{#3B#6#;<Q?{d"&s+A.L&t,Q/L %S%6q%8#%=>qA$S%6r%93%>?B|t#(-a0M(.q1M 	6C')N388$-0\\*]:SUXU]U]-^N388$T*%chh/5H	6   	7Cxx~-+-sxx(-0\\*Y:OQTQYQY-ZN388$T*&sxx06I		7 46/09A/069B/06(*}%(*}%.<\.J4.P}%d+.<\.J4.P}%d+.<].KD.Q}%d+.<].KD.Q}%d++#r   c                 >   |}t        |      dk\  rd}|d   j                  |d   j                  z  dkD  rd}nI|d   j                  |d   j                  z  dkD  rd}n$|d   j                  |d   j                  z  dkD  rd}|dkD  rt        |d | d       ||d  z   }|S )	Nr  r   r  g333333?r  rV  c                     | j                   S r   )r   r   s    r   <lambda>z1CmfSearchEngine.bm25_sort_date4.<locals>.<lambda>  s
    !** r   )r  )r^  r  sorted)rk  r   newresr   s       r   r  zCmfSearchEngine.bm25_sort_date4  s     s8a<D1v$$SV%:%::TAQ&&s1v'<'<<tCQ&&s1v'<'<<tC axET
0DEDE
Rr   c                    g }d}t        |      }|D ]U  }|dz  }| j                  ||      \  |_        |_        |_        |xj
                  d| z  c_        |j                  |       W t        |d d      }|d d D ]H  }d|j                  v s|xj                  d	z  c_        d
|j                  dd|j                   |_        J t        |d d      }d}|d d D ]i  }|dz  }|xj
                  d| d| z  c_        |j                  dz   |j                   d| z   |_	        |j                  j                  dd      |_
        k |d d S )Nr   rV  z - rc                     | j                   S r   r  r4  s    r   r5  z+CmfSearchEngine.bm25_sort.<locals>.<lambda>  
    AKK r   T)r  reverser  u   ТОП1r  u    ТОП1.2fru  c                     | j                   S r   r:  r4  s    r   r5  z+CmfSearchEngine.bm25_sort.<locals>.<lambda>$  r;  r   r  z bz - z | tsq=DZQTNr4  )r^  	bm25_rankr  r  
rank_debugr   r   r6  r   r   r   r  )r   r   r  r7  r   llr/  rs           r   r  zCmfSearchEngine.bm25_sort  sf    X 	CFA@DsTY@Z=CM3-s~II4s#IMM#		
 f"7F 	KAAJJ&t#!)!++c):!ALL>J		K
 f"7F 	8AFAGGA3c"&GggmgeW&EEAG++GB7AJ		8 ds|r   c                    fd} |j                         }i }j                  !j                  dk7  r |j                        }d}d}d}d}	d}
d}d}t               }d}j                  | j                  v rd}i }|j                         D ]_  }||v r	|j                  |       |
dz  }
t               }d}d d t               g d dt               d}|j                  d      D ]  }||vr	|d	   j                  |       |d
   j                  ||   j                  d      D cg c]  }t        |       c}       |dz  }|t        j                  v r,|t        j                  |   v rt        j                  |   |   }nt        j                  d   |   }d}d}t        t        ||   j                  d            d      }|||dz   z  z  ||z   z  }|}d}j                  dk(  r9||v r5j                  r)|j                  j                         v r|	dz  }	d}|dz  }n|dz  }|
dkD  r
|dkD  r|dz
  }||z  }|d|dz    d| d| d|dd|dd|dd| d|dd| z  }|j                  |        t        |      dk(  r|dz  }t!        |      }| j"                  |
dz
     dv r|d z  }|d!| j"                  |
dz
      z  }t        |      dkD  r	|d"|d#z  }||vr|||<   ||d$<   ||d%<   b d}d}d} |j%                         D ]  \  }}d}!d}"|d
   D ]e  }#d}$|j%                         D ]F  \  }%}&||%k(  r|&d
   D ]3  }'t'        |'|#z
        d&k  s|!dz  }!|$dz  }$|d'   j                  |%        F H |$|"kD  sd|$}"g |!dkD  rd(|"z  }(|d$   |(z  |d)<   d*| d+|" d,|d)   dd|d$   dd-	|z   }|dz  }|"| kD  r|"} ||!z  }t)        |d'         j+                  d.d      j+                  d/d      j+                  d0d      })d1| d2|) d!z   j,                  z   _        |d$   |d)<    j                   d!j,                  z   _        t/        |j1                         D cg c]  }|d)   	 c}      }|d(| z  z  }dd3d d4j2                  z   z  z  z   }*||*z  }d5| d6| d7j2                  d8d9|*d#d:j4                  d#d;|z   d<z   }d}+t6        j8                  j:                  j<                  ret6        j8                  j:                  j<                  dk7  r>j>                  r2t6        j8                  j:                  j<                  j>                  v rd=}+d},d>t6        v rj@                  t6        jB                  v rd=},||+z  |,z  }d?|d#d@| dA|+ dB|,d#d!	|z   }|dk(  rdC}|}-j4                  dDk(  r|dDz  }dE|d#d!| }||-|fS c c}w c c}w )FNc                     j                   j                  dd      } t        j                  dd|       } | d d } t	        j
                  d| z   dz         S )N'"z":([0-9,]*)z":"\1", z{ z })r   r  ry  rz  orjsonloads)tmpr  s    r   tsvector2dictz0CmfSearchEngine.bm25_rank.<locals>.tsvector2dict4  sT     &&..sC8C&&c:Ccr(C
 <<s
T 122r   r4  r   rV  r  r  )r@  
smart_rank
found_synsvector_positionstoken_found_idxr  near_tokensr  rN  rO  ,r  g      ?g      ?r  Fr  Tr  r  z wr  =z bm25t(bm25)=z.1f(z) idf=z fqd=z ord=z	 in_name=)VERBINFNr  ru  z
 SYNW_AWG=r=  r@  rP  r  rQ  g?rM  z maxNRC[z]=z smrt(bm25t)=z) rF  {}r  : r     z nearC=z
 sumNearC=z age=.0fz ageF=z	 sqlRank= ()g?FTS_RECENT_PROJECTSz rbm25=z wc=z rur=z rpr=gMbP?r  z code)"r   r   rD  r   r  r  r  extendintrQ  r5  minr^  r   r   r{  maxr  itemsabsr   r  r   sumvaluesr   r   r  r  r  r  r   r   r^  ).r   r  r  rL  doc_vector_dictname_vector_dictr   rA  	w_doc_cnt
w_name_cntsynw_cntorder_factorfound_tokens_cnt
done_words	model_keymatched_tokenssynwrank_setw_cntr  r  r   idfkbfqdbm25	bm25_origin_namew_avgnear_tokens_cntsum_near_tokens_cntmax_of_max_near_contexthave_near_token_cntmax_near_contextpostmp_max_near_contextsynw2token2pos2mltrQ  
age_factorrelated_user_rankrecent_project_rank	real_ranks.    `                                            r   r@  zCmfSearchEngine.bm25_rank2  s   	3 ((<(<=(S->->"-D,S->->?
	
U
	==D000I

 KKM G	8Dz!NN4 MH
 uHE ""!e$&#'"uE ZZ_  #O+l#''*()00/RSBTBZBZ[^B_1`Q#a&1`a
***yC<N<Nq<Q/Q,,Q/	:C,,-EFyQC#oa066s;<a@cQqSk*CE2 	88{*q4D/DZ[_b_k_k_q_q_sZs!OJ"G!8DNIa<L3$6#/##5Ll*#3A#5"6awas-PTUXzYZ[dehZiiopstwoxx}  B  ~C  CH  IU  VY  HZ  Zc  dk  cl  m  m
T"A #D 8}!!ME++HQJ7;KK	$">">xz"J!KLL
8}q 
5+66
>)',t$!&E+'7E#$OG	8V "#)//1 "	9KD%"# /0 <'($%3%9%9%; "ME6u}  &'9 : 	"
 tcz?b0/14/0A50!-044U;!	"" (*::';$!<" #Q& --&+K&83&>l#'vR0@/AuUaObcfNgghinozi{|  iA  AC  D  GQ  Q
1$#&==.>+#'::#!%"67??BGOOPSTVW__`cdfg!"4&;-83>M&+K&8l#E"	9F ((1~4^-B-B-DEAlOEFs556 r1c#,,./00
j /z:M9NeTWT`T`adSeeklvwzk{  |E  FI  FN  FN  OR  ES  SU  V  Yc  c  fi  i

 ^^!!''ANN,@,@,F,F",LQTQnQn$$**c.K.KK $   A%#*<*<@U@U*U"&''*==tCj-=,>eDUCVV[\ops[ttuv  xB  B
19 D	88tDLD c
!J<8JY
**s 2ap Fs   %W Wc                 6    d| vr| S | j                  d      d   S )Nz@#@#@#r   )r  )r   s    r   _remove_suffix_from_headlinez,CmfSearchEngine._remove_suffix_from_headline  s#    8#O~~h'**r   c                    | j                   | j                  d   | j                  d    }| j                  rGi }g }|D ]  }|j                  |j                         |j
                  r| j                  |j
                        nd}d}|j                  |j                  d}|j                  }|j                  r|dz   |j                  z   }|j                  |j                  |j                  | d| d	| d	|j                  d
dt        j                   	t        j!                  |j"                        ||j                  |j                  d||j                  <    |t        _        t        j'                  d       |S i }| j(                  rXi }	|D ]7  }|	j+                  |j,                  g       j                  |j                         9 |	j/                         D ]  }
t1        t2              |
   j4                  r| j(                  dgz   }dd|	|
   gg dg}n| j(                  }dd|	|
   g}|
dk(  r|g dz   }|
dk(  r|dgz   }t6        j9                  |
      j;                  ||| j<                  | j>                        }|D ]]  }t1        t2              j,                     j4                  r&|j4                  r|||j@                  jB                  <   O|||jB                  <   _  ntE        d       t        j'                  d       g }|D ]  }|jG                  |j                        }|s"|j
                  r| j                  |j
                        nd}d}|j                  |j                  d}|j                  }|j                  r|dz   |j                  z   }|jH                  dk(  r|jJ                  s|jL                  |jJ                  jN                  |jJ                  jP                  jR                  | d| d	| d	|j                  d
dt        j                   	||j                  |j                  |jB                  jR                  |j                  |jT                  |jV                  |jX                  t        j!                  |j"                        d}nt[        j\                  d|j                        d   }|jB                  jR                  ||jP                  jR                  | d| d	| d	|j                  d
dt        j                   	||j                  |j                  t        j!                  |j"                        d}	 | j^                  s|ja                  dd       n| j_                  |      stb        | j(                  D ]6  } |j\                  d      d   }te        ||j                     |d      }|||<   8 |j                  |        t        j'                  d       | jf                  sHti        t2        jj                  jl                  | jn                  t6        jq                  |dd       d !       || _9        y# tb        $ r Y Jw xY w)"u5   
        Итоговая обработка
        r   rV  r4  NoneNz.6frx  z ||| ru  r[  z words=)idr   coder   r   r   r   r   zfulltext_search ENDcmf_ver_headIN)cmf_ver_curz==Tr  r(  )urlurl_previewurl_preview_imgr  fullsearch_answer)r
   filterinclude_archivedinclude_deleteduY   DEV: FATAL. Укажите в запросе поиска список полей fields=z"fulltext_search Start check access)r  r   r  r   r   r   r   	attach_idattach_name
attach_urlattach_url_previewattach_url_preview_imgr   z DZQTN )r  r   r  r   r   r   r   r   T)TEXKOM_skip_failread_auditTEXKOM_ppp_project_simplecheckr  rW  )rl  obj_dict)rq  ):rH  r:  r9  r   r   r   r  r   r   r   r   r   r   r  r@  jsonrJ  r   fulltext_search_headlinesr  r
   
setdefaultr   keysvarsr   cmf_verr  get_model_by_namerc  r>  r?  r  r  	cmf_alertgetrO  parent	parent_idr   r  r  r  r  r  ry  r  r<  _acl_check_readCmfPermissionErrorgetattrr;  schedule_deferred_jobr   _do_calc_statisticsr8  
dumps_dictrI  )r   rI  r  	result_idrC  r   formated_rankr   objectsids_by_modelr	  _fields_filterr  r  r   r  r   r	   attrs                       r   rj  z%CmfSearchEngine._prepare_final_result  s    ))$**Q-

1F==(*%I!   *LMJJ4<<QZZH\^ &66%'(vvclM==!CK!--7E((GGJJ#+*E%-!**UXIYY`abafaf`g h#'::amm#<"FF !

	7)!((3* +DA'GG)*;;L! J''R8??IJ +//1 .
<
+33 KK>*::G .l:6NOQlmG KKG!4j)ABG0%(QQG.%)<(==G"44Z@EE""%)]]$(LL	 F  $ .CF|AKK088S[[7: 0 0 3 34*-	.).4 qs	45 F	A++ahh'CLMJJ4<<QZZH\^ &66%'(vvclM==!CK!--7E>>_4 ::  "mm #

 #

 5 5'/jeWAm_AajjY\M]]defejejdk$l!& !$%JJ%(VV\\'(ww&)gg.1oo252E2E'+zz!--'@ H" HHZ9!<E "ffll % #'/jeWAm_AajjY\M]]defejejdk$l!& !$%JJ'+zz!--'@	 H66++tlp+q!88=00!% / +C 0 3&wqxx'8%F*./ JJx(F	P 	
%&   !$$88(,(>(>GL^L^_bcfdf_gLhi
  ) *  s   BY	YYr  rI  r  c                    |D ]  \  }}t        |      D ]l  }|s |d   j                  |v r|j                  d       *|j                  |d          |j	                  |d   j                         |j                  d       n  y )Nr   )ranger   r  r   r  )rk  r  rI  r  lstr_  _s          r   r  zCmfSearchEngine._add_if_exists  s}    " 		JC5\ q6==H,GGAJ##CF+SV]]+
		r   c                     | sg S t         j                  j                  j                  j	                         j                  dd| i      }|D cg c]  }|d   	 c}S c c}w )Na  
                WITH tree_parents AS (
                    WITH RECURSIVE r AS (
                        SELECT obj_id, obj_code, obj_tree_parent_id
                        FROM cmf_full_search
                        WHERE obj_tree_parent_id = :tree_parent_id

                        UNION

                        SELECT cfs.obj_id, cfs.obj_code, cfs.obj_tree_parent_id
                        FROM cmf_full_search AS cfs
                        JOIN r ON cfs.obj_tree_parent_id = r.obj_id
                    )
                    SELECT obj_id FROM r
                    WHERE r.obj_id IN (SELECT obj_tree_parent_id FROM r)

                    UNION

                    SELECT :tree_parent_id
                )
                SELECT * FROM tree_parents;
            ro  r   )r   r   r   r!  r"  r#  )ro  recordsrC  s      r   r  z!CmfSearchEngine._get_all_branches  s`    
 I&&))55==?GG, ~./
2 &&!&&&s   Ac                    dt         _        |j                  dd      }t        j                  dd|      }t        j
                  d|      }d}d}d}	|D ]I  }
t        |
      dk(  rt        |
      dk(  r|
dv r&|
d	v r	|	d|
 z  }	3|
dk(  r9|
d
v r|	dz  }	C|
dv r|	dz  }	Mt        |
      dk(  r\|
d   dv rt        |
      dk(  rr|d|
dd   z  }~|r|
j                  dd      }
|dz  }|dkD  rd}d}|	r|	d   dvrd}t        |
      dkD  r| j                  |
|      }t        t        |      t        t              z
        }t        |      dk(  r|	| d|d    z  }	n-t        |      dkD  r|	| ddj                  |      z   dz   z  }	n	 |r|dk\  r n
|dk\  r nL |rt        |      dkD  r|d   dv r|dd  }|S |	j                  dd      j                  dd      j                  dd      j                  d d      j                         }	|	r|	d   dv r|	dd  }	|	r|	d   dv r|	dd  }	|	r|	d   dv r|	dd  }	|	r|	dd  dv r|	d d }		 t        j                  j                  j                   j#                         j%                  d!d"|	i      }t        |      d   d   }	t/        |d$|	       |	S # t&        j(                  j*                  $ r}t        j                  j                  j                   j-                          t        j                  j                  j                   j#                         j%                  d#d"|i      }t        |      d   d   }	Y d }~d }~ww xY w)%Nr4  zwww.u   [^-A-Za-zА-Яа-я0-9()|&!' ]ru  z(,| |&|\||\(|\))r   rV  )rx  !z()&|)rQ   r   r  z |)r   rN   &z &r  r  rx  r  F)r  r  r  )rv  z (  | z )r  )r  r  z OR z or z AND  & z and z!select to_tsquery('russian', :q);q+select websearch_to_tsquery('russian', :q);z->)r  r@  r  ry  rz  r  r^  prepare_wordrc  rD  r  r  r}  r   
CmfSynonymr   r!  r"  r#  
sqlalchemyexcProgrammingErrorrollbackprint)rk  rl  rv  rw  
first_wordsearch_query_allowed_symbtokens
word_countstopsr  r   oper	sug_wordstsqueryes                  r   r  z"CmfSearchEngine.parse_search_query  s    !))&4$&FF+KSR^$_!-/HI 
 6	A1v{1v{
?;1QCLA8))r
&&r
 1v{tz!q6Q;1QqrUG$		#s#A!OJA~ D"Z/1vz,,Q,B	 Y#n2E!EF	y>Q&D69Q<.11A^a'D6

9(==DDAjAo Rm6	p 5z!|aJ 6ab	LIIfe$,,VU;CCGUS[[\cejkqqs1#!"A1#!"A1#!"A23:%#2A	$''**66>>@HHIpsvxyrz{GWa #A 	lD!$ ~~.. 	$  ,,557 ''**66>>@HHAl#%G Wa #A	$s   AJ+ +MBMMc           
      j   d}|j                         D ]  }d|v r|d|z   dz   z  }|d|z   z  } |j                         }|j                  dd      }|j                  dd      }	 t        j                  j
                  j                  j                         j                  d	d
|i      }t        |      d   d   }|j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      j                  dd      }d}|j                         D ]>  }d|vr	|d|z   z  }|ddj                  t!        |j                  d                  z   z  }@ dj                  |j                               }|S # t        j                  j                  $ r}t        j                  j
                  j                  j                          t        j                  j
                  j                  j                         j                  ddt        i      }Y d }~d }~ww xY w)Nr4  r  r\  r]  ru  r  rx  hhhackskiptirez%select to_tsquery('russian', :query);r  r  r  r   rF  r  rT  )r  r}  r  r   r  r   r!  r"  r#  r  r  r  r  r  rc  r  rD  )rk  r   r  r  r  r  r   s          r   r  zCmfSearchEngine.text_stemming9  s    	AaxTAX^#S1W		
 GGIyyU#||C!12	''**66>>@HHItw~  AE  wF  GG M!QIIeC ((R088sCKKCPRS[[\_`bckkl|  B  C 	9A!|sQw sSXXc!''#,&7888	9 hhsyy{#
- ~~.. 
	  ,,557 ''**66>>@HHAeG
	s   %AF H2-A:H--H2c                    t         xj                  d| dz  c_        |d   t        j                  v rd}t        j
                  }nd}t        j                  }g }t        j                  |      s|j                  |      }g }t        j                  |      }|r.t         xj                  d| dz  c_        |j                  |       t         xj                  dz  c_        d}	|D ]  }
|	d	k\  r nt        |
      d	k  r|
d   |d   k7  r|
d
   |d
   k7  r0d|
v rF|
j                  dd      }
t         xj                  d|
 dz  c_        |j                  |
       |	d
z  }	zt         xj                  d|
 dz  c_        |j                  |
       |	d
z  }	 g }t        j                  j                  j                   j#                         j%                  dd|i      }d}	|D ]w  \  }}|	d
k\  r nmt        |      d	k  r|d   |d   k(  s|d
   |d
   k(  s3|j                  dd      }t         xj                  d| dz  c_        |j                  |       |	d
z  }	y t'        |      t'        |      z  |hz  }n|h}t'               }|D ]a  }
t(        j+                  |
      d d	 D ]D  }|j-                  |j.                         t         xj                  d|j.                   dz  c_        F c ||z  }t'               }|rt        j                  j1                  ddt1        ||hz        gddgdgdd
g      }|D ]  }|j2                  s|j2                  j4                  j7                  d      d d D ]P  }|j9                         j                  dd      }t         xj                  d| dz  c_        |j-                  |       R  ||z  t'        |      z  }t1        |      S )Nz|w:rY  r   enruzaddNinjaRevers z, zspellError, r  rV  ru  r4  z	addSpell z
            SELECT
                name, similarity(:word, name) as sim
            FROM cmf_synonym
            WHERE
                :word % name
            ORDER BY "sim" desc
            LIMIT 5;
             r%  zaddSpellTrgm z
normalize r   r  r   orderno)r  r
   order_byr:  rR  r	  zsynAdd )r  r@  stringascii_lettersr  dictionary_endictionary_rudictionary_checksuggestninja_reversr   r^  r  r   r  r   r!  r"  r#  rD  morphparser  normal_formrc  r   r  r  r}  )rk  r%  rv  lang
dictionaryfiltered_suggestions3suggestionsfiltered_suggestionsnwr   r  filtered_suggestions2suggestions2_listsuggr  all_suggestionsnormalized_wordssynonym_wordssynonym_listsynonymr   s                        r   r  zCmfSearchEngine.prepare_word_  s   	3tfB7f***D ..JD ..J "''-$,,T2K#% %%d+B?2$b11$++B/EE|$EA  6q6Q;Q447?qttAw!8 		#r*AEEy2..E)003FA9QCr**$++A.Q)4 %'! & 1 1 4 4 @ @ H H J R R T 
! A, a6t9>7d1g%aDG);  <<R0DEE}TF"55E)006FA ""67#>S:TTX\W]]O  $fO 5  	9Akk!nRa( 9 !$$R^^4:bnn%5R889	9 *,<< !,,11&$M]ae`fMfHg9h:@&9I<E;9:1 2 ?L ( -<<$\\//55c:2A> - GGI--c2671#R0%))!,-- *M9C@U<VVO$$r   )FNNFNNFFNNN)rl  r   returnztuple[str, set[str]])r  z	list[str])TFF)T)*r   r   r   __doc__r  r  r+  r  r   r  rT  classmethodrs  ra  rb  r  staticmethodr  r  r  r  rf  rg  rh  ri  r  r  r  r  rR  r  r  r@  r  rj  r   r   r  r  r  r  r  r   r   r   r2  r2    s   F $_N kO,- O&(F(D `d$) l# l#b:9zS 
0 
0 " "J9 $	(cV)+XhbV<a~ " "& 	 	 " "b " "
 1$ 1$f  $B@+L + +
Y v 
d 
$ 
# 
 
  '  'F i iZ " "J q% q%r   r2  )-cmf.includecmf.util.cmfnlputilr  typingr   r   r   r   r  r  r  dataclassesr   r	   r
   	tracebacksys	itertoolscollectionsr   enchantr  	pymorphy3ry  r  rI  bs4r   MorphAnalyzerr  RUSSIAN_STOP_WORDSENGLISH_STOP_WORDSPROMPT_STOP_WORDS_NORMTECH_STOP_WORDSEVA_ARTIFACT_KEYS_STOP_WORDSEVA_ARTIFACT_VALUES_STOP_WORDSrD  r  ALLOWED_FIELDSr   r   r   r0  r2  r   r   r   <module>r     s         - ,    0 0   
  "    	   	! @  A  K ;  M  "x  '*<<NO `, 8 8 8r "f 0 fT	 0 	C% C%r   