
    ;i1                       d dl  d dlmZmZmZmZ d dlZd dlZd dlZd dl	m
Z
mZmZ d dl	Z	d dlZd dlZd dlZd dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ  ej2                         Zg dZg dZg d	Zeg d
z  Zeg dz  Z eeez   ez         Zg dZg dZ eg dz  Z eeez         Z!g dZ"e
 G d d             Z# G d d      Z$ G d de$      Z% G d de$      Z& G d d      Z'y)    )*)ListSetDictOptionalN)	dataclassfieldfields)
namedtuple)BeautifulSoup)   и   в   во   не   что   он   на   я   с   со   как   а   то   все   она   так   его   но   да   ты   к   у   же   вы   за   бы   по   только   ее   мне   было   вот   от   меня   еще   нет   о   из   ему   теперь
   когда   даже   ну
   вдруг   ли   если   уже   или   ни   быть   был   него   до   вас   нибудь
   опять   уж   вам   ведь   там
   потом   себя   ничего   ей
   может   они   тут   где   есть   надо   ней   для   мы   тебя   их   чем   была   сам   чтоб   без
   будто   чего   раз   тоже   себе   под
   будет   ж
   тогда   кто   этот   того   потому
   этого
   какой   совсем   ним
   здесь   этом   один
   почти   мой   тем
   чтобы   нее   сейчас   были   куда
   зачем   всех   никогда
   можно   при   наконецu   два   об   другой   хоть
   после   надu   больше   тот
   через   эти   нас   про
   всего   них
   какаяu
   много
   развеu   три   эту   моя   впрочемu   хорошо   свою   этой
   перед   иногда
   лучше   чуть   томu   нельзя
   такой   им
   более   всегда   конечно   всю
   между)imemymyselfweourours	ourselvesyouyouryoursyourself
yourselveshehimhishimselfsheherhersherselfititsitselftheythemtheirtheirs
themselveswhatwhichwhowhomthisthatthesethoseamisarewaswerebebeenbeinghavehashadhavingdodoesdiddoingaantheandbutiforbecauseasuntilwhileofatbyforwithaboutagainstbetweenintothroughduringbeforeafterabovebelowtofromupdowninoutonoffoverunderagainfurtherthenonceheretherewhenwherewhyhowallanybotheachfewmoremostothersomesuchnonornotonlyownsamesothantooverystcanwilljustdonshouldnow)r   r   r   r   r   r   r   r   r   r   r   r   u   всёr   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r   r   r>   r,   r-   r   u   ещёr0   r1   r2   r   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r>   r   rA   r$   rC   u   нибытьrD   rE   r$   rG   rH   rI   rJ   rK   r   u   мочьrN   rO   rP   rQ   rR   r   rT   rU   r    rN   rX   r>   rZ   r[   r\   r]   r   r_   r`   rJ   rb   r>   rd   re   rf   rg   r   ri      этоrk   rl   rN   rn   r  rp   rq   rr   rs   rt   r   rv   r>   rx   ry      весьr{   r|   r}   r~   r1   r   r   r   r   r   r   rg   rU   r   r  rN   rk   r   rg   rr   r   u   свойrg   r   r   r   r   r   rN   r   r   r   r  r   ).u
   такоеrh   r   r   r   rV   rL   rY   r   rc   r   r*   r3   rm   r@   r/   r   r   rj   ra   rz   r   rF   r   rM   rB   r)   r   r   ro   rw   r.   r   rS   r   r   r+   r   rW   rC   r   r   r^   r?   r   ru   )u
   нужноu   хотяu   затоu   именноu   вообщеu   тудаu   такиu   делатьu   помнитьu
   менееu   оноu
   кромеu   твойu   откудаu   считатьu   внеu   тойu   каждыйu
   никтоu   которыйu
   затем   вопросu   оба)r   u   ещu   нибu   моч   этr      веr!  u   своr!  r   u   нужнu   хот   затu
   вообщu   тудr   u   менr   u   кромu   твоu   & внеr   u   каждu   никтu
   которr"  r  r   )u
   авторu   бизнес-процессu   владелецu   датu   исполнителu   контрагентu   логическu
   отделu   постановщикu   приоритетu   проектu   процессu
   спискu   стандартнu   статусu   тип)approvbaseclosedefaultdocumentopentasku   бизнесu   документu
   задачu   задачаu   закрытu
   обычнu   системu   созданu   черновик)textml_textnametagscommentsaddon_fieldskey_phrasesc                      e Zd ZU dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed<   dZ
eed<   dZeed<   dZeed	<   dZeed
<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZeed<   dZ	eed<   dZeed<   dZeed<   dZ
eed<   dZeed<   dZeed<   dZeed<   dZ eed<   dZ!eed<   dZ"eed<   dZ#eed <   dZeed<   dZ$eed!<   dZeed	<   dZ%eed"<   dZ&eed#<   dZ'eed$<   dZ(eed%<   dZ)eed&<   dZ*eed'<   dZ+eed(<   dZ,eed)<   dZ-eed*<   dZ.eed+<   dZ/eed,<   dZ0eed-<   dZ1eed.<   dZ2eed/<   dZ3eed0<   dZ4eed1<   dZ5eed2<   y)3DriverSearchObjectN	obj_modelobj_idobj_codeobj_nameobj_modified_atobj_project_idobj_related_person_loginsobj_ml_textobj_textobj_tagsobj_logic_type_codeobj_activity_codeobj_status_typeobj_result_textobj_commentsobj_owner_nameobj_responsible_namesobj_hrefobj_parent_idobj_tree_parent_idobj_root_parent_idobj_created_atobj_deletedobj_archivedobj_author_nameobj_modified_by_nameobj_addon_fieldsobj_user_ratingobj_key_phrasesobj_company_idobj_breadcrumbs"obj_related_person_logins_tsvectorname_tsvectortext_tsvectortags_tsvectorresult_text_tsvectorcomments_tsvectoraddon_fields_tsvectorkey_phrases_tsvectorml_text_tsvectorheadlineheadline_rawtitlebreadcrumbslabelrankage_daysdebug_labelmark)6__name__
__module____qualname__r3  str__annotations__r4  r5  r6  r7  r8  r9  r:  r;  r<  r=  r>  r?  r@  rA  rB  rC  rD  rE  rF  rG  rH  rI  rJ  rK  rL  rM  rN  rO  rP  rQ  rR  rS  rT  rU  rV  rW  rX  rY  rZ  r[  r\  r]  r^  r_  r`  ra  rb  rc       !./cmf/models/cmf_search_engine.pyr2  r2  J   s!   IcF3HSHSOCN3$(c(KHSHS"" c OCOCLN3 $#$HSM#!s!!s!N3OCKLN3OC##SOCOCN3OC$(c(-1&s1KM#M#M### c  $#$##SHSLE#KE#DOHSKDOrj  r2  c                   *    e Zd Zd Zd Zd Zd Zd Zy)SearchDriverBasec                     t         NNotImplementedselfsearchs     rk  select_empty_query_topz'SearchDriverBase.select_empty_query_top       rj  c                     t         ro  rp  rr  s     rk  select_empty_queryz#SearchDriverBase.select_empty_query   rv  rj  c                     t         ro  rp  rr  s     rk  select_bm25zSearchDriverBase.select_bm25   rv  rj  c                     t         ro  rp  rr  s     rk  select_defaultzSearchDriverBase.select_default   rv  rj  c                     t         ro  rp  rr  s     rk  select_attachmentz"SearchDriverBase.select_attachment   rv  rj  N)rd  re  rf  ru  rx  rz  r|  r~  ri  rj  rk  rm  rm     s    rj  rm  c                   $    e Zd Zd Zd Zd Zd Zy)SearchDriverEvaFullSearchc                     t         j                  j                  |      }g }|D ]  }|j                  t	        di |        |S )Nri  )modelsCmfFullSearchfilter_empty_topappendr2  )rs  rt  
found_objsres	found_objs        rk  ru  z0SearchDriverEvaFullSearch.select_empty_query_top   sO    ))::6B
# 	IJJ)  	 
rj  c                 v   g }g }g }g }g }|j                   dk(  s|j                   dk(  r|t        j                  j                  |j                  t        t        j                  j                        dd|      }t        j                  j                  |j                  dd|      }|j                   dk(  s|j                   dk(  r|t        j                  j                  |j                  t        t        j                  j                        dd|      }t        j                  j                  |j                  dd|      }|j                   dk(  s|j                   dvr.t        j                  j                  |j                  d	dd|
      }g }||z   |z   |z   |z   D ]  }|j                  t        di |        |S )NANYCmfTaskEMPTY   )force_related_person_loginr_  ra  rt  )r_  ra  rt  CmfDocumentr  r  r*  )force_field_namer_  ra  rt  ri  )
model_namer  r  filter_onceAKA_TASK_MODELSrg  gcurrent_userloginAKA_DOC_MODELSAKA_DICT_MODELSr  r2  )	rs  rt  task_empty_query_related_usertask_empty_querydoc_empty_query_related_userdoc_empty_queryother_empty_queryr  r  s	            rk  rx  z,SearchDriverEvaFullSearch.select_empty_query   s   (*%')$%):):i)G,2,@,@,L,L&&+.q~~/C/C+D -M -)  &33??&&	  @   %):):m)K+1+?+?+K+K%%+.q~~/C/C+D ,L ,( %22>>%%	 ? O %):):B\)\ & 4 4 @ @&&!' !A ! 69IILhhkzz  ~O  O 	8IJJ)6I67	8 
rj  c           	      "   |j                   dk(  s|j                   dk(  r=t        j                  j                  |j                  |j
                  dddgdd|      }ng }|j                   dk(  s|j                   d	k(  r=t        j                  j                  |j                  |j
                  dddgdd|      }ng }d
}|j                   dk7  rd}|j                   dk(  r<t        j                  j                  |j                  |j
                  dd|gd|      }nM|j                   dvr=t        j                  j                  |j                   g|j
                  dd|gd|      }ng }g }||z   |z   D ]  }|j                  t        di |        |S )Nr  r  r,  r   i,  TOPT)r  force_slicer_  include_attachmentrt  r        )r  r  r_  rt  r  ri  )r  r  r  search_once_top_bm25r  tsquery_without_synr  search_oncer  r  r2  )rs  rt  result_tasksresult_docsother_sliceresult_dictr  r  s           rk  rz  z%SearchDriverEvaFullSearch.select_bm25  s   %):):i)G!//DDVE[E[]c]w]w=C9:3u?CF E TL
 L%):):m)K ..CCFDYDY[a[u[u=C9:3u?CF D TK
 K%K% ..::6;Q;QSYSmSm=CRST_Q`hmv| ; ~K&@@ ..::F<M<M;NPVPjPj=CRST_Q`hmv| ; ~K K%3kA 	8IJJ)6I67	8 
rj  c                 z   g }g }g }g }g }g }|j                   dk(  r8t        j                  j                  |j                  |j
                  d|      }nF|j                   dvr8t        j                  j                  |j                   g|j
                  d|      }|j                   dk(  s|j                   dk(  rot        j                  j                  |j                  |j                  dd|      }t        j                  j                  |j                  |j
                  d|      }|j                   dk(  s|j                   d	k(  rot        j                  j                  |j                  |j                  dd|      }t        j                  j                  |j                  |j
                  d|      }|j                   dk(  s|j                   d
k(  r7t        j                  j                  |j                  |j
                  d|      }g }||z   |z   |z   |z   |z   D ]  }	|j                  t        di |	        |S )Nr  MAIN)r_  rt  r  r  SYNsyn)r_  rc  rt  r  CmfAttachmentri  )r  r  r  r  r  r  r  tsquery_with_synr  AKA_ATTACHMENT_MODELSr  r2  )
rs  rt  r  result_tasks_synr  result_docs_synr  result_attachsr  r  s
             rk  r|  z(SearchDriverEvaFullSearch.select_default6  sS    % ..::&&**	 ; K &@@ ..::""#**	 ; K %):):i)G%33??@V@VX^XoXo6;%PV  @  X!//;;F<R<RTZTnTn6<V < ML %):):m)K$22>>v?T?TV\VmVm6;%PV ? XO ..::6;P;PRXRlRl6<V ; MK %):):o)M#11==,,**	 > N$ %(88;FX[iilww 	8IJJ)6I67	8 
rj  N)rd  re  rf  ru  rx  rz  r|  ri  rj  rk  r  r     s    0d"HJrj  r  c                       e Zd Zy)SearchDriverElasticSearchN)rd  re  rf  ri  rj  rk  r  r    s    rj  r  c                   "   e Zd ZdZdgZdgZdgZej                  j                  j                         D  cg c](  }|j                  r|j                  dvr|j                  * c}} Z e       ZdZdZe	 	 	 	 	 d!d       Zd Zd	 Zd
 Zd Zd Zd Zd Zed        Zd Zd Zed        Zed"d       Z ed        Z!ed        Z"ed        Z#ed        Z$ed        Z%d Z&ede'de'de(fd       Z)ed#d       Z*ed$d       Z+ed%d        Z,yc c}} w )&CmfSearchEngineu  
    Главная задача: выполнение поиска (без учета дата-драйвера PG или Elastic).

    Алгоритм:
    - Подготовка запроса
    - Запрос делаем через дата-драйвер, получаем сырые предварительные данные (х10 объема)
    - Сортируем по классам
    - Сортируем по bm25eva
    - Проверяем права
    - Смешиваем потоки (оптимизированно с проверкой прав)
    - Формируем вывод

    Драйвер:
    - Уметь делать запросы
    - Не обязательно, но желательно: давать статистику bm25 (даже если используется Elastic,
          мы храним данные в нашей таблице все равно и можем посчитать bm25 статистику из нее)
    r  r  r  )r  r  r  Nc                    t        |      dkD  rt        dd       |d}|sddg}t               }||_        ||_        ||_        ||_        ||_        ||_        ||_	        ||_
        ||_        |	|_        |
|_        ||_        d|_        ||_        ||_        ||_        t&        j(                  j+                  d      }|d	kD  rt-        d
|        t/        t0        d      s| j3                          |j5                          |j7                          t9        |      |_        d|j:                  d<   |j                  j=                  d      r|j                  d d |_        |j
                  dk(  rd|_        |j                  dk(  rd|_        d|_        |rt?        |      |_         nt?               |_         |jB                  dk(  r|j                  r|jE                          nM|jB                  dk(  r|jG                          n-|j                  r|jI                          n|jK                          |jM                          |jN                  S )Ni   uY   Превышена максимально допустимая длина запроса!T)abort r   d   )is_dirty   u   Идет процесс индексации, могут быть доступны не все результаты поиска. Осталось объектов: FTS_STAT_WORDSModelr  r+  
CmfCommentr.  )(len	cmf_alertr  r  orig_field_name
field_nameorig_search_queryonly_idsslicer
   no_analitycscheck_access_custom_fntoparchiveddeletedFSTorig_tag_nameorig_tree_parent_idaddon_filterr  r  countcmf_notehasattrAPPbm25_stat_words_loadprepare_search_queryprepare_additional_filterlistfullsearch_sliceendswithsetrecent_projectlike_search_querysearch_empty_top_recentsearch_empty
search_topsearch_main_prepare_final_resultfinal_result)clsr  r  search_queryr  r  r
   r  check_accessr  r  r  tag_nametree_parent_idrecent_projectskwargsrt  dirty_counts                     rk  
fts_searchzCmfSearchEngine.fts_search  s:    |S qy}~LHE " '!+&#/ "*(4%
" 
'%3"$ **00$0?  }  ~I  }J  K  Ls,-$$& 	##%((*"&u+%&" %%g. & 1 1#2 6F% )F , %F *F $'$8F!$'EF!& ##r)fjj**,%%+!ZZ $$&"""rj  c                 J   g }g }g }g }|D ]  }|j                   | j                  v r|j                  |       -|j                   | j                  v r|j                  |       W|j                   | j                  v r|j                  |       |j                  |        ||||fS ro  )r3  r  r  r  r  )rs  search_objects	aka_tasksaka_docsaka_dict
aka_attachsearch_objects          rk  _split_search_objects_to_4_akaz.CmfSearchEngine._split_search_objects_to_4_aka'  s    	
+ 	/M&&$*>*>>  /((D,?,??.((D,F,FF!!-0.	/ (Hj88rj  c                    g }g }t         j                  rKt         j                  j                  j                  r't         j                  j                  j                  dk(  rg |fS t	        t         j                  j                        }|D ]?  }|j
                  r ||j
                  v r|j                  |       /|j                  |       A ||fS )Nr  )r  r  r  valuerg  r9  r  )rs  r  relatednot_related
user_loginr  s         rk  %_split_search_objects_to_related_userz5CmfSearchEngine._split_search_objects_to_related_user7  s    ~~Q^^%9%9%?%?1>>CWCWC]C]acCc~%%--.
+ 	2M66:IpIp;p}-""=1		2
 ##rj  c                     g }g }|D ]4  }|j                   dk(  r|j                  |       $|j                  |       6 ||fS )Nr  )rc  r  )rs  r  r  nosynr  s        rk  "_split_search_objects_to_nosyn_synz2CmfSearchEngine._split_search_objects_to_nosyn_synF  sM    + 	,M!!U*

=)]+		,
 czrj  c                    g }t               }t        j                         }g }| j                  j                  |       }| j	                  |      \  }}}}d }	ddd}
	 t        |      } |	||
|        |	||
|        |	||
|        |	||
|        |	||
|        |	||
|       t        |      }|dk\  s||k(  rn^|d d | _        t        j                         |z
  dkD  r.t        j                  dt        j                         |z
          y y )Nc                     |d   |d   k\  ry |sy |j                  d      }d|_        | j                  |       |dxx   dz  cc<   y )Nr  total_limitr   	empty_top   poprb  r  r  countersobj_listobjs       rk  append_if_exists_empty_topzKCmfSearchEngine.search_empty_top_recent.<locals>.append_if_exists_empty_top[  sL     H]$;;,,q/C)COJJsOW"rj  r      r  r     皙?z3PROF fulltext_search empty_queries_top selects got )	r  timedriverru  r  r  sorted_resultr  debug)rs  r  skip_idsprof_str  top_results_taskstop_results_docstop_results_dictstop_results_attachr  counters_empty_topres_count_beforeres_count_afters                rk  r  z'CmfSearchEngine.search_empty_top_recentQ  s8   5))+;;DAUYUxUx  zH  VIR+->@R		# ()<"<0&|5GIZ[&|5GIZ[&|5GIZ[&|5GIYZ&|5GIYZ&|5GI[\!,/O"$;K(K  *#2.99; 3&GGI$))+X_J_I`ab 'rj  c                 f   g }t               }t        j                         }| j                  j                  |       }| j	                  |      \  }}}}| j                  |      \  }	}
| j                  |      \  }}d}t        |	|
|||g      rX|	df|
df|df|df|df|dff}| j                  |||       |dz  }|dkD  rt        j                  d       nt        |	|
|||g      rXt        j                         |z
  dkD  r-t        j                  dt        j                         |z
          || _
        y )Nr   r    W   Баг в поиске, много данных, либо не идет вычитка.r  z&PROF fulltext_search search_empty got )r  r  r  rx  r  r  r  _add_if_existsr  r  r  )rs  r  r  r  r  results_tasksresults_docsresults_dictsresults_attachr  r  r  r  
iter_countprocessing_groups                  rk  r  zCmfSearchEngine.search_empty|  sd    5 ))+77=EIEhEhiwExB|]N9=9c9cdq9r7%'77;7a7abn7o5$o
02B.! " /2!1%-q1 !$"#   0-J!OJD sw# 02B.! "& 99; 3&GG<TYY[7=R<STU*rj  c                    t        j                          }g }t               }| j                  dk(  rg S | j                  dk7  rg S | j                  j                  |       }| j                  |      \  }}}}| j                  || j                        }| j                  || j                        }t        j                          |z
  dkD  r-t        j                  dt        j                          |z
          t        j                          }t               }g }	d }
d }ddddddd	}dd
d}	 t        |	      } |
|	||        |
|	||        |
|	||        |
|	||        ||	||        |
|	||        |
|	||        |
|	||        |
|	||       t        |	      }|dk\  s||k(  rn||	d d | _        t        j                          |z
  dkD  r.t        j                  dt        j                          |z
          y y )Nr.  r  r  z'PROF fulltext_search TOP25 selects got c                     t        t        j                  j                        }|sy |j	                  d      }d}| j                  |       ||xx   dz  cc<   y )Nr   cr  )rg  r  r  r  r  r  )r  r	  r
  r  r  	cur_classs         rk  append_if_existsz4CmfSearchEngine.search_top.<locals>.append_if_exists  sM    Q^^112J,,q/C I JJsOY1$rj  c                     |d   |d   k\  ry |sy |j                  d      }d|_        | j                  |       |dxx   dz  cc<   y )Nr  r  r   r  r  r  r  s       rk  append_if_exists_otherz:CmfSearchEngine.search_top.<locals>.append_if_exists_other  sL     H]$;;,,q/C%COJJsOW"rj  r      )r   br*  a_limitb_limitc_limitr  r  r  z2PROF fulltext_search TOP25 mixing and prepare got )r  r  r  r  r  rz  r  	bm25_sortr  r  r  r  r  )rs  r  r  r  r  r  r  result_dictsr%  r  r,  r.  r	  counters_otherr  r  s                   rk  r  zCmfSearchEngine.search_top  s   ))+5 ??j(I5(I006BFBeBeftBu?k< nn[$2J2JK~~lD4L4LM 99; 3&GG=diikG>S=TUV ))+5
	%*	#  Q2? $%Q7"=1]HlC]HlC]HkB]HkB"=.,O]HlC]HlC]HkB]HkB!-0O"$;K(K " +3B/99; 3&GGHW^I^H_`a 'rj  c                 X   g }t               }t        j                         }| j                  j                  |       }| j	                  |      \  }}}}| j                  |      \  }}	| j                  |      \  }}
t        j                         |z
  dkD  r-t        j                  dt        j                         |z
          t        j                         }d}t        |||||	|
g      rq|df|df|df|df|	df|df|	df|df|df|
df|df|
df|df|dfg}| j                  |||       |dz  }|dkD  rt        j                  d       nt        |||||	|
g      rq|d d | _
        t        j                         |z
  dkD  r.t        j                  d	t        j                         |z
          y y )
Nr  z&PROF fulltext_search main selects got r      r  r  r   r  z1PROF fulltext_search main mixing and prepare got )r  r  r  r|  r  r   r  r  r  r!  r  )rs  r  r  r  r  r  r  r5  r  r  r  r&  r'  s                rk  r  zCmfSearchEngine.search_main  s   5 ))+33D9BFBeBeftBu?k<)-)P)PQ])^&&'+'N'N{'[$_99; 3&GG<TYY[7=R<STU ))+
<lNL\^mnoq!a #q!!1%q!!1%#q! !$a  !$#q!% 0  0-J!OJD sw= <lNL\^mno@ +3B/99; 3&GGG		V]H]G^_` 'rj  c                     |S )u   
        Подчистка оригинального квери, который ввел пользователь:
        - удаление стоп-слов
        )splitr  r  join)r  r  clean_search_query_listwclean_search_querys        rk  _clean_search_queryz#CmfSearchEngine._clean_search_queryR  s
     rj  c                 d   | j                   }|j                         }t        j                  |      }|| _        | j                   j                         | _        d | _        | j
                  r_t        j                  d| j
                        s t        j                  d| j
                        r| j
                  j                         | _        | j                  | j                  dd      | _        | j                  | j                  d      | _        | j                  | j                  d      | _        y )Nz^[a-zA-Z0-9]+-[0-9]+$z^[0-9]+$FT)synonyms
stop_wordsrA  )r  lowercmfutilninjar  r  search_obj_coderematchupperparse_search_queryrB  r  r  )rs  r  s     rk  r  z$CmfSearchEngine.prepare_search_queryf  s    --#))+}}\2(!%!7!7!=!=!?  $!!XX.0F0FG288T_aeawawKx#'#9#9#?#?#AD  11$2C2Ce`d1e#'#:#:4;L;LW\#:#]  !% 7 78I8ITX 7 Yrj  c                     | j                  | j                  | j                        | _        | j	                  | j
                        | _        y ro  )	calc_tagsr  r  r  _get_all_branchesr  tree_parent_filter)rs  s    rk  r  z)CmfSearchEngine.prepare_additional_filter}  s9    t'9'94;Q;QR"&"8"89Q9Q"Rrj  c                     t               }|r:t        |t              r|D ]  }|j                  |        n|j                  |       | j	                  |      \  }}t        |j                  |            S ro  )r  
isinstancer  add_extract_tagsunion)r  r  r  r-  _tagextracted_tagss         rk  rM  zCmfSearchEngine.calc_tags  sg    u(D)$ #DHHTN# "'*'8'8'F$nDJJ~.//rj  c                 H   t               }| j                  d      }t        |      dk(  r| |fS d} |dd D ]X  }|dk(  r	t        j                  d|d      }|d   dk7  r|j	                  |d          t        |      dk(  sK|  |d    |d    } Z | j                         } | |fS )	zExtrats tags from the given search_query and returns its reminder and a set of extracted tags

        Args:
            search_query (str)

        Returns:
            tuple[str, list[str]]: search_query reminder and a set of extractd tags
        #r  r  Nz(\W)r   r     )r  r:  r  rH  rR  strip)r  r-  sharp_splittedtoken
sub_tokenss        rk  rS  zCmfSearchEngine._extract_tags  s     u%++C0~!#%%#AB' 
	OE{'5!4J!}"A' :!#".
1z!}oN
	O $))+T!!rj  c                 8    t        j                  ||dz   z        S )Nr  )mathlog)r  Ndfs      rk  bm25_idfzCmfSearchEngine.bm25_idf  s     xx2a4!!rj  c                 Z   i }i }t         j                  j                  d      |d<   t         j                  j                  d      |d<   t         j                  j                  j
                  j                         j                  d      j                         }t         j                  j                  j
                  j                         j                  d      j                         }|t        |      dz  dz     d   }|t        |      d	z  dz     d   }t        |      d
kD  r|d   d   }|d   d   }|t        |      dz  dz     d   }|t        |      d	z  dz     d   }t        |      d
kD  r|d   d   }|d   d   }d}	|D ]R  }
i ||
j                  <   | j                  |d   |
j                        ||
j                     d<   ||
j                     d   }T |D ]`  }
|
j                  |vri ||
j                  <   | j                  |d   |
j                        ||
j                     d<   ||
j                     d   }b i |d<   |d   d<   |d   d<   i |d<   i |d<   ||   d   |d   d<   ||   d   |d   d<   ||   d   |d   d<   ||   d   |d   d<   |t        _        |t        _        y )Nr  )r3  r  z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfDocument''  ') where  ndoc > 10  order by ndoc desc limit 10000 z SELECT word, ndoc, nentry  FROM ts_stat('SELECT ml_text_tsvector from cmf_full_search where obj_model=''CmfTask''  ') where  ndoc > 10  order by ndoc desc limit 10000 r  r  r   
   i  i  i  ditiFST_STAT_WORDS_DEFAULTFST_FRQ_MIDFST_FRQ_LOW)r  r  r  CmfGlobalSettingsdpdata_driverSessionexecutefetchallr  wordrc  ndocr  r  
FTS_COUNTS)r  r  rs  cmfdoc_statcmftask_statdoc_mid_worddoc_low_wordtask_mid_wordtask_low_wordlastrowlast_doc	last_tasks                rk  r  z$CmfSearchEngine.bm25_stat_words_load  s   
$*$8$8$>$>$>$W
=! & 4 4 : :Y : O
9..11==EEGOO  Q  @  I  I  K//22>>FFHPP  R|  }  F  F  H #3{#3A#5s#:;A>"3{#3B#6#;<Q?{d"&s+A.L&t,Q/L %S%6q%8#%=>qA$S%6r%93%>?B|t#(-a0M(.q1M 	6C')N388$-0\\*]:SUXU]U]-^N388$T*%chh/5H	6   	7Cxx~-+-sxx(-0\\*Y:OQTQYQY-ZN388$T*&sxx06I		7 46/09A/069B/06(*}%(*}%.<\.J4.P}%d+.<\.J4.P}%d+.<].KD.Q}%d+.<].KD.Q}%d++#rj  c                    g }|D ]2  }| j                  ||      \  |_        |_        |j                  |       4 t	        |d d      }t        |      dk\  rd}|d   j                  |d   j                  z  dkD  rd}nI|d   j                  |d   j                  z  dkD  rd}n$|d	   j                  |d   j                  z  dkD  rd}|dkD  rt	        |d | d
       ||d  z   }|d d D ]~  }|j                  dz   d|j                  dd|j                   d| z   |_        t        |j                  |j                  dd|j                  d|j                  |j                          |S )Nc                     | j                   S ro  )	rank_bm25r   s    rk  <lambda>z+CmfSearchEngine.bm25_sort.<locals>.<lambda>  s
    AKK rj  T)keyreverser8  r   r  g333333?rY  r  c                     | j                   S ro  )ra  r  s    rk  r  z+CmfSearchEngine.bm25_sort.<locals>.<lambda>
  s
    QZZ rj  )r  r   zrbm25=.2fz | tsq=z.4f)	bm25_rankr  
rank_debugr  sortedr  r_  printr6  r`  r5  )r  r  tsquerynewresr{  r  rs          rk  r4  zCmfSearchEngine.bm25_sort  s}     	C,/MM#w,G)CM3>MM#	 f"7Fv;?Day""6!9#6#66=$$VAY%8%884?$$VAY%8%884? axu3GH6RVRW=X 	[Aggmq{{3.?qgV]U^&__AG!**S 1166#,?qzzZ	[ rj  c                    fd} |j                         }i }j                  !j                  dk7  r |j                        }t        j                  dd|      j	                         }d}d}d}	d}
d}d}g }|D ]8  }||v r	|j                  |       |dz  }||vr$d}j                  dk(  rd	}|t        j                  v r,|t        j                  |   v rt        j                  |   |   }nt        j                  d
   |   }d}d}t        ||   j	                  d            }|dkD  rd}|||dz   z  z  ||z   z  }|}d}||v r5j                  r)|j                  j                         v r|
dz  }
d}|dz  }n|	dz  }	|dkD  r
|dkD  r|dz
  }||z  }|d| d|dd|dd| d|dd| z  }||z  }; |}t        j                  j                  t        j                  j                        j                   z
  j#                         dz  dz  }dddd |z   z  z  z   }||z  }d!|d"d#|dd|z   }d}t$        j&                  j(                  j*                  ret$        j&                  j(                  j*                  dk7  r>j,                  r2t$        j&                  j(                  j*                  j,                  v rd$}d}d%t$        v rj.                  t$        j0                  v rd$}d&|	|
z    d'| d(|dd|z   }||z  |z  }||fS ))Nc                     j                   j                  dd      } t        j                  dd|       } | d d } t        j                  d| z   dz         S )N'"z":([0-9,]*)z":"\1", z{ z })rZ  replacerH  subjsonloads)tmpr  s    rk  tsvector2dictz0CmfSearchEngine.bm25_rank.<locals>.tsvector2dict  sR    &&..sC8C&&c:Ccr(C ::dSj4/00rj  r  z[()!|&']r  r   r  rf  r  rg  rh  g      ?g      ?,r  FTr8  r  g?z w=z
 bm25Orig=r  z idf=z.1fz fqd=z ord=z	 in_name=i     re  rY     z age=.0fz ageF=g?FTS_RECENT_PROJECTSz wc=z rur=z rpr=)rZ  rS  rH  r  r:  r  r3  r  r  r  r6  rD  datetimer  timezoneutcr7  total_secondsr  r  r  r  r9  r8  r  )r  r  r  r  doc_vector_dictname_vector_dictqueryr`  r  	w_doc_cnt
w_name_cntw_cntorder_factor
done_wordsr=  	model_keyidfkr0  fqdbm25	bm25_origin_namera  
age_factorrelated_user_rankrecent_project_ranks    `                         rk  r  zCmfSearchEngine.bm25_rank  s   	1 ((<(<=(S->->"-D,S->->?z#w/557
	

 %	AJa QJE'I}}	) 	C&&&98J8J18M+M((+I6(()AB9MAAoa(..s34CQw#1+&A.DIG$$!s||?Q?Q?S:Sa
axQ	qy\C/+c1,&DCs*YsO5S	seSXYefiXjjst{s|}}JDLDK%	N %%))(*;*;*?*?@ATATTccefjjkmm r1c(l+,,
j XcN&C0@B:M

 ^^!!''ANN,@,@,F,F",LQTQnQn$$**c.K.KK $   A%#*<*<@U@U*U"&Ij017H6IObcfNgghijtt
''*==Zrj  c                 6    d| vr| S | j                  d      d   S )Nz@#@#@#r   )r:  )r;  s    rk  _remove_suffix_from_headlinez,CmfSearchEngine._remove_suffix_from_headlinem  s#    8#O~~h'**rj  c                    | j                   | j                  d   | j                  d    }| j                  rGi }g }|D ]  }|j                  |j                         |j
                  r| j                  |j
                        nd}d}|j                  |j                  d}|j                  }|j                  r|dz   |j                  z   }|j                  |j                  |j                  | d| d	| d	|j                  d
dt        j                   	t        j!                  |j"                        ||j                  |j                  d||j                  <    |t        _        t        j'                  d       |S i }| j(                  rXi }	|D ]7  }|	j+                  |j,                  g       j                  |j                         9 |	j/                         D ]  }
t1        t2              |
   j4                  r| j(                  dgz   }dd|	|
   gg dg}n| j(                  }dd|	|
   g}|
dk(  r|g dz   }|
dk(  r|dgz   }t6        j9                  |
      j;                  ||| j<                  | j>                        }|D ]]  }t1        t2              j,                     j4                  r&|j4                  r|||j@                  jB                  <   O|||jB                  <   _  ntE        d       t        j'                  d       g }|D ]  }|jG                  |j                        }|s"|j
                  r| j                  |j
                        nd}d}|j                  |j                  d}|j                  }|j                  r|dz   |j                  z   }|jH                  dk(  r|jJ                  s|jL                  |jJ                  jN                  |jJ                  jP                  jR                  | d| d	| d	|j                  d
dt        j                   	||j                  |j                  |jB                  jR                  |j                  |jT                  |jV                  |jX                  t        j!                  |j"                        d}nt[        j\                  d|j                        d   }|jB                  jR                  ||jP                  jR                  | d| d	| d	|j                  d
dt        j                   	||j                  |j                  t        j!                  |j"                        d}|jH                  dk(  r|j^                  jR                  |d<   	 | j`                  s|jc                  dd       n| ja                  |      std        | j(                  D ]6  } |j\                  d      d   }tg        ||j                     |d      }|||<   8 |j                  |        t        j'                  d       | jh                  sHtk        t2        jl                  jn                  | jp                  t6        js                  |dd       d !       || _:        y# td        $ r Y rw xY w)"u5   
        Итоговая обработка
        r   r  r  NoneNz.6f-z ||| r  r  z words=)idr,  coder[  r^  r_  r`  ra  zfulltext_search ENDcmf_ver_headIN)cmf_ver_curz==Tr  r  )urlurl_previewurl_preview_imgr  fullsearch_answer)r
   filterinclude_archivedinclude_deleteduY   DEV: FATAL. Укажите в запросе поиска список полей fields=z"fulltext_search Start check access)r  r,  r  r[  r_  r`  ra  	attach_idattach_name
attach_urlattach_url_previewattach_url_preview_imgr^  z DZQTN )r  r,  r  r[  r_  r`  ra  r^  T)TEXKOM_skip_failread_auditTEXKOM_ppp_project_simplecheck.r  )r  obj_dict)r  );r  r  r  r  r4  r[  r  r`  r_  rb  r]  r5  ra  r  r  r  r  r^  fulltext_search_headlinesr  r
   
setdefaultr3  keysvarsr  cmf_verrE  get_model_by_namer  r  r  r  r  r  get
class_nameparent	parent_idr,  r  r  r  r  r  rH  r:  r  r  _acl_check_readCmfPermissionErrorgetattrr  schedule_deferred_jobr  _do_calc_statisticsr  
dumps_dictr  )rs  r  r  	result_idr  r[  formated_rankr_  objectsids_by_modelr  _fields_filterr
  r  r  r  r]  r	   attrs                       rk  r  z%CmfSearchEngine._prepare_final_results  s    ))$**Q-

1F==(*%I!   *LMJJ4<<QZZH\^ &66%'(vvclM==!CK!--7E((GGJJ#+*E%-!**UXIYY`abafaf`g h#'::amm#<"FF !

	7)!((3* +DA'GG)*;;L! J''R8??IJ +//1 .
<
+33 KK>*::G .l:6NOQlmG KKG!4j)ABG0%(QQG.%)<(==G"44Z@EE""%)]]$(LL	 F  $ .CF|AKK088S[[7: 0 0 3 34*-	.).4 qs	45 F	A++ahh'CLMJJ4<<QZZH\^ &66%'(vvclM==!CK!--7E>>_4 ::  "mm #

 #

 5 5'/jeWAm_AajjY\M]]defejejdk$l!& !$%JJ%(VV\\'(ww&)gg.1oo252E2E'+zz!--'@ H" HHZ9!<E "ffll % #'/jeWAm_AajjY\M]]defejejdk$l!& !$%JJ'+zz!--'@	 H >>]2474I4I4O4OH0166++tlp+q!88=00!% / +C 0 3&wqxx'8%F*./ JJx(F	P 	
%&   !$$88(,(>(>GL^L^_bcfdf_gLhi
  ) *  s   .BY//	Y<;Y<r
  r  r  c                    |D ]  \  }}t        |      D ]l  }|s |d   j                  |v r|j                  d       *|j                  |d          |j	                  |d   j                         |j                  d       n  y )Nr   )ranger4  r  r  rR  )r  r
  r  r  lstr  _s          rk  r!  zCmfSearchEngine._add_if_exists  s}    " 		JC5\ q6==H,GGAJ##CF+SV]]+
		rj  c                     | sg S t         j                  j                  j                  j	                         j                  dd| i      }|D cg c]  }|d   	 c}S c c}w )Na  
                WITH tree_parents AS (
                    WITH RECURSIVE r AS (
                        SELECT obj_id, obj_code, obj_tree_parent_id
                        FROM cmf_full_search
                        WHERE obj_tree_parent_id = :tree_parent_id

                        UNION

                        SELECT cfs.obj_id, cfs.obj_code, cfs.obj_tree_parent_id
                        FROM cmf_full_search AS cfs
                        JOIN r ON cfs.obj_tree_parent_id = r.obj_id
                    )
                    SELECT obj_id FROM r
                    WHERE r.obj_id IN (SELECT obj_tree_parent_id FROM r)

                    UNION

                    SELECT :tree_parent_id
                )
                SELECT * FROM tree_parents;
            r  r   )r  r  rl  rm  rn  ro  )r  recordsr  s      rk  rN  z!CmfSearchEngine._get_all_branches  s`    
 I&&))55==?GG, ~./
2 &&!&&&s   Ac                    dt         _        |j                  dd      }t        j                  dd|      }t        j
                  d|      }d}d}d}	|D ]T  }
t        |
      dk(  rt        |
      dk(  r|
dv r&|
d	v r	|	d|
 z  }	3|
dk(  r9|
d
v r|	dz  }	C|
dv r|	dz  }	Mt        |
      dk(  r\|
d   dv r&t        |
      dk(  rr|	d|
dd   z  }	|d|
dd   z  }|r|
j                  dd      }
|dz  }|dkD  rd}d}|	r|	d   dvrd}t        |
      dkD  r| j                  |
|      }t        t        |      t        t              z
        }t        |      dk(  r|	| d|d    z  }	n-t        |      dkD  r|	| ddj                  |      z   dz   z  }	n	 |r|dk\  r n
|dk\  r nW |rt        |      dkD  r|d   dv r|dd  }|S |	j                  dd      j                  dd      j                  dd       j                  d!d       j                         }	|	r|	d   dv r|	dd  }	|	r|	d   dv r|	dd  }	|	r|	d   dv r|	dd  }	|	r|	dd  dv r|	d d }		 t        j                  j                  j                   j#                         j%                  d"d#|	i      }t        |      d   d   }	t/        |d%|	       |	S # t&        j(                  j*                  $ r}t        j                  j                  j                   j-                          t        j                  j                  j                   j#                         j%                  d$d#|i      }t        |      d   d   }	Y d }~d }~ww xY w)&Nr  zwww.u   [^-A-Za-zА-Яа-я0-9()|&!' ]r  z(,| |&|\||\(|\))r   r  )r  !z()&|)r   r<   |z |)r   r   &z &rY  z& !r  r  r8  F)r  r  r  rC  z ( z | z )re  )r  r  z OR z or z AND z & z and z!select to_tsquery('russian', :q);qz+select websearch_to_tsquery('russian', :q);z->)r  r  r  rH  r  r:  r  prepare_wordr  r  EVA_OR_QUERY_STOP_WORDS_NORMr;  rZ  r  
CmfSynonymrl  rm  rn  ro  
sqlalchemyexcProgrammingErrorrollbackr  )r  r  rA  rB  
first_wordsearch_query_allowed_symbtokens
word_countstopsr  r  oper	sug_wordsr  es                  rk  rK  z"CmfSearchEngine.parse_search_query@  s    !))&4$&FF+KSR^$_!-/HI 
 7	A1v{1v{
?;1QCLA8))r
&&r
 1v{tz!q6Q;s1QR5']"3qug&		#s#A!OJA~ D"Z/1vz,,Q,B	 Y#6R2S!ST	y>Q&D69Q<.11A^a'D6

9(==DDAjAo Ro7	r 5z!|aJ 6ab	LIIfe$,,VU;CCGUS[[\cejkqqs1#!"A1#!"A1#!"A23:%#2A	$''**66>>@HHIpsvxyrz{GWa #A 	lD!$ ~~.. 	$  ,,557 ''**66>>@HHAl#%G Wa #A	$s   AJ6 6M$BMM$c                    t         xj                  d| dz  c_        |d   t        j                  v rd}t        j
                  }nd}t        j                  }g }t        j                  |      s|j                  |      }g }t        j                  |      }|r.t         xj                  d| dz  c_        |j                  |       t         xj                  dz  c_        d}	|D ]  }
|	d	k\  r nt        |
      d	k  r|
d   |d   k7  r|
d
   |d
   k7  r0d|
v rF|
j                  dd      }
t         xj                  d|
 dz  c_        |j                  |
       |	d
z  }	zt         xj                  d|
 dz  c_        |j                  |
       |	d
z  }	 g }t        j                  j                  j                   j#                         j%                  dd|i      }d}	|D ]w  \  }}|	d
k\  r nmt        |      d	k  r|d   |d   k(  s|d
   |d
   k(  s3|j                  dd      }t         xj                  d| dz  c_        |j                  |       |	d
z  }	y t'        |      t'        |      z  |hz  }n|h}t'               }|D ]a  }
t(        j+                  |
      d d	 D ]D  }|j-                  |j.                         t         xj                  d|j.                   dz  c_        F c ||z  }t'               }|rt        j                  j1                  ddt1        ||hz        gddgdgdd
g      }|D ]  }|j2                  s|j2                  j4                  j7                  d      d d D ]P  }|j9                         j                  dd      }t         xj                  d| dz  c_        |j-                  |       R  ||z  t'        |      z  }t1        |      S )Nz|w:z: r   enruzaddNinjaRevers z, zspellError, rY  r  r  r  z	addSpell z
            SELECT
                name, similarity(:word, name) as sim
            FROM cmf_synonym
            WHERE
                :word % name
            ORDER BY "sim" desc
            LIMIT 5;
             rq  z<->zaddSpellTrgm z
normalize r,  r  r*  orderno)r  r
   order_byr  r     zsynAdd )r  r  stringascii_lettersrE  dictionary_endictionary_rudictionary_checksuggestninja_reversr  r  r  r  r  rl  rm  rn  ro  r  morphparserR  normal_formr  r*  r  r:  rZ  )r  rq  rA  lang
dictionaryfiltered_suggestions3suggestionsfiltered_suggestionsnwr   r=  filtered_suggestions2suggestions2_listsuggr  all_suggestionsnormalized_wordssynonym_wordssynonym_listsynonymr  s                        rk  r  zCmfSearchEngine.prepare_word  s   	3tfB7f***D ..JD ..J "''-$,,T2K#% %%d+B?2$b11$++B/EE|$EA  6q6Q;Q447?qttAw!8 		#r*AEEy2..E)003FA9QCr**$++A.Q)4 %'! & 1 1 4 4 @ @ H H J R R T 
! A, 	a6t9>7d1g%aDG);<<U3DEE}TF"55E)006FA	 ""67#>S:TTX\W]]O  $fO 5  	9Akk!nRa( 9 !$$R^^4:bnn%5R889	9 *,<< !,,11&$M]ae`fMfHg9h:@&9I<E;9:1 2 ?L ( -<<$\\//55c:2A> -GGI--c5971#R0%))!,	-- *M9C@U<VVO$$rj  )FNNFNNFFNNN)r  rg  returnztuple[str, set[str]])r  z	list[str])TFF)T)-rd  re  rf  __doc__r  r  r  cmfr  	CmfEntityiter_subclassesfull_searchr  r  r  r  r  r  classmethodr  r  r  r   r  r  r  r  r?  r  r  rM  staticmethodrS  rc  r  r4  r  r  r  r   r   r!  rN  rK  r  ).0ms   00rk  r  r    s   F $_N kO,--0ZZ-A-A-Q-Q-S n==Q\\Al-l || nO '(F ML `d$) h# h#T9 $	(cV)+XhbV<a~ " "&Z.S 
0 
0 " "Z " "
 1$ 1$f  6 T  T t + +
Y v 
d 
$ 
# 
 
  '  'F k kZ m% m%A ns   -Dr  )(cmf.includetypingr   r   r   r   r  r_  r  dataclassesr   r	   r
   	tracebacksys	itertoolscollectionsr   enchantr  	pymorphy3rH  r  bs4r   MorphAnalyzerr  RUSSIAN_STOP_WORDSENGLISH_STOP_WORDSr  r  EVA_OR_STOP_TSQUERYEVA_ARTIFACT_STOP_WORDSALL_STOP_WORDSALLOWED_FIELDSr2  rm  r  r  r  ri  rj  rk  <module>r:     s0    , ,    0 0   
  "    	  	! @  A   ^    !]  ]    !d  d "#58J#JMi#ij    H    n  n  '*<<=_, 8 8 8r "k 0 k^	 0 	T% T%rj  