From bb2358103e109121440d452192bda7c3cc945b36 Mon Sep 17 00:00:00 2001 From: chenyujie28 Date: Fri, 9 Jul 2021 18:11:12 +0800 Subject: [PATCH] Mod:fix_union_query_bugs --- src/search_local/index_read/key_format.h | 10 --- .../index_read/process/bool_query_process.cc | 84 +++++++++++++------ .../index_read/process/bool_query_process.h | 19 +---- .../index_read/process/match_query_process.cc | 9 +- .../index_read/valid_doc_filter.cc | 4 +- 5 files changed, 66 insertions(+), 60 deletions(-) diff --git a/src/search_local/index_read/key_format.h b/src/search_local/index_read/key_format.h index 76d20db..e1a11a9 100644 --- a/src/search_local/index_read/key_format.h +++ b/src/search_local/index_read/key_format.h @@ -14,16 +14,6 @@ class KeyFormat { public: typedef std::vector > UnionKey; - struct MemCompUnionNode{ - uint32_t ui_field_type; - std::string s_key; - - MemCompUnionNode(uint32_t field_type , std::string key) - : ui_field_type(field_type) - , s_key(key) - { } - }; - public: static std::string Encode(const UnionKey& oUnionKey); static bool Decode(const std::string& sKey, UnionKey& oUnionKey); diff --git a/src/search_local/index_read/process/bool_query_process.cc b/src/search_local/index_read/process/bool_query_process.cc index f51fb37..3686325 100644 --- a/src/search_local/index_read/process/bool_query_process.cc +++ b/src/search_local/index_read/process/bool_query_process.cc @@ -31,8 +31,8 @@ void BoolQueryProcess::HandleUnifiedIndex(){ for (; iter != and_keys.end(); ++iter){ fieldid_fieldinfos_map.insert(std::make_pair(((*iter)[0]).field , *iter)); } - component_->AndKeys().clear(); + std::vector > union_field_infos; std::vector union_key_vec; DBManager::Instance()->GetUnionKeyField(component_->Appid() , union_key_vec); std::vector::iterator union_key_iter = union_key_vec.begin(); @@ -48,32 +48,52 @@ void BoolQueryProcess::HandleUnifiedIndex(){ } } if(hit_union_key == true){ - std::vector > keys_vvec; + log_debug("hit union key combination"); + std::vector > keys_vvec; std::vector unionFieldInfos; bool b_has_range = false; for(union_field_iter = union_field_vec.begin(); union_field_iter != union_field_vec.end(); union_field_iter++){ std::vector field_info_vec = fieldid_fieldinfos_map.at(*union_field_iter); - std::vector key_vec; + std::vector key_vec; GetKeyFromFieldInfo(field_info_vec, key_vec , b_has_range); keys_vvec.push_back(key_vec); fieldid_fieldinfos_map.erase(*union_field_iter); // 命中union_key的需要从fieldid_fieldinfos_map中删除 } - std::vector union_keys = Combination(keys_vvec); + log_debug("has range query flag:%d" , (int)b_has_range); + std::vector union_keys = Combination(keys_vvec); for(int m = 0 ; m < (int)union_keys.size(); m++){ FieldInfo info; info.field = 0; info.field_type = FIELD_INDEX; - info.query_type= (b_has_range ? E_INDEX_READ_RANGE : E_INDEX_READ_TERM); + + for (uint32_t ui_query_type = E_INDEX_READ_PRE_TERM ; + ui_query_type < E_INDEX_READ_TOTAL_NUM ; + ++ui_query_type){ + if (query_bitset_.test(ui_query_type)){ + info.query_type = ui_query_type; + break; + } + } info.segment_tag = (b_has_range ? SEGMENT_RANGE : SEGMENT_DEFAULT); - info.word = union_keys[m].s_key; + info.word = union_keys[m]; + log_debug("union key[%d]:%s" , m, info.word.c_str()); unionFieldInfos.push_back(info); } - component_->AddToFieldList(ANDKEY, unionFieldInfos); + union_field_infos.push_back(unionFieldInfos); } } - std::map >::iterator field_key_map_iter = fieldid_fieldinfos_map.begin(); - for(; field_key_map_iter != fieldid_fieldinfos_map.end(); field_key_map_iter++){ - component_->AddToFieldList(ANDKEY, field_key_map_iter->second); + + if (!union_field_infos.empty()){ + log_debug("replace andkey database"); + component_->AndKeys().clear(); + std::vector >::iterator field_info_vet_iter = union_field_infos.begin(); + for (; field_info_vet_iter != union_field_infos.end();++field_info_vet_iter){ + component_->AddToFieldList(ANDKEY, *field_info_vet_iter); + } + std::map >::iterator field_key_map_iter = fieldid_fieldinfos_map.begin(); + for(; field_key_map_iter != fieldid_fieldinfos_map.end(); field_key_map_iter++){ + component_->AddToFieldList(ANDKEY, field_key_map_iter->second); + } } } @@ -112,7 +132,7 @@ int BoolQueryProcess::GetValidDoc(int logic_type , const std::vector& int BoolQueryProcess::GetValidDoc(){ if (query_bitset_.test(E_INDEX_READ_PRE_TERM) && query_bitset_.test(E_INDEX_READ_TERM)){ return query_process_map_[E_INDEX_READ_PRE_TERM]->GetValidDoc(); - } + } for (uint32_t ui_key_type = ORKEY; ui_key_type < KEYTOTALNUM; ++ui_key_type){ std::vector >::const_iterator filedinfo_vet_iter = \ @@ -308,15 +328,28 @@ int BoolQueryProcess::InitQueryProcess( return 0; } -void BoolQueryProcess::GetKeyFromFieldInfo(const std::vector& field_info_vec, std::vector& key_vec, bool& b_has_range){ +void BoolQueryProcess::GetKeyFromFieldInfo(const std::vector& field_info_vec, std::vector& key_vec, bool& b_has_range){ std::vector::const_iterator iter = field_info_vec.cbegin(); - for(; iter != field_info_vec.end(); iter++){ - if (SEGMENT_RANGE == iter->segment_tag ){ + for(; iter != field_info_vec.cend(); iter++){ + KeyFormat::UnionKey o_keyinfo_vet; + std::string s_format_key = ""; + if (E_INDEX_READ_RANGE == iter->query_type || + E_INDEX_READ_PRE_TERM == iter->query_type){ b_has_range = true; - key_vec.push_back(MemCompUnionNode(iter->field_type , std::to_string(iter->start))); - key_vec.push_back(MemCompUnionNode(iter->field_type , std::to_string(iter->end))); + + o_keyinfo_vet.push_back(std::make_pair(iter->field_type , std::to_string(iter->start))); + s_format_key = KeyFormat::Encode(o_keyinfo_vet); + key_vec.push_back(s_format_key); + + o_keyinfo_vet.clear(); + o_keyinfo_vet.push_back(std::make_pair(iter->field_type , std::to_string(iter->end))); + s_format_key = KeyFormat::Encode(o_keyinfo_vet); + key_vec.push_back(s_format_key); }else{ - key_vec.push_back(MemCompUnionNode(iter->field_type , iter->word)); + o_keyinfo_vet.push_back(std::make_pair(iter->field_type , iter->word)); + s_format_key = KeyFormat::Encode(o_keyinfo_vet); + key_vec.push_back(s_format_key); + log_debug("field type:%d , word:%s" , iter->field_type , iter->word.c_str()); } } } @@ -326,27 +359,24 @@ void BoolQueryProcess::GetKeyFromFieldInfo(const std::vector& field_i ** 输入:[[a],[b1,b2],[c1,c2,c3]] ** 输出:[a_b1_c1,a_b1_c2,a_b1_c3,a_b2_c1,a_b2_c2,a_b2_c3] */ -std::vector BoolQueryProcess::Combination(std::vector >& dimensionalArr){ +std::vector BoolQueryProcess::Combination( + std::vector >& dimensionalArr) +{ int FLength = dimensionalArr.size(); if(FLength >= 2){ int SLength1 = dimensionalArr[0].size(); int SLength2 = dimensionalArr[1].size(); int DLength = SLength1 * SLength2; - std::vector temporary(DLength); + std::vector temporary(DLength); int index = 0; for(int i = 0; i < SLength1; i++){ for (int j = 0; j < SLength2; j++) { - KeyFormat::UnionKey o_keyinfo_vet; - o_keyinfo_vet.push_back(std::make_pair(dimensionalArr[0][i].ui_field_type , dimensionalArr[0][i].s_key)); - o_keyinfo_vet.push_back(std::make_pair(dimensionalArr[1][j].ui_field_type , dimensionalArr[1][j].s_key)); - std::string s_format_key = KeyFormat::Encode(o_keyinfo_vet); - - temporary[index].s_key = s_format_key; - temporary[index].ui_field_type = FIELD_STRING; + temporary[index].append(dimensionalArr[0][i]); + temporary[index].append(dimensionalArr[1][j]); index++; } } - std::vector > new_arr; + std::vector > new_arr; new_arr.push_back(temporary); for(int i = 2; i < (int)dimensionalArr.size(); i++){ new_arr.push_back(dimensionalArr[i]); diff --git a/src/search_local/index_read/process/bool_query_process.h b/src/search_local/index_read/process/bool_query_process.h index 50fa47d..acbc98d 100644 --- a/src/search_local/index_read/process/bool_query_process.h +++ b/src/search_local/index_read/process/bool_query_process.h @@ -9,21 +9,6 @@ class QueryProcess; class PreTerminal; class GeoDistanceQueryProcess; -struct MemCompUnionNode{ - uint32_t ui_field_type; - std::string s_key; - - MemCompUnionNode() - : ui_field_type(-1) - , s_key("") - {} - - MemCompUnionNode(uint32_t field_type , std::string key) - : ui_field_type(field_type) - , s_key(key) - { } -}; - class BoolQueryProcess : public QueryProcess{ public: BoolQueryProcess(const Json::Value& value); @@ -44,8 +29,8 @@ private: int ParseRequest(const Json::Value& request, int logic_type); int InitQueryProcess(uint32_t type , const std::string& query_key, const Json::Value& parse_value); void HandleUnifiedIndex(); - void GetKeyFromFieldInfo(const std::vector& field_info_vec, std::vector& key_vec , bool& b_has_range); - std::vector Combination(std::vector >& dimensionalArr); + void GetKeyFromFieldInfo(const std::vector& field_info_vec, std::vector& key_vec , bool& b_has_range); + std::vector Combination(std::vector >& dimensionalArr); private: std::map query_process_map_; diff --git a/src/search_local/index_read/process/match_query_process.cc b/src/search_local/index_read/process/match_query_process.cc index 299aa97..b119d06 100644 --- a/src/search_local/index_read/process/match_query_process.cc +++ b/src/search_local/index_read/process/match_query_process.cc @@ -44,7 +44,7 @@ int MatchQueryProcess::ParseContent(int logic_type){ fieldInfos.push_back(fieldInfo); } } - else if (uiRet != 0 && segment_tag != SEGMENT_RANGE){ + else if (uiRet != 0){ fieldInfo.word = field_value.asString(); fieldInfos.push_back(fieldInfo); }else{ @@ -64,12 +64,11 @@ int MatchQueryProcess::GetValidDoc(int logic_type, const std::vector& std::vector index_info_vet; int iret = -1; uint32_t segment_tag = keys[FIRST_SPLIT_WORD_INDEX].segment_tag; - if (SEGMENT_DEFAULT == segment_tag - || SEGMENT_NONE == segment_tag){ - iret = ValidDocFilter::Instance()->TextInvertIndexSearch(keys , index_info_vet); - }else if(SEGMENT_CHINESE == segment_tag + if(SEGMENT_CHINESE == segment_tag || SEGMENT_ENGLISH == segment_tag){ iret = ValidDocFilter::Instance()->HanPinTextInvertIndexSearch(keys , index_info_vet); + }else{ + iret = ValidDocFilter::Instance()->TextInvertIndexSearch(keys , index_info_vet); } if (iret != 0) { return iret; } diff --git a/src/search_local/index_read/valid_doc_filter.cc b/src/search_local/index_read/valid_doc_filter.cc index 75811a0..24deb77 100644 --- a/src/search_local/index_read/valid_doc_filter.cc +++ b/src/search_local/index_read/valid_doc_filter.cc @@ -129,7 +129,9 @@ int ValidDocFilter::TextInvertIndexSearch(const std::vector& keys, st if (doc_info.size() == 0) continue; if (!p_data_base_->GetHasGisFlag() || !isAllNumber(iter->word)){ - ResultContext::Instance()->SetHighLightWordSet(iter->word); + if (iter->field_type != FIELD_INDEX){ + ResultContext::Instance()->SetHighLightWordSet(iter->word); + } } if(!p_data_base_->GetHasGisFlag() && (SORT_RELEVANCE == p_data_base_->SortType() || SORT_TIMESTAMP == p_data_base_->SortType())){ CalculateByWord(*iter, doc_info);