Mod:remove_skiplist_fix_query_bugs
This commit is contained in:
parent
149dc03bfc
commit
4ccbea999a
@ -25,6 +25,7 @@
|
|||||||
#include <tr1/unordered_map>
|
#include <tr1/unordered_map>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
#define DOC_CNT 10000
|
#define DOC_CNT 10000
|
||||||
#define MAX_DOCID_LENGTH 32
|
#define MAX_DOCID_LENGTH 32
|
||||||
@ -37,6 +38,8 @@ const uint32_t MAX_SEARCH_LEN = 60;
|
|||||||
const uint32_t SINGLE_WORD_LEN = 18;
|
const uint32_t SINGLE_WORD_LEN = 18;
|
||||||
const uint32_t MAX_VALUE_LEN = 51200;
|
const uint32_t MAX_VALUE_LEN = 51200;
|
||||||
|
|
||||||
|
const double DOUBLE_EPS = 1e-3;
|
||||||
|
|
||||||
typedef std::tr1::unordered_map<std::string, double> hash_double_map;
|
typedef std::tr1::unordered_map<std::string, double> hash_double_map;
|
||||||
typedef std::tr1::unordered_map<std::string, std::string> hash_string_map;
|
typedef std::tr1::unordered_map<std::string, std::string> hash_string_map;
|
||||||
|
|
||||||
@ -91,20 +94,20 @@ enum SORTTYPE {
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum FieldType{
|
enum FieldType{
|
||||||
FIELD_INT = 1,
|
FIELD_INT = 1,
|
||||||
FIELD_STRING,
|
FIELD_STRING = 2,
|
||||||
FIELD_TEXT,
|
FIELD_TEXT = 3,
|
||||||
FIELD_IP,
|
FIELD_IP = 4,
|
||||||
FIELD_LNG,
|
FIELD_GEO_POINT = 5,
|
||||||
FIELD_LAT,
|
FIELD_LAT = 6,
|
||||||
FIELD_GIS,
|
FIELD_GIS = 7,
|
||||||
FIELD_DISTANCE,
|
FIELD_DISTANCE = 8,
|
||||||
FIELD_DOUBLE,
|
FIELD_DOUBLE = 9,
|
||||||
FIELD_LONG,
|
FIELD_LONG = 10,
|
||||||
FIELD_INDEX = 11,
|
FIELD_INDEX = 11,
|
||||||
FIELD_LNG_ARRAY,
|
FIELD_LNG_ARRAY = 12,
|
||||||
FIELD_LAT_ARRAY,
|
FIELD_LAT_ARRAY = 13,
|
||||||
FIELD_WKT,
|
FIELD_GEO_SHAPE = 14
|
||||||
};
|
};
|
||||||
|
|
||||||
enum SEGMENTTAG {
|
enum SEGMENTTAG {
|
||||||
@ -251,6 +254,23 @@ enum KeyType
|
|||||||
INVERTKEY,
|
INVERTKEY,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ScoreDocIdNode{
|
||||||
|
double d_score;
|
||||||
|
std::string s_docid;
|
||||||
|
|
||||||
|
ScoreDocIdNode(double score , std::string docid)
|
||||||
|
: d_score(score)
|
||||||
|
, s_docid(docid)
|
||||||
|
{ }
|
||||||
|
|
||||||
|
bool operator<(const ScoreDocIdNode& score_docid_node) const {
|
||||||
|
if (fabs(d_score - score_docid_node.d_score) < DOUBLE_EPS){
|
||||||
|
return s_docid.compare(score_docid_node.s_docid) < 0;
|
||||||
|
}
|
||||||
|
return (d_score + DOUBLE_EPS) < score_docid_node.d_score;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct IndexInfo {
|
struct IndexInfo {
|
||||||
uint32_t appid;
|
uint32_t appid;
|
||||||
std::string doc_id;
|
std::string doc_id;
|
||||||
|
@ -18,9 +18,9 @@ BoolQueryProcess::BoolQueryProcess(const Json::Value& value)
|
|||||||
query_process_map_.insert(std::make_pair(E_INDEX_READ_TERM
|
query_process_map_.insert(std::make_pair(E_INDEX_READ_TERM
|
||||||
, new TermQueryProcess(parse_value_ )));
|
, new TermQueryProcess(parse_value_ )));
|
||||||
query_process_map_.insert(std::make_pair(E_INDEX_READ_RANGE
|
query_process_map_.insert(std::make_pair(E_INDEX_READ_RANGE
|
||||||
, RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE , parse_value_)));
|
, new RangeQueryProcess(parse_value_ )));
|
||||||
query_process_map_.insert(std::make_pair(E_INDEX_READ_RANGE_PRE_TERM
|
query_process_map_.insert(std::make_pair(E_INDEX_READ_PRE_TERM
|
||||||
, RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE_PRE_TERM , parse_value_)));
|
, new PreTerminal(parse_value_ )));
|
||||||
}
|
}
|
||||||
|
|
||||||
BoolQueryProcess::~BoolQueryProcess()
|
BoolQueryProcess::~BoolQueryProcess()
|
||||||
@ -122,7 +122,7 @@ int BoolQueryProcess::ParseContent(int logic_type){
|
|||||||
int BoolQueryProcess::GetValidDoc(){
|
int BoolQueryProcess::GetValidDoc(){
|
||||||
bool bRet = false;
|
bool bRet = false;
|
||||||
if (component_->TerminalTag()){
|
if (component_->TerminalTag()){
|
||||||
range_query_pre_term_ = dynamic_cast<RangeQueryPreTerminal*>(query_process_map_[E_INDEX_READ_RANGE_PRE_TERM]);
|
range_query_pre_term_ = dynamic_cast<PreTerminal*>(query_process_map_[E_INDEX_READ_PRE_TERM]);
|
||||||
if (range_query_pre_term_ != NULL){
|
if (range_query_pre_term_ != NULL){
|
||||||
return range_query_pre_term_->GetValidDoc();
|
return range_query_pre_term_->GetValidDoc();
|
||||||
}
|
}
|
||||||
@ -230,7 +230,7 @@ int BoolQueryProcess::InitQueryProcess(uint32_t type , const Json::Value& value)
|
|||||||
} else if(value.isMember(RANGE)){
|
} else if(value.isMember(RANGE)){
|
||||||
parse_value = parse_value_[RANGE];
|
parse_value = parse_value_[RANGE];
|
||||||
if (component_->TerminalTag()){
|
if (component_->TerminalTag()){
|
||||||
query_type = E_INDEX_READ_RANGE_PRE_TERM;
|
query_type = E_INDEX_READ_PRE_TERM;
|
||||||
|
|
||||||
}else{
|
}else{
|
||||||
query_type = E_INDEX_READ_RANGE;
|
query_type = E_INDEX_READ_RANGE;
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
#include "query_process.h"
|
#include "query_process.h"
|
||||||
|
|
||||||
class RangeQueryProcess;
|
class RangeQueryProcess;
|
||||||
class RangeQueryPreTerminal;
|
class PreTerminal;
|
||||||
class GeoDistanceQueryProcess;
|
class GeoDistanceQueryProcess;
|
||||||
|
|
||||||
class BoolQueryProcess : public QueryProcess{
|
class BoolQueryProcess : public QueryProcess{
|
||||||
@ -31,7 +31,7 @@ private:
|
|||||||
private:
|
private:
|
||||||
std::map<int , QueryProcess*> query_process_map_;
|
std::map<int , QueryProcess*> query_process_map_;
|
||||||
RangeQueryProcess* range_query_;
|
RangeQueryProcess* range_query_;
|
||||||
RangeQueryPreTerminal* range_query_pre_term_;
|
PreTerminal* range_query_pre_term_;
|
||||||
GeoDistanceQueryProcess* geo_distance_query_;
|
GeoDistanceQueryProcess* geo_distance_query_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -69,7 +69,7 @@ int GeoDistanceQueryProcess::ParseContent(int logic_type){
|
|||||||
|
|
||||||
int GeoDistanceQueryProcess::GetValidDoc(){
|
int GeoDistanceQueryProcess::GetValidDoc(){
|
||||||
std::vector<IndexInfo> index_info_vet;
|
std::vector<IndexInfo> index_info_vet;
|
||||||
int iret = ValidDocFilter::Instance()->MixTextInvertIndexSearch(component_->AndKeys(), index_info_vet
|
int iret = ValidDocFilter::Instance()->TextInvertIndexSearch(component_->AndKeys(), index_info_vet
|
||||||
, high_light_word_, docid_keyinfovet_map_ , key_doccount_map_);
|
, high_light_word_, docid_keyinfovet_map_ , key_doccount_map_);
|
||||||
if (iret != 0) { return iret; }
|
if (iret != 0) { return iret; }
|
||||||
|
|
||||||
@ -86,6 +86,8 @@ int GeoDistanceQueryProcess::GetScore(){
|
|||||||
{
|
{
|
||||||
case SORT_RELEVANCE:
|
case SORT_RELEVANCE:
|
||||||
case SORT_TIMESTAMP:
|
case SORT_TIMESTAMP:
|
||||||
|
case SORT_FIELD_ASC:
|
||||||
|
case SORT_FIELD_DESC:
|
||||||
{
|
{
|
||||||
hash_double_map::iterator dis_iter = o_distance_.begin();
|
hash_double_map::iterator dis_iter = o_distance_.begin();
|
||||||
for(; dis_iter != o_distance_.end(); ++dis_iter){
|
for(; dis_iter != o_distance_.end(); ++dis_iter){
|
||||||
@ -93,17 +95,23 @@ int GeoDistanceQueryProcess::GetScore(){
|
|||||||
double score = dis_iter->second;
|
double score = dis_iter->second;
|
||||||
if ((o_geo_point_.d_distance > -0.0001 && o_geo_point_.d_distance < 0.0001)
|
if ((o_geo_point_.d_distance > -0.0001 && o_geo_point_.d_distance < 0.0001)
|
||||||
|| (score + 1e-6 <= o_geo_point_.d_distance)){
|
|| (score + 1e-6 <= o_geo_point_.d_distance)){
|
||||||
skipList_.InsertNode(score, doc_id.c_str());
|
scoredocid_set_.insert(ScoreDocIdNode(score , doc_id));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case DONT_SORT:
|
case DONT_SORT:
|
||||||
case SORT_FIELD_ASC:
|
|
||||||
case SORT_FIELD_DESC:
|
|
||||||
{
|
{
|
||||||
// do nothing
|
hash_double_map::iterator dis_iter = o_distance_.begin();
|
||||||
|
for(; dis_iter != o_distance_.end(); ++dis_iter){
|
||||||
|
std::string doc_id = dis_iter->first;
|
||||||
|
if ((o_geo_point_.d_distance > -0.0001 && o_geo_point_.d_distance < 0.0001)
|
||||||
|
|| (dis_iter->second + 1e-6 <= o_geo_point_.d_distance)){
|
||||||
|
scoredocid_set_.insert(ScoreDocIdNode(1 , doc_id));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -113,5 +121,10 @@ int GeoDistanceQueryProcess::GetScore(){
|
|||||||
|
|
||||||
void GeoDistanceQueryProcess::SortScore(int& i_sequence , int& i_rank)
|
void GeoDistanceQueryProcess::SortScore(int& i_sequence , int& i_rank)
|
||||||
{
|
{
|
||||||
SortForwardBySkipList(i_sequence , i_rank);
|
// 默认升序,距离近在前
|
||||||
|
if (SORT_FIELD_ASC == component_->SortType()){
|
||||||
|
AscSort(i_sequence , i_rank);
|
||||||
|
}else { // 降序和不排序处理
|
||||||
|
DescSort(i_sequence , i_rank);
|
||||||
|
}
|
||||||
}
|
}
|
@ -62,8 +62,24 @@ int MatchQueryProcess::ParseContent(int logic_type){
|
|||||||
|
|
||||||
int MatchQueryProcess::GetValidDoc(){
|
int MatchQueryProcess::GetValidDoc(){
|
||||||
std::vector<IndexInfo> index_info_vet;
|
std::vector<IndexInfo> index_info_vet;
|
||||||
int iret = ValidDocFilter::Instance()->PureTextInvertIndexSearch(component_->OrKeys()
|
if (component_->OrKeys().empty()){
|
||||||
, index_info_vet , high_light_word_, docid_keyinfovet_map_);
|
return -RT_GET_FIELD_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
int iret = 0;
|
||||||
|
if (SEGMENT_DEFAULT == component_->OrKeys()[FIRST_TEST_INDEX][FIRST_SPLIT_WORD_INDEX].segment_tag){
|
||||||
|
iret = ValidDocFilter::Instance()->TextInvertIndexSearch(component_->OrKeys()
|
||||||
|
, index_info_vet
|
||||||
|
, high_light_word_
|
||||||
|
, docid_keyinfovet_map_
|
||||||
|
, key_doccount_map_);
|
||||||
|
}else{
|
||||||
|
iret = ValidDocFilter::Instance()->HanPinTextInvertIndexSearch(component_->OrKeys()
|
||||||
|
, index_info_vet
|
||||||
|
, high_light_word_
|
||||||
|
, docid_keyinfovet_map_);
|
||||||
|
}
|
||||||
|
|
||||||
if (iret != 0) { return iret; }
|
if (iret != 0) { return iret; }
|
||||||
|
|
||||||
bool bRet = doc_manager_->GetDocContent(index_info_vet , valid_docs_);
|
bool bRet = doc_manager_->GetDocContent(index_info_vet , valid_docs_);
|
||||||
|
@ -21,6 +21,9 @@
|
|||||||
|
|
||||||
#include "query_process.h"
|
#include "query_process.h"
|
||||||
|
|
||||||
|
#define FIRST_TEST_INDEX 0
|
||||||
|
#define FIRST_SPLIT_WORD_INDEX 0
|
||||||
|
|
||||||
class MatchQueryProcess: public QueryProcess{
|
class MatchQueryProcess: public QueryProcess{
|
||||||
public:
|
public:
|
||||||
MatchQueryProcess(const Json::Value& value);
|
MatchQueryProcess(const Json::Value& value);
|
||||||
|
@ -2,26 +2,20 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "../valid_doc_filter.h"
|
#include "../valid_doc_filter.h"
|
||||||
#include "../order_op.h"
|
#include "../order_op.h"
|
||||||
#include "../result_cache.h"
|
|
||||||
#include "cachelist_unit.h"
|
|
||||||
|
|
||||||
extern CCacheListUnit* cachelist;
|
|
||||||
|
|
||||||
QueryProcess::QueryProcess(const Json::Value& value)
|
QueryProcess::QueryProcess(const Json::Value& value)
|
||||||
: component_(NULL)
|
: component_(NULL)
|
||||||
, doc_manager_(NULL)
|
, doc_manager_(NULL)
|
||||||
, request_(NULL)
|
, request_(NULL)
|
||||||
, parse_value_(value)
|
, parse_value_(value)
|
||||||
, skipList_()
|
, scoredocid_set_()
|
||||||
, response_()
|
, response_()
|
||||||
, valid_docs_()
|
, valid_docs_()
|
||||||
, high_light_word_()
|
, high_light_word_()
|
||||||
, docid_keyinfovet_map_()
|
, docid_keyinfovet_map_()
|
||||||
, key_doccount_map_()
|
, key_doccount_map_()
|
||||||
, sort_field_type_()
|
, sort_field_type_()
|
||||||
{
|
{ }
|
||||||
skipList_.InitList();
|
|
||||||
}
|
|
||||||
|
|
||||||
QueryProcess::~QueryProcess()
|
QueryProcess::~QueryProcess()
|
||||||
{ }
|
{ }
|
||||||
@ -73,13 +67,13 @@ int QueryProcess::GetScore()
|
|||||||
{
|
{
|
||||||
switch (component_->SortType())
|
switch (component_->SortType())
|
||||||
{
|
{
|
||||||
case SORT_RELEVANCE:
|
case SORT_RELEVANCE: // 按照相关度得分,并以此排序
|
||||||
{
|
{
|
||||||
// 范围查的时候如果不指定排序类型,需要在这里对skipList进行赋值
|
// 范围查的时候如果不指定排序类型,需要在这里对skipList进行赋值
|
||||||
if (docid_keyinfovet_map_.empty() && skipList_.GetSize() == 0) {
|
if (docid_keyinfovet_map_.empty() && scoredocid_set_.empty()) {
|
||||||
std::set<std::string>::iterator iter = valid_docs_.begin();
|
std::set<std::string>::iterator iter = valid_docs_.begin();
|
||||||
for(; iter != valid_docs_.end(); iter++){
|
for(; iter != valid_docs_.end(); iter++){
|
||||||
skipList_.InsertNode(1, (*iter).c_str());
|
scoredocid_set_.insert(ScoreDocIdNode(1,*iter));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -101,11 +95,11 @@ int QueryProcess::GetScore()
|
|||||||
score += log((DOC_CNT - ui_doc_count + 0.5) / (ui_doc_count + 0.5)) * ((D_BM25_K1 + 1)*ui_word_freq) \
|
score += log((DOC_CNT - ui_doc_count + 0.5) / (ui_doc_count + 0.5)) * ((D_BM25_K1 + 1)*ui_word_freq) \
|
||||||
/ (D_BM25_K + ui_word_freq) * (D_BM25_K2 + 1) * 1 / (D_BM25_K2 + 1);
|
/ (D_BM25_K + ui_word_freq) * (D_BM25_K2 + 1) * 1 / (D_BM25_K2 + 1);
|
||||||
}
|
}
|
||||||
skipList_.InsertNode(score, doc_id.c_str());
|
scoredocid_set_.insert(ScoreDocIdNode(score , doc_id));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SORT_TIMESTAMP:
|
case SORT_TIMESTAMP: // 按照时间戳得分,并以此排序
|
||||||
{
|
{
|
||||||
std::map<std::string, KeyInfoVet>::iterator docid_keyinfovet_iter = docid_keyinfovet_map_.begin();
|
std::map<std::string, KeyInfoVet>::iterator docid_keyinfovet_iter = docid_keyinfovet_map_.begin();
|
||||||
for (; docid_keyinfovet_iter != docid_keyinfovet_map_.end(); ++ docid_keyinfovet_iter){
|
for (; docid_keyinfovet_iter != docid_keyinfovet_map_.end(); ++ docid_keyinfovet_iter){
|
||||||
@ -121,20 +115,20 @@ int QueryProcess::GetScore()
|
|||||||
}
|
}
|
||||||
|
|
||||||
double score = (double)key_info[0].created_time;
|
double score = (double)key_info[0].created_time;
|
||||||
skipList_.InsertNode(score, doc_id.c_str());
|
scoredocid_set_.insert(ScoreDocIdNode(score , doc_id));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case DONT_SORT:
|
case DONT_SORT: // 不排序,docid有序
|
||||||
{
|
{
|
||||||
std::set<std::string>::iterator valid_docs_iter = valid_docs_.begin();
|
std::set<std::string>::iterator valid_docs_iter = valid_docs_.begin();
|
||||||
for(; valid_docs_iter != valid_docs_.end(); valid_docs_iter++){
|
for(; valid_docs_iter != valid_docs_.end(); valid_docs_iter++){
|
||||||
std::string doc_id = *valid_docs_iter;
|
std::string doc_id = *valid_docs_iter;
|
||||||
skipList_.InsertNode(1, doc_id.c_str());
|
scoredocid_set_.insert(ScoreDocIdNode(1 , doc_id));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SORT_FIELD_ASC:
|
case SORT_FIELD_ASC: // 按照指定字段进行升降排序
|
||||||
case SORT_FIELD_DESC:
|
case SORT_FIELD_DESC:
|
||||||
{
|
{
|
||||||
std::set<std::string>::iterator valid_docs_iter = valid_docs_.begin();
|
std::set<std::string>::iterator valid_docs_iter = valid_docs_.begin();
|
||||||
@ -155,12 +149,10 @@ int QueryProcess::GetScore()
|
|||||||
void QueryProcess::SortScore(int& i_sequence , int& i_rank)
|
void QueryProcess::SortScore(int& i_sequence , int& i_rank)
|
||||||
{
|
{
|
||||||
if ((SORT_FIELD_DESC == component_->SortType() || SORT_FIELD_ASC == component_->SortType())
|
if ((SORT_FIELD_DESC == component_->SortType() || SORT_FIELD_ASC == component_->SortType())
|
||||||
&& 0 == skipList_.GetSize()){
|
&& scoredocid_set_.empty()){
|
||||||
SortByCOrderOp(i_rank);
|
SortByCOrderOp(i_rank);
|
||||||
}else if(SORT_FIELD_ASC == component_->SortType()){
|
}else{ // 默认降序,分高的在前(地理位置查询除外)
|
||||||
SortForwardBySkipList(i_sequence , i_rank);
|
DescSort(i_sequence, i_rank);
|
||||||
}else{
|
|
||||||
SortBackwardBySkipList(i_sequence, i_rank);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -190,61 +182,54 @@ void QueryProcess::SortByCOrderOp(int& i_rank)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void QueryProcess::SortForwardBySkipList(int& i_sequence , int& i_rank)
|
void QueryProcess::AscSort(int& i_sequence , int& i_rank)
|
||||||
{
|
{
|
||||||
log_debug("m_has_gis, size:%d ", skipList_.GetSize());
|
log_debug("m_has_gis, size:%d ", scoredocid_set_.size());
|
||||||
SkipListNode* tmp = skipList_.GetHeader()->level[0].forward;
|
|
||||||
|
|
||||||
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
|
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
|
||||||
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
|
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
|
||||||
|
|
||||||
while (tmp->level[0].forward != NULL) {
|
std::set<ScoreDocIdNode>::iterator iter = scoredocid_set_.begin();
|
||||||
|
for( ;iter != scoredocid_set_.end(); ++iter){
|
||||||
// 通过extra_filter_keys进行额外过滤(针对区分度不高的字段)
|
// 通过extra_filter_keys进行额外过滤(针对区分度不高的字段)
|
||||||
if(doc_manager_->CheckDocByExtraFilterKey(tmp->value) == false){
|
if(doc_manager_->CheckDocByExtraFilterKey(iter->s_docid) == false){
|
||||||
log_debug("CheckDocByExtraFilterKey failed, %s", tmp->value);
|
log_debug("CheckDocByExtraFilterKey failed, %s", iter->s_docid);
|
||||||
tmp = tmp->level[0].forward;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
i_sequence ++;
|
i_sequence ++;
|
||||||
i_rank ++;
|
i_rank ++;
|
||||||
if(component_->ReturnAll() == 0){
|
if(component_->ReturnAll() == 0){
|
||||||
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
|
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
|
||||||
tmp = tmp->level[0].forward;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Json::Value doc_info;
|
Json::Value doc_info;
|
||||||
doc_info["doc_id"] = Json::Value(tmp->value);
|
doc_info["doc_id"] = Json::Value(iter->s_docid);
|
||||||
doc_info["score"] = Json::Value(tmp->key);
|
doc_info["score"] = Json::Value(iter->d_score);
|
||||||
response_["result"].append(doc_info);
|
response_["result"].append(doc_info);
|
||||||
tmp = tmp->level[0].forward;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void QueryProcess::SortBackwardBySkipList(int& i_sequence , int& i_rank)
|
void QueryProcess::DescSort(int& i_sequence , int& i_rank)
|
||||||
{
|
{
|
||||||
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
|
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
|
||||||
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
|
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
|
||||||
|
|
||||||
SkipListNode *tmp = skipList_.GetFooter()->backward;
|
std::set<ScoreDocIdNode>::iterator riter = scoredocid_set_.rbegin();
|
||||||
while(tmp->backward != NULL) {
|
for( ;riter != scoredocid_set_.rend(); ++riter){
|
||||||
if(doc_manager_->CheckDocByExtraFilterKey(tmp->value) == false){
|
if(doc_manager_->CheckDocByExtraFilterKey(riter->s_docid) == false){
|
||||||
tmp = tmp->backward;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
i_sequence++;
|
i_sequence++;
|
||||||
i_rank++;
|
i_rank++;
|
||||||
if (component_->ReturnAll() == 0){
|
if (component_->ReturnAll() == 0){
|
||||||
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
|
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
|
||||||
tmp = tmp->backward;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Json::Value doc_info;
|
Json::Value doc_info;
|
||||||
doc_info["doc_id"] = Json::Value(tmp->value);
|
doc_info["doc_id"] = Json::Value(riter->s_docid);
|
||||||
doc_info["score"] = Json::Value(tmp->key);
|
doc_info["score"] = Json::Value(riter->d_score);
|
||||||
response_["result"].append(doc_info);
|
response_["result"].append(doc_info);
|
||||||
tmp = tmp->backward;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ enum E_INDEX_READ_QUERY_PROCESS{
|
|||||||
E_INDEX_READ_MATCH,
|
E_INDEX_READ_MATCH,
|
||||||
E_INDEX_READ_TERM,
|
E_INDEX_READ_TERM,
|
||||||
E_INDEX_READ_RANGE,
|
E_INDEX_READ_RANGE,
|
||||||
E_INDEX_READ_RANGE_PRE_TERM
|
E_INDEX_READ_PRE_TERM
|
||||||
};
|
};
|
||||||
|
|
||||||
class QueryProcess{
|
class QueryProcess{
|
||||||
@ -81,8 +81,8 @@ protected:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
void SortByCOrderOp(int& i_rank);
|
void SortByCOrderOp(int& i_rank);
|
||||||
void SortForwardBySkipList(int& i_sequence , int& i_rank);
|
void AscSort(int& i_sequence , int& i_rank);
|
||||||
void SortBackwardBySkipList(int& i_sequence , int& i_rank);
|
void DescSort(int& i_sequence , int& i_rank);
|
||||||
void AppendHighLightWord();
|
void AppendHighLightWord();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -91,7 +91,7 @@ protected:
|
|||||||
CTaskRequest* request_;
|
CTaskRequest* request_;
|
||||||
|
|
||||||
Json::Value parse_value_;
|
Json::Value parse_value_;
|
||||||
SkipList skipList_;
|
std::set<ScoreDocIdNode> scoredocid_set_;
|
||||||
Json::Value response_;
|
Json::Value response_;
|
||||||
|
|
||||||
ValidDocSet valid_docs_;
|
ValidDocSet valid_docs_;
|
||||||
|
@ -106,15 +106,25 @@ int RangeQueryProcess::GetValidDoc()
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
RangeQueryPreTerminal::RangeQueryPreTerminal(const Json::Value& value)
|
PreTerminal::PreTerminal(const Json::Value& value)
|
||||||
: RangeQueryProcess(value)
|
: QueryProcess(value)
|
||||||
, candidate_doc_()
|
, candidate_doc_()
|
||||||
{}
|
{}
|
||||||
|
|
||||||
RangeQueryPreTerminal::~RangeQueryPreTerminal()
|
PreTerminal::~PreTerminal()
|
||||||
{}
|
{}
|
||||||
|
|
||||||
int RangeQueryPreTerminal::GetValidDoc(){
|
int PreTerminal::ParseContent(int logic_type){
|
||||||
|
log_info("PreTerminal do not need parse content");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int PreTerminal::ParseContent(){
|
||||||
|
log_info("PreTerminal do not need parse content");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int PreTerminal::GetValidDoc(){
|
||||||
uint32_t count = 0;
|
uint32_t count = 0;
|
||||||
uint32_t N = 2;
|
uint32_t N = 2;
|
||||||
uint32_t limit_start = 0;
|
uint32_t limit_start = 0;
|
||||||
@ -162,12 +172,12 @@ int RangeQueryPreTerminal::GetValidDoc(){
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int RangeQueryPreTerminal::GetScore(){
|
int PreTerminal::GetScore(){
|
||||||
log_info("RangeQueryPreTerminal do not need get score");
|
log_info("RangeQueryPreTerminal do not need get score");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RangeQueryPreTerminal::SetResponse(){
|
void PreTerminal::SetResponse(){
|
||||||
response_["code"] = 0;
|
response_["code"] = 0;
|
||||||
int sequence = -1;
|
int sequence = -1;
|
||||||
int rank = 0;
|
int rank = 0;
|
||||||
|
@ -25,12 +25,16 @@ private:
|
|||||||
friend class BoolQueryProcess;
|
friend class BoolQueryProcess;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RangeQueryPreTerminal : public RangeQueryProcess{
|
class PreTerminal : public QueryProcess{
|
||||||
public:
|
public:
|
||||||
RangeQueryPreTerminal(const Json::Value& value);
|
PreTerminal(const Json::Value& value);
|
||||||
virtual~ RangeQueryPreTerminal();
|
virtual~ PreTerminal();
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual int ParseContent(int logic_type);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
virtual int ParseContent();
|
||||||
virtual int GetValidDoc();
|
virtual int GetValidDoc();
|
||||||
virtual int GetScore();
|
virtual int GetScore();
|
||||||
virtual void SetResponse();
|
virtual void SetResponse();
|
||||||
@ -42,39 +46,39 @@ private:
|
|||||||
friend class BoolQueryProcess;
|
friend class BoolQueryProcess;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RangeQueryGenerator : private noncopyable{
|
// class RangeQueryGenerator : private noncopyable{
|
||||||
public:
|
// public:
|
||||||
RangeQueryGenerator() { };
|
// RangeQueryGenerator() { };
|
||||||
virtual~ RangeQueryGenerator() { };
|
// virtual~ RangeQueryGenerator() { };
|
||||||
|
|
||||||
public:
|
// public:
|
||||||
static RangeQueryGenerator* Instance(){
|
// static RangeQueryGenerator* Instance(){
|
||||||
return CSingleton<RangeQueryGenerator>::Instance();
|
// return CSingleton<RangeQueryGenerator>::Instance();
|
||||||
};
|
// };
|
||||||
|
|
||||||
static void Destroy(){
|
// static void Destroy(){
|
||||||
CSingleton<RangeQueryGenerator>::Destroy();
|
// CSingleton<RangeQueryGenerator>::Destroy();
|
||||||
};
|
// };
|
||||||
|
|
||||||
public:
|
// public:
|
||||||
// 内存释放由调用方处理
|
// // 内存释放由调用方处理
|
||||||
QueryProcess* GetRangeQueryProcess(int iType , const Json::Value& parse_value){
|
// QueryProcess* GetRangeQueryProcess(int iType , const Json::Value& parse_value){
|
||||||
QueryProcess* current_range_query = NULL;
|
// QueryProcess* current_range_query = NULL;
|
||||||
switch (iType){
|
// switch (iType){
|
||||||
case E_INDEX_READ_RANGE:{
|
// case E_INDEX_READ_RANGE:{
|
||||||
current_range_query = new RangeQueryProcess(parse_value);
|
// current_range_query = new RangeQueryProcess(parse_value);
|
||||||
}
|
// }
|
||||||
break;
|
// break;
|
||||||
case E_INDEX_READ_RANGE_PRE_TERM:{
|
// case E_INDEX_READ_PRE_TERM:{
|
||||||
current_range_query = new RangeQueryPreTerminal(parse_value);
|
// current_range_query = new PreTerminal(parse_value);
|
||||||
}
|
// }
|
||||||
break;
|
// break;
|
||||||
default:
|
// default:
|
||||||
break;
|
// break;
|
||||||
}
|
// }
|
||||||
|
|
||||||
return current_range_query;
|
// return current_range_query;
|
||||||
}
|
// }
|
||||||
};
|
// };
|
||||||
|
|
||||||
#endif
|
#endif
|
@ -48,7 +48,7 @@ int TermQueryProcess::ParseContent(int logic_type){
|
|||||||
|
|
||||||
int TermQueryProcess::GetValidDoc(){
|
int TermQueryProcess::GetValidDoc(){
|
||||||
std::vector<IndexInfo> index_info_vet;
|
std::vector<IndexInfo> index_info_vet;
|
||||||
int iret = ValidDocFilter::Instance()->MixTextInvertIndexSearch(component_->OrKeys()
|
int iret = ValidDocFilter::Instance()->TextInvertIndexSearch(component_->OrKeys()
|
||||||
, index_info_vet , high_light_word_, docid_keyinfovet_map_ , key_doccount_map_);
|
, index_info_vet , high_light_word_, docid_keyinfovet_map_ , key_doccount_map_);
|
||||||
if (iret != 0) { return iret; }
|
if (iret != 0) { return iret; }
|
||||||
|
|
||||||
|
@ -76,13 +76,7 @@ int SearchTask::Process(CTaskRequest *request)
|
|||||||
}else if (query.isMember(GEOSHAPE)){
|
}else if (query.isMember(GEOSHAPE)){
|
||||||
query_process_ = new GeoShapeQueryProcess(query[GEOSHAPE]);
|
query_process_ = new GeoShapeQueryProcess(query[GEOSHAPE]);
|
||||||
}else if (query.isMember(RANGE)){
|
}else if (query.isMember(RANGE)){
|
||||||
if (component_->TerminalTag()){
|
query_process_ = new RangeQueryProcess(query[RANGE]);
|
||||||
query_process_ = RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE_PRE_TERM
|
|
||||||
, query[RANGE]);
|
|
||||||
}else{
|
|
||||||
query_process_ = RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE
|
|
||||||
, query[RANGE]);
|
|
||||||
}
|
|
||||||
}else if (query.isMember(BOOL)){
|
}else if (query.isMember(BOOL)){
|
||||||
query_process_ = new BoolQueryProcess(query[BOOL]);
|
query_process_ = new BoolQueryProcess(query[BOOL]);
|
||||||
}else{
|
}else{
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -57,7 +57,7 @@ set<string> sets_intersection(set<string> v1, set<string> v2); // 集合求交
|
|||||||
set<string> sets_union(set<string> v1, set<string> v2); // 集合求并集
|
set<string> sets_union(set<string> v1, set<string> v2); // 集合求并集
|
||||||
set<string> sets_difference(set<string> v1, set<string> v2); // 集合求差集
|
set<string> sets_difference(set<string> v1, set<string> v2); // 集合求差集
|
||||||
double strToDouble(const string& str);
|
double strToDouble(const string& str);
|
||||||
bool GetGisDistance(uint32_t appid, const string& latLeft, const string& lngLeft, hash_double_map& distances, hash_string_map& doc_content);
|
bool GetGisDistance(uint32_t appid, const string& Latitude, const string& Longtitude, hash_double_map& distances, hash_string_map& doc_content);
|
||||||
void ConvertCharIntelligent(const string word, IntelligentInfo &info, int &len);
|
void ConvertCharIntelligent(const string word, IntelligentInfo &info, int &len);
|
||||||
void ConvertIntelligent(const vector<Content> &result, IntelligentInfo &info, bool &flag);
|
void ConvertIntelligent(const vector<Content> &result, IntelligentInfo &info, bool &flag);
|
||||||
bool GetGisCode(string lng, string lat, string ip, double distance, vector<string>& gisCode);
|
bool GetGisCode(string lng, string lat, string ip, double distance, vector<string>& gisCode);
|
||||||
|
@ -275,12 +275,12 @@ int ValidDocFilter::Process(const std::vector<std::vector<FieldInfo> >& keys, st
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ValidDocFilter::PureTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
int ValidDocFilter::HanPinTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||||
, std::vector<IndexInfo>& index_info_vet
|
, std::vector<IndexInfo>& index_info_vet
|
||||||
, std::set<std::string>& highlightWord
|
, std::set<std::string>& highlightWord
|
||||||
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map){
|
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map){
|
||||||
if (keys.empty() || keys.size() > 1){
|
if (keys.empty() || keys.size() > 1){
|
||||||
return -RT_GET_DOC_ERR;
|
return -RT_GET_FIELD_ERROR;
|
||||||
}
|
}
|
||||||
const std::vector<FieldInfo>& key_field_info_vet = keys[0];
|
const std::vector<FieldInfo>& key_field_info_vet = keys[0];
|
||||||
std::vector<FieldInfo>::const_iterator iter = key_field_info_vet.cbegin();
|
std::vector<FieldInfo>::const_iterator iter = key_field_info_vet.cbegin();
|
||||||
@ -350,13 +350,13 @@ int ValidDocFilter::RangeQueryInvertIndexSearch(const std::vector<std::vector<Fi
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int ValidDocFilter::MixTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
int ValidDocFilter::TextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||||
, std::vector<IndexInfo>& index_info_vet
|
, std::vector<IndexInfo>& index_info_vet
|
||||||
, std::set<std::string>& highlightWord
|
, std::set<std::string>& highlightWord
|
||||||
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map
|
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map
|
||||||
, std::map<std::string, uint32_t>& key_doccount_map){
|
, std::map<std::string, uint32_t>& key_doccount_map){
|
||||||
if (keys.empty() || keys.size() > 1){
|
if (keys.empty() || keys.size() > 1){
|
||||||
return -RT_GET_DOC_ERR;
|
return -RT_GET_FIELD_ERROR;
|
||||||
}
|
}
|
||||||
const std::vector<FieldInfo>& key_field_info_vet = keys[0];
|
const std::vector<FieldInfo>& key_field_info_vet = keys[0];
|
||||||
std::vector<FieldInfo>::const_iterator iter = key_field_info_vet.cbegin();
|
std::vector<FieldInfo>::const_iterator iter = key_field_info_vet.cbegin();
|
||||||
|
@ -52,7 +52,7 @@ public:
|
|||||||
, std::set<std::string>& highlightWord, std::map<std::string, KeyInfoVet>& docid_keyinfo_map
|
, std::set<std::string>& highlightWord, std::map<std::string, KeyInfoVet>& docid_keyinfo_map
|
||||||
, std::map<std::string, uint32_t>& key_doccount_map);
|
, std::map<std::string, uint32_t>& key_doccount_map);
|
||||||
|
|
||||||
int PureTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
int HanPinTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||||
, std::vector<IndexInfo>& index_info_vet
|
, std::vector<IndexInfo>& index_info_vet
|
||||||
, std::set<std::string>& highlightWord
|
, std::set<std::string>& highlightWord
|
||||||
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map);
|
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map);
|
||||||
@ -60,7 +60,7 @@ public:
|
|||||||
int RangeQueryInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
int RangeQueryInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||||
, std::vector<IndexInfo>& index_info_vet);
|
, std::vector<IndexInfo>& index_info_vet);
|
||||||
|
|
||||||
int MixTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
int TextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||||
, std::vector<IndexInfo>& index_info_vet
|
, std::vector<IndexInfo>& index_info_vet
|
||||||
, std::set<std::string>& highlightWord
|
, std::set<std::string>& highlightWord
|
||||||
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map
|
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map
|
||||||
|
Loading…
Reference in New Issue
Block a user