Mod:remove_skiplist_fix_query_bugs
This commit is contained in:
parent
149dc03bfc
commit
4ccbea999a
@ -25,6 +25,7 @@
|
||||
#include <tr1/unordered_map>
|
||||
#include <limits.h>
|
||||
#include <map>
|
||||
#include <cmath>
|
||||
|
||||
#define DOC_CNT 10000
|
||||
#define MAX_DOCID_LENGTH 32
|
||||
@ -37,6 +38,8 @@ const uint32_t MAX_SEARCH_LEN = 60;
|
||||
const uint32_t SINGLE_WORD_LEN = 18;
|
||||
const uint32_t MAX_VALUE_LEN = 51200;
|
||||
|
||||
const double DOUBLE_EPS = 1e-3;
|
||||
|
||||
typedef std::tr1::unordered_map<std::string, double> hash_double_map;
|
||||
typedef std::tr1::unordered_map<std::string, std::string> hash_string_map;
|
||||
|
||||
@ -92,19 +95,19 @@ enum SORTTYPE {
|
||||
|
||||
enum FieldType{
|
||||
FIELD_INT = 1,
|
||||
FIELD_STRING,
|
||||
FIELD_TEXT,
|
||||
FIELD_IP,
|
||||
FIELD_LNG,
|
||||
FIELD_LAT,
|
||||
FIELD_GIS,
|
||||
FIELD_DISTANCE,
|
||||
FIELD_DOUBLE,
|
||||
FIELD_LONG,
|
||||
FIELD_STRING = 2,
|
||||
FIELD_TEXT = 3,
|
||||
FIELD_IP = 4,
|
||||
FIELD_GEO_POINT = 5,
|
||||
FIELD_LAT = 6,
|
||||
FIELD_GIS = 7,
|
||||
FIELD_DISTANCE = 8,
|
||||
FIELD_DOUBLE = 9,
|
||||
FIELD_LONG = 10,
|
||||
FIELD_INDEX = 11,
|
||||
FIELD_LNG_ARRAY,
|
||||
FIELD_LAT_ARRAY,
|
||||
FIELD_WKT,
|
||||
FIELD_LNG_ARRAY = 12,
|
||||
FIELD_LAT_ARRAY = 13,
|
||||
FIELD_GEO_SHAPE = 14
|
||||
};
|
||||
|
||||
enum SEGMENTTAG {
|
||||
@ -251,6 +254,23 @@ enum KeyType
|
||||
INVERTKEY,
|
||||
};
|
||||
|
||||
struct ScoreDocIdNode{
|
||||
double d_score;
|
||||
std::string s_docid;
|
||||
|
||||
ScoreDocIdNode(double score , std::string docid)
|
||||
: d_score(score)
|
||||
, s_docid(docid)
|
||||
{ }
|
||||
|
||||
bool operator<(const ScoreDocIdNode& score_docid_node) const {
|
||||
if (fabs(d_score - score_docid_node.d_score) < DOUBLE_EPS){
|
||||
return s_docid.compare(score_docid_node.s_docid) < 0;
|
||||
}
|
||||
return (d_score + DOUBLE_EPS) < score_docid_node.d_score;
|
||||
}
|
||||
};
|
||||
|
||||
struct IndexInfo {
|
||||
uint32_t appid;
|
||||
std::string doc_id;
|
||||
|
@ -18,9 +18,9 @@ BoolQueryProcess::BoolQueryProcess(const Json::Value& value)
|
||||
query_process_map_.insert(std::make_pair(E_INDEX_READ_TERM
|
||||
, new TermQueryProcess(parse_value_ )));
|
||||
query_process_map_.insert(std::make_pair(E_INDEX_READ_RANGE
|
||||
, RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE , parse_value_)));
|
||||
query_process_map_.insert(std::make_pair(E_INDEX_READ_RANGE_PRE_TERM
|
||||
, RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE_PRE_TERM , parse_value_)));
|
||||
, new RangeQueryProcess(parse_value_ )));
|
||||
query_process_map_.insert(std::make_pair(E_INDEX_READ_PRE_TERM
|
||||
, new PreTerminal(parse_value_ )));
|
||||
}
|
||||
|
||||
BoolQueryProcess::~BoolQueryProcess()
|
||||
@ -122,7 +122,7 @@ int BoolQueryProcess::ParseContent(int logic_type){
|
||||
int BoolQueryProcess::GetValidDoc(){
|
||||
bool bRet = false;
|
||||
if (component_->TerminalTag()){
|
||||
range_query_pre_term_ = dynamic_cast<RangeQueryPreTerminal*>(query_process_map_[E_INDEX_READ_RANGE_PRE_TERM]);
|
||||
range_query_pre_term_ = dynamic_cast<PreTerminal*>(query_process_map_[E_INDEX_READ_PRE_TERM]);
|
||||
if (range_query_pre_term_ != NULL){
|
||||
return range_query_pre_term_->GetValidDoc();
|
||||
}
|
||||
@ -230,7 +230,7 @@ int BoolQueryProcess::InitQueryProcess(uint32_t type , const Json::Value& value)
|
||||
} else if(value.isMember(RANGE)){
|
||||
parse_value = parse_value_[RANGE];
|
||||
if (component_->TerminalTag()){
|
||||
query_type = E_INDEX_READ_RANGE_PRE_TERM;
|
||||
query_type = E_INDEX_READ_PRE_TERM;
|
||||
|
||||
}else{
|
||||
query_type = E_INDEX_READ_RANGE;
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include "query_process.h"
|
||||
|
||||
class RangeQueryProcess;
|
||||
class RangeQueryPreTerminal;
|
||||
class PreTerminal;
|
||||
class GeoDistanceQueryProcess;
|
||||
|
||||
class BoolQueryProcess : public QueryProcess{
|
||||
@ -31,7 +31,7 @@ private:
|
||||
private:
|
||||
std::map<int , QueryProcess*> query_process_map_;
|
||||
RangeQueryProcess* range_query_;
|
||||
RangeQueryPreTerminal* range_query_pre_term_;
|
||||
PreTerminal* range_query_pre_term_;
|
||||
GeoDistanceQueryProcess* geo_distance_query_;
|
||||
};
|
||||
|
||||
|
@ -69,7 +69,7 @@ int GeoDistanceQueryProcess::ParseContent(int logic_type){
|
||||
|
||||
int GeoDistanceQueryProcess::GetValidDoc(){
|
||||
std::vector<IndexInfo> index_info_vet;
|
||||
int iret = ValidDocFilter::Instance()->MixTextInvertIndexSearch(component_->AndKeys(), index_info_vet
|
||||
int iret = ValidDocFilter::Instance()->TextInvertIndexSearch(component_->AndKeys(), index_info_vet
|
||||
, high_light_word_, docid_keyinfovet_map_ , key_doccount_map_);
|
||||
if (iret != 0) { return iret; }
|
||||
|
||||
@ -86,6 +86,8 @@ int GeoDistanceQueryProcess::GetScore(){
|
||||
{
|
||||
case SORT_RELEVANCE:
|
||||
case SORT_TIMESTAMP:
|
||||
case SORT_FIELD_ASC:
|
||||
case SORT_FIELD_DESC:
|
||||
{
|
||||
hash_double_map::iterator dis_iter = o_distance_.begin();
|
||||
for(; dis_iter != o_distance_.end(); ++dis_iter){
|
||||
@ -93,17 +95,23 @@ int GeoDistanceQueryProcess::GetScore(){
|
||||
double score = dis_iter->second;
|
||||
if ((o_geo_point_.d_distance > -0.0001 && o_geo_point_.d_distance < 0.0001)
|
||||
|| (score + 1e-6 <= o_geo_point_.d_distance)){
|
||||
skipList_.InsertNode(score, doc_id.c_str());
|
||||
scoredocid_set_.insert(ScoreDocIdNode(score , doc_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case DONT_SORT:
|
||||
case SORT_FIELD_ASC:
|
||||
case SORT_FIELD_DESC:
|
||||
{
|
||||
// do nothing
|
||||
hash_double_map::iterator dis_iter = o_distance_.begin();
|
||||
for(; dis_iter != o_distance_.end(); ++dis_iter){
|
||||
std::string doc_id = dis_iter->first;
|
||||
if ((o_geo_point_.d_distance > -0.0001 && o_geo_point_.d_distance < 0.0001)
|
||||
|| (dis_iter->second + 1e-6 <= o_geo_point_.d_distance)){
|
||||
scoredocid_set_.insert(ScoreDocIdNode(1 , doc_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -113,5 +121,10 @@ int GeoDistanceQueryProcess::GetScore(){
|
||||
|
||||
void GeoDistanceQueryProcess::SortScore(int& i_sequence , int& i_rank)
|
||||
{
|
||||
SortForwardBySkipList(i_sequence , i_rank);
|
||||
// 默认升序,距离近在前
|
||||
if (SORT_FIELD_ASC == component_->SortType()){
|
||||
AscSort(i_sequence , i_rank);
|
||||
}else { // 降序和不排序处理
|
||||
DescSort(i_sequence , i_rank);
|
||||
}
|
||||
}
|
@ -62,8 +62,24 @@ int MatchQueryProcess::ParseContent(int logic_type){
|
||||
|
||||
int MatchQueryProcess::GetValidDoc(){
|
||||
std::vector<IndexInfo> index_info_vet;
|
||||
int iret = ValidDocFilter::Instance()->PureTextInvertIndexSearch(component_->OrKeys()
|
||||
, index_info_vet , high_light_word_, docid_keyinfovet_map_);
|
||||
if (component_->OrKeys().empty()){
|
||||
return -RT_GET_FIELD_ERROR;
|
||||
}
|
||||
|
||||
int iret = 0;
|
||||
if (SEGMENT_DEFAULT == component_->OrKeys()[FIRST_TEST_INDEX][FIRST_SPLIT_WORD_INDEX].segment_tag){
|
||||
iret = ValidDocFilter::Instance()->TextInvertIndexSearch(component_->OrKeys()
|
||||
, index_info_vet
|
||||
, high_light_word_
|
||||
, docid_keyinfovet_map_
|
||||
, key_doccount_map_);
|
||||
}else{
|
||||
iret = ValidDocFilter::Instance()->HanPinTextInvertIndexSearch(component_->OrKeys()
|
||||
, index_info_vet
|
||||
, high_light_word_
|
||||
, docid_keyinfovet_map_);
|
||||
}
|
||||
|
||||
if (iret != 0) { return iret; }
|
||||
|
||||
bool bRet = doc_manager_->GetDocContent(index_info_vet , valid_docs_);
|
||||
|
@ -21,6 +21,9 @@
|
||||
|
||||
#include "query_process.h"
|
||||
|
||||
#define FIRST_TEST_INDEX 0
|
||||
#define FIRST_SPLIT_WORD_INDEX 0
|
||||
|
||||
class MatchQueryProcess: public QueryProcess{
|
||||
public:
|
||||
MatchQueryProcess(const Json::Value& value);
|
||||
|
@ -2,26 +2,20 @@
|
||||
#include <assert.h>
|
||||
#include "../valid_doc_filter.h"
|
||||
#include "../order_op.h"
|
||||
#include "../result_cache.h"
|
||||
#include "cachelist_unit.h"
|
||||
|
||||
extern CCacheListUnit* cachelist;
|
||||
|
||||
QueryProcess::QueryProcess(const Json::Value& value)
|
||||
: component_(NULL)
|
||||
, doc_manager_(NULL)
|
||||
, request_(NULL)
|
||||
, parse_value_(value)
|
||||
, skipList_()
|
||||
, scoredocid_set_()
|
||||
, response_()
|
||||
, valid_docs_()
|
||||
, high_light_word_()
|
||||
, docid_keyinfovet_map_()
|
||||
, key_doccount_map_()
|
||||
, sort_field_type_()
|
||||
{
|
||||
skipList_.InitList();
|
||||
}
|
||||
{ }
|
||||
|
||||
QueryProcess::~QueryProcess()
|
||||
{ }
|
||||
@ -73,13 +67,13 @@ int QueryProcess::GetScore()
|
||||
{
|
||||
switch (component_->SortType())
|
||||
{
|
||||
case SORT_RELEVANCE:
|
||||
case SORT_RELEVANCE: // 按照相关度得分,并以此排序
|
||||
{
|
||||
// 范围查的时候如果不指定排序类型,需要在这里对skipList进行赋值
|
||||
if (docid_keyinfovet_map_.empty() && skipList_.GetSize() == 0) {
|
||||
if (docid_keyinfovet_map_.empty() && scoredocid_set_.empty()) {
|
||||
std::set<std::string>::iterator iter = valid_docs_.begin();
|
||||
for(; iter != valid_docs_.end(); iter++){
|
||||
skipList_.InsertNode(1, (*iter).c_str());
|
||||
scoredocid_set_.insert(ScoreDocIdNode(1,*iter));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -101,11 +95,11 @@ int QueryProcess::GetScore()
|
||||
score += log((DOC_CNT - ui_doc_count + 0.5) / (ui_doc_count + 0.5)) * ((D_BM25_K1 + 1)*ui_word_freq) \
|
||||
/ (D_BM25_K + ui_word_freq) * (D_BM25_K2 + 1) * 1 / (D_BM25_K2 + 1);
|
||||
}
|
||||
skipList_.InsertNode(score, doc_id.c_str());
|
||||
scoredocid_set_.insert(ScoreDocIdNode(score , doc_id));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case SORT_TIMESTAMP:
|
||||
case SORT_TIMESTAMP: // 按照时间戳得分,并以此排序
|
||||
{
|
||||
std::map<std::string, KeyInfoVet>::iterator docid_keyinfovet_iter = docid_keyinfovet_map_.begin();
|
||||
for (; docid_keyinfovet_iter != docid_keyinfovet_map_.end(); ++ docid_keyinfovet_iter){
|
||||
@ -121,20 +115,20 @@ int QueryProcess::GetScore()
|
||||
}
|
||||
|
||||
double score = (double)key_info[0].created_time;
|
||||
skipList_.InsertNode(score, doc_id.c_str());
|
||||
scoredocid_set_.insert(ScoreDocIdNode(score , doc_id));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case DONT_SORT:
|
||||
case DONT_SORT: // 不排序,docid有序
|
||||
{
|
||||
std::set<std::string>::iterator valid_docs_iter = valid_docs_.begin();
|
||||
for(; valid_docs_iter != valid_docs_.end(); valid_docs_iter++){
|
||||
std::string doc_id = *valid_docs_iter;
|
||||
skipList_.InsertNode(1, doc_id.c_str());
|
||||
scoredocid_set_.insert(ScoreDocIdNode(1 , doc_id));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case SORT_FIELD_ASC:
|
||||
case SORT_FIELD_ASC: // 按照指定字段进行升降排序
|
||||
case SORT_FIELD_DESC:
|
||||
{
|
||||
std::set<std::string>::iterator valid_docs_iter = valid_docs_.begin();
|
||||
@ -155,12 +149,10 @@ int QueryProcess::GetScore()
|
||||
void QueryProcess::SortScore(int& i_sequence , int& i_rank)
|
||||
{
|
||||
if ((SORT_FIELD_DESC == component_->SortType() || SORT_FIELD_ASC == component_->SortType())
|
||||
&& 0 == skipList_.GetSize()){
|
||||
&& scoredocid_set_.empty()){
|
||||
SortByCOrderOp(i_rank);
|
||||
}else if(SORT_FIELD_ASC == component_->SortType()){
|
||||
SortForwardBySkipList(i_sequence , i_rank);
|
||||
}else{
|
||||
SortBackwardBySkipList(i_sequence, i_rank);
|
||||
}else{ // 默认降序,分高的在前(地理位置查询除外)
|
||||
DescSort(i_sequence, i_rank);
|
||||
}
|
||||
}
|
||||
|
||||
@ -190,61 +182,54 @@ void QueryProcess::SortByCOrderOp(int& i_rank)
|
||||
}
|
||||
}
|
||||
|
||||
void QueryProcess::SortForwardBySkipList(int& i_sequence , int& i_rank)
|
||||
void QueryProcess::AscSort(int& i_sequence , int& i_rank)
|
||||
{
|
||||
log_debug("m_has_gis, size:%d ", skipList_.GetSize());
|
||||
SkipListNode* tmp = skipList_.GetHeader()->level[0].forward;
|
||||
|
||||
log_debug("m_has_gis, size:%d ", scoredocid_set_.size());
|
||||
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
|
||||
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
|
||||
|
||||
while (tmp->level[0].forward != NULL) {
|
||||
std::set<ScoreDocIdNode>::iterator iter = scoredocid_set_.begin();
|
||||
for( ;iter != scoredocid_set_.end(); ++iter){
|
||||
// 通过extra_filter_keys进行额外过滤(针对区分度不高的字段)
|
||||
if(doc_manager_->CheckDocByExtraFilterKey(tmp->value) == false){
|
||||
log_debug("CheckDocByExtraFilterKey failed, %s", tmp->value);
|
||||
tmp = tmp->level[0].forward;
|
||||
if(doc_manager_->CheckDocByExtraFilterKey(iter->s_docid) == false){
|
||||
log_debug("CheckDocByExtraFilterKey failed, %s", iter->s_docid);
|
||||
continue;
|
||||
}
|
||||
i_sequence ++;
|
||||
i_rank ++;
|
||||
if(component_->ReturnAll() == 0){
|
||||
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
|
||||
tmp = tmp->level[0].forward;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Json::Value doc_info;
|
||||
doc_info["doc_id"] = Json::Value(tmp->value);
|
||||
doc_info["score"] = Json::Value(tmp->key);
|
||||
doc_info["doc_id"] = Json::Value(iter->s_docid);
|
||||
doc_info["score"] = Json::Value(iter->d_score);
|
||||
response_["result"].append(doc_info);
|
||||
tmp = tmp->level[0].forward;
|
||||
}
|
||||
}
|
||||
|
||||
void QueryProcess::SortBackwardBySkipList(int& i_sequence , int& i_rank)
|
||||
void QueryProcess::DescSort(int& i_sequence , int& i_rank)
|
||||
{
|
||||
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
|
||||
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
|
||||
|
||||
SkipListNode *tmp = skipList_.GetFooter()->backward;
|
||||
while(tmp->backward != NULL) {
|
||||
if(doc_manager_->CheckDocByExtraFilterKey(tmp->value) == false){
|
||||
tmp = tmp->backward;
|
||||
std::set<ScoreDocIdNode>::iterator riter = scoredocid_set_.rbegin();
|
||||
for( ;riter != scoredocid_set_.rend(); ++riter){
|
||||
if(doc_manager_->CheckDocByExtraFilterKey(riter->s_docid) == false){
|
||||
continue;
|
||||
}
|
||||
i_sequence++;
|
||||
i_rank++;
|
||||
if (component_->ReturnAll() == 0){
|
||||
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
|
||||
tmp = tmp->backward;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Json::Value doc_info;
|
||||
doc_info["doc_id"] = Json::Value(tmp->value);
|
||||
doc_info["score"] = Json::Value(tmp->key);
|
||||
doc_info["doc_id"] = Json::Value(riter->s_docid);
|
||||
doc_info["score"] = Json::Value(riter->d_score);
|
||||
response_["result"].append(doc_info);
|
||||
tmp = tmp->backward;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -47,7 +47,7 @@ enum E_INDEX_READ_QUERY_PROCESS{
|
||||
E_INDEX_READ_MATCH,
|
||||
E_INDEX_READ_TERM,
|
||||
E_INDEX_READ_RANGE,
|
||||
E_INDEX_READ_RANGE_PRE_TERM
|
||||
E_INDEX_READ_PRE_TERM
|
||||
};
|
||||
|
||||
class QueryProcess{
|
||||
@ -81,8 +81,8 @@ protected:
|
||||
|
||||
protected:
|
||||
void SortByCOrderOp(int& i_rank);
|
||||
void SortForwardBySkipList(int& i_sequence , int& i_rank);
|
||||
void SortBackwardBySkipList(int& i_sequence , int& i_rank);
|
||||
void AscSort(int& i_sequence , int& i_rank);
|
||||
void DescSort(int& i_sequence , int& i_rank);
|
||||
void AppendHighLightWord();
|
||||
|
||||
protected:
|
||||
@ -91,7 +91,7 @@ protected:
|
||||
CTaskRequest* request_;
|
||||
|
||||
Json::Value parse_value_;
|
||||
SkipList skipList_;
|
||||
std::set<ScoreDocIdNode> scoredocid_set_;
|
||||
Json::Value response_;
|
||||
|
||||
ValidDocSet valid_docs_;
|
||||
|
@ -106,15 +106,25 @@ int RangeQueryProcess::GetValidDoc()
|
||||
|
||||
|
||||
|
||||
RangeQueryPreTerminal::RangeQueryPreTerminal(const Json::Value& value)
|
||||
: RangeQueryProcess(value)
|
||||
PreTerminal::PreTerminal(const Json::Value& value)
|
||||
: QueryProcess(value)
|
||||
, candidate_doc_()
|
||||
{}
|
||||
|
||||
RangeQueryPreTerminal::~RangeQueryPreTerminal()
|
||||
PreTerminal::~PreTerminal()
|
||||
{}
|
||||
|
||||
int RangeQueryPreTerminal::GetValidDoc(){
|
||||
int PreTerminal::ParseContent(int logic_type){
|
||||
log_info("PreTerminal do not need parse content");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PreTerminal::ParseContent(){
|
||||
log_info("PreTerminal do not need parse content");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PreTerminal::GetValidDoc(){
|
||||
uint32_t count = 0;
|
||||
uint32_t N = 2;
|
||||
uint32_t limit_start = 0;
|
||||
@ -162,12 +172,12 @@ int RangeQueryPreTerminal::GetValidDoc(){
|
||||
return 0;
|
||||
}
|
||||
|
||||
int RangeQueryPreTerminal::GetScore(){
|
||||
int PreTerminal::GetScore(){
|
||||
log_info("RangeQueryPreTerminal do not need get score");
|
||||
return 0;
|
||||
}
|
||||
|
||||
void RangeQueryPreTerminal::SetResponse(){
|
||||
void PreTerminal::SetResponse(){
|
||||
response_["code"] = 0;
|
||||
int sequence = -1;
|
||||
int rank = 0;
|
||||
|
@ -25,12 +25,16 @@ private:
|
||||
friend class BoolQueryProcess;
|
||||
};
|
||||
|
||||
class RangeQueryPreTerminal : public RangeQueryProcess{
|
||||
class PreTerminal : public QueryProcess{
|
||||
public:
|
||||
RangeQueryPreTerminal(const Json::Value& value);
|
||||
virtual~ RangeQueryPreTerminal();
|
||||
PreTerminal(const Json::Value& value);
|
||||
virtual~ PreTerminal();
|
||||
|
||||
public:
|
||||
virtual int ParseContent(int logic_type);
|
||||
|
||||
private:
|
||||
virtual int ParseContent();
|
||||
virtual int GetValidDoc();
|
||||
virtual int GetScore();
|
||||
virtual void SetResponse();
|
||||
@ -42,39 +46,39 @@ private:
|
||||
friend class BoolQueryProcess;
|
||||
};
|
||||
|
||||
class RangeQueryGenerator : private noncopyable{
|
||||
public:
|
||||
RangeQueryGenerator() { };
|
||||
virtual~ RangeQueryGenerator() { };
|
||||
// class RangeQueryGenerator : private noncopyable{
|
||||
// public:
|
||||
// RangeQueryGenerator() { };
|
||||
// virtual~ RangeQueryGenerator() { };
|
||||
|
||||
public:
|
||||
static RangeQueryGenerator* Instance(){
|
||||
return CSingleton<RangeQueryGenerator>::Instance();
|
||||
};
|
||||
// public:
|
||||
// static RangeQueryGenerator* Instance(){
|
||||
// return CSingleton<RangeQueryGenerator>::Instance();
|
||||
// };
|
||||
|
||||
static void Destroy(){
|
||||
CSingleton<RangeQueryGenerator>::Destroy();
|
||||
};
|
||||
// static void Destroy(){
|
||||
// CSingleton<RangeQueryGenerator>::Destroy();
|
||||
// };
|
||||
|
||||
public:
|
||||
// 内存释放由调用方处理
|
||||
QueryProcess* GetRangeQueryProcess(int iType , const Json::Value& parse_value){
|
||||
QueryProcess* current_range_query = NULL;
|
||||
switch (iType){
|
||||
case E_INDEX_READ_RANGE:{
|
||||
current_range_query = new RangeQueryProcess(parse_value);
|
||||
}
|
||||
break;
|
||||
case E_INDEX_READ_RANGE_PRE_TERM:{
|
||||
current_range_query = new RangeQueryPreTerminal(parse_value);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
// public:
|
||||
// // 内存释放由调用方处理
|
||||
// QueryProcess* GetRangeQueryProcess(int iType , const Json::Value& parse_value){
|
||||
// QueryProcess* current_range_query = NULL;
|
||||
// switch (iType){
|
||||
// case E_INDEX_READ_RANGE:{
|
||||
// current_range_query = new RangeQueryProcess(parse_value);
|
||||
// }
|
||||
// break;
|
||||
// case E_INDEX_READ_PRE_TERM:{
|
||||
// current_range_query = new PreTerminal(parse_value);
|
||||
// }
|
||||
// break;
|
||||
// default:
|
||||
// break;
|
||||
// }
|
||||
|
||||
return current_range_query;
|
||||
}
|
||||
};
|
||||
// return current_range_query;
|
||||
// }
|
||||
// };
|
||||
|
||||
#endif
|
@ -48,7 +48,7 @@ int TermQueryProcess::ParseContent(int logic_type){
|
||||
|
||||
int TermQueryProcess::GetValidDoc(){
|
||||
std::vector<IndexInfo> index_info_vet;
|
||||
int iret = ValidDocFilter::Instance()->MixTextInvertIndexSearch(component_->OrKeys()
|
||||
int iret = ValidDocFilter::Instance()->TextInvertIndexSearch(component_->OrKeys()
|
||||
, index_info_vet , high_light_word_, docid_keyinfovet_map_ , key_doccount_map_);
|
||||
if (iret != 0) { return iret; }
|
||||
|
||||
|
@ -76,13 +76,7 @@ int SearchTask::Process(CTaskRequest *request)
|
||||
}else if (query.isMember(GEOSHAPE)){
|
||||
query_process_ = new GeoShapeQueryProcess(query[GEOSHAPE]);
|
||||
}else if (query.isMember(RANGE)){
|
||||
if (component_->TerminalTag()){
|
||||
query_process_ = RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE_PRE_TERM
|
||||
, query[RANGE]);
|
||||
}else{
|
||||
query_process_ = RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE
|
||||
, query[RANGE]);
|
||||
}
|
||||
query_process_ = new RangeQueryProcess(query[RANGE]);
|
||||
}else if (query.isMember(BOOL)){
|
||||
query_process_ = new BoolQueryProcess(query[BOOL]);
|
||||
}else{
|
||||
|
@ -1285,79 +1285,42 @@ double distanceSimplify(double lat1, double lng1, double lat2, double lng2) {
|
||||
return sqrt(Lx * Lx + Ly * Ly); // 用平面的矩形对角距离公式计算总距离
|
||||
}
|
||||
|
||||
bool GetGisDistance(uint32_t appid, const string& latLeft, const string& lngLeft, hash_double_map& distances, hash_string_map& doc_content)
|
||||
bool GetGisDistance(uint32_t appid, const string& Latitude, const string& Longtitude, hash_double_map& distances, hash_string_map& doc_content)
|
||||
{
|
||||
double lat1 = strToDouble(latLeft);
|
||||
double lng1 = strToDouble(lngLeft);
|
||||
int lon_len = strlen("longitude\":\"");
|
||||
int lat_len = strlen("latitude\":\"");
|
||||
double d_query_lat = strToDouble(Latitude);
|
||||
double d_query_lng = strToDouble(Longtitude);
|
||||
|
||||
hash_string_map::iterator doc_it;
|
||||
for (doc_it = doc_content.begin(); doc_it != doc_content.end(); doc_it++) {
|
||||
hash_string_map::iterator doc_it = doc_content.begin();
|
||||
for ( ; doc_it != doc_content.end(); doc_it++) {
|
||||
if (doc_it->second == "") {
|
||||
log_error("content is invalid, appid:%d, doc_id:%s, content:%s.",appid, (doc_it->first).c_str(), (doc_it->second).c_str());
|
||||
continue;
|
||||
}
|
||||
// 如果是aoi数据则改为计算查询点到aoi的最短距离
|
||||
if(doc_it->second.find("longitude_list") != string::npos){
|
||||
Json::Reader reader;
|
||||
Json::Value data;
|
||||
bool result = reader.parse(doc_it->second, data);
|
||||
if (result && data.isMember("latitude_list") && data["latitude_list"].isArray()
|
||||
&& data.isMember("longitude_list") && data["longitude_list"].isArray()) {
|
||||
if(data["latitude_list"].size() != data["longitude_list"].size()){
|
||||
log_error("latitude_list size not equal longitude_list size, appid:%d, doc_id:%s, content:%s",appid, (doc_it->first).c_str(), (doc_it->second).c_str());
|
||||
continue;
|
||||
}
|
||||
Polygon polygon;
|
||||
for (uint32_t idx = 0; idx < data["longitude_list"].size(); ++idx) {
|
||||
vector<double> point;
|
||||
if(!data["longitude_list"][idx].isString() || !data["latitude_list"][idx].isString()){
|
||||
log_error("longitude or latitude is not string!");
|
||||
continue;
|
||||
}
|
||||
point.push_back(strToDouble(data["longitude_list"][idx].asString()));
|
||||
point.push_back(strToDouble(data["latitude_list"][idx].asString()));
|
||||
polygon.push_back(point);
|
||||
}
|
||||
distances[doc_it->first] = GetShortestDistance(lng1, lat1, polygon) / 1000;
|
||||
}
|
||||
} else {
|
||||
double lat2;
|
||||
double lng2;
|
||||
/*
|
||||
rapidjson::Document document;
|
||||
bool result = document.Parse(doc_it->second.data()).HasParseError();
|
||||
if(!result && document.HasMember("latitude") && document["latitude"].IsString() && document.HasMember("longitude") && document["longitude"].IsString()){
|
||||
lat2 = strToDouble(document["latitude"].GetString());
|
||||
lng2 = strToDouble(document["longitude"].GetString());
|
||||
} else {
|
||||
log_error("content hasn't gis info, appid:%d, doc_id:%s, content:%s.",appid, (doc_it->first).c_str(), (doc_it->second).c_str());
|
||||
continue;
|
||||
}*/
|
||||
|
||||
size_t pos1 = doc_it->second.find("longitude");
|
||||
size_t pos2 = doc_it->second.find_first_of(",", pos1);
|
||||
if(pos1 != string::npos && pos2 != string::npos){
|
||||
string longitude = doc_it->second.substr(pos1+lon_len, pos2-pos1-lon_len-1);
|
||||
lng2 = strToDouble(longitude);
|
||||
} else {
|
||||
log_debug("content has no longitude info, appid:%d, doc_id:%s, content:%s.",appid, (doc_it->first).c_str(), (doc_it->second).c_str());
|
||||
distances[doc_it->first] = 1;
|
||||
continue;
|
||||
}
|
||||
size_t pos3 = doc_it->second.find("latitude");
|
||||
size_t pos4 = doc_it->second.find_first_of(",", pos3);
|
||||
if(pos3 != string::npos && pos4 != string::npos){
|
||||
string latitude = doc_it->second.substr(pos3+lat_len, pos4-pos3-lat_len-1);
|
||||
lat2 = strToDouble(latitude);
|
||||
} else {
|
||||
log_debug("content has no latitude info, appid:%d, doc_id:%s, content:%s.",appid, (doc_it->first).c_str(), (doc_it->second).c_str());
|
||||
continue;
|
||||
Json::Reader read(Json::Features::strictMode());
|
||||
Json::Value snap_json;
|
||||
int ret = read.parse(doc_it->second , snap_json);
|
||||
if (0 == ret){
|
||||
log_error("parse json error [%s], errmsg : %s", doc_it->second.c_str(), read.getFormattedErrorMessages().c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
double dis = distanceSimplify(lat1, lng1, lat2, lng2);
|
||||
Json::Value::Members member = snap_json.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
for (; iter != member.end(); ++iter){
|
||||
uint32_t segment_tag = 0;
|
||||
FieldInfo field_info;
|
||||
uint32_t uiret = DBManager::Instance()->GetWordField(segment_tag, appid, *iter, field_info);
|
||||
if (FIELD_GEO_POINT == field_info.field_type){
|
||||
GeoPointContext geo_point(snap_json[*iter]);
|
||||
double d_target_lat = strToDouble(geo_point.sLatitude);
|
||||
double d_target_lng = strToDouble(geo_point.sLongtitude);
|
||||
double dis = distanceSimplify(d_query_lat, d_query_lng, d_target_lat, d_target_lng);
|
||||
distances[doc_it->first] = round(dis * 1000)/1000;
|
||||
}else if (FIELD_GEO_SHAPE == field_info.field_type){
|
||||
// temp no handle ,latter add
|
||||
distances[doc_it->first] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
@ -57,7 +57,7 @@ set<string> sets_intersection(set<string> v1, set<string> v2); // 集合求交
|
||||
set<string> sets_union(set<string> v1, set<string> v2); // 集合求并集
|
||||
set<string> sets_difference(set<string> v1, set<string> v2); // 集合求差集
|
||||
double strToDouble(const string& str);
|
||||
bool GetGisDistance(uint32_t appid, const string& latLeft, const string& lngLeft, hash_double_map& distances, hash_string_map& doc_content);
|
||||
bool GetGisDistance(uint32_t appid, const string& Latitude, const string& Longtitude, hash_double_map& distances, hash_string_map& doc_content);
|
||||
void ConvertCharIntelligent(const string word, IntelligentInfo &info, int &len);
|
||||
void ConvertIntelligent(const vector<Content> &result, IntelligentInfo &info, bool &flag);
|
||||
bool GetGisCode(string lng, string lat, string ip, double distance, vector<string>& gisCode);
|
||||
|
@ -275,12 +275,12 @@ int ValidDocFilter::Process(const std::vector<std::vector<FieldInfo> >& keys, st
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ValidDocFilter::PureTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||
int ValidDocFilter::HanPinTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||
, std::vector<IndexInfo>& index_info_vet
|
||||
, std::set<std::string>& highlightWord
|
||||
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map){
|
||||
if (keys.empty() || keys.size() > 1){
|
||||
return -RT_GET_DOC_ERR;
|
||||
return -RT_GET_FIELD_ERROR;
|
||||
}
|
||||
const std::vector<FieldInfo>& key_field_info_vet = keys[0];
|
||||
std::vector<FieldInfo>::const_iterator iter = key_field_info_vet.cbegin();
|
||||
@ -350,13 +350,13 @@ int ValidDocFilter::RangeQueryInvertIndexSearch(const std::vector<std::vector<Fi
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ValidDocFilter::MixTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||
int ValidDocFilter::TextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||
, std::vector<IndexInfo>& index_info_vet
|
||||
, std::set<std::string>& highlightWord
|
||||
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map
|
||||
, std::map<std::string, uint32_t>& key_doccount_map){
|
||||
if (keys.empty() || keys.size() > 1){
|
||||
return -RT_GET_DOC_ERR;
|
||||
return -RT_GET_FIELD_ERROR;
|
||||
}
|
||||
const std::vector<FieldInfo>& key_field_info_vet = keys[0];
|
||||
std::vector<FieldInfo>::const_iterator iter = key_field_info_vet.cbegin();
|
||||
|
@ -52,7 +52,7 @@ public:
|
||||
, std::set<std::string>& highlightWord, std::map<std::string, KeyInfoVet>& docid_keyinfo_map
|
||||
, std::map<std::string, uint32_t>& key_doccount_map);
|
||||
|
||||
int PureTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||
int HanPinTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||
, std::vector<IndexInfo>& index_info_vet
|
||||
, std::set<std::string>& highlightWord
|
||||
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map);
|
||||
@ -60,7 +60,7 @@ public:
|
||||
int RangeQueryInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||
, std::vector<IndexInfo>& index_info_vet);
|
||||
|
||||
int MixTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||
int TextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
|
||||
, std::vector<IndexInfo>& index_info_vet
|
||||
, std::set<std::string>& highlightWord
|
||||
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map
|
||||
|
Loading…
Reference in New Issue
Block a user