Mod:remove_skiplist_fix_query_bugs

This commit is contained in:
chenyujie28 2021-06-10 19:30:38 +08:00
parent 149dc03bfc
commit 4ccbea999a
16 changed files with 1408 additions and 1400 deletions

View File

@ -25,6 +25,7 @@
#include <tr1/unordered_map>
#include <limits.h>
#include <map>
#include <cmath>
#define DOC_CNT 10000
#define MAX_DOCID_LENGTH 32
@ -37,6 +38,8 @@ const uint32_t MAX_SEARCH_LEN = 60;
const uint32_t SINGLE_WORD_LEN = 18;
const uint32_t MAX_VALUE_LEN = 51200;
const double DOUBLE_EPS = 1e-3;
typedef std::tr1::unordered_map<std::string, double> hash_double_map;
typedef std::tr1::unordered_map<std::string, std::string> hash_string_map;
@ -92,19 +95,19 @@ enum SORTTYPE {
enum FieldType{
FIELD_INT = 1,
FIELD_STRING,
FIELD_TEXT,
FIELD_IP,
FIELD_LNG,
FIELD_LAT,
FIELD_GIS,
FIELD_DISTANCE,
FIELD_DOUBLE,
FIELD_LONG,
FIELD_STRING = 2,
FIELD_TEXT = 3,
FIELD_IP = 4,
FIELD_GEO_POINT = 5,
FIELD_LAT = 6,
FIELD_GIS = 7,
FIELD_DISTANCE = 8,
FIELD_DOUBLE = 9,
FIELD_LONG = 10,
FIELD_INDEX = 11,
FIELD_LNG_ARRAY,
FIELD_LAT_ARRAY,
FIELD_WKT,
FIELD_LNG_ARRAY = 12,
FIELD_LAT_ARRAY = 13,
FIELD_GEO_SHAPE = 14
};
enum SEGMENTTAG {
@ -251,6 +254,23 @@ enum KeyType
INVERTKEY,
};
struct ScoreDocIdNode{
double d_score;
std::string s_docid;
ScoreDocIdNode(double score , std::string docid)
: d_score(score)
, s_docid(docid)
{ }
bool operator<(const ScoreDocIdNode& score_docid_node) const {
if (fabs(d_score - score_docid_node.d_score) < DOUBLE_EPS){
return s_docid.compare(score_docid_node.s_docid) < 0;
}
return (d_score + DOUBLE_EPS) < score_docid_node.d_score;
}
};
struct IndexInfo {
uint32_t appid;
std::string doc_id;

View File

@ -18,9 +18,9 @@ BoolQueryProcess::BoolQueryProcess(const Json::Value& value)
query_process_map_.insert(std::make_pair(E_INDEX_READ_TERM
, new TermQueryProcess(parse_value_ )));
query_process_map_.insert(std::make_pair(E_INDEX_READ_RANGE
, RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE , parse_value_)));
query_process_map_.insert(std::make_pair(E_INDEX_READ_RANGE_PRE_TERM
, RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE_PRE_TERM , parse_value_)));
, new RangeQueryProcess(parse_value_ )));
query_process_map_.insert(std::make_pair(E_INDEX_READ_PRE_TERM
, new PreTerminal(parse_value_ )));
}
BoolQueryProcess::~BoolQueryProcess()
@ -122,7 +122,7 @@ int BoolQueryProcess::ParseContent(int logic_type){
int BoolQueryProcess::GetValidDoc(){
bool bRet = false;
if (component_->TerminalTag()){
range_query_pre_term_ = dynamic_cast<RangeQueryPreTerminal*>(query_process_map_[E_INDEX_READ_RANGE_PRE_TERM]);
range_query_pre_term_ = dynamic_cast<PreTerminal*>(query_process_map_[E_INDEX_READ_PRE_TERM]);
if (range_query_pre_term_ != NULL){
return range_query_pre_term_->GetValidDoc();
}
@ -230,7 +230,7 @@ int BoolQueryProcess::InitQueryProcess(uint32_t type , const Json::Value& value)
} else if(value.isMember(RANGE)){
parse_value = parse_value_[RANGE];
if (component_->TerminalTag()){
query_type = E_INDEX_READ_RANGE_PRE_TERM;
query_type = E_INDEX_READ_PRE_TERM;
}else{
query_type = E_INDEX_READ_RANGE;

View File

@ -4,7 +4,7 @@
#include "query_process.h"
class RangeQueryProcess;
class RangeQueryPreTerminal;
class PreTerminal;
class GeoDistanceQueryProcess;
class BoolQueryProcess : public QueryProcess{
@ -31,7 +31,7 @@ private:
private:
std::map<int , QueryProcess*> query_process_map_;
RangeQueryProcess* range_query_;
RangeQueryPreTerminal* range_query_pre_term_;
PreTerminal* range_query_pre_term_;
GeoDistanceQueryProcess* geo_distance_query_;
};

View File

@ -69,7 +69,7 @@ int GeoDistanceQueryProcess::ParseContent(int logic_type){
int GeoDistanceQueryProcess::GetValidDoc(){
std::vector<IndexInfo> index_info_vet;
int iret = ValidDocFilter::Instance()->MixTextInvertIndexSearch(component_->AndKeys(), index_info_vet
int iret = ValidDocFilter::Instance()->TextInvertIndexSearch(component_->AndKeys(), index_info_vet
, high_light_word_, docid_keyinfovet_map_ , key_doccount_map_);
if (iret != 0) { return iret; }
@ -86,6 +86,8 @@ int GeoDistanceQueryProcess::GetScore(){
{
case SORT_RELEVANCE:
case SORT_TIMESTAMP:
case SORT_FIELD_ASC:
case SORT_FIELD_DESC:
{
hash_double_map::iterator dis_iter = o_distance_.begin();
for(; dis_iter != o_distance_.end(); ++dis_iter){
@ -93,17 +95,23 @@ int GeoDistanceQueryProcess::GetScore(){
double score = dis_iter->second;
if ((o_geo_point_.d_distance > -0.0001 && o_geo_point_.d_distance < 0.0001)
|| (score + 1e-6 <= o_geo_point_.d_distance)){
skipList_.InsertNode(score, doc_id.c_str());
scoredocid_set_.insert(ScoreDocIdNode(score , doc_id));
}
}
}
break;
case DONT_SORT:
case SORT_FIELD_ASC:
case SORT_FIELD_DESC:
{
// do nothing
hash_double_map::iterator dis_iter = o_distance_.begin();
for(; dis_iter != o_distance_.end(); ++dis_iter){
std::string doc_id = dis_iter->first;
if ((o_geo_point_.d_distance > -0.0001 && o_geo_point_.d_distance < 0.0001)
|| (dis_iter->second + 1e-6 <= o_geo_point_.d_distance)){
scoredocid_set_.insert(ScoreDocIdNode(1 , doc_id));
}
}
}
break;
default:
break;
}
@ -113,5 +121,10 @@ int GeoDistanceQueryProcess::GetScore(){
void GeoDistanceQueryProcess::SortScore(int& i_sequence , int& i_rank)
{
SortForwardBySkipList(i_sequence , i_rank);
// 默认升序,距离近在前
if (SORT_FIELD_ASC == component_->SortType()){
AscSort(i_sequence , i_rank);
}else { // 降序和不排序处理
DescSort(i_sequence , i_rank);
}
}

View File

@ -62,8 +62,24 @@ int MatchQueryProcess::ParseContent(int logic_type){
int MatchQueryProcess::GetValidDoc(){
std::vector<IndexInfo> index_info_vet;
int iret = ValidDocFilter::Instance()->PureTextInvertIndexSearch(component_->OrKeys()
, index_info_vet , high_light_word_, docid_keyinfovet_map_);
if (component_->OrKeys().empty()){
return -RT_GET_FIELD_ERROR;
}
int iret = 0;
if (SEGMENT_DEFAULT == component_->OrKeys()[FIRST_TEST_INDEX][FIRST_SPLIT_WORD_INDEX].segment_tag){
iret = ValidDocFilter::Instance()->TextInvertIndexSearch(component_->OrKeys()
, index_info_vet
, high_light_word_
, docid_keyinfovet_map_
, key_doccount_map_);
}else{
iret = ValidDocFilter::Instance()->HanPinTextInvertIndexSearch(component_->OrKeys()
, index_info_vet
, high_light_word_
, docid_keyinfovet_map_);
}
if (iret != 0) { return iret; }
bool bRet = doc_manager_->GetDocContent(index_info_vet , valid_docs_);

View File

@ -21,6 +21,9 @@
#include "query_process.h"
#define FIRST_TEST_INDEX 0
#define FIRST_SPLIT_WORD_INDEX 0
class MatchQueryProcess: public QueryProcess{
public:
MatchQueryProcess(const Json::Value& value);

View File

@ -2,26 +2,20 @@
#include <assert.h>
#include "../valid_doc_filter.h"
#include "../order_op.h"
#include "../result_cache.h"
#include "cachelist_unit.h"
extern CCacheListUnit* cachelist;
QueryProcess::QueryProcess(const Json::Value& value)
: component_(NULL)
, doc_manager_(NULL)
, request_(NULL)
, parse_value_(value)
, skipList_()
, scoredocid_set_()
, response_()
, valid_docs_()
, high_light_word_()
, docid_keyinfovet_map_()
, key_doccount_map_()
, sort_field_type_()
{
skipList_.InitList();
}
{ }
QueryProcess::~QueryProcess()
{ }
@ -73,13 +67,13 @@ int QueryProcess::GetScore()
{
switch (component_->SortType())
{
case SORT_RELEVANCE:
case SORT_RELEVANCE: // 按照相关度得分,并以此排序
{
// 范围查的时候如果不指定排序类型需要在这里对skipList进行赋值
if (docid_keyinfovet_map_.empty() && skipList_.GetSize() == 0) {
if (docid_keyinfovet_map_.empty() && scoredocid_set_.empty()) {
std::set<std::string>::iterator iter = valid_docs_.begin();
for(; iter != valid_docs_.end(); iter++){
skipList_.InsertNode(1, (*iter).c_str());
scoredocid_set_.insert(ScoreDocIdNode(1,*iter));
}
break;
}
@ -101,11 +95,11 @@ int QueryProcess::GetScore()
score += log((DOC_CNT - ui_doc_count + 0.5) / (ui_doc_count + 0.5)) * ((D_BM25_K1 + 1)*ui_word_freq) \
/ (D_BM25_K + ui_word_freq) * (D_BM25_K2 + 1) * 1 / (D_BM25_K2 + 1);
}
skipList_.InsertNode(score, doc_id.c_str());
scoredocid_set_.insert(ScoreDocIdNode(score , doc_id));
}
}
break;
case SORT_TIMESTAMP:
case SORT_TIMESTAMP: // 按照时间戳得分,并以此排序
{
std::map<std::string, KeyInfoVet>::iterator docid_keyinfovet_iter = docid_keyinfovet_map_.begin();
for (; docid_keyinfovet_iter != docid_keyinfovet_map_.end(); ++ docid_keyinfovet_iter){
@ -121,20 +115,20 @@ int QueryProcess::GetScore()
}
double score = (double)key_info[0].created_time;
skipList_.InsertNode(score, doc_id.c_str());
scoredocid_set_.insert(ScoreDocIdNode(score , doc_id));
}
}
break;
case DONT_SORT:
case DONT_SORT: // 不排序docid有序
{
std::set<std::string>::iterator valid_docs_iter = valid_docs_.begin();
for(; valid_docs_iter != valid_docs_.end(); valid_docs_iter++){
std::string doc_id = *valid_docs_iter;
skipList_.InsertNode(1, doc_id.c_str());
scoredocid_set_.insert(ScoreDocIdNode(1 , doc_id));
}
}
break;
case SORT_FIELD_ASC:
case SORT_FIELD_ASC: // 按照指定字段进行升降排序
case SORT_FIELD_DESC:
{
std::set<std::string>::iterator valid_docs_iter = valid_docs_.begin();
@ -155,12 +149,10 @@ int QueryProcess::GetScore()
void QueryProcess::SortScore(int& i_sequence , int& i_rank)
{
if ((SORT_FIELD_DESC == component_->SortType() || SORT_FIELD_ASC == component_->SortType())
&& 0 == skipList_.GetSize()){
&& scoredocid_set_.empty()){
SortByCOrderOp(i_rank);
}else if(SORT_FIELD_ASC == component_->SortType()){
SortForwardBySkipList(i_sequence , i_rank);
}else{
SortBackwardBySkipList(i_sequence, i_rank);
}else{ // 默认降序,分高的在前(地理位置查询除外)
DescSort(i_sequence, i_rank);
}
}
@ -190,61 +182,54 @@ void QueryProcess::SortByCOrderOp(int& i_rank)
}
}
void QueryProcess::SortForwardBySkipList(int& i_sequence , int& i_rank)
void QueryProcess::AscSort(int& i_sequence , int& i_rank)
{
log_debug("m_has_gis, size:%d ", skipList_.GetSize());
SkipListNode* tmp = skipList_.GetHeader()->level[0].forward;
log_debug("m_has_gis, size:%d ", scoredocid_set_.size());
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
while (tmp->level[0].forward != NULL) {
std::set<ScoreDocIdNode>::iterator iter = scoredocid_set_.begin();
for( ;iter != scoredocid_set_.end(); ++iter){
// 通过extra_filter_keys进行额外过滤针对区分度不高的字段
if(doc_manager_->CheckDocByExtraFilterKey(tmp->value) == false){
log_debug("CheckDocByExtraFilterKey failed, %s", tmp->value);
tmp = tmp->level[0].forward;
if(doc_manager_->CheckDocByExtraFilterKey(iter->s_docid) == false){
log_debug("CheckDocByExtraFilterKey failed, %s", iter->s_docid);
continue;
}
i_sequence ++;
i_rank ++;
if(component_->ReturnAll() == 0){
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
tmp = tmp->level[0].forward;
continue;
}
}
Json::Value doc_info;
doc_info["doc_id"] = Json::Value(tmp->value);
doc_info["score"] = Json::Value(tmp->key);
doc_info["doc_id"] = Json::Value(iter->s_docid);
doc_info["score"] = Json::Value(iter->d_score);
response_["result"].append(doc_info);
tmp = tmp->level[0].forward;
}
}
void QueryProcess::SortBackwardBySkipList(int& i_sequence , int& i_rank)
void QueryProcess::DescSort(int& i_sequence , int& i_rank)
{
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
SkipListNode *tmp = skipList_.GetFooter()->backward;
while(tmp->backward != NULL) {
if(doc_manager_->CheckDocByExtraFilterKey(tmp->value) == false){
tmp = tmp->backward;
std::set<ScoreDocIdNode>::iterator riter = scoredocid_set_.rbegin();
for( ;riter != scoredocid_set_.rend(); ++riter){
if(doc_manager_->CheckDocByExtraFilterKey(riter->s_docid) == false){
continue;
}
i_sequence++;
i_rank++;
if (component_->ReturnAll() == 0){
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
tmp = tmp->backward;
continue;
}
}
Json::Value doc_info;
doc_info["doc_id"] = Json::Value(tmp->value);
doc_info["score"] = Json::Value(tmp->key);
doc_info["doc_id"] = Json::Value(riter->s_docid);
doc_info["score"] = Json::Value(riter->d_score);
response_["result"].append(doc_info);
tmp = tmp->backward;
}
}

View File

@ -47,7 +47,7 @@ enum E_INDEX_READ_QUERY_PROCESS{
E_INDEX_READ_MATCH,
E_INDEX_READ_TERM,
E_INDEX_READ_RANGE,
E_INDEX_READ_RANGE_PRE_TERM
E_INDEX_READ_PRE_TERM
};
class QueryProcess{
@ -81,8 +81,8 @@ protected:
protected:
void SortByCOrderOp(int& i_rank);
void SortForwardBySkipList(int& i_sequence , int& i_rank);
void SortBackwardBySkipList(int& i_sequence , int& i_rank);
void AscSort(int& i_sequence , int& i_rank);
void DescSort(int& i_sequence , int& i_rank);
void AppendHighLightWord();
protected:
@ -91,7 +91,7 @@ protected:
CTaskRequest* request_;
Json::Value parse_value_;
SkipList skipList_;
std::set<ScoreDocIdNode> scoredocid_set_;
Json::Value response_;
ValidDocSet valid_docs_;

View File

@ -106,15 +106,25 @@ int RangeQueryProcess::GetValidDoc()
RangeQueryPreTerminal::RangeQueryPreTerminal(const Json::Value& value)
: RangeQueryProcess(value)
PreTerminal::PreTerminal(const Json::Value& value)
: QueryProcess(value)
, candidate_doc_()
{}
RangeQueryPreTerminal::~RangeQueryPreTerminal()
PreTerminal::~PreTerminal()
{}
int RangeQueryPreTerminal::GetValidDoc(){
int PreTerminal::ParseContent(int logic_type){
log_info("PreTerminal do not need parse content");
return 0;
}
int PreTerminal::ParseContent(){
log_info("PreTerminal do not need parse content");
return 0;
}
int PreTerminal::GetValidDoc(){
uint32_t count = 0;
uint32_t N = 2;
uint32_t limit_start = 0;
@ -162,12 +172,12 @@ int RangeQueryPreTerminal::GetValidDoc(){
return 0;
}
int RangeQueryPreTerminal::GetScore(){
int PreTerminal::GetScore(){
log_info("RangeQueryPreTerminal do not need get score");
return 0;
}
void RangeQueryPreTerminal::SetResponse(){
void PreTerminal::SetResponse(){
response_["code"] = 0;
int sequence = -1;
int rank = 0;

View File

@ -25,12 +25,16 @@ private:
friend class BoolQueryProcess;
};
class RangeQueryPreTerminal : public RangeQueryProcess{
class PreTerminal : public QueryProcess{
public:
RangeQueryPreTerminal(const Json::Value& value);
virtual~ RangeQueryPreTerminal();
PreTerminal(const Json::Value& value);
virtual~ PreTerminal();
public:
virtual int ParseContent(int logic_type);
private:
virtual int ParseContent();
virtual int GetValidDoc();
virtual int GetScore();
virtual void SetResponse();
@ -42,39 +46,39 @@ private:
friend class BoolQueryProcess;
};
class RangeQueryGenerator : private noncopyable{
public:
RangeQueryGenerator() { };
virtual~ RangeQueryGenerator() { };
// class RangeQueryGenerator : private noncopyable{
// public:
// RangeQueryGenerator() { };
// virtual~ RangeQueryGenerator() { };
public:
static RangeQueryGenerator* Instance(){
return CSingleton<RangeQueryGenerator>::Instance();
};
// public:
// static RangeQueryGenerator* Instance(){
// return CSingleton<RangeQueryGenerator>::Instance();
// };
static void Destroy(){
CSingleton<RangeQueryGenerator>::Destroy();
};
// static void Destroy(){
// CSingleton<RangeQueryGenerator>::Destroy();
// };
public:
// 内存释放由调用方处理
QueryProcess* GetRangeQueryProcess(int iType , const Json::Value& parse_value){
QueryProcess* current_range_query = NULL;
switch (iType){
case E_INDEX_READ_RANGE:{
current_range_query = new RangeQueryProcess(parse_value);
}
break;
case E_INDEX_READ_RANGE_PRE_TERM:{
current_range_query = new RangeQueryPreTerminal(parse_value);
}
break;
default:
break;
}
// public:
// // 内存释放由调用方处理
// QueryProcess* GetRangeQueryProcess(int iType , const Json::Value& parse_value){
// QueryProcess* current_range_query = NULL;
// switch (iType){
// case E_INDEX_READ_RANGE:{
// current_range_query = new RangeQueryProcess(parse_value);
// }
// break;
// case E_INDEX_READ_PRE_TERM:{
// current_range_query = new PreTerminal(parse_value);
// }
// break;
// default:
// break;
// }
return current_range_query;
}
};
// return current_range_query;
// }
// };
#endif

View File

@ -48,7 +48,7 @@ int TermQueryProcess::ParseContent(int logic_type){
int TermQueryProcess::GetValidDoc(){
std::vector<IndexInfo> index_info_vet;
int iret = ValidDocFilter::Instance()->MixTextInvertIndexSearch(component_->OrKeys()
int iret = ValidDocFilter::Instance()->TextInvertIndexSearch(component_->OrKeys()
, index_info_vet , high_light_word_, docid_keyinfovet_map_ , key_doccount_map_);
if (iret != 0) { return iret; }

View File

@ -76,13 +76,7 @@ int SearchTask::Process(CTaskRequest *request)
}else if (query.isMember(GEOSHAPE)){
query_process_ = new GeoShapeQueryProcess(query[GEOSHAPE]);
}else if (query.isMember(RANGE)){
if (component_->TerminalTag()){
query_process_ = RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE_PRE_TERM
, query[RANGE]);
}else{
query_process_ = RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE
, query[RANGE]);
}
query_process_ = new RangeQueryProcess(query[RANGE]);
}else if (query.isMember(BOOL)){
query_process_ = new BoolQueryProcess(query[BOOL]);
}else{

View File

@ -1285,79 +1285,42 @@ double distanceSimplify(double lat1, double lng1, double lat2, double lng2) {
return sqrt(Lx * Lx + Ly * Ly); // 用平面的矩形对角距离公式计算总距离
}
bool GetGisDistance(uint32_t appid, const string& latLeft, const string& lngLeft, hash_double_map& distances, hash_string_map& doc_content)
bool GetGisDistance(uint32_t appid, const string& Latitude, const string& Longtitude, hash_double_map& distances, hash_string_map& doc_content)
{
double lat1 = strToDouble(latLeft);
double lng1 = strToDouble(lngLeft);
int lon_len = strlen("longitude\":\"");
int lat_len = strlen("latitude\":\"");
double d_query_lat = strToDouble(Latitude);
double d_query_lng = strToDouble(Longtitude);
hash_string_map::iterator doc_it;
for (doc_it = doc_content.begin(); doc_it != doc_content.end(); doc_it++) {
hash_string_map::iterator doc_it = doc_content.begin();
for ( ; doc_it != doc_content.end(); doc_it++) {
if (doc_it->second == "") {
log_error("content is invalid, appid:%d, doc_id:%s, content:%s.",appid, (doc_it->first).c_str(), (doc_it->second).c_str());
continue;
}
// 如果是aoi数据则改为计算查询点到aoi的最短距离
if(doc_it->second.find("longitude_list") != string::npos){
Json::Reader reader;
Json::Value data;
bool result = reader.parse(doc_it->second, data);
if (result && data.isMember("latitude_list") && data["latitude_list"].isArray()
&& data.isMember("longitude_list") && data["longitude_list"].isArray()) {
if(data["latitude_list"].size() != data["longitude_list"].size()){
log_error("latitude_list size not equal longitude_list size, appid:%d, doc_id:%s, content:%s",appid, (doc_it->first).c_str(), (doc_it->second).c_str());
continue;
}
Polygon polygon;
for (uint32_t idx = 0; idx < data["longitude_list"].size(); ++idx) {
vector<double> point;
if(!data["longitude_list"][idx].isString() || !data["latitude_list"][idx].isString()){
log_error("longitude or latitude is not string!");
continue;
}
point.push_back(strToDouble(data["longitude_list"][idx].asString()));
point.push_back(strToDouble(data["latitude_list"][idx].asString()));
polygon.push_back(point);
}
distances[doc_it->first] = GetShortestDistance(lng1, lat1, polygon) / 1000;
}
} else {
double lat2;
double lng2;
/*
rapidjson::Document document;
bool result = document.Parse(doc_it->second.data()).HasParseError();
if(!result && document.HasMember("latitude") && document["latitude"].IsString() && document.HasMember("longitude") && document["longitude"].IsString()){
lat2 = strToDouble(document["latitude"].GetString());
lng2 = strToDouble(document["longitude"].GetString());
} else {
log_error("content hasn't gis info, appid:%d, doc_id:%s, content:%s.",appid, (doc_it->first).c_str(), (doc_it->second).c_str());
continue;
}*/
size_t pos1 = doc_it->second.find("longitude");
size_t pos2 = doc_it->second.find_first_of(",", pos1);
if(pos1 != string::npos && pos2 != string::npos){
string longitude = doc_it->second.substr(pos1+lon_len, pos2-pos1-lon_len-1);
lng2 = strToDouble(longitude);
} else {
log_debug("content has no longitude info, appid:%d, doc_id:%s, content:%s.",appid, (doc_it->first).c_str(), (doc_it->second).c_str());
distances[doc_it->first] = 1;
continue;
}
size_t pos3 = doc_it->second.find("latitude");
size_t pos4 = doc_it->second.find_first_of(",", pos3);
if(pos3 != string::npos && pos4 != string::npos){
string latitude = doc_it->second.substr(pos3+lat_len, pos4-pos3-lat_len-1);
lat2 = strToDouble(latitude);
} else {
log_debug("content has no latitude info, appid:%d, doc_id:%s, content:%s.",appid, (doc_it->first).c_str(), (doc_it->second).c_str());
continue;
Json::Reader read(Json::Features::strictMode());
Json::Value snap_json;
int ret = read.parse(doc_it->second , snap_json);
if (0 == ret){
log_error("parse json error [%s], errmsg : %s", doc_it->second.c_str(), read.getFormattedErrorMessages().c_str());
return false;
}
double dis = distanceSimplify(lat1, lng1, lat2, lng2);
Json::Value::Members member = snap_json.getMemberNames();
Json::Value::Members::iterator iter = member.begin();
for (; iter != member.end(); ++iter){
uint32_t segment_tag = 0;
FieldInfo field_info;
uint32_t uiret = DBManager::Instance()->GetWordField(segment_tag, appid, *iter, field_info);
if (FIELD_GEO_POINT == field_info.field_type){
GeoPointContext geo_point(snap_json[*iter]);
double d_target_lat = strToDouble(geo_point.sLatitude);
double d_target_lng = strToDouble(geo_point.sLongtitude);
double dis = distanceSimplify(d_query_lat, d_query_lng, d_target_lat, d_target_lng);
distances[doc_it->first] = round(dis * 1000)/1000;
}else if (FIELD_GEO_SHAPE == field_info.field_type){
// temp no handle ,latter add
distances[doc_it->first] = 1;
}
}
}
return true;

View File

@ -57,7 +57,7 @@ set<string> sets_intersection(set<string> v1, set<string> v2); // 集合求交
set<string> sets_union(set<string> v1, set<string> v2); // 集合求并集
set<string> sets_difference(set<string> v1, set<string> v2); // 集合求差集
double strToDouble(const string& str);
bool GetGisDistance(uint32_t appid, const string& latLeft, const string& lngLeft, hash_double_map& distances, hash_string_map& doc_content);
bool GetGisDistance(uint32_t appid, const string& Latitude, const string& Longtitude, hash_double_map& distances, hash_string_map& doc_content);
void ConvertCharIntelligent(const string word, IntelligentInfo &info, int &len);
void ConvertIntelligent(const vector<Content> &result, IntelligentInfo &info, bool &flag);
bool GetGisCode(string lng, string lat, string ip, double distance, vector<string>& gisCode);

View File

@ -275,12 +275,12 @@ int ValidDocFilter::Process(const std::vector<std::vector<FieldInfo> >& keys, st
return 0;
}
int ValidDocFilter::PureTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
int ValidDocFilter::HanPinTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
, std::vector<IndexInfo>& index_info_vet
, std::set<std::string>& highlightWord
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map){
if (keys.empty() || keys.size() > 1){
return -RT_GET_DOC_ERR;
return -RT_GET_FIELD_ERROR;
}
const std::vector<FieldInfo>& key_field_info_vet = keys[0];
std::vector<FieldInfo>::const_iterator iter = key_field_info_vet.cbegin();
@ -350,13 +350,13 @@ int ValidDocFilter::RangeQueryInvertIndexSearch(const std::vector<std::vector<Fi
return 0;
}
int ValidDocFilter::MixTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
int ValidDocFilter::TextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
, std::vector<IndexInfo>& index_info_vet
, std::set<std::string>& highlightWord
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map
, std::map<std::string, uint32_t>& key_doccount_map){
if (keys.empty() || keys.size() > 1){
return -RT_GET_DOC_ERR;
return -RT_GET_FIELD_ERROR;
}
const std::vector<FieldInfo>& key_field_info_vet = keys[0];
std::vector<FieldInfo>::const_iterator iter = key_field_info_vet.cbegin();

View File

@ -52,7 +52,7 @@ public:
, std::set<std::string>& highlightWord, std::map<std::string, KeyInfoVet>& docid_keyinfo_map
, std::map<std::string, uint32_t>& key_doccount_map);
int PureTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
int HanPinTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
, std::vector<IndexInfo>& index_info_vet
, std::set<std::string>& highlightWord
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map);
@ -60,7 +60,7 @@ public:
int RangeQueryInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
, std::vector<IndexInfo>& index_info_vet);
int MixTextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
int TextInvertIndexSearch(const std::vector<std::vector<FieldInfo> >& keys
, std::vector<IndexInfo>& index_info_vet
, std::set<std::string>& highlightWord
, std::map<std::string, KeyInfoVet>& docid_keyinfo_map