Compare commits
34 Commits
master
...
chenyujie-
Author | SHA1 | Date | |
---|---|---|---|
|
3736d4972b | ||
|
b1494d633a | ||
|
b93e25166f | ||
|
6c9b1e64a7 | ||
|
3847045aa5 | ||
|
cce43b936f | ||
|
2248c8747c | ||
|
9a9c901cc4 | ||
|
26aaa94920 | ||
|
2825798e5c | ||
|
2bae879f75 | ||
|
ba68bc2117 | ||
|
5c04c247ab | ||
|
199e540a58 | ||
|
6fa36f3a4c | ||
|
88e5846294 | ||
|
008427a214 | ||
|
101d872c09 | ||
|
bb2358103e | ||
|
97408057e8 | ||
|
27b4d80a36 | ||
|
f93694baca | ||
|
6be870dfb6 | ||
|
77afde66a1 | ||
|
78562a2ae6 | ||
|
5d35cff86a | ||
|
2942e539a7 | ||
|
787aee4566 | ||
|
4ccbea999a | ||
|
149dc03bfc | ||
|
1ecc06d0f8 | ||
|
801f486365 | ||
|
a50af991c1 | ||
|
3b2b3784ef |
@ -160,14 +160,14 @@ vector<string> GetArroundGeoHash(GeoPoint& circle_center, double distance, int p
|
||||
return list;
|
||||
}
|
||||
|
||||
vector<string> GetArroundGeoHash(double lng_max, double lng_min, double lat_max, double lat_min, int precision)
|
||||
vector<string> GetArroundGeoHash(const EnclosingRectangle& oEnclosingRectangle, int precision)
|
||||
{
|
||||
vector<string> list;
|
||||
GeoPoint top_left, bottom_right;
|
||||
top_left.lat = lat_max;
|
||||
top_left.lon = lng_min;
|
||||
bottom_right.lat = lat_min;
|
||||
bottom_right.lon = lng_max;
|
||||
top_left.lat = oEnclosingRectangle.dlatMax;
|
||||
top_left.lon = oEnclosingRectangle.dlngMin;
|
||||
bottom_right.lat = oEnclosingRectangle.dlatMin;
|
||||
bottom_right.lon = oEnclosingRectangle.dlngMax;
|
||||
double min_lat, min_lon;
|
||||
std::set <std::string> result;
|
||||
getMinLatLng(precision, min_lat, min_lon);
|
||||
@ -181,7 +181,7 @@ vector<string> GetArroundGeoHash(double lng_max, double lng_min, double lat_max,
|
||||
list.push_back(geohash);
|
||||
}
|
||||
}
|
||||
top_left.lon = lng_min;
|
||||
top_left.lon = oEnclosingRectangle.dlngMin;
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
@ -25,18 +25,47 @@
|
||||
#include <stdlib.h>
|
||||
using namespace std;
|
||||
|
||||
const double DOUBLE_EPS = 1e-10;
|
||||
|
||||
struct GeoPoint {
|
||||
double lon;
|
||||
double lat;
|
||||
double lon;
|
||||
double lat;
|
||||
};
|
||||
|
||||
struct EnclosingRectangle{
|
||||
double dlngMax;
|
||||
double dlngMin;
|
||||
double dlatMax;
|
||||
double dlatMin;
|
||||
|
||||
EnclosingRectangle()
|
||||
: dlngMax(0.0)
|
||||
, dlngMin(0.0)
|
||||
, dlatMax(0.0)
|
||||
, dlatMin(0.0)
|
||||
{ }
|
||||
|
||||
EnclosingRectangle(double _dlngMax, double _dlngMin
|
||||
, double _dlatMax , double _dlatMin)
|
||||
: dlngMax(_dlngMax)
|
||||
, dlngMin(_dlngMin)
|
||||
, dlatMax(_dlatMax)
|
||||
, dlatMin(_dlatMin)
|
||||
{ }
|
||||
|
||||
bool IsVaild(){
|
||||
return (!(fabs(dlngMax - dlngMin) < DOUBLE_EPS))
|
||||
&& (!(fabs(dlatMax - dlatMin) < DOUBLE_EPS));
|
||||
}
|
||||
};
|
||||
|
||||
string encode(double lat, double lng, int precision);
|
||||
vector<string> getArroundGeoHash(double lat, double lon, int precision);
|
||||
GeoPoint GetTerminalGeo(GeoPoint& beg , // 初始的geo坐标
|
||||
double distance,// 距离
|
||||
double angle //角度
|
||||
double distance,// 距离
|
||||
double angle //角度
|
||||
);
|
||||
vector<string> GetArroundGeoHash(GeoPoint& circle_center, double distance, int precision);
|
||||
vector<string> GetArroundGeoHash(double lng_max, double lng_min, double lat_max, double lat_min, int precision);
|
||||
vector<string> GetArroundGeoHash(const EnclosingRectangle& oEnclosingRectangle, int precision);
|
||||
|
||||
#endif
|
21
src/comm/noncopyable.h
Normal file
21
src/comm/noncopyable.h
Normal file
@ -0,0 +1,21 @@
|
||||
/* $Id: noncopyable.h 602 2009-01-08 02:27:44Z jackda $ */
|
||||
#ifndef __TTC_NONCOPY_H__
|
||||
#define __TTC_NONCOPY_H__
|
||||
|
||||
#include "namespace.h"
|
||||
|
||||
TTC_BEGIN_NAMESPACE
|
||||
|
||||
class noncopyable
|
||||
{
|
||||
protected:
|
||||
noncopyable(void){}
|
||||
~noncopyable(void){}
|
||||
private:
|
||||
noncopyable(const noncopyable&);
|
||||
const noncopyable& operator= (const noncopyable&);
|
||||
};
|
||||
|
||||
TTC_END_NAMESPACE
|
||||
|
||||
#endif
|
@ -3,13 +3,14 @@ cmake_minimum_required(VERSION 2.6)
|
||||
PROJECT(index_read)
|
||||
EXECUTE_PROCESS(COMMAND git log -1 --pretty=format:%h . OUTPUT_VARIABLE version)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -lrt -Wall -O2")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x -g -lrt -Wall")
|
||||
|
||||
AUX_SOURCE_DIRECTORY(. main)
|
||||
AUX_SOURCE_DIRECTORY(./index_sync index_sync)
|
||||
AUX_SOURCE_DIRECTORY(./utils utils)
|
||||
AUX_SOURCE_DIRECTORY(./query query)
|
||||
AUX_SOURCE_DIRECTORY(./process process)
|
||||
AUX_SOURCE_DIRECTORY(./sort_operator sort_operator)
|
||||
|
||||
LINK_DIRECTORIES(
|
||||
${PROJECT_SOURCE_DIR}/../../comm
|
||||
@ -17,7 +18,7 @@ ${PROJECT_SOURCE_DIR}/../../3rdlib/jsoncpp/lib
|
||||
${PROJECT_SOURCE_DIR}/../../comm/stat
|
||||
)
|
||||
|
||||
ADD_EXECUTABLE(index_read ${main} ${index_sync} ${utils} ${query} ${process})
|
||||
ADD_EXECUTABLE(index_read ${main} ${index_sync} ${utils} ${query} ${process} ${sort_operator})
|
||||
|
||||
target_include_directories(index_read PUBLIC
|
||||
../../3rdlib/jsoncpp/include
|
||||
|
@ -21,17 +21,27 @@
|
||||
#include <string>
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
#include <tr1/unordered_map>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <limits.h>
|
||||
using namespace std;
|
||||
#include <map>
|
||||
#include <cmath>
|
||||
|
||||
#define DOC_CNT 10000
|
||||
#define MAX_DOCID_LENGTH 32
|
||||
|
||||
const double D_BM25_K = 1.65;
|
||||
const double D_BM25_K1 = 1.2;
|
||||
const double D_BM25_K2 = 200;
|
||||
|
||||
const uint32_t MAX_SEARCH_LEN = 60;
|
||||
const uint32_t SINGLE_WORD_LEN = 18;
|
||||
const uint32_t MAX_VALUE_LEN = 51200;
|
||||
typedef std::tr1::unordered_map<string, double> hash_double_map;
|
||||
typedef std::tr1::unordered_map<string, string> hash_string_map;
|
||||
|
||||
const double DOUBLE_EPS_3 = 1e-3;
|
||||
|
||||
typedef std::unordered_map<std::string, double> hash_double_map;
|
||||
typedef std::unordered_map<std::string, std::string> hash_string_map;
|
||||
|
||||
enum RetCode{
|
||||
RT_PARSE_JSON_ERR = 10001,
|
||||
@ -76,31 +86,32 @@ enum KEYLOCATE{
|
||||
};
|
||||
|
||||
enum SORTTYPE {
|
||||
SORT_RELEVANCE = 1, // 按相关性排序
|
||||
SORT_TIMESTAMP = 2, // 按时间排序
|
||||
SORT_RELEVANCE = 1, // 默认,按相关性排序
|
||||
DONT_SORT = 3, //不排序
|
||||
SORT_FIELD_ASC = 4, // 按字段升序
|
||||
SORT_FIELD_DESC = 5, // 按字段降序
|
||||
SORT_GEO_DISTANCE = 6 // 按距离升序
|
||||
};
|
||||
|
||||
enum FieldType{
|
||||
FIELD_INT = 1,
|
||||
FIELD_STRING,
|
||||
FIELD_TEXT,
|
||||
FIELD_IP,
|
||||
FIELD_LNG,
|
||||
FIELD_LAT,
|
||||
FIELD_GIS,
|
||||
FIELD_DISTANCE,
|
||||
FIELD_DOUBLE,
|
||||
FIELD_LONG,
|
||||
FIELD_INDEX = 11,
|
||||
FIELD_LNG_ARRAY,
|
||||
FIELD_LAT_ARRAY,
|
||||
FIELD_WKT,
|
||||
FIELD_INT = 1,
|
||||
FIELD_STRING = 2,
|
||||
FIELD_TEXT = 3,
|
||||
FIELD_IP = 4,
|
||||
FIELD_GEO_POINT = 5,
|
||||
FIELD_LAT = 6,
|
||||
FIELD_GIS = 7,
|
||||
FIELD_DISTANCE = 8,
|
||||
FIELD_DOUBLE = 9,
|
||||
FIELD_LONG = 10,
|
||||
FIELD_INDEX = 11,
|
||||
FIELD_LNG_ARRAY = 12,
|
||||
FIELD_LAT_ARRAY = 13,
|
||||
FIELD_GEO_SHAPE = 14
|
||||
};
|
||||
|
||||
enum SEGMENTTAG {
|
||||
SEGMENT_NONE = 0,
|
||||
SEGMENT_DEFAULT = 1,
|
||||
SEGMENT_NGRAM = 2,
|
||||
SEGMENT_CHINESE = 3,
|
||||
@ -151,36 +162,37 @@ enum RANGTYPE {
|
||||
|
||||
struct Content {
|
||||
uint32_t type;
|
||||
string str;
|
||||
std::string str;
|
||||
};
|
||||
|
||||
struct Info {
|
||||
string title;
|
||||
string content;
|
||||
string classify;
|
||||
string keywords;
|
||||
string url;
|
||||
std::string title;
|
||||
std::string content;
|
||||
std::string classify;
|
||||
std::string keywords;
|
||||
std::string url;
|
||||
};
|
||||
|
||||
struct KeyInfo {
|
||||
string word;
|
||||
std::string word;
|
||||
uint32_t field;
|
||||
uint32_t word_freq;
|
||||
uint32_t created_time;
|
||||
vector<int> pos_vec;
|
||||
std::vector<int> pos_vec;
|
||||
};
|
||||
|
||||
struct FieldInfo
|
||||
{
|
||||
string word;
|
||||
std::string word;
|
||||
uint32_t field;
|
||||
uint32_t field_type;
|
||||
uint32_t segment_tag;
|
||||
uint32_t segment_feature;
|
||||
uint32_t start;
|
||||
uint32_t end;
|
||||
double start;
|
||||
double end;
|
||||
uint32_t index_tag;
|
||||
RANGTYPE range_type;
|
||||
uint32_t query_type;
|
||||
FieldInfo() {
|
||||
field = 1;
|
||||
field_type = 0;
|
||||
@ -190,6 +202,7 @@ struct FieldInfo
|
||||
end = 0;
|
||||
range_type = RANGE_INIT;
|
||||
index_tag = 0;
|
||||
query_type = -1;
|
||||
}
|
||||
};
|
||||
|
||||
@ -200,14 +213,14 @@ struct AppFieldInfo {
|
||||
uint16_t segment_tag;
|
||||
uint16_t field_value;
|
||||
uint16_t segment_feature;
|
||||
string index_info;
|
||||
std::string index_info;
|
||||
};
|
||||
|
||||
struct ScoreInfo
|
||||
{
|
||||
double score;
|
||||
FIELDTYPE type;
|
||||
string str;
|
||||
std::string str;
|
||||
int i;
|
||||
double d;
|
||||
ScoreInfo(){
|
||||
@ -225,8 +238,8 @@ struct CacheQueryInfo
|
||||
uint32_t sort_type;
|
||||
uint32_t page_index;
|
||||
uint32_t page_size;
|
||||
string last_score;
|
||||
string last_id;
|
||||
std::string last_score;
|
||||
std::string last_id;
|
||||
CacheQueryInfo(){
|
||||
appid = 0;
|
||||
sort_field = 0;
|
||||
@ -241,17 +254,36 @@ enum KeyType
|
||||
ORKEY,
|
||||
ANDKEY,
|
||||
INVERTKEY,
|
||||
KEYTOTALNUM
|
||||
};
|
||||
|
||||
struct ScoreDocIdNode{
|
||||
double d_score;
|
||||
std::string s_docid;
|
||||
|
||||
ScoreDocIdNode(double score , const std::string& docid)
|
||||
: d_score(score)
|
||||
, s_docid(docid)
|
||||
{ }
|
||||
|
||||
bool operator<(const ScoreDocIdNode& score_docid_node) const {
|
||||
if (fabs(d_score - score_docid_node.d_score) < DOUBLE_EPS_3){
|
||||
return s_docid.compare(score_docid_node.s_docid) < 0;
|
||||
}
|
||||
return (d_score + DOUBLE_EPS_3) < score_docid_node.d_score;
|
||||
}
|
||||
};
|
||||
|
||||
struct IndexInfo {
|
||||
uint32_t appid;
|
||||
string doc_id;
|
||||
std::string doc_id;
|
||||
uint32_t doc_version;
|
||||
uint32_t field;
|
||||
uint32_t word_freq;
|
||||
uint32_t created_time;
|
||||
string pos;
|
||||
string extend;
|
||||
std::string pos;
|
||||
std::string extend;
|
||||
double distance;
|
||||
|
||||
IndexInfo(){
|
||||
appid = 0;
|
||||
@ -259,6 +291,7 @@ struct IndexInfo {
|
||||
field = 0;
|
||||
word_freq = 0;
|
||||
created_time = 0;
|
||||
distance = 0.0;
|
||||
}
|
||||
|
||||
bool operator<(const IndexInfo& src) const {
|
||||
@ -271,29 +304,29 @@ struct IndexInfo {
|
||||
|
||||
struct ExtraFilterKey
|
||||
{
|
||||
string field_name;
|
||||
string field_value;
|
||||
std::string field_name;
|
||||
std::string field_value;
|
||||
uint16_t field_type;
|
||||
};
|
||||
|
||||
struct TerminalQryCond{
|
||||
uint32_t sort_type;
|
||||
string sort_field;
|
||||
string last_id;
|
||||
string last_score;
|
||||
std::string sort_field;
|
||||
std::string last_id;
|
||||
std::string last_score;
|
||||
uint32_t limit_start;
|
||||
uint32_t page_size;
|
||||
};
|
||||
|
||||
struct OrderOpCond{
|
||||
string last_id;
|
||||
std::string last_id;
|
||||
uint32_t limit_start;
|
||||
uint32_t count;
|
||||
bool has_extra_filter;
|
||||
};
|
||||
|
||||
struct TerminalRes{
|
||||
string doc_id;
|
||||
std::string doc_id;
|
||||
double score;
|
||||
};
|
||||
|
||||
@ -306,4 +339,13 @@ enum QUERYRTPE{
|
||||
QUERY_TYPE_GEO_SHAPE,
|
||||
};
|
||||
|
||||
|
||||
typedef std::set<std::string> ValidDocSet;
|
||||
typedef std::set<std::string> HighLightWordSet;
|
||||
|
||||
typedef std::vector<KeyInfo> KeyInfoVet;
|
||||
typedef std::map<std::string, KeyInfoVet> DocKeyinfosMap;
|
||||
|
||||
typedef std::map<std::string, uint32_t> KeywordDoccountMap;
|
||||
|
||||
#endif
|
@ -1,791 +0,0 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: component.h
|
||||
*
|
||||
* Description: component class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2019
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "component.h"
|
||||
#include "split_manager.h"
|
||||
#include "db_manager.h"
|
||||
#include "utf8_str.h"
|
||||
#include "query/bool_query_parser.h"
|
||||
#include "query/geo_distance_parser.h"
|
||||
#include "query/range_query_parser.h"
|
||||
#include "query/match_query_parser.h"
|
||||
#include "query/term_query_parser.h"
|
||||
#include "query/geo_shape_parser.h"
|
||||
#include <sstream>
|
||||
using namespace std;
|
||||
|
||||
Component::Component(){
|
||||
SGlobalConfig &global_cfg = SearchConf::Instance()->GetGlobalConfig();
|
||||
m_default_query = global_cfg.sDefaultQuery;
|
||||
m_jdq_switch = global_cfg.iJdqSwitch;
|
||||
m_page_index = 0;
|
||||
m_page_size = 0;
|
||||
m_return_all = 0;
|
||||
m_cache_switch = 0;
|
||||
m_top_switch = 0;
|
||||
m_snapshot_switch = 0;
|
||||
m_sort_type = SORT_RELEVANCE;
|
||||
m_appid = 10001;
|
||||
m_last_id = "";
|
||||
m_last_score = "";
|
||||
m_search_after = false;
|
||||
distance = 0;
|
||||
m_terminal_tag = 0;
|
||||
m_terminal_tag_valid = false;
|
||||
m_query_type = 0;
|
||||
}
|
||||
|
||||
Component::~Component(){
|
||||
if(NULL != query_parser){
|
||||
delete query_parser;
|
||||
}
|
||||
if(NULL != query_parser_res){
|
||||
delete query_parser_res;
|
||||
}
|
||||
}
|
||||
|
||||
int Component::ParseJson(const char *sz_json, int json_len, Json::Value &recv_packet)
|
||||
{
|
||||
Json::Reader r(Json::Features::strictMode());
|
||||
int ret;
|
||||
ret = r.parse(sz_json, sz_json + json_len, recv_packet);
|
||||
if (0 == ret)
|
||||
{
|
||||
log_error("the err json string is : %s", sz_json);
|
||||
log_error("parse json error , errmsg : %s", r.getFormattedErrorMessages().c_str());
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("appid") && recv_packet["appid"].isUInt())
|
||||
{
|
||||
m_appid = recv_packet["appid"].asUInt();
|
||||
}
|
||||
else {
|
||||
m_appid = 10001;
|
||||
}
|
||||
|
||||
if(recv_packet.isMember("query")){
|
||||
m_query = recv_packet["query"];
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("key") && recv_packet["key"].isString())
|
||||
{
|
||||
m_Data = recv_packet["key"].asString();
|
||||
}
|
||||
else {
|
||||
m_Data = "";
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("key_and") && recv_packet["key_and"].isString())
|
||||
{
|
||||
m_Data_and = recv_packet["key_and"].asString();
|
||||
}
|
||||
else {
|
||||
m_Data_and = "";
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("key_invert") && recv_packet["key_invert"].isString())
|
||||
{
|
||||
m_Data_invert = recv_packet["key_invert"].asString();
|
||||
}
|
||||
else {
|
||||
m_Data_invert = "";
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("key_complete") && recv_packet["key_complete"].isString())
|
||||
{
|
||||
m_Data_complete = recv_packet["key_complete"].asString();
|
||||
}
|
||||
else {
|
||||
m_Data_complete = "";
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("page_index") && recv_packet["page_index"].isString())
|
||||
{
|
||||
m_page_index = atoi(recv_packet["page_index"].asString().c_str());
|
||||
}
|
||||
else {
|
||||
m_page_index = 1 ;
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("page_size") && recv_packet["page_size"].isString())
|
||||
{
|
||||
m_page_size = atoi(recv_packet["page_size"].asString().c_str());
|
||||
}
|
||||
else {
|
||||
m_page_size = 10;
|
||||
}
|
||||
|
||||
if(recv_packet.isMember("sort_type") && recv_packet["sort_type"].isString())
|
||||
{
|
||||
m_sort_type = atoi(recv_packet["sort_type"].asString().c_str());
|
||||
}
|
||||
else {
|
||||
m_sort_type = SORT_RELEVANCE;
|
||||
}
|
||||
|
||||
if(recv_packet.isMember("sort_field") && recv_packet["sort_field"].isString())
|
||||
{
|
||||
m_sort_field = recv_packet["sort_field"].asString();
|
||||
}
|
||||
else {
|
||||
m_sort_field = "";
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("return_all") && recv_packet["return_all"].isString())
|
||||
{
|
||||
m_return_all = atoi(recv_packet["return_all"].asString().c_str());
|
||||
}
|
||||
else {
|
||||
m_return_all = 0;
|
||||
}
|
||||
|
||||
if(recv_packet.isMember("fields") && recv_packet["fields"].isString())
|
||||
{
|
||||
string fields = recv_packet["fields"].asString();
|
||||
m_fields = splitEx(fields, ",");
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("terminal_tag") && recv_packet["terminal_tag"].isString())
|
||||
{
|
||||
m_terminal_tag = atoi(recv_packet["terminal_tag"].asString().c_str());
|
||||
}
|
||||
else {
|
||||
m_terminal_tag = 0;
|
||||
}
|
||||
|
||||
if(m_terminal_tag == 1){
|
||||
if(m_Data_and == "" || m_Data != "" || m_Data_invert != ""){
|
||||
log_error("terminal_tag is true, only key_and is available.");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
}
|
||||
|
||||
if(recv_packet.isMember("last_id") && recv_packet["last_id"].isString())
|
||||
{
|
||||
m_last_id = recv_packet["last_id"].asString();
|
||||
}
|
||||
else {
|
||||
m_last_id = "";
|
||||
}
|
||||
|
||||
bool score_flag = true;
|
||||
if (recv_packet.isMember("last_score") && recv_packet["last_score"].isString())
|
||||
{
|
||||
m_last_score = recv_packet["last_score"].asString();
|
||||
}
|
||||
else {
|
||||
score_flag = false;
|
||||
m_last_score = "0";
|
||||
}
|
||||
if(m_last_id != "" && score_flag == true){
|
||||
m_search_after = true;
|
||||
}
|
||||
if(m_search_after == true && m_sort_type != SORT_FIELD_DESC && m_sort_type != SORT_FIELD_ASC){
|
||||
log_error("in search_after mode, sort_type must be SORT_FIELD_DESC or SORT_FIELD_ASC.");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
if ("" == m_Data && "" == m_Data_and && "" == m_Data_complete) {
|
||||
m_Data = m_default_query;
|
||||
}
|
||||
log_debug("parse success, m_Data: %s, m_Data_and: %s, m_Data_invert: %s, m_page_index: %u, m_return_all: %u",
|
||||
m_Data.c_str(), m_Data_and.c_str(), m_Data_invert.c_str(), m_page_index, m_return_all);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Component::InitSwitch()
|
||||
{
|
||||
AppInfo app_info;
|
||||
bool res = SearchConf::Instance()->GetAppInfo(m_appid, app_info);
|
||||
if (true == res){
|
||||
m_cache_switch = app_info.cache_switch;
|
||||
m_top_switch = app_info.top_switch;
|
||||
m_snapshot_switch = app_info.snapshot_switch;
|
||||
}
|
||||
}
|
||||
|
||||
int Component::GetQueryWord(uint32_t &m_has_gis, string &err_msg){
|
||||
if(m_query.isObject()){
|
||||
if(m_query.isMember("bool")){
|
||||
m_query_type = QUERY_TYPE_BOOL;
|
||||
query_parser = new BoolQueryParser(m_appid, m_query["bool"]);
|
||||
} else if(m_query.isMember("geo_distance")){
|
||||
m_query_type = QUERY_TYPE_GEO_DISTANCE;
|
||||
query_parser = new GeoDistanceParser(m_appid, m_query["geo_distance"]);
|
||||
} else if(m_query.isMember("range")){
|
||||
m_query_type = QUERY_TYPE_RANGE;
|
||||
query_parser = new RangeQueryParser(m_appid, m_query["range"]);
|
||||
} else if(m_query.isMember("match")){
|
||||
m_query_type = QUERY_TYPE_MATCH;
|
||||
query_parser = new MatchQueryParser(m_appid, m_query["match"]);
|
||||
} else if(m_query.isMember("term")){
|
||||
m_query_type = QUERY_TYPE_TERM;
|
||||
query_parser = new TermQueryParser(m_appid, m_query["term"]);
|
||||
} else if(m_query.isMember("geo_shape")){
|
||||
m_query_type = QUERY_TYPE_GEO_SHAPE;
|
||||
query_parser = new GeoShapeParser(m_appid, m_query["geo_shape"]);
|
||||
} else {
|
||||
log_error("query type error!");
|
||||
return -RT_QUERY_TYPE_ERROR;
|
||||
}
|
||||
query_parser_res = new QueryParserRes();
|
||||
int ret = query_parser->ParseContent(query_parser_res);
|
||||
if(ret != 0){
|
||||
err_msg = query_parser_res->ErrMsg();
|
||||
log_error("query_parser ParseContent error, ret: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
map<uint32_t, vector<FieldInfo> >::iterator field_key_map_iter = query_parser_res->FieldKeysMap().begin();
|
||||
for(; field_key_map_iter != query_parser_res->FieldKeysMap().end(); field_key_map_iter++){
|
||||
AddToFieldList(ANDKEY, field_key_map_iter->second);
|
||||
}
|
||||
map<uint32_t, vector<FieldInfo> >::iterator or_key_map_iter = query_parser_res->OrFieldKeysMap().begin();
|
||||
for(; or_key_map_iter != query_parser_res->OrFieldKeysMap().end(); or_key_map_iter++){
|
||||
AddToFieldList(ORKEY, or_key_map_iter->second);
|
||||
}
|
||||
map<uint32_t, vector<FieldInfo> >::iterator invert_key_map_iter = query_parser_res->InvertFieldKeysMap().begin();
|
||||
for(; invert_key_map_iter != query_parser_res->InvertFieldKeysMap().end(); invert_key_map_iter++){
|
||||
AddToFieldList(INVERTKEY, invert_key_map_iter->second);
|
||||
}
|
||||
m_has_gis = query_parser_res->HasGis();
|
||||
if(m_has_gis){
|
||||
latitude = query_parser_res->Latitude();
|
||||
longitude = query_parser_res->Longitude();
|
||||
distance = query_parser_res->Distance();
|
||||
log_debug("lat: %s, lon: %s, distance: %f", latitude.c_str(), longitude.c_str(), distance);
|
||||
}
|
||||
extra_filter_keys.assign(query_parser_res->ExtraFilterKeys().begin(), query_parser_res->ExtraFilterKeys().end());
|
||||
extra_filter_and_keys.assign(query_parser_res->ExtraFilterAndKeys().begin(), query_parser_res->ExtraFilterAndKeys().end());
|
||||
extra_filter_invert_keys.assign(query_parser_res->ExtraFilterInvertKeys().begin(), query_parser_res->ExtraFilterInvertKeys().end());
|
||||
} else {
|
||||
GetFieldWords(ORKEY, m_Data, m_appid, m_has_gis);
|
||||
GetFieldWords(ANDKEY, m_Data_and, m_appid, m_has_gis);
|
||||
GetFieldWords(INVERTKEY, m_Data_invert, m_appid, m_has_gis);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Component::GetFieldWords(int type, string dataStr, uint32_t appid, uint32_t &m_has_gis){
|
||||
if (dataStr == "")
|
||||
return ;
|
||||
string latitude_tmp = "";
|
||||
string longitude_tmp = "";
|
||||
string gisip_tmp = "";
|
||||
string field_Data = "";
|
||||
string primary_Data = "";
|
||||
vector<FieldInfo> joinFieldInfos;
|
||||
int i = dataStr.find(":");
|
||||
if (i == -1) {
|
||||
primary_Data = dataStr;
|
||||
} else {
|
||||
int j = dataStr.substr(0, i).rfind(" ");
|
||||
if (j == -1) {
|
||||
field_Data = dataStr;
|
||||
primary_Data = "";
|
||||
} else {
|
||||
primary_Data = dataStr.substr(0, j);
|
||||
field_Data = dataStr.substr(j+1);
|
||||
}
|
||||
}
|
||||
|
||||
if (type == 0) {
|
||||
m_Query_Word = primary_Data;
|
||||
}
|
||||
|
||||
if (primary_Data.length() > MAX_SEARCH_LEN) { // 超长进行截断
|
||||
primary_Data = primary_Data.substr(0, MAX_SEARCH_LEN);
|
||||
}
|
||||
|
||||
string probably_key = "";
|
||||
bool is_correct = false;
|
||||
if(primary_Data != "" && primary_Data.length() <= SINGLE_WORD_LEN) // 判断输入的词语是否正确,如果超过一定长度,则认为是多个词组成
|
||||
{
|
||||
JudgeWord(appid, primary_Data, is_correct, probably_key);
|
||||
m_probably_data = probably_key;
|
||||
}
|
||||
vector<FieldInfo> primaryInfo;
|
||||
FieldInfo pInfo;
|
||||
string split_data;
|
||||
if (is_correct == true) {
|
||||
pInfo.field = INT_MAX;
|
||||
pInfo.word = primary_Data;
|
||||
primaryInfo.push_back(pInfo);
|
||||
DataManager::Instance()->GetSynonymByKey(primary_Data, primaryInfo);
|
||||
}
|
||||
else if (probably_key != "") {
|
||||
pInfo.word = probably_key;
|
||||
primaryInfo.push_back(pInfo);
|
||||
DataManager::Instance()->GetSynonymByKey(probably_key, primaryInfo);
|
||||
}
|
||||
else if (primary_Data != ""){
|
||||
split_data = SplitManager::Instance()->split(primary_Data, appid);
|
||||
log_debug("split_data: %s", split_data.c_str());
|
||||
vector<string> split_datas = splitEx(split_data, "|");
|
||||
for(size_t i = 0; i < split_datas.size(); i++) //是否有重复的同义词存在?
|
||||
{
|
||||
pInfo.field = INT_MAX;
|
||||
pInfo.word = split_datas[i];
|
||||
primaryInfo.push_back(pInfo);
|
||||
DataManager::Instance()->GetSynonymByKey(split_datas[i], primaryInfo);
|
||||
}
|
||||
}
|
||||
AddToFieldList(type, primaryInfo);
|
||||
|
||||
vector<string> gisCode;
|
||||
vector<string> vec = splitEx(field_Data, " ");
|
||||
vector<string>::iterator iter;
|
||||
map<uint32_t, vector<FieldInfo> > field_keys_map;
|
||||
uint32_t range_query = 0;
|
||||
vector<string> lng_arr;
|
||||
vector<string> lat_arr;
|
||||
for (iter = vec.begin(); iter < vec.end(); iter++)
|
||||
{
|
||||
vector<FieldInfo> fieldInfos;
|
||||
if ((*iter)[0] == '\0')
|
||||
continue;
|
||||
vector<string> tmp = splitEx(*iter, ":");
|
||||
if (tmp.size() != 2)
|
||||
continue;
|
||||
uint32_t segment_tag = 0;
|
||||
FieldInfo fieldInfo;
|
||||
string fieldname = tmp[0];
|
||||
|
||||
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
|
||||
if(field != 0 && fieldInfo.index_tag == 0){
|
||||
ExtraFilterKey extra_filter_key;
|
||||
extra_filter_key.field_name = fieldname;
|
||||
extra_filter_key.field_value = tmp[1];
|
||||
extra_filter_key.field_type = fieldInfo.field_type;
|
||||
if(type == 0){
|
||||
extra_filter_keys.push_back(extra_filter_key);
|
||||
} else if (type == 1) {
|
||||
extra_filter_and_keys.push_back(extra_filter_key);
|
||||
} else if (type == 2) {
|
||||
extra_filter_invert_keys.push_back(extra_filter_key);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (field != 0 && segment_tag == 1)
|
||||
{
|
||||
string split_data = SplitManager::Instance()->split(tmp[1], appid);
|
||||
log_debug("split_data: %s", split_data.c_str());
|
||||
vector<string> split_datas = splitEx(split_data, "|");
|
||||
for(size_t index = 0; index < split_datas.size(); index++)
|
||||
{
|
||||
FieldInfo info;
|
||||
info.field = fieldInfo.field;
|
||||
info.field_type = fieldInfo.field_type;
|
||||
info.word = split_datas[index];
|
||||
info.segment_tag = fieldInfo.segment_tag;
|
||||
fieldInfos.push_back(info);
|
||||
}
|
||||
}
|
||||
else if (field != 0 && segment_tag == 5) {
|
||||
range_query++;
|
||||
string str = tmp[1];
|
||||
str.erase(0, str.find_first_not_of(" "));
|
||||
str.erase(str.find_last_not_of(" ") + 1);
|
||||
if (str.size() == 0) {
|
||||
log_error("field[%s] content is null", fieldname.c_str());
|
||||
continue;
|
||||
}
|
||||
if (str[0] == '[') { // 范围查询
|
||||
int l = str.find(",");
|
||||
if (l == -1 || (str[str.size() - 1] != ']' && str[str.size() - 1] != ')')) {
|
||||
log_error("field[%s] content[%s] invalid", fieldname.c_str(), str.c_str());
|
||||
continue;
|
||||
}
|
||||
istringstream iss(str.substr(1, l).c_str());
|
||||
iss >> fieldInfo.start;
|
||||
string end_str = str.substr(l + 1, str.size() - l - 2);
|
||||
end_str.erase(0, end_str.find_first_not_of(" "));
|
||||
istringstream end_iss(end_str);
|
||||
end_iss >> fieldInfo.end;
|
||||
|
||||
if (str[str.size() - 1] == ']') {
|
||||
fieldInfo.range_type = RANGE_GELE;
|
||||
}
|
||||
else {
|
||||
if (end_str.size() == 0) {
|
||||
fieldInfo.range_type = RANGE_GE;
|
||||
}
|
||||
else {
|
||||
fieldInfo.range_type = RANGE_GELT;
|
||||
}
|
||||
}
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}
|
||||
else if (str[0] == '(') {
|
||||
int l = str.find(",");
|
||||
if (l == -1 || (str[str.size() - 1] != ']' && str[str.size() - 1] != ')')) {
|
||||
log_error("field[%s] content[%s] invalid", fieldname.c_str(), str.c_str());
|
||||
continue;
|
||||
}
|
||||
string start_str = str.substr(1, l).c_str();
|
||||
string end_str = str.substr(l + 1, str.size() - l - 2);
|
||||
start_str.erase(0, start_str.find_first_not_of(" "));
|
||||
end_str.erase(0, end_str.find_first_not_of(" "));
|
||||
istringstream start_iss(start_str);
|
||||
start_iss >> fieldInfo.start;
|
||||
istringstream end_iss(end_str);
|
||||
end_iss >> fieldInfo.end;
|
||||
|
||||
if (str[str.size() - 1] == ']') {
|
||||
if (start_str.size() == 0) {
|
||||
fieldInfo.range_type = RANGE_LE;
|
||||
}
|
||||
else {
|
||||
fieldInfo.range_type = RANGE_GTLE;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (start_str.size() != 0 && end_str.size() != 0) {
|
||||
fieldInfo.range_type = RANGE_GTLT;
|
||||
}
|
||||
else if (start_str.size() == 0 && end_str.size() != 0) {
|
||||
fieldInfo.range_type = RANGE_LT;
|
||||
}
|
||||
else if (start_str.size() != 0 && end_str.size() == 0) {
|
||||
fieldInfo.range_type = RANGE_GT;
|
||||
}
|
||||
else {
|
||||
log_error("field[%s] content[%s] invalid", fieldname.c_str(), str.c_str());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}
|
||||
else {
|
||||
fieldInfo.word = tmp[1];
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}
|
||||
log_debug("range_type: %d, start: %u, end: %u, segment_tag: %d, word: %s", fieldInfo.range_type, fieldInfo.start, fieldInfo.end, fieldInfo.segment_tag, fieldInfo.word.c_str());
|
||||
}
|
||||
else if (field != 0)
|
||||
{
|
||||
fieldInfo.word = tmp[1];
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}
|
||||
else if (field == 0)
|
||||
{
|
||||
if (fieldInfo.field_type == 5) {
|
||||
longitude_tmp = tmp[1];
|
||||
longitude = longitude_tmp;
|
||||
} else if (fieldInfo.field_type == 6) {
|
||||
latitude_tmp = tmp[1];
|
||||
latitude = latitude_tmp;
|
||||
} else if (fieldInfo.field_type == 8) {
|
||||
distance = strToDouble(tmp[1]);
|
||||
} else if (fieldInfo.field_type == 7) {
|
||||
gisip_tmp = tmp[1];
|
||||
gisip = gisip_tmp;
|
||||
} else if (fieldInfo.field_type == FIELD_WKT) {
|
||||
string str = tmp[1];
|
||||
str = delPrefix(str);
|
||||
vector<string> str_vec = splitEx(str, ",");
|
||||
for(uint32_t str_vec_idx = 0; str_vec_idx < str_vec.size(); str_vec_idx++){
|
||||
string wkt_str = trim(str_vec[str_vec_idx]);
|
||||
vector<string> wkt_vec = splitEx(wkt_str, "-");
|
||||
if(wkt_vec.size() == 2){
|
||||
lng_arr.push_back(wkt_vec[0]);
|
||||
lat_arr.push_back(wkt_vec[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fieldInfos.size() != 0) {
|
||||
field_keys_map.insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
}
|
||||
}
|
||||
|
||||
double distance_tmp = 2; // 不指定distance时最多返回2km内的数据
|
||||
if(distance > 1e-6 && distance_tmp > distance + 1e-6){ // distance大于0小于2时取distance的值
|
||||
distance_tmp = distance;
|
||||
}
|
||||
GetGisCode(longitude_tmp, latitude_tmp, gisip_tmp, distance_tmp, gisCode);
|
||||
log_debug("lng_arr size: %d, lat_arr size: %d", (int)lng_arr.size(), (int)lat_arr.size());
|
||||
if (gisCode.size() == 0 && lng_arr.size() > 0){
|
||||
GetGisCode(lng_arr, lat_arr, gisCode);
|
||||
}
|
||||
if(gisCode.size() > 0){
|
||||
vector<FieldInfo> fieldInfos;
|
||||
uint32_t segment_tag = 0;
|
||||
FieldInfo fieldInfo;
|
||||
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, "gis", fieldInfo);
|
||||
if (field != 0 && segment_tag == 0) {
|
||||
m_has_gis = 1;
|
||||
for (size_t index = 0; index < gisCode.size(); index++) {
|
||||
FieldInfo info;
|
||||
info.field = fieldInfo.field;
|
||||
info.field_type = fieldInfo.field_type;
|
||||
info.segment_tag = fieldInfo.segment_tag;
|
||||
info.word = gisCode[index];
|
||||
fieldInfos.push_back(info);
|
||||
}
|
||||
}
|
||||
if (fieldInfos.size() != 0) {
|
||||
field_keys_map.insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
}
|
||||
}
|
||||
|
||||
//如果key_and查询的field匹配到联合索引,则将查询词拼接起来作为新的查询词
|
||||
if(type == 1){
|
||||
vector<string> union_key_vec;
|
||||
DBManager::Instance()->GetUnionKeyField(appid, union_key_vec);
|
||||
vector<string>::iterator union_key_iter = union_key_vec.begin();
|
||||
for(; union_key_iter != union_key_vec.end(); union_key_iter++){
|
||||
string union_key = *union_key_iter;
|
||||
vector<int> union_field_vec = splitInt(union_key, ",");
|
||||
vector<int>::iterator union_field_iter = union_field_vec.begin();
|
||||
bool hit_union_key = true;
|
||||
for(; union_field_iter != union_field_vec.end(); union_field_iter++){
|
||||
if(field_keys_map.find(*union_field_iter) == field_keys_map.end()){
|
||||
hit_union_key = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(hit_union_key == true){
|
||||
vector<vector<string> > keys_vvec;
|
||||
vector<FieldInfo> unionFieldInfos;
|
||||
for(union_field_iter = union_field_vec.begin(); union_field_iter != union_field_vec.end(); union_field_iter++){
|
||||
vector<FieldInfo> field_info_vec = field_keys_map.at(*union_field_iter);
|
||||
vector<string> key_vec;
|
||||
GetKeyFromFieldInfo(field_info_vec, key_vec);
|
||||
keys_vvec.push_back(key_vec);
|
||||
field_keys_map.erase(*union_field_iter); // 命中union_key的需要从field_keys_map中删除
|
||||
}
|
||||
vector<string> union_keys = Combination(keys_vvec);
|
||||
for(int m = 0 ; m < (int)union_keys.size(); m++){
|
||||
FieldInfo info;
|
||||
info.field = 0;
|
||||
info.field_type = FIELD_STRING;
|
||||
info.segment_tag = 1;
|
||||
info.word = union_keys[m];
|
||||
unionFieldInfos.push_back(info);
|
||||
}
|
||||
AddToFieldList(type, unionFieldInfos);
|
||||
log_debug("hit union key index.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
map<uint32_t, vector<FieldInfo> >::iterator field_key_map_iter = field_keys_map.begin();
|
||||
for(; field_key_map_iter != field_keys_map.end(); field_key_map_iter++){
|
||||
AddToFieldList(type, field_key_map_iter->second);
|
||||
}
|
||||
} else {
|
||||
map<uint32_t, vector<FieldInfo> >::iterator field_key_map_iter = field_keys_map.begin();
|
||||
for(; field_key_map_iter != field_keys_map.end(); field_key_map_iter++){
|
||||
AddToFieldList(type, field_key_map_iter->second);
|
||||
}
|
||||
}
|
||||
|
||||
if(type == 1){ // terminal_tag为1时,key_and中必须只带有一个范围查询
|
||||
if(m_terminal_tag == 1 && range_query == 1 && and_keys.size() == 1){
|
||||
m_terminal_tag_valid = true;
|
||||
}
|
||||
}
|
||||
|
||||
return ;
|
||||
}
|
||||
|
||||
void Component::AddToFieldList(int type, vector<FieldInfo>& fields)
|
||||
{
|
||||
if (fields.size() == 0)
|
||||
return ;
|
||||
if (type == 0) {
|
||||
keys.push_back(fields);
|
||||
} else if (type == 1) {
|
||||
and_keys.push_back(fields);
|
||||
} else if (type == 2) {
|
||||
invert_keys.push_back(fields);
|
||||
}
|
||||
return ;
|
||||
}
|
||||
|
||||
const vector<vector<FieldInfo> >& Component::Keys(){
|
||||
return keys;
|
||||
}
|
||||
|
||||
const vector<vector<FieldInfo> >& Component::AndKeys(){
|
||||
return and_keys;
|
||||
}
|
||||
|
||||
const vector<vector<FieldInfo> >& Component::InvertKeys(){
|
||||
return invert_keys;
|
||||
}
|
||||
|
||||
const vector<ExtraFilterKey>& Component::ExtraFilterKeys(){
|
||||
return extra_filter_keys;
|
||||
}
|
||||
|
||||
const vector<ExtraFilterKey>& Component::ExtraFilterAndKeys(){
|
||||
return extra_filter_and_keys;
|
||||
}
|
||||
|
||||
const vector<ExtraFilterKey>& Component::ExtraFilterInvertKeys(){
|
||||
return extra_filter_invert_keys;
|
||||
}
|
||||
|
||||
string Component::QueryWord(){
|
||||
return m_Query_Word;
|
||||
}
|
||||
|
||||
void Component::SetQueryWord(string query_word){
|
||||
m_Query_Word = query_word;
|
||||
}
|
||||
|
||||
string Component::ProbablyData(){
|
||||
return m_probably_data;
|
||||
}
|
||||
|
||||
void Component::SetProbablyData(string probably_data){
|
||||
m_probably_data = probably_data;
|
||||
}
|
||||
|
||||
string Component::Latitude(){
|
||||
return latitude;
|
||||
}
|
||||
|
||||
string Component::Longitude(){
|
||||
return longitude;
|
||||
}
|
||||
|
||||
double Component::Distance(){
|
||||
return distance;
|
||||
}
|
||||
|
||||
string Component::Data(){
|
||||
return m_Data;
|
||||
}
|
||||
|
||||
uint32_t Component::JdqSwitch(){
|
||||
return m_jdq_switch;
|
||||
}
|
||||
|
||||
uint32_t Component::Appid(){
|
||||
return m_appid;
|
||||
}
|
||||
|
||||
string Component::DataAnd(){
|
||||
return m_Data_and;
|
||||
}
|
||||
|
||||
string Component::DataInvert(){
|
||||
return m_Data_invert;
|
||||
}
|
||||
|
||||
string Component::DataComplete(){
|
||||
return m_Data_complete;
|
||||
}
|
||||
|
||||
uint32_t Component::SortType(){
|
||||
return m_sort_type;
|
||||
}
|
||||
|
||||
uint32_t Component::PageIndex(){
|
||||
return m_page_index;
|
||||
}
|
||||
uint32_t Component::PageSize(){
|
||||
return m_page_size;
|
||||
}
|
||||
|
||||
uint32_t Component::ReturnAll(){
|
||||
return m_return_all;
|
||||
}
|
||||
|
||||
uint32_t Component::CacheSwitch(){
|
||||
return m_cache_switch;
|
||||
}
|
||||
|
||||
uint32_t Component::TopSwitch(){
|
||||
return m_top_switch;
|
||||
}
|
||||
|
||||
uint32_t Component::SnapshotSwitch(){
|
||||
return m_snapshot_switch;
|
||||
}
|
||||
|
||||
string Component::SortField(){
|
||||
return m_sort_field;
|
||||
}
|
||||
|
||||
string Component::LastId(){
|
||||
return m_last_id;
|
||||
}
|
||||
|
||||
string Component::LastScore(){
|
||||
return m_last_score;
|
||||
}
|
||||
|
||||
bool Component::SearchAfter(){
|
||||
return m_search_after;
|
||||
}
|
||||
|
||||
vector<string>& Component::Fields(){
|
||||
return m_fields;
|
||||
}
|
||||
|
||||
uint32_t Component::TerminalTag(){
|
||||
return m_terminal_tag;
|
||||
}
|
||||
|
||||
bool Component::TerminalTagValid(){
|
||||
return m_terminal_tag_valid;
|
||||
}
|
||||
|
||||
Json::Value& Component::GetQuery(){
|
||||
return m_query;
|
||||
}
|
||||
|
||||
void Component::GetKeyFromFieldInfo(const vector<FieldInfo>& field_info_vec, vector<string>& key_vec){
|
||||
vector<FieldInfo>::const_iterator iter = field_info_vec.begin();
|
||||
for(; iter != field_info_vec.end(); iter++){
|
||||
key_vec.push_back((*iter).word);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** 通过递归求出二维vector每一维vector中取一个数的各种组合
|
||||
** 输入:[[a],[b1,b2],[c1,c2,c3]]
|
||||
** 输出:[a_b1_c1,a_b1_c2,a_b1_c3,a_b2_c1,a_b2_c2,a_b2_c3]
|
||||
*/
|
||||
vector<string> Component::Combination(vector<vector<string> > &dimensionalArr){
|
||||
int FLength = dimensionalArr.size();
|
||||
if(FLength >= 2){
|
||||
int SLength1 = dimensionalArr[0].size();
|
||||
int SLength2 = dimensionalArr[1].size();
|
||||
int DLength = SLength1 * SLength2;
|
||||
vector<string> temporary(DLength);
|
||||
int index = 0;
|
||||
for(int i = 0; i < SLength1; i++){
|
||||
for (int j = 0; j < SLength2; j++) {
|
||||
temporary[index] = dimensionalArr[0][i] +"_"+ dimensionalArr[1][j];
|
||||
index++;
|
||||
}
|
||||
}
|
||||
vector<vector<string> > new_arr;
|
||||
new_arr.push_back(temporary);
|
||||
for(int i = 2; i < (int)dimensionalArr.size(); i++){
|
||||
new_arr.push_back(dimensionalArr[i]);
|
||||
}
|
||||
return Combination(new_arr);
|
||||
} else {
|
||||
return dimensionalArr[0];
|
||||
}
|
||||
}
|
@ -1,118 +0,0 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: component.h
|
||||
*
|
||||
* Description: component class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2019
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __COMPONENT_H__
|
||||
#define __COMPONENT_H__
|
||||
#include "comm.h"
|
||||
#include "json/json.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "query/query_parser.h"
|
||||
using namespace std;
|
||||
|
||||
class Component
|
||||
{
|
||||
public:
|
||||
Component();
|
||||
~Component();
|
||||
|
||||
int GetQueryWord(uint32_t &m_has_gis, string &err_msg);
|
||||
const vector<vector<FieldInfo> >& Keys();
|
||||
const vector<vector<FieldInfo> >& AndKeys();
|
||||
const vector<vector<FieldInfo> >& InvertKeys();
|
||||
const vector<ExtraFilterKey>& ExtraFilterKeys();
|
||||
const vector<ExtraFilterKey>& ExtraFilterAndKeys();
|
||||
const vector<ExtraFilterKey>& ExtraFilterInvertKeys();
|
||||
int ParseJson(const char *sz_json, int json_len, Json::Value &recv_packet);
|
||||
void InitSwitch();
|
||||
string QueryWord();
|
||||
void SetQueryWord(string query_word);
|
||||
string ProbablyData();
|
||||
void SetProbablyData(string probably_data);
|
||||
string Latitude();
|
||||
string Longitude();
|
||||
double Distance();
|
||||
string Data();
|
||||
string DataAnd();
|
||||
string DataInvert();
|
||||
string DataComplete();
|
||||
uint32_t JdqSwitch();
|
||||
uint32_t Appid();
|
||||
uint32_t SortType();
|
||||
uint32_t PageIndex();
|
||||
uint32_t PageSize();
|
||||
uint32_t ReturnAll();
|
||||
uint32_t CacheSwitch();
|
||||
uint32_t TopSwitch();
|
||||
uint32_t SnapshotSwitch();
|
||||
string SortField();
|
||||
string LastId();
|
||||
string LastScore();
|
||||
bool SearchAfter();
|
||||
vector<string>& Fields();
|
||||
uint32_t TerminalTag();
|
||||
bool TerminalTagValid();
|
||||
Json::Value& GetQuery();
|
||||
|
||||
void GetFieldWords(int type, string dataStr, uint32_t appid, uint32_t &m_has_gis);
|
||||
void AddToFieldList(int type, vector<FieldInfo>& fields);
|
||||
void GetKeyFromFieldInfo(const vector<FieldInfo>& field_info_vec, vector<string>& key_vec);
|
||||
vector<string> Combination(vector<vector<string> > &dimensionalArr);
|
||||
|
||||
private:
|
||||
vector<vector<FieldInfo> > keys;
|
||||
vector<vector<FieldInfo> > and_keys;
|
||||
vector<vector<FieldInfo> > invert_keys;
|
||||
vector<ExtraFilterKey> extra_filter_keys;
|
||||
vector<ExtraFilterKey> extra_filter_and_keys;
|
||||
vector<ExtraFilterKey> extra_filter_invert_keys;
|
||||
|
||||
string m_Query_Word;
|
||||
string m_probably_data;
|
||||
string latitude;
|
||||
string longitude;
|
||||
string gisip;
|
||||
double distance;
|
||||
|
||||
string m_Data; //查询词
|
||||
string m_Data_and; // 包含该查询词
|
||||
string m_Data_invert; // 不包含该查询词
|
||||
string m_Data_complete; // 完整关键词
|
||||
uint32_t m_page_index;
|
||||
uint32_t m_page_size;
|
||||
uint32_t m_return_all;
|
||||
uint32_t m_cache_switch;
|
||||
uint32_t m_top_switch;
|
||||
uint32_t m_snapshot_switch;
|
||||
uint32_t m_sort_type;
|
||||
uint32_t m_appid;
|
||||
uint32_t m_query_type;
|
||||
string m_sort_field;
|
||||
string m_last_id;
|
||||
string m_last_score;
|
||||
bool m_search_after;
|
||||
vector<string> m_fields;
|
||||
string m_default_query;
|
||||
uint32_t m_jdq_switch;
|
||||
uint32_t m_terminal_tag;
|
||||
bool m_terminal_tag_valid;
|
||||
Json::Value m_query;
|
||||
QueryParser* query_parser;
|
||||
QueryParserRes* query_parser_res;
|
||||
};
|
||||
#endif
|
@ -23,8 +23,8 @@
|
||||
[
|
||||
{
|
||||
"ip": "127.0.0.1",
|
||||
"bid": 2153,
|
||||
"port": 20000,
|
||||
"bid": 2113,
|
||||
"port": 30311,
|
||||
"weight": 1,
|
||||
"status": 1
|
||||
}
|
||||
|
@ -16,288 +16,329 @@
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "component.h"
|
||||
#include "request_context.h"
|
||||
#include "doc_manager.h"
|
||||
#include "log.h"
|
||||
#include "search_util.h"
|
||||
#include "db_manager.h"
|
||||
#include "process/geo_distance_query_process.h"
|
||||
#include <math.h>
|
||||
#include <sstream>
|
||||
|
||||
DocManager::DocManager(Component *c): component(c){
|
||||
}
|
||||
extern CIndexTableManager g_IndexInstance;
|
||||
|
||||
DocManager::DocManager(RequestContext *c)
|
||||
: score_str_map()
|
||||
, score_int_map()
|
||||
, score_double_map()
|
||||
, valid_version_()
|
||||
, doc_content_map_()
|
||||
, component(c)
|
||||
{ }
|
||||
|
||||
DocManager::~DocManager(){
|
||||
|
||||
}
|
||||
|
||||
bool DocManager::CheckDocByExtraFilterKey(string doc_id){
|
||||
vector<ExtraFilterKey> extra_filter_vec = component->ExtraFilterKeys();
|
||||
vector<ExtraFilterKey> extra_filter_and_vec = component->ExtraFilterAndKeys();
|
||||
vector<ExtraFilterKey> extra_filter_invert_vec = component->ExtraFilterInvertKeys();
|
||||
if(extra_filter_vec.size() == 0 && extra_filter_and_vec.size() == 0 && extra_filter_invert_vec.size() == 0){
|
||||
return true;
|
||||
} else {
|
||||
vector<string> fields;
|
||||
for(int i = 0; i < (int)extra_filter_vec.size(); i++){
|
||||
fields.push_back(extra_filter_vec[i].field_name);
|
||||
}
|
||||
for(int i = 0; i < (int)extra_filter_and_vec.size(); i++){
|
||||
fields.push_back(extra_filter_and_vec[i].field_name);
|
||||
}
|
||||
for(int i = 0; i < (int)extra_filter_invert_vec.size(); i++){
|
||||
fields.push_back(extra_filter_invert_vec[i].field_name);
|
||||
}
|
||||
Json::Value value;
|
||||
uint32_t doc_version = 0;
|
||||
if(valid_version.find(doc_id) != valid_version.end()){
|
||||
doc_version = valid_version[doc_id];
|
||||
}
|
||||
if(doc_content_map.find(doc_id) != doc_content_map.end()){
|
||||
string extend = doc_content_map[doc_id];
|
||||
Json::Reader r(Json::Features::strictMode());
|
||||
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), value);
|
||||
if (0 == ret2){
|
||||
log_error("the err json is %s, errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
bool bRet = g_IndexInstance.GetContentByField(component->Appid(), doc_id, doc_version, fields, value);
|
||||
if(bRet == false){
|
||||
log_error("get field content error, appid[%d] doc_id[%s].", component->Appid(), doc_id.c_str());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
bool DocManager::CheckDocByExtraFilterKey(std::string doc_id){
|
||||
std::vector<ExtraFilterKey> extra_filter_vec = component->ExtraFilterOrKeys();
|
||||
std::vector<ExtraFilterKey> extra_filter_and_vec = component->ExtraFilterAndKeys();
|
||||
std::vector<ExtraFilterKey> extra_filter_invert_vec = component->ExtraFilterInvertKeys();
|
||||
if(extra_filter_vec.size() == 0 && extra_filter_and_vec.size() == 0 && extra_filter_invert_vec.size() == 0){
|
||||
return true;
|
||||
} else {
|
||||
std::vector<std::string> fields;
|
||||
for(int i = 0; i < (int)extra_filter_vec.size(); i++){
|
||||
fields.push_back(extra_filter_vec[i].field_name);
|
||||
}
|
||||
for(int i = 0; i < (int)extra_filter_and_vec.size(); i++){
|
||||
fields.push_back(extra_filter_and_vec[i].field_name);
|
||||
}
|
||||
for(int i = 0; i < (int)extra_filter_invert_vec.size(); i++){
|
||||
fields.push_back(extra_filter_invert_vec[i].field_name);
|
||||
}
|
||||
Json::Value value;
|
||||
uint32_t doc_version = 0;
|
||||
if(valid_version_.find(doc_id) != valid_version_.end()){
|
||||
doc_version = valid_version_[doc_id];
|
||||
}
|
||||
if(doc_content_map_.find(doc_id) != doc_content_map_.end()){
|
||||
std::string extend = doc_content_map_[doc_id];
|
||||
Json::Reader r(Json::Features::strictMode());
|
||||
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), value);
|
||||
if (0 == ret2){
|
||||
log_error("the err json is %s, errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
bool bRet = g_IndexInstance.GetContentByField(component->Appid(), doc_id, doc_version, fields, value);
|
||||
if(bRet == false){
|
||||
log_error("get field content error, appid[%d] doc_id[%s].", component->Appid(), doc_id.c_str());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool key_or_valid = false;
|
||||
CheckIfKeyValid(extra_filter_vec, value, true, key_or_valid);
|
||||
if(extra_filter_vec.size() > 0 && key_or_valid == false){
|
||||
return false;
|
||||
}
|
||||
bool key_or_valid = false;
|
||||
CheckIfKeyValid(extra_filter_vec, value, true, key_or_valid);
|
||||
if(extra_filter_vec.size() > 0 && key_or_valid == false){
|
||||
return false;
|
||||
}
|
||||
|
||||
bool key_and_valid = true;
|
||||
CheckIfKeyValid(extra_filter_and_vec, value, false, key_and_valid);
|
||||
if(key_and_valid == false){
|
||||
return false;
|
||||
}
|
||||
bool key_and_valid = true;
|
||||
CheckIfKeyValid(extra_filter_and_vec, value, false, key_and_valid);
|
||||
if(key_and_valid == false){
|
||||
return false;
|
||||
}
|
||||
|
||||
bool key_invert_valid = false;
|
||||
CheckIfKeyValid(extra_filter_invert_vec, value, true, key_invert_valid);
|
||||
if(key_invert_valid == true){
|
||||
return false;
|
||||
}
|
||||
bool key_invert_valid = false;
|
||||
CheckIfKeyValid(extra_filter_invert_vec, value, true, key_invert_valid);
|
||||
if(key_invert_valid == true){
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
void DocManager::CheckIfKeyValid(const vector<ExtraFilterKey>& extra_filter_vec, const Json::Value &value, bool flag, bool &key_valid){
|
||||
for(int i = 0; i < (int)extra_filter_vec.size(); i++){
|
||||
bool the_same = false;
|
||||
string field_name = extra_filter_vec[i].field_name;
|
||||
if(extra_filter_vec[i].field_type == FIELD_INT){
|
||||
string query = extra_filter_vec[i].field_value;
|
||||
vector<string> query_vec = splitEx(query, "|");
|
||||
if(query_vec.size() > 1){
|
||||
for(int i = 0 ; i < (int)query_vec.size(); i++){
|
||||
if(atoi(query_vec[i].c_str()) == value[field_name.c_str()].asInt()){
|
||||
the_same = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
the_same = (atoi(extra_filter_vec[i].field_value.c_str()) == value[field_name.c_str()].asInt());
|
||||
}
|
||||
} else if(extra_filter_vec[i].field_type == FIELD_DOUBLE){
|
||||
double d_field_value = atof(extra_filter_vec[i].field_value.c_str());
|
||||
double d_extend = value[field_name.c_str()].asDouble();
|
||||
the_same = (fabs(d_field_value - d_extend) < 1e-15);
|
||||
} else if(extra_filter_vec[i].field_type == FIELD_STRING){
|
||||
string snapshot = value[field_name.c_str()].asString();
|
||||
string query = extra_filter_vec[i].field_value;
|
||||
set<string> snapshot_set = splitStr(snapshot, "|");
|
||||
vector<string> query_vec = splitEx(query, "|");
|
||||
for(int i = 0 ; i < (int)query_vec.size(); i++){
|
||||
if(snapshot_set.find(query_vec[i]) != snapshot_set.end()){
|
||||
the_same = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(the_same == flag){
|
||||
key_valid = flag;
|
||||
break;
|
||||
}
|
||||
}
|
||||
void DocManager::CheckIfKeyValid(const std::vector<ExtraFilterKey>& extra_filter_vec, const Json::Value &value, bool flag, bool &key_valid){
|
||||
for(int i = 0; i < (int)extra_filter_vec.size(); i++){
|
||||
bool the_same = false;
|
||||
std::string field_name = extra_filter_vec[i].field_name;
|
||||
if(extra_filter_vec[i].field_type == FIELD_INT){
|
||||
std::string query = extra_filter_vec[i].field_value;
|
||||
std::vector<std::string> query_vec = splitEx(query, "|");
|
||||
if(query_vec.size() > 1){
|
||||
for(int i = 0 ; i < (int)query_vec.size(); i++){
|
||||
if(atoi(query_vec[i].c_str()) == value[field_name.c_str()].asInt()){
|
||||
the_same = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
the_same = (atoi(extra_filter_vec[i].field_value.c_str()) == value[field_name.c_str()].asInt());
|
||||
}
|
||||
} else if(extra_filter_vec[i].field_type == FIELD_DOUBLE){
|
||||
double d_field_value = atof(extra_filter_vec[i].field_value.c_str());
|
||||
double d_extend = value[field_name.c_str()].asDouble();
|
||||
the_same = (fabs(d_field_value - d_extend) < 1e-15);
|
||||
} else if(extra_filter_vec[i].field_type == FIELD_STRING){
|
||||
std::string snapshot = value[field_name.c_str()].asString();
|
||||
std::string query = extra_filter_vec[i].field_value;
|
||||
std::set<std::string> snapshot_set = splitStr(snapshot, "|");
|
||||
std::vector<std::string> query_vec = splitEx(query, "|");
|
||||
for(int i = 0 ; i < (int)query_vec.size(); i++){
|
||||
if(snapshot_set.find(query_vec[i]) != snapshot_set.end()){
|
||||
the_same = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(the_same == flag){
|
||||
key_valid = flag;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool DocManager::GetDocContent(uint32_t m_has_gis, vector<IndexInfo> &doc_id_ver_vec, set<string> &valid_docs, hash_double_map &distances){
|
||||
if (!m_has_gis && component->SnapshotSwitch() == 1 && doc_id_ver_vec.size() <= 1000) {
|
||||
bool need_version = false;
|
||||
if(component->Fields().size() > 0){
|
||||
need_version = true;
|
||||
}
|
||||
bool bRet = g_IndexInstance.DocValid(component->Appid(), doc_id_ver_vec, valid_docs, need_version, valid_version, doc_content_map);
|
||||
if (false == bRet) {
|
||||
log_error("GetDocInfo by snapshot error.");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
for(size_t i = 0 ; i < doc_id_ver_vec.size(); i++){
|
||||
if(!m_has_gis){
|
||||
valid_docs.insert(doc_id_ver_vec[i].doc_id);
|
||||
}
|
||||
if(doc_id_ver_vec[i].extend != ""){
|
||||
doc_content_map.insert(make_pair(doc_id_ver_vec[i].doc_id, doc_id_ver_vec[i].extend));
|
||||
}
|
||||
}
|
||||
}
|
||||
bool DocManager::GetDocContent(){
|
||||
const std::vector<IndexInfo>& o_index_info_vet = ResultContext::Instance()->GetIndexInfos();
|
||||
if (component->SnapshotSwitch() == 1 && o_index_info_vet.size() <= 1000) {
|
||||
bool need_version = false;
|
||||
if(component->RequiredFields().size() > 0){
|
||||
need_version = true;
|
||||
}
|
||||
bool bRet = g_IndexInstance.DocValid(component->Appid(), o_index_info_vet, need_version, valid_version_, doc_content_map_);
|
||||
if (false == bRet) {
|
||||
log_error("GetDocInfo by snapshot error.");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
for(size_t i = 0 ; i < o_index_info_vet.size(); i++){
|
||||
ResultContext::Instance()->SetValidDocs(o_index_info_vet[i].doc_id);
|
||||
if(o_index_info_vet[i].extend != ""){
|
||||
doc_content_map_.insert(std::make_pair(o_index_info_vet[i].doc_id, o_index_info_vet[i].extend));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log_debug("doc_id_ver_vec size: %d", (int)doc_id_ver_vec.size());
|
||||
if (m_has_gis) {
|
||||
if(doc_content_map.size() == 0){
|
||||
g_IndexInstance.GetDocContent(component->Appid(), doc_id_ver_vec, doc_content_map);
|
||||
}
|
||||
GetGisDistance(component->Appid(), component->Latitude(), component->Longitude(), distances, doc_content_map);
|
||||
}
|
||||
return true;
|
||||
log_debug("doc_id_ver_vec size: %d", (int)o_index_info_vet.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DocManager::AppendFieldsToRes(Json::Value &response, vector<string> &m_fields){
|
||||
for(int i = 0; i < (int)response["result"].size(); i++){
|
||||
Json::Value doc_info = response["result"][i];
|
||||
bool DocManager::GetDocContent(
|
||||
const GeoPointContext& geo_point ,
|
||||
std::vector<IndexInfo>& index_infos)
|
||||
{
|
||||
std::vector<IndexInfo>::iterator iter = index_infos.begin();
|
||||
for( ;iter != index_infos.end(); ++iter){
|
||||
if((iter->extend) != ""){
|
||||
doc_content_map_.insert(make_pair(iter->doc_id, iter->extend));
|
||||
}
|
||||
}
|
||||
|
||||
string doc_id = doc_info["doc_id"].asString();
|
||||
if(doc_content_map.find(doc_id) != doc_content_map.end()){
|
||||
string extend = doc_content_map[doc_id];
|
||||
Json::Reader r(Json::Features::strictMode());
|
||||
Json::Value recv_packet;
|
||||
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), recv_packet);
|
||||
if (0 == ret2){
|
||||
log_error("parse json error [%s], errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
|
||||
return false;
|
||||
}
|
||||
if(doc_content_map_.empty()){
|
||||
g_IndexInstance.GetDocContent(component->Appid(),index_infos , doc_content_map_);
|
||||
}
|
||||
|
||||
Json::Value &value = response["result"][i];
|
||||
for(int i = 0; i < (int)m_fields.size(); i++){
|
||||
if (recv_packet.isMember(m_fields[i].c_str()))
|
||||
{
|
||||
if(recv_packet[m_fields[i].c_str()].isUInt()){
|
||||
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asUInt();
|
||||
} else if(recv_packet[m_fields[i].c_str()].isString()){
|
||||
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asString();
|
||||
} else if(recv_packet[m_fields[i].c_str()].isDouble()){
|
||||
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asDouble();
|
||||
} else {
|
||||
log_error("field[%s] data type error.", m_fields[i].c_str());
|
||||
}
|
||||
} else {
|
||||
log_error("appid[%u] field[%s] invalid.", component->Appid(), m_fields[i].c_str());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
uint32_t doc_version = 0;
|
||||
if(valid_version.find(doc_info["doc_id"].asString()) != valid_version.end()){
|
||||
doc_version = valid_version[doc_info["doc_id"].asString()];
|
||||
}
|
||||
bool bRet = g_IndexInstance.GetContentByField(component->Appid(), doc_info["doc_id"].asString(), doc_version, m_fields, response["result"][i]);
|
||||
if(bRet == false){
|
||||
log_error("get field content error.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
hash_double_map docid_dis_map;
|
||||
bool bret = GetGisDistance(component->Appid(), geo_point, doc_content_map_, docid_dis_map);
|
||||
if (!bret){
|
||||
return bret;
|
||||
}
|
||||
|
||||
std::vector<IndexInfo> o_valid_index_infos;
|
||||
hash_double_map::iterator docid_dis_iter = docid_dis_map.begin();
|
||||
for ( ; docid_dis_iter != docid_dis_map.end(); ++docid_dis_iter){
|
||||
iter = index_infos.begin();
|
||||
for( ;iter != index_infos.end(); ++iter){
|
||||
if ((docid_dis_iter->first) == (iter->doc_id)){
|
||||
iter->distance = docid_dis_iter->second;
|
||||
o_valid_index_infos.push_back(*iter);
|
||||
}
|
||||
}
|
||||
}
|
||||
index_infos.swap(o_valid_index_infos);
|
||||
return bret;
|
||||
}
|
||||
|
||||
bool DocManager::GetScoreMap(string doc_id, uint32_t m_sort_type, string m_sort_field, FIELDTYPE &m_sort_field_type, uint32_t appid){
|
||||
if(doc_content_map.find(doc_id) != doc_content_map.end()){
|
||||
uint32_t field_type = 0;
|
||||
bool bRet = DBManager::Instance()->GetFieldType(appid, m_sort_field, field_type);
|
||||
if(false == bRet){
|
||||
log_error("appid[%d] field[%s] not find.", appid, m_sort_field.c_str());
|
||||
return false;
|
||||
}
|
||||
string extend = doc_content_map[doc_id];
|
||||
bool DocManager::AppendFieldsToRes(Json::Value &response, std::vector<std::string> &m_fields){
|
||||
for(int i = 0; i < (int)response["result"].size(); i++){
|
||||
Json::Value doc_info = response["result"][i];
|
||||
|
||||
if(field_type == FIELD_INT){
|
||||
int len = strlen(m_sort_field.c_str()) + strlen("\":");
|
||||
size_t pos1 = extend.find(m_sort_field);
|
||||
size_t pos2 = extend.find_first_of(",", pos1);
|
||||
if(pos2 == string::npos){
|
||||
pos2 = extend.find_first_of("}", pos1);
|
||||
}
|
||||
if(pos1 != string::npos && pos2 != string::npos){
|
||||
string field_str = extend.substr(pos1+len, pos2-pos1-len);
|
||||
int field_int;
|
||||
istringstream iss(field_str);
|
||||
iss >> field_int;
|
||||
m_sort_field_type = FIELDTYPE_INT;
|
||||
score_int_map.insert(make_pair(doc_id, field_int));
|
||||
} else {
|
||||
m_sort_field_type = FIELDTYPE_INT;
|
||||
score_int_map.insert(make_pair(doc_id, 0));
|
||||
}
|
||||
} else {
|
||||
Json::Reader r(Json::Features::strictMode());
|
||||
Json::Value recv_packet;
|
||||
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), recv_packet);
|
||||
if (0 == ret2){
|
||||
log_error("the err json is %s, errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
|
||||
return false;
|
||||
}
|
||||
std::string doc_id = doc_info["doc_id"].asString();
|
||||
if(doc_content_map_.find(doc_id) != doc_content_map_.end()){
|
||||
std::string extend = doc_content_map_[doc_id];
|
||||
Json::Reader r(Json::Features::strictMode());
|
||||
Json::Value recv_packet;
|
||||
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), recv_packet);
|
||||
if (0 == ret2){
|
||||
log_error("parse json error [%s], errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if(recv_packet.isMember(m_sort_field.c_str()))
|
||||
{
|
||||
if(recv_packet[m_sort_field.c_str()].isUInt()){
|
||||
m_sort_field_type = FIELDTYPE_INT;
|
||||
score_int_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asUInt()));
|
||||
} else if(recv_packet[m_sort_field.c_str()].isString()){
|
||||
m_sort_field_type = FIELDTYPE_STRING;
|
||||
score_str_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asString()));
|
||||
} else if(recv_packet[m_sort_field.c_str()].isDouble()){
|
||||
m_sort_field_type = FIELDTYPE_DOUBLE;
|
||||
score_double_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asDouble()));
|
||||
} else {
|
||||
log_error("sort_field[%s] data type error.", m_sort_field.c_str());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
log_error("appid[%u] sort_field[%s] invalid.", component->Appid(), m_sort_field.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ScoreInfo score_info;
|
||||
bool bRet = g_IndexInstance.GetScoreByField(component->Appid(), doc_id, m_sort_field, m_sort_type, score_info);
|
||||
if(bRet == false){
|
||||
log_error("get score by field error.");
|
||||
return false;
|
||||
}
|
||||
m_sort_field_type = score_info.type;
|
||||
if(score_info.type == FIELDTYPE_INT){
|
||||
score_int_map.insert(make_pair(doc_id, score_info.i));
|
||||
} else if(score_info.type == FIELDTYPE_STRING){
|
||||
score_str_map.insert(make_pair(doc_id, score_info.str));
|
||||
} else if(score_info.type == FIELDTYPE_DOUBLE){
|
||||
score_double_map.insert(make_pair(doc_id, score_info.d));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
Json::Value &value = response["result"][i];
|
||||
for(int i = 0; i < (int)m_fields.size(); i++){
|
||||
if (recv_packet.isMember(m_fields[i].c_str()))
|
||||
{
|
||||
if(recv_packet[m_fields[i].c_str()].isUInt()){
|
||||
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asUInt();
|
||||
} else if(recv_packet[m_fields[i].c_str()].isString()){
|
||||
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asString();
|
||||
} else if(recv_packet[m_fields[i].c_str()].isDouble()){
|
||||
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asDouble();
|
||||
} else if(recv_packet[m_fields[i].c_str()].isObject()
|
||||
|| recv_packet[m_fields[i].c_str()].isArray()){
|
||||
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()];
|
||||
}else{
|
||||
log_error("field[%s] data type error.", m_fields[i].c_str());
|
||||
}
|
||||
} else {
|
||||
log_error("appid[%u] field[%s] invalid.", component->Appid(), m_fields[i].c_str());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
uint32_t doc_version = 0;
|
||||
if(valid_version_.find(doc_info["doc_id"].asString()) != valid_version_.end()){
|
||||
doc_version = valid_version_[doc_info["doc_id"].asString()];
|
||||
}
|
||||
bool bRet = g_IndexInstance.GetContentByField(component->Appid(), doc_info["doc_id"].asString(), doc_version, m_fields, response["result"][i]);
|
||||
if(bRet == false){
|
||||
log_error("get field content error.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
map<string, string>& DocManager::ScoreStrMap(){
|
||||
return score_str_map;
|
||||
bool DocManager::GetScoreMap(std::string doc_id, uint32_t m_sort_type, std::string m_sort_field, FIELDTYPE &m_sort_field_type){
|
||||
if(doc_content_map_.find(doc_id) != doc_content_map_.end()){
|
||||
uint32_t field_type = 0;
|
||||
bool bRet = DBManager::Instance()->GetFieldType(component->Appid(), m_sort_field, field_type);
|
||||
if(false == bRet){
|
||||
log_error("appid[%d] field[%s] not find.", component->Appid(), m_sort_field.c_str());
|
||||
return false;
|
||||
}
|
||||
std::string extend = doc_content_map_[doc_id];
|
||||
|
||||
if(field_type == FIELD_INT){
|
||||
int len = strlen(m_sort_field.c_str()) + strlen("\":");
|
||||
size_t pos1 = extend.find(m_sort_field);
|
||||
size_t pos2 = extend.find_first_of(",", pos1);
|
||||
if(pos2 == std::string::npos){
|
||||
pos2 = extend.find_first_of("}", pos1);
|
||||
}
|
||||
if(pos1 != std::string::npos && pos2 != std::string::npos){
|
||||
string field_str = extend.substr(pos1+len, pos2-pos1-len);
|
||||
int field_int;
|
||||
istringstream iss(field_str);
|
||||
iss >> field_int;
|
||||
m_sort_field_type = FIELDTYPE_INT;
|
||||
score_int_map.insert(std::make_pair(doc_id, field_int));
|
||||
} else {
|
||||
m_sort_field_type = FIELDTYPE_INT;
|
||||
score_int_map.insert(std::make_pair(doc_id, 0));
|
||||
}
|
||||
} else {
|
||||
Json::Reader r(Json::Features::strictMode());
|
||||
Json::Value recv_packet;
|
||||
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), recv_packet);
|
||||
if (0 == ret2){
|
||||
log_error("the err json is %s, errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if(recv_packet.isMember(m_sort_field.c_str()))
|
||||
{
|
||||
if(recv_packet[m_sort_field.c_str()].isUInt()){
|
||||
m_sort_field_type = FIELDTYPE_INT;
|
||||
score_int_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asUInt()));
|
||||
} else if(recv_packet[m_sort_field.c_str()].isString()){
|
||||
m_sort_field_type = FIELDTYPE_STRING;
|
||||
score_str_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asString()));
|
||||
} else if(recv_packet[m_sort_field.c_str()].isDouble()){
|
||||
m_sort_field_type = FIELDTYPE_DOUBLE;
|
||||
score_double_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asDouble()));
|
||||
} else {
|
||||
log_error("sort_field[%s] data type error.", m_sort_field.c_str());
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
log_error("appid[%u] sort_field[%s] invalid.", component->Appid(), m_sort_field.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ScoreInfo score_info;
|
||||
bool bRet = g_IndexInstance.GetScoreByField(component->Appid(), doc_id, m_sort_field, m_sort_type, score_info);
|
||||
if(bRet == false){
|
||||
log_error("get score by field error.");
|
||||
return false;
|
||||
}
|
||||
m_sort_field_type = score_info.type;
|
||||
if(score_info.type == FIELDTYPE_INT){
|
||||
score_int_map.insert(make_pair(doc_id, score_info.i));
|
||||
} else if(score_info.type == FIELDTYPE_STRING){
|
||||
score_str_map.insert(make_pair(doc_id, score_info.str));
|
||||
} else if(score_info.type == FIELDTYPE_DOUBLE){
|
||||
score_double_map.insert(make_pair(doc_id, score_info.d));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
map<string, int>& DocManager::ScoreIntMap(){
|
||||
return score_int_map;
|
||||
std::map<std::string, std::string>& DocManager::ScoreStrMap(){
|
||||
return score_str_map;
|
||||
}
|
||||
|
||||
map<string, double>& DocManager::ScoreDoubleMap(){
|
||||
return score_double_map;
|
||||
std::map<std::string, int>& DocManager::ScoreIntMap(){
|
||||
return score_int_map;
|
||||
}
|
||||
|
||||
map<string, uint32_t>& DocManager::ValidVersion(){
|
||||
return valid_version;
|
||||
std::map<std::string, double>& DocManager::ScoreDoubleMap(){
|
||||
return score_double_map;
|
||||
}
|
||||
|
||||
std::map<std::string, uint32_t>& DocManager::ValidVersion(){
|
||||
return valid_version_;
|
||||
}
|
@ -22,33 +22,36 @@
|
||||
#include "json/json.h"
|
||||
#include <map>
|
||||
#include <set>
|
||||
using namespace std;
|
||||
|
||||
class Component;
|
||||
class RequestContext;
|
||||
struct GeoPointContext;
|
||||
|
||||
class DocManager{
|
||||
public:
|
||||
DocManager(Component *c);
|
||||
~DocManager();
|
||||
DocManager(RequestContext *c);
|
||||
~DocManager();
|
||||
|
||||
bool CheckDocByExtraFilterKey(string doc_id);
|
||||
bool GetDocContent(uint32_t m_has_gis, vector<IndexInfo> &doc_id_ver_vec, set<string> &valid_docs, hash_double_map &distances);
|
||||
bool AppendFieldsToRes(Json::Value &response, vector<string> &m_fields);
|
||||
bool GetScoreMap(string doc_id, uint32_t m_sort_type, string m_sort_field, FIELDTYPE &m_sort_field_type, uint32_t appid);
|
||||
map<string, string>& ScoreStrMap();
|
||||
map<string, int>& ScoreIntMap();
|
||||
map<string, double>& ScoreDoubleMap();
|
||||
map<string, uint32_t>& ValidVersion();
|
||||
bool CheckDocByExtraFilterKey(std::string doc_id);
|
||||
bool GetDocContent();
|
||||
bool GetDocContent(const GeoPointContext& geo_point , std::vector<IndexInfo>& index_infos);
|
||||
|
||||
bool AppendFieldsToRes(Json::Value &response, std::vector<std::string> &m_fields);
|
||||
bool GetScoreMap(std::string doc_id, uint32_t m_sort_type, std::string m_sort_field, FIELDTYPE &m_sort_field_type);
|
||||
std::map<std::string, std::string>& ScoreStrMap();
|
||||
std::map<std::string, int>& ScoreIntMap();
|
||||
std::map<std::string, double>& ScoreDoubleMap();
|
||||
std::map<std::string, uint32_t>& ValidVersion();
|
||||
|
||||
private:
|
||||
void CheckIfKeyValid(const vector<ExtraFilterKey>& extra_filter_vec, const Json::Value &value, bool flag, bool &key_valid);
|
||||
void CheckIfKeyValid(const std::vector<ExtraFilterKey>& extra_filter_vec, const Json::Value &value, bool flag, bool &key_valid);
|
||||
|
||||
private:
|
||||
map<string, string> score_str_map;
|
||||
map<string, int> score_int_map;
|
||||
map<string, double> score_double_map;
|
||||
map<string, uint32_t> valid_version;
|
||||
hash_string_map doc_content_map;
|
||||
Component *component;
|
||||
std::map<std::string, std::string> score_str_map;
|
||||
std::map<std::string, int> score_int_map;
|
||||
std::map<std::string, double> score_double_map;
|
||||
std::map<std::string, uint32_t> valid_version_;
|
||||
hash_string_map doc_content_map_;
|
||||
RequestContext* component;
|
||||
};
|
||||
|
||||
#endif
|
@ -95,8 +95,8 @@ struct LimitCond
|
||||
int sLimitStart;
|
||||
int sLimitStep;
|
||||
LimitCond(){
|
||||
sLimitStart = 0;
|
||||
sLimitStep = 10;
|
||||
sLimitStart = -1;
|
||||
sLimitStep = -1;
|
||||
}
|
||||
|
||||
void reset() { sLimitStart = -1, sLimitStep = -1; }
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "log.h"
|
||||
#include "../index_tbl_op.h"
|
||||
#include "rocksdb_direct_context.h"
|
||||
#include "../key_format.h"
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
|
||||
@ -33,12 +34,23 @@ const char *INDEX_SYMBOL = "00";
|
||||
const char *MAX_BORDER_SYMBOL = "10";
|
||||
const char *MIN_BORDER_SYMBOL = "00";
|
||||
|
||||
static string gen_dtc_key_string(string appid, string type, double key) {
|
||||
stringstream ssKey;
|
||||
ssKey << setw(20) << setfill('0') << (int)key;
|
||||
stringstream ss;
|
||||
ss << appid << "#" << type << "#" << ssKey.str();
|
||||
return ss.str();
|
||||
static string gen_dtc_key_string(string appid, string type, uint32_t key_type, double key) {
|
||||
log_debug("fieldtype:%d , key:%f " , key_type , key);
|
||||
KeyFormat::UnionKey o_keyinfo_vet;
|
||||
o_keyinfo_vet.push_back(std::make_pair(key_type , std::to_string(key)));
|
||||
std::string s_format_key = KeyFormat::Encode(o_keyinfo_vet);
|
||||
|
||||
#if 0
|
||||
KeyFormat::UnionKey o_output_vet;
|
||||
o_output_vet.push_back(std::make_pair(key_type , ""));
|
||||
KeyFormat::Decode(s_format_key , o_output_vet);
|
||||
for (int i = 0; i < o_output_vet.size(); i++){
|
||||
log_error("decode string:%s", o_output_vet[i].second.c_str());
|
||||
}
|
||||
#endif
|
||||
std::stringstream stream_key;
|
||||
stream_key << appid << "#" << type << "#" << s_format_key;
|
||||
return stream_key.str();
|
||||
}
|
||||
|
||||
static string gen_dtc_key_string(string appid, string type, string key) {
|
||||
@ -403,7 +415,7 @@ vector<string> split(const string& str, const string& delim) {
|
||||
}
|
||||
|
||||
std::string getPath(const char *bind_addr){
|
||||
string s = "/tmp/domain_socket/rocks_direct_20000.sock";
|
||||
string s = "/tmp/domain_socket/rocks_direct_30311.sock";
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -471,6 +483,8 @@ int SearchRocksDBIndex::getFieldIndex(const char *fieldName){
|
||||
|
||||
|
||||
void SearchRocksDBIndex::GetRangeIndex(uint32_t range_type, InvertIndexEntry &startEntry, InvertIndexEntry &endEntry, std::vector<InvertIndexEntry>& resultEntry){
|
||||
log_debug("range_type:%d", range_type);
|
||||
|
||||
if (range_type == RANGE_GELE) {
|
||||
GetRangeIndexGELE(startEntry, endEntry, resultEntry);
|
||||
}
|
||||
@ -493,7 +507,7 @@ void SearchRocksDBIndex::GetRangeIndex(uint32_t range_type, InvertIndexEntry &st
|
||||
GetRangeIndexGT(startEntry, resultEntry);
|
||||
}
|
||||
else if (range_type == RANGE_LT) {
|
||||
GetRangeIndexLT(startEntry, resultEntry);
|
||||
GetRangeIndexLT(endEntry, resultEntry);
|
||||
}
|
||||
}
|
||||
|
||||
@ -535,10 +549,6 @@ void SearchRocksDBIndex::setQueryCond(QueryCond& query_cond, int field_index, in
|
||||
|
||||
|
||||
void SearchRocksDBIndex::GetRangeIndexGELE(InvertIndexEntry& begin_key, const InvertIndexEntry& end_key, std::vector<InvertIndexEntry>& entry){
|
||||
|
||||
DirectRequestContext direct_request_context;
|
||||
stringstream ss;
|
||||
|
||||
if(getFieldIndex("field") == -1){
|
||||
log_error("GetRangeIndexGELE get field Index error");
|
||||
return;
|
||||
@ -549,25 +559,31 @@ void SearchRocksDBIndex::GetRangeIndexGELE(InvertIndexEntry& begin_key, const In
|
||||
return;
|
||||
}
|
||||
|
||||
DirectRequestContext direct_request_context;
|
||||
QueryCond query_cond1;
|
||||
query_cond1.sFieldIndex = getFieldIndex("field");
|
||||
query_cond1.sCondOpr = 0;
|
||||
query_cond1.sCondOpr = eEQ;
|
||||
|
||||
stringstream ss;
|
||||
ss << begin_key._InvertIndexField;
|
||||
query_cond1.sCondValue = ss.str();
|
||||
|
||||
direct_request_context.sFieldConds.push_back(query_cond1);
|
||||
|
||||
|
||||
QueryCond query_cond2;
|
||||
query_cond2.sFieldIndex = getFieldIndex("key");
|
||||
query_cond2.sCondOpr = 5;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
|
||||
query_cond2.sCondOpr = eGE;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL
|
||||
, begin_key._InvertIndex_key_type ,begin_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond2);
|
||||
|
||||
|
||||
QueryCond query_cond3;
|
||||
query_cond3.sFieldIndex = getFieldIndex("key");
|
||||
query_cond3.sCondOpr = 3;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
|
||||
query_cond3.sCondOpr = eLE;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
|
||||
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond3);
|
||||
|
||||
setEntry(direct_request_context, entry);
|
||||
@ -590,7 +606,7 @@ void SearchRocksDBIndex::GetRangeIndexGE(InvertIndexEntry& begin_key, std::vecto
|
||||
|
||||
QueryCond query_cond1;
|
||||
query_cond1.sFieldIndex = getFieldIndex("field");
|
||||
query_cond1.sCondOpr = 0;
|
||||
query_cond1.sCondOpr = eEQ;
|
||||
ss << begin_key._InvertIndexField;
|
||||
query_cond1.sCondValue = ss.str();
|
||||
direct_request_context.sFieldConds.push_back(query_cond1);
|
||||
@ -598,13 +614,14 @@ void SearchRocksDBIndex::GetRangeIndexGE(InvertIndexEntry& begin_key, std::vecto
|
||||
|
||||
QueryCond query_cond2;
|
||||
query_cond2.sFieldIndex = getFieldIndex("key");
|
||||
query_cond2.sCondOpr = 5;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
|
||||
query_cond2.sCondOpr = eGE;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL,
|
||||
begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond2);
|
||||
|
||||
QueryCond query_cond3;
|
||||
query_cond3.sFieldIndex = getFieldIndex("key");
|
||||
query_cond3.sCondOpr = 2;
|
||||
query_cond3.sCondOpr = eLT;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, MAX_BORDER_SYMBOL, "");
|
||||
direct_request_context.sFieldConds.push_back(query_cond3);
|
||||
|
||||
@ -628,20 +645,21 @@ void SearchRocksDBIndex::GetRangeIndexLE(InvertIndexEntry& end_key, std::vector<
|
||||
|
||||
QueryCond query_cond1;
|
||||
query_cond1.sFieldIndex = getFieldIndex("field");
|
||||
query_cond1.sCondOpr = 0;
|
||||
query_cond1.sCondOpr = eEQ;
|
||||
ss << end_key._InvertIndexField;
|
||||
query_cond1.sCondValue = ss.str();
|
||||
direct_request_context.sFieldConds.push_back(query_cond1);
|
||||
|
||||
QueryCond query_cond2;
|
||||
query_cond2.sFieldIndex = getFieldIndex("key");
|
||||
query_cond2.sCondOpr = 3;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
|
||||
query_cond2.sCondOpr = eLE;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
|
||||
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond2);
|
||||
|
||||
QueryCond query_cond3;
|
||||
query_cond3.sFieldIndex = getFieldIndex("key");
|
||||
query_cond3.sCondOpr = 4;
|
||||
query_cond3.sCondOpr = eGT;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, MIN_BORDER_SYMBOL, "");
|
||||
direct_request_context.sFieldConds.push_back(query_cond3);
|
||||
|
||||
@ -665,21 +683,23 @@ void SearchRocksDBIndex::GetRangeIndexGTLT(InvertIndexEntry& begin_key, const In
|
||||
|
||||
QueryCond query_cond1;
|
||||
query_cond1.sFieldIndex = getFieldIndex("field");
|
||||
query_cond1.sCondOpr = 0;
|
||||
query_cond1.sCondOpr = eEQ;
|
||||
ss << begin_key._InvertIndexField;
|
||||
query_cond1.sCondValue = ss.str();
|
||||
direct_request_context.sFieldConds.push_back(query_cond1);
|
||||
|
||||
QueryCond query_cond2;
|
||||
query_cond2.sFieldIndex = getFieldIndex("key");
|
||||
query_cond2.sCondOpr = 4;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
|
||||
query_cond2.sCondOpr = eGT;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL
|
||||
, begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond2);
|
||||
|
||||
QueryCond query_cond3;
|
||||
query_cond3.sFieldIndex = getFieldIndex("key");
|
||||
query_cond3.sCondOpr = 2;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
|
||||
query_cond3.sCondOpr = eLT;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
|
||||
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond3);
|
||||
|
||||
|
||||
@ -703,21 +723,23 @@ void SearchRocksDBIndex::GetRangeIndexGTLE(InvertIndexEntry& begin_key, const In
|
||||
|
||||
QueryCond query_cond1;
|
||||
query_cond1.sFieldIndex = getFieldIndex("field");
|
||||
query_cond1.sCondOpr = 0;
|
||||
query_cond1.sCondOpr = eEQ;
|
||||
ss << begin_key._InvertIndexField;
|
||||
query_cond1.sCondValue = ss.str();
|
||||
direct_request_context.sFieldConds.push_back(query_cond1);
|
||||
|
||||
QueryCond query_cond2;
|
||||
query_cond2.sFieldIndex = getFieldIndex("key");
|
||||
query_cond2.sCondOpr = 4;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
|
||||
query_cond2.sCondOpr = eGT;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL
|
||||
, begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond2);
|
||||
|
||||
QueryCond query_cond3;
|
||||
query_cond3.sFieldIndex = getFieldIndex("key");
|
||||
query_cond3.sCondOpr = 3;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
|
||||
query_cond3.sCondOpr = eLE;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
|
||||
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond3);
|
||||
|
||||
setEntry(direct_request_context, entry);
|
||||
@ -740,21 +762,23 @@ void SearchRocksDBIndex::GetRangeIndexGELT(InvertIndexEntry& begin_key, const In
|
||||
|
||||
QueryCond query_cond1;
|
||||
query_cond1.sFieldIndex = getFieldIndex("field");
|
||||
query_cond1.sCondOpr = 0;
|
||||
query_cond1.sCondOpr = eEQ;
|
||||
ss << begin_key._InvertIndexField;
|
||||
query_cond1.sCondValue = ss.str();
|
||||
direct_request_context.sFieldConds.push_back(query_cond1);
|
||||
|
||||
QueryCond query_cond2;
|
||||
query_cond2.sFieldIndex = getFieldIndex("key");
|
||||
query_cond2.sCondOpr = 5;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
|
||||
query_cond2.sCondOpr = eGE;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL
|
||||
, begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond2);
|
||||
|
||||
QueryCond query_cond3;
|
||||
query_cond3.sFieldIndex = getFieldIndex("key");
|
||||
query_cond3.sCondOpr = 2;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
|
||||
query_cond3.sCondOpr = eLT;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
|
||||
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond3);
|
||||
|
||||
setEntry(direct_request_context, entry);
|
||||
@ -777,20 +801,21 @@ void SearchRocksDBIndex::GetRangeIndexGT(InvertIndexEntry& begin_key, std::vecto
|
||||
|
||||
QueryCond query_cond1;
|
||||
query_cond1.sFieldIndex = getFieldIndex("field");
|
||||
query_cond1.sCondOpr = 0;
|
||||
query_cond1.sCondOpr = eEQ;
|
||||
ss << begin_key._InvertIndexField;
|
||||
query_cond1.sCondValue = ss.str();
|
||||
direct_request_context.sFieldConds.push_back(query_cond1);
|
||||
|
||||
QueryCond query_cond2;
|
||||
query_cond2.sFieldIndex = getFieldIndex("key");
|
||||
query_cond2.sCondOpr = 4;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
|
||||
query_cond2.sCondOpr = eGT;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL
|
||||
, begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond2);
|
||||
|
||||
QueryCond query_cond3;
|
||||
query_cond3.sFieldIndex = getFieldIndex("key");
|
||||
query_cond3.sCondOpr = 2;
|
||||
query_cond3.sCondOpr = eLT;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, MAX_BORDER_SYMBOL, "");
|
||||
direct_request_context.sFieldConds.push_back(query_cond3);
|
||||
|
||||
@ -814,21 +839,22 @@ void SearchRocksDBIndex::GetRangeIndexLT(InvertIndexEntry& end_key, std::vector<
|
||||
|
||||
QueryCond query_cond1;
|
||||
query_cond1.sFieldIndex = getFieldIndex("field");
|
||||
query_cond1.sCondOpr = 0;
|
||||
query_cond1.sCondOpr = eEQ;
|
||||
ss << end_key._InvertIndexField;
|
||||
query_cond1.sCondValue = ss.str();
|
||||
direct_request_context.sFieldConds.push_back(query_cond1);
|
||||
|
||||
QueryCond query_cond2;
|
||||
query_cond2.sFieldIndex = getFieldIndex("key");
|
||||
query_cond2.sCondOpr = 2;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
|
||||
query_cond2.sCondOpr = eLT;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
|
||||
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond2);
|
||||
|
||||
QueryCond query_cond3;
|
||||
query_cond3.sFieldIndex = getFieldIndex("key");
|
||||
query_cond3.sCondOpr = 4;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, MAX_BORDER_SYMBOL, "");
|
||||
query_cond3.sCondOpr = eGT;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, MIN_BORDER_SYMBOL, "");
|
||||
direct_request_context.sFieldConds.push_back(query_cond3);
|
||||
|
||||
setEntry(direct_request_context, entry);
|
||||
@ -879,7 +905,7 @@ void SearchRocksDBIndex::GetRangeIndexInTerminal(RANGTYPE range_type, const Inve
|
||||
|
||||
QueryCond query_cond1;
|
||||
query_cond1.sFieldIndex = getFieldIndex("field");
|
||||
query_cond1.sCondOpr = 0;
|
||||
query_cond1.sCondOpr = eEQ;
|
||||
stringstream ss;
|
||||
ss << end_key._InvertIndexField;
|
||||
query_cond1.sCondValue = ss.str();
|
||||
@ -889,14 +915,16 @@ void SearchRocksDBIndex::GetRangeIndexInTerminal(RANGTYPE range_type, const Inve
|
||||
QueryCond query_cond2;
|
||||
query_cond2.sFieldIndex = getFieldIndex("key");
|
||||
query_cond2.sCondOpr = greater_type;
|
||||
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, begin_symbol, begin_key._InvertIndexKey);
|
||||
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, begin_symbol
|
||||
, begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond2);
|
||||
|
||||
|
||||
QueryCond query_cond3;
|
||||
query_cond3.sFieldIndex = getFieldIndex("key");
|
||||
query_cond3.sCondOpr = less_type;
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, end_symbol, end_key._InvertIndexKey);
|
||||
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, end_symbol
|
||||
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
|
||||
direct_request_context.sFieldConds.push_back(query_cond3);
|
||||
|
||||
// key和docd_id对应的field值分别为0和1
|
||||
|
@ -55,13 +55,15 @@ struct InvertIndexEntry {
|
||||
_IsValid = true;
|
||||
}
|
||||
|
||||
InvertIndexEntry(std::string appid, int field, double key){
|
||||
InvertIndexEntry(std::string appid, int field, uint32_t key_type, double key){
|
||||
_InvertIndexAppid = appid;
|
||||
_InvertIndexField = field;
|
||||
_InvertIndex_key_type = key_type;
|
||||
_InvertIndexKey = key;
|
||||
}
|
||||
|
||||
InvertIndexEntry(const InvertIndexEntry& src) {
|
||||
this->_InvertIndex_key_type = src._InvertIndex_key_type;
|
||||
this->_InvertIndexKey = src._InvertIndexKey;
|
||||
this->_InvertIndexDocId = src._InvertIndexDocId;
|
||||
this->_InvertIndexAppid = src._InvertIndexAppid;
|
||||
@ -72,6 +74,7 @@ struct InvertIndexEntry {
|
||||
}
|
||||
|
||||
InvertIndexEntry& operator=(const InvertIndexEntry& src) {
|
||||
this->_InvertIndex_key_type = src._InvertIndex_key_type;
|
||||
this->_InvertIndexKey = src._InvertIndexKey;
|
||||
this->_InvertIndexDocId = src._InvertIndexDocId;
|
||||
this->_InvertIndexAppid = src._InvertIndexAppid;
|
||||
@ -109,6 +112,7 @@ struct InvertIndexEntry {
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
uint32_t _InvertIndex_key_type;
|
||||
double _InvertIndexKey;
|
||||
std::string _InvertIndexDocId;
|
||||
std::string _InvertIndexAppid;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -22,9 +22,11 @@
|
||||
#include "dtcapi.h"
|
||||
#include "chash.h"
|
||||
|
||||
#include "comm.h"
|
||||
#include "search_conf.h"
|
||||
#include "search_util.h"
|
||||
#include "json/value.h"
|
||||
#include "result_context.h"
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
@ -52,18 +54,18 @@ class CIndexTableManager
|
||||
public:
|
||||
int InitServer(const SDTCHost &dtchost, string bindAddr);
|
||||
int InitServer2(const SDTCHost &dtchost);
|
||||
bool GetDocInfo(uint32_t appid, string word, uint32_t key_locate, vector<IndexInfo> &doc_info);
|
||||
bool GetDocInfo(uint32_t appid, string word, uint32_t field_id, vector<IndexInfo> &doc_info);
|
||||
int GetDocCnt(uint32_t appid);
|
||||
|
||||
bool get_snapshot_execute(int left, int right,uint32_t appid, vector<IndexInfo>& no_filter_docs, vector<DocVersionInfo>& docVersionInfo);
|
||||
bool get_snapshot_execute(int left, int right,uint32_t appid, const vector<IndexInfo>& no_filter_docs, vector<DocVersionInfo>& docVersionInfo);
|
||||
bool get_top_snapshot_execute(int left, int right, uint32_t appid, vector<TopDocInfo>& no_filter_docs, vector<DocVersionInfo>& docVersionInfo);
|
||||
bool TopDocValid(uint32_t appid, vector<TopDocInfo>& no_filter_docs, vector<TopDocInfo>& doc_info);
|
||||
bool DocValid(uint32_t appid, vector<IndexInfo>& vecs, set<string>& valid_set, bool need_version, map<string, uint32_t>& valid_version, hash_string_map& doc_content_map);
|
||||
bool DocValid(uint32_t appid, const vector<IndexInfo>& vecs, bool need_version, map<string, uint32_t>& valid_version, hash_string_map& doc_content_map);
|
||||
bool GetTopDocInfo(uint32_t appid, string word, vector<TopDocInfo>& doc_info);
|
||||
bool GetDocContent(uint32_t appid, vector<IndexInfo> &doc_id_set, hash_string_map& doc_content);
|
||||
bool GetSnapshotContent(int left, int right, uint32_t appid, vector<IndexInfo>& docs, hash_string_map& doc_content);
|
||||
bool GetSuggestDoc(uint32_t appid, int index, uint32_t len, uint32_t field, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set, set<string>& hlWord);
|
||||
bool GetSuggestDocWithoutCharacter(uint32_t appid, int index, uint32_t len, uint32_t field, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set, set<string>& hlWord);
|
||||
bool GetDocContent(uint32_t appid, const std::vector<IndexInfo>& index_infos, hash_string_map& doc_content);
|
||||
bool GetSnapshotContent(int left, int right, uint32_t appid , const std::vector<IndexInfo>& index_infos, hash_string_map& doc_content);
|
||||
bool GetSuggestDoc(uint32_t appid, int index, uint32_t len, uint32_t field, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set);
|
||||
bool GetSuggestDocWithoutCharacter(uint32_t appid, int index, uint32_t len, uint32_t field, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set);
|
||||
bool GetScoreByField(uint32_t appid, string doc_id, string sort_field, uint32_t sort_type, ScoreInfo &score_info);
|
||||
bool DocValid(uint32_t appid, string doc_id, bool &is_valid);
|
||||
bool GetContentByField(uint32_t appid, string doc_id, uint32_t doc_version, const vector<string>& fields, Json::Value &value);
|
||||
|
213
src/search_local/index_read/key_format.cc
Normal file
213
src/search_local/index_read/key_format.cc
Normal file
@ -0,0 +1,213 @@
|
||||
#include "key_format.h"
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include "comm.h"
|
||||
|
||||
#define SEGMENT_SIZE 8
|
||||
|
||||
const std::string SEG_SYMBOL = "|";
|
||||
|
||||
const char ENCODER_MARKER = 127;
|
||||
const uint64_t signMask = 0x8000000000000000;
|
||||
|
||||
uint64_t encode_into_cmp_uint(int64_t src) {
|
||||
|
||||
return uint64_t(src) ^ signMask;
|
||||
}
|
||||
|
||||
uint64_t htonll(uint64_t val) {
|
||||
return (((uint64_t)htonl(val)) << 32) + htonl(val >> 32);
|
||||
}
|
||||
|
||||
uint64_t ntohll(uint64_t val)
|
||||
{
|
||||
return (((uint64_t)ntohl(val)) << 32) + ntohl(val >> 32);
|
||||
}
|
||||
|
||||
std::string KeyFormat::Encode(const UnionKey& oUnionKey)
|
||||
{
|
||||
std::string sUnionKey;
|
||||
for (size_t i = 0; i < oUnionKey.size(); ++i)
|
||||
{
|
||||
switch (oUnionKey[i].first)
|
||||
{
|
||||
case FIELD_INT:
|
||||
case FIELD_LONG:
|
||||
case FIELD_IP:
|
||||
{
|
||||
sUnionKey.append(EncodeBytes((int64_t)strtoll(oUnionKey[i].second.c_str(), NULL, 10)));
|
||||
}
|
||||
break;
|
||||
case FIELD_DOUBLE:
|
||||
sUnionKey.append(EncodeBytes(strtod(oUnionKey[i].second.c_str(), NULL)));
|
||||
break;
|
||||
case FIELD_STRING:
|
||||
case FIELD_TEXT:
|
||||
case FIELD_GEO_POINT:
|
||||
case FIELD_GEO_SHAPE:
|
||||
sUnionKey.append(EncodeBytes(oUnionKey[i].second));
|
||||
break;
|
||||
default:
|
||||
sUnionKey.clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
return sUnionKey;
|
||||
}
|
||||
|
||||
bool KeyFormat::Decode(const std::string& sKey, UnionKey& oUnionKey)
|
||||
{
|
||||
if (oUnionKey.empty()){
|
||||
return false;
|
||||
}
|
||||
|
||||
int iPos = 0;
|
||||
for (size_t i = 0; i < oUnionKey.size(); ++i)
|
||||
{
|
||||
switch (oUnionKey[i].first)
|
||||
{
|
||||
case FIELD_INT:
|
||||
case FIELD_LONG:
|
||||
case FIELD_IP:
|
||||
{
|
||||
int64_t lValue;
|
||||
DecodeBytes(sKey.substr(iPos, 8), lValue);
|
||||
iPos += 8;
|
||||
oUnionKey[i].second = std::to_string((long long)lValue);
|
||||
}
|
||||
break;
|
||||
case FIELD_DOUBLE:
|
||||
{
|
||||
double dValue;
|
||||
DecodeBytes(sKey.substr(iPos, 8), dValue);
|
||||
iPos += 8;
|
||||
oUnionKey[i].second = std::to_string((long double)dValue);
|
||||
}
|
||||
break;
|
||||
case FIELD_STRING:
|
||||
case FIELD_TEXT:
|
||||
{
|
||||
int begin_pos = iPos;
|
||||
iPos += SEGMENT_SIZE ;
|
||||
for ( ; ENCODER_MARKER == sKey[ iPos - 1 ] ; iPos += SEGMENT_SIZE) {
|
||||
}
|
||||
DecodeBytes(sKey.substr(begin_pos, iPos - begin_pos ), oUnionKey[i].second);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string KeyFormat::EncodeBytes(const std::string & src)
|
||||
{
|
||||
unsigned char padding_bytes;
|
||||
size_t left_length = src.length();
|
||||
size_t pos = 0;
|
||||
std::stringstream oss_dst;
|
||||
while (true) {
|
||||
unsigned char copy_len = SEGMENT_SIZE - 1 < left_length ? SEGMENT_SIZE - 1 : left_length;
|
||||
padding_bytes = SEGMENT_SIZE - 1 - copy_len;
|
||||
oss_dst << src.substr(pos, copy_len);
|
||||
pos += copy_len;
|
||||
left_length -= copy_len;
|
||||
|
||||
if (padding_bytes) {
|
||||
oss_dst << std::string(padding_bytes, '\0');
|
||||
oss_dst << (char)(ENCODER_MARKER - padding_bytes);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
oss_dst << ENCODER_MARKER;
|
||||
}
|
||||
}
|
||||
return oss_dst.str();
|
||||
}
|
||||
|
||||
std::string KeyFormat::EncodeBytes(int src)
|
||||
{
|
||||
return EncodeBytes((int64_t)src);
|
||||
}
|
||||
|
||||
std::string KeyFormat::EncodeBytes(int64_t src)
|
||||
{
|
||||
uint64_t host_bytes = encode_into_cmp_uint(src);
|
||||
uint64_t net_bytes = htonll(host_bytes);
|
||||
char dst_bytes[8];
|
||||
memcpy(dst_bytes, &net_bytes, sizeof(uint64_t));
|
||||
std::string dst = std::string(8, '\0');
|
||||
for (size_t i = 0; i < dst.length(); i++) {
|
||||
dst[i] = dst_bytes[i];
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
std::string KeyFormat::EncodeBytes(double src)
|
||||
{
|
||||
uint64_t u;
|
||||
memcpy(&u, &src, sizeof(double));
|
||||
if (src >= 0) {
|
||||
u |= signMask;
|
||||
}
|
||||
else {
|
||||
u = ~u;
|
||||
}
|
||||
|
||||
return EncodeBytes(u);
|
||||
}
|
||||
|
||||
std::string KeyFormat::EncodeBytes(uint64_t src)
|
||||
{
|
||||
uint64_t net_bytes = htonll(src);
|
||||
char dst_bytes[8];
|
||||
memcpy(dst_bytes, &net_bytes, sizeof(uint64_t));
|
||||
std::string dst = std::string(8, '\0');
|
||||
for (size_t i = 0; i < dst.length(); i++) {
|
||||
dst[i] = dst_bytes[i];
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
void KeyFormat::DecodeBytes(const std::string & src, int64_t& dst)
|
||||
{
|
||||
uint64_t net_bytes;
|
||||
memcpy(&net_bytes, src.c_str(), sizeof(uint64_t));
|
||||
uint64_t host_bytes = ntohll(net_bytes);
|
||||
dst = int64_t(host_bytes ^ signMask);
|
||||
}
|
||||
|
||||
void KeyFormat::DecodeBytes(const std::string & src, std::string & dst)
|
||||
{
|
||||
if (src.length() == 0) {
|
||||
dst = "";
|
||||
}
|
||||
std::stringstream oss_dst;
|
||||
for (size_t i = 0; i < src.length(); i += SEGMENT_SIZE) {
|
||||
char padding_bytes = ENCODER_MARKER - src[i + 7];
|
||||
oss_dst << src.substr(i, SEGMENT_SIZE - 1 - padding_bytes);
|
||||
}
|
||||
dst = oss_dst.str();
|
||||
}
|
||||
|
||||
void KeyFormat::DecodeBytes(const std::string & src, uint64_t & dst)
|
||||
{
|
||||
uint64_t net_bytes;
|
||||
memcpy(&net_bytes, src.c_str(), sizeof(uint64_t));
|
||||
dst = ntohll(net_bytes);
|
||||
}
|
||||
|
||||
void KeyFormat::DecodeBytes(const std::string & src, double & dst)
|
||||
{
|
||||
uint64_t u;
|
||||
DecodeBytes(src, u);
|
||||
|
||||
if ((u & signMask) > 0) {
|
||||
u &= (~signMask);
|
||||
}
|
||||
else {
|
||||
u = ~u;
|
||||
}
|
||||
memcpy(&dst, &u, sizeof(dst));
|
||||
}
|
33
src/search_local/index_read/key_format.h
Normal file
33
src/search_local/index_read/key_format.h
Normal file
@ -0,0 +1,33 @@
|
||||
#ifndef __KEY_FORMAT_H__
|
||||
#define __KEY_FORMAT_H__
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <stdint.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <inttypes.h>
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
class KeyFormat {
|
||||
public:
|
||||
typedef std::vector<std::pair<int,std::string> > UnionKey;
|
||||
|
||||
public:
|
||||
static std::string Encode(const UnionKey& oUnionKey);
|
||||
static bool Decode(const std::string& sKey, UnionKey& oUnionKey);
|
||||
|
||||
static std::string EncodeBytes(const std::string& src);
|
||||
static std::string EncodeBytes(int src);
|
||||
static std::string EncodeBytes(int64_t src);
|
||||
static std::string EncodeBytes(uint64_t src);
|
||||
static std::string EncodeBytes(double src);
|
||||
static void DecodeBytes(const std::string& src, int64_t& dst);
|
||||
static void DecodeBytes(const std::string& src, std::string& dst);
|
||||
static void DecodeBytes(const std::string& src, uint64_t& dst);
|
||||
static void DecodeBytes(const std::string& src, double& dst);
|
||||
};
|
||||
|
||||
|
||||
#endif
|
@ -1,341 +0,0 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: logical_operate.h
|
||||
*
|
||||
* Description: logical operate class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2018
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "logical_operate.h"
|
||||
#include "search_util.h"
|
||||
#include "cachelist_unit.h"
|
||||
#include "data_manager.h"
|
||||
#include "json/reader.h"
|
||||
#include "json/writer.h"
|
||||
#include "index_tbl_op.h"
|
||||
#include "index_sync/sync_index_timer.h"
|
||||
#include "index_sync/sequence_search_index.h"
|
||||
#include "stem.h"
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
using namespace std;
|
||||
|
||||
extern SyncIndexTimer *globalSyncIndexTimer;
|
||||
extern CCacheListUnit *indexcachelist;
|
||||
|
||||
LogicalOperate::LogicalOperate(uint32_t a, uint32_t s, uint32_t h, uint32_t c):m_appid(a), m_sort_type(s), m_has_gis(h), m_cache_switch(c)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
LogicalOperate::~LogicalOperate(){
|
||||
|
||||
}
|
||||
|
||||
void LogicalOperate::SetFunc(logical_func func){
|
||||
m_func = func;
|
||||
}
|
||||
|
||||
int LogicalOperate::Process(const vector<vector<FieldInfo> >& keys, vector<IndexInfo>& vecs, set<string>& highlightWord, map<string, vec> &ves, map<string, uint32_t> &key_in_doc){
|
||||
for (size_t index = 0; index < keys.size(); index++)
|
||||
{
|
||||
vector<IndexInfo> doc_id_vec;
|
||||
vector<FieldInfo> fieldInfos = keys[index];
|
||||
vector<FieldInfo>::iterator it;
|
||||
for (it = fieldInfos.begin(); it != fieldInfos.end(); it++) {
|
||||
vector<IndexInfo> doc_info;
|
||||
if ((*it).segment_tag == 3) {
|
||||
int ret = GetDocByShiftWord(*it, doc_info, m_appid, highlightWord);
|
||||
if (ret != 0) {
|
||||
doc_id_vec.clear();
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
sort(doc_info.begin(), doc_info.end());
|
||||
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
|
||||
KeyInfo info;
|
||||
info.word_freq = 1;
|
||||
info.field = (*it).field;
|
||||
info.word = (*it).word;
|
||||
ves[doc_info[doc_info_idx].doc_id].push_back(info);
|
||||
}
|
||||
} else if ((*it).segment_tag == 4) {
|
||||
int ret = GetDocByShiftEnWord(*it, doc_info, m_appid, highlightWord);
|
||||
if (ret != 0) {
|
||||
doc_id_vec.clear();
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
sort(doc_info.begin(), doc_info.end());
|
||||
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
|
||||
KeyInfo info;
|
||||
info.word_freq = 1;
|
||||
info.field = (*it).field;
|
||||
info.word = (*it).word;
|
||||
ves[doc_info[doc_info_idx].doc_id].push_back(info);
|
||||
}
|
||||
} else if ((*it).segment_tag == 5 && (*it).word == "") { // 范围查询
|
||||
stringstream ss;
|
||||
ss << m_appid;
|
||||
InvertIndexEntry startEntry(ss.str(), (*it).field, (double)(*it).start);
|
||||
InvertIndexEntry endEntry(ss.str(), (*it).field, (double)(*it).end);
|
||||
std::vector<InvertIndexEntry> resultEntry;
|
||||
globalSyncIndexTimer->GetSearchIndex()->GetRangeIndex((*it).range_type, startEntry, endEntry, resultEntry);
|
||||
std::vector<InvertIndexEntry>::iterator iter = resultEntry.begin();
|
||||
for (; iter != resultEntry.end(); iter ++) {
|
||||
IndexInfo info;
|
||||
info.doc_id = (*iter)._InvertIndexDocId;
|
||||
info.doc_version = (*iter)._InvertIndexDocVersion;
|
||||
doc_info.push_back(info);
|
||||
}
|
||||
log_debug("appid: %s, field: %d, count: %d", startEntry._InvertIndexAppid.c_str(), (*it).field, (int)resultEntry.size());
|
||||
} else {
|
||||
int ret = GetDocIdSetByWord(*it, doc_info);
|
||||
if (ret != 0){
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
if (doc_info.size() == 0)
|
||||
continue;
|
||||
if (!m_has_gis || !isAllNumber((*it).word))
|
||||
highlightWord.insert((*it).word);
|
||||
if(!m_has_gis && (m_sort_type == SORT_RELEVANCE || m_sort_type == SORT_TIMESTAMP)){
|
||||
CalculateByWord(*it, doc_info, ves, key_in_doc);
|
||||
}
|
||||
}
|
||||
doc_id_vec = vec_union(doc_id_vec, doc_info);
|
||||
}
|
||||
if(index == 0){ // 第一个直接赋值给vecs,后续的依次与前面的进行逻辑运算
|
||||
vecs.assign(doc_id_vec.begin(), doc_id_vec.end());
|
||||
} else {
|
||||
vecs = m_func(vecs, doc_id_vec);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int LogicalOperate::ProcessTerminal(const vector<vector<FieldInfo> >& and_keys, const TerminalQryCond& query_cond, vector<TerminalRes>& vecs){
|
||||
if(and_keys.size() != 1){
|
||||
return 0;
|
||||
}
|
||||
vector<FieldInfo> field_vec = and_keys[0];
|
||||
if(field_vec.size() != 1){
|
||||
return 0;
|
||||
}
|
||||
FieldInfo field_info = field_vec[0];
|
||||
if(field_info.segment_tag != SEGMENT_RANGE){
|
||||
return 0;
|
||||
}
|
||||
|
||||
stringstream ss;
|
||||
ss << m_appid;
|
||||
InvertIndexEntry beginEntry(ss.str(), field_info.field, (double)field_info.start);
|
||||
InvertIndexEntry endEntry(ss.str(), field_info.field, (double)field_info.end);
|
||||
std::vector<InvertIndexEntry> resultEntry;
|
||||
globalSyncIndexTimer->GetSearchIndex()->GetRangeIndexInTerminal(field_info.range_type, beginEntry, endEntry, query_cond, resultEntry);
|
||||
std::vector<InvertIndexEntry>::iterator iter = resultEntry.begin();
|
||||
for (; iter != resultEntry.end(); iter ++) {
|
||||
TerminalRes info;
|
||||
info.doc_id = (*iter)._InvertIndexDocId;
|
||||
info.score = (*iter)._InvertIndexKey;
|
||||
vecs.push_back(info);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int LogicalOperate::ProcessComplete(const vector<FieldInfo>& complete_keys, vector<IndexInfo>& complete_vecs, vector<string>& word_vec, map<string, vec> &ves, map<string, uint32_t> &key_in_doc){
|
||||
vector<FieldInfo>::const_iterator iter;
|
||||
for (iter = complete_keys.begin(); iter != complete_keys.end(); iter++) {
|
||||
vector<IndexInfo> doc_info;
|
||||
int ret = GetDocIdSetByWord(*iter, doc_info);
|
||||
if (ret != 0) {
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
|
||||
word_vec.push_back((*iter).word);
|
||||
|
||||
if(m_sort_type == SORT_RELEVANCE || m_sort_type == SORT_TIMESTAMP){
|
||||
CalculateByWord(*iter, doc_info, ves, key_in_doc);
|
||||
}
|
||||
|
||||
if(iter == complete_keys.begin()){
|
||||
complete_vecs.assign(doc_info.begin(), doc_info.end());
|
||||
} else {
|
||||
complete_vecs = vec_intersection(complete_vecs, doc_info);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void LogicalOperate::CalculateByWord(FieldInfo fieldInfo, const vector<IndexInfo> &doc_info, map<string, vec> &ves, map<string, uint32_t> &key_in_doc) {
|
||||
string doc_id;
|
||||
uint32_t word_freq = 0;
|
||||
uint32_t field = 0;
|
||||
uint32_t created_time;
|
||||
string pos_str = "";
|
||||
for (size_t i = 0; i < doc_info.size(); i++) {
|
||||
doc_id = doc_info[i].doc_id;
|
||||
word_freq = doc_info[i].word_freq;
|
||||
field = doc_info[i].field;
|
||||
created_time = doc_info[i].created_time;
|
||||
pos_str = doc_info[i].pos;
|
||||
vector<int> pos_vec;
|
||||
if (pos_str != "" && pos_str.size() > 2) {
|
||||
pos_str = pos_str.substr(1, pos_str.size() - 2);
|
||||
pos_vec = splitInt(pos_str, ",");
|
||||
}
|
||||
KeyInfo info;
|
||||
info.word_freq = word_freq;
|
||||
info.field = field;
|
||||
info.word = fieldInfo.word;
|
||||
info.created_time = created_time;
|
||||
info.pos_vec = pos_vec;
|
||||
ves[doc_id].push_back(info);
|
||||
}
|
||||
key_in_doc[fieldInfo.word] = doc_info.size();
|
||||
}
|
||||
|
||||
|
||||
bool LogicalOperate::GetDocIndexCache(string word, uint32_t field, vector<IndexInfo> &doc_info) {
|
||||
log_debug("get doc index start");
|
||||
bool res = false;
|
||||
uint8_t value[MAX_VALUE_LEN] = { 0 };
|
||||
unsigned vsize = 0;
|
||||
string output = "";
|
||||
string indexCache = word + "|" + ToString(field);
|
||||
if (m_cache_switch == 1 && indexcachelist->in_list(indexCache.c_str(), indexCache.size(), value, vsize))
|
||||
{
|
||||
log_debug("hit index cache.");
|
||||
value[vsize] = '\0';
|
||||
output = (char *)value;
|
||||
res = true;
|
||||
}
|
||||
|
||||
if (res) {
|
||||
Json::Value packet;
|
||||
Json::Reader r(Json::Features::strictMode());
|
||||
int ret;
|
||||
ret = r.parse(output.c_str(), output.c_str() + output.size(), packet);
|
||||
if (0 == ret)
|
||||
{
|
||||
log_error("the err json string is : %s, errmsg : %s", output.c_str(), r.getFormattedErrorMessages().c_str());
|
||||
res = false;
|
||||
return res;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < packet.size(); ++i) {
|
||||
IndexInfo info;
|
||||
Json::Value& index_cache = packet[i];
|
||||
if (index_cache.isMember("appid") && index_cache["appid"].isUInt() &&
|
||||
index_cache.isMember("id") && index_cache["id"].isString() &&
|
||||
index_cache.isMember("version") && index_cache["version"].isUInt() &&
|
||||
index_cache.isMember("field") && index_cache["field"].isUInt() &&
|
||||
index_cache.isMember("freq") && index_cache["freq"].isUInt() &&
|
||||
index_cache.isMember("time") && index_cache["time"].isUInt() &&
|
||||
index_cache.isMember("pos") && index_cache["pos"].isString())
|
||||
{
|
||||
info.appid = index_cache["appid"].asUInt();
|
||||
info.doc_id = index_cache["id"].asString();
|
||||
info.doc_version = index_cache["version"].asUInt();
|
||||
info.field = index_cache["field"].asUInt();
|
||||
info.word_freq = index_cache["freq"].asUInt();
|
||||
info.created_time = index_cache["time"].asUInt();
|
||||
info.pos = index_cache["pos"].asString();
|
||||
doc_info.push_back(info);
|
||||
}
|
||||
else {
|
||||
log_error("parse index_cache error, no appid");
|
||||
doc_info.clear();
|
||||
res = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void LogicalOperate::SetDocIndexCache(const vector<IndexInfo> &doc_info, string& indexJsonStr) {
|
||||
Json::Value indexJson;
|
||||
Json::FastWriter writer;
|
||||
for (size_t i = 0; i < doc_info.size(); i++) {
|
||||
Json::Value json_tmp;
|
||||
json_tmp["appid"] = doc_info[i].appid;
|
||||
json_tmp["id"] = doc_info[i].doc_id;
|
||||
json_tmp["version"] = doc_info[i].doc_version;
|
||||
json_tmp["field"] = doc_info[i].field;
|
||||
json_tmp["freq"] = doc_info[i].word_freq;
|
||||
json_tmp["time"] = doc_info[i].created_time;
|
||||
json_tmp["pos"] = doc_info[i].pos;
|
||||
indexJson.append(json_tmp);
|
||||
}
|
||||
indexJsonStr = writer.write(indexJson);
|
||||
}
|
||||
|
||||
|
||||
int LogicalOperate::GetDocIdSetByWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_info) {
|
||||
bool bRet = false;
|
||||
if (DataManager::Instance()->IsSensitiveWord(fieldInfo.word)) {
|
||||
log_debug("%s is a sensitive word.", fieldInfo.word.c_str());
|
||||
return 0;
|
||||
}
|
||||
|
||||
stringstream ss_key;
|
||||
ss_key << m_appid;
|
||||
ss_key << "#00#";
|
||||
if(fieldInfo.segment_tag == 5){
|
||||
stringstream ss;
|
||||
ss << setw(20) << setfill('0') << fieldInfo.word;
|
||||
ss_key << ss.str();
|
||||
}
|
||||
else if (fieldInfo.field_type == FIELD_INT || fieldInfo.field_type == FIELD_DOUBLE || fieldInfo.field_type == FIELD_LONG) {
|
||||
ss_key << fieldInfo.word;
|
||||
}
|
||||
else if (fieldInfo.field_type == FIELD_IP) {
|
||||
uint32_t word_id = GetIpNum(fieldInfo.word);
|
||||
if (word_id == 0)
|
||||
return 0;
|
||||
ss_key << word_id;
|
||||
}
|
||||
else if (fieldInfo.word.find("_") != string::npos) { // 联合索引
|
||||
ss_key << fieldInfo.word;
|
||||
}
|
||||
else {
|
||||
string word_new = stem(fieldInfo.word);
|
||||
ss_key << word_new;
|
||||
}
|
||||
|
||||
log_debug("appid [%u], key[%s]", m_appid, ss_key.str().c_str());
|
||||
if (m_has_gis && GetDocIndexCache(ss_key.str(), fieldInfo.field, doc_info)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bRet = g_IndexInstance.GetDocInfo(m_appid, ss_key.str(), fieldInfo.field, doc_info);
|
||||
if (false == bRet) {
|
||||
log_error("GetDocInfo error.");
|
||||
return -RT_DTC_ERR;
|
||||
}
|
||||
|
||||
if (m_cache_switch == 1 && m_has_gis == 1 && doc_info.size() > 0 && doc_info.size() <= 1000) {
|
||||
string index_str;
|
||||
SetDocIndexCache(doc_info, index_str);
|
||||
if (index_str != "" && index_str.size() < MAX_VALUE_LEN) {
|
||||
string indexCache = ss_key.str() + "|" + ToString(fieldInfo.field);
|
||||
unsigned data_size = indexCache.size();
|
||||
int ret = indexcachelist->add_list(indexCache.c_str(), index_str.c_str(), data_size, index_str.size());
|
||||
if (ret != 0) {
|
||||
log_error("add to index_cache_list error, ret: %d.", ret);
|
||||
}
|
||||
else {
|
||||
log_debug("add to index_cache_list: %s.", indexCache.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: logical_operate.h
|
||||
*
|
||||
* Description: logical operate class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2018
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef LOGICAL_OP_H
|
||||
#define LOGICAL_OP_H
|
||||
|
||||
#include "component.h"
|
||||
#include <map>
|
||||
#include <set>
|
||||
using namespace std;
|
||||
|
||||
typedef vector<KeyInfo> vec;
|
||||
typedef vector<IndexInfo> (*logical_func)(vector<IndexInfo> &a, vector<IndexInfo> &b);
|
||||
|
||||
class LogicalOperate
|
||||
{
|
||||
public:
|
||||
LogicalOperate(uint32_t appid, uint32_t sort_type, uint32_t has_gis, uint32_t cache_switch);
|
||||
~LogicalOperate();
|
||||
|
||||
int Process(const vector<vector<FieldInfo> >& keys, vector<IndexInfo>& vecs, set<string>& highlightWord, map<string, vec> &ves, map<string, uint32_t> &key_in_doc);
|
||||
int ProcessComplete(const vector<FieldInfo>& complete_keys, vector<IndexInfo>& complete_vecs, vector<string>& word_vec, map<string, vec> &ves, map<string, uint32_t> &key_in_doc);
|
||||
void SetFunc(logical_func func);
|
||||
int ProcessTerminal(const vector<vector<FieldInfo> >& and_keys, const TerminalQryCond& query_cond, vector<TerminalRes>& vecs);
|
||||
|
||||
void CalculateByWord(FieldInfo fieldInfo, const vector<IndexInfo> &doc_info, map<string, vec> &ves, map<string, uint32_t> &key_in_doc);
|
||||
void SetDocIndexCache(const vector<IndexInfo> &doc_info, string& indexJsonStr);
|
||||
bool GetDocIndexCache(string word, uint32_t field, vector<IndexInfo> &doc_info);
|
||||
int GetDocIdSetByWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_info);
|
||||
|
||||
private:
|
||||
uint32_t m_appid;
|
||||
uint32_t m_sort_type;
|
||||
uint32_t m_has_gis;
|
||||
uint32_t m_cache_switch;
|
||||
logical_func m_func;
|
||||
};
|
||||
|
||||
#endif
|
@ -58,7 +58,7 @@ private:
|
||||
template<typename T>
|
||||
void COrderOp<T>::Process(const std::map<std::string, T>& score_map, T last_value, OrderOpCond order_op_cond, Json::Value& response, DocManager *doc_manager)
|
||||
{
|
||||
DocIdEntry<T> last_entry;
|
||||
DocIdEntry<T> last_entry(order_op_cond.last_id , last_value , _OrderFieldType, _SortType);
|
||||
for (typename std::map<std::string, T>::const_iterator it = score_map.begin(); it != score_map.end(); it++) {
|
||||
DocIdEntry<T> doc_entry(it->first, it->second, _OrderFieldType, _SortType);
|
||||
if(order_op_cond.has_extra_filter){
|
||||
@ -66,9 +66,6 @@ void COrderOp<T>::Process(const std::map<std::string, T>& score_map, T last_valu
|
||||
} else {
|
||||
_ScoreVec.push_back(doc_entry);
|
||||
}
|
||||
if(it->second == last_value && it->first == order_op_cond.last_id){
|
||||
last_entry = doc_entry;
|
||||
}
|
||||
}
|
||||
|
||||
if (_SearchAfter) {
|
||||
|
363
src/search_local/index_read/process/bool_query_process.cc
Normal file
363
src/search_local/index_read/process/bool_query_process.cc
Normal file
@ -0,0 +1,363 @@
|
||||
#include "bool_query_process.h"
|
||||
#include "geo_distance_query_process.h"
|
||||
#include "geo_shape_query_process.h"
|
||||
#include "match_query_process.h"
|
||||
#include "term_query_process.h"
|
||||
#include "range_query_process.h"
|
||||
#include "../key_format.h"
|
||||
|
||||
BoolQueryProcess::BoolQueryProcess(const Json::Value& value)
|
||||
: QueryProcess(value)
|
||||
, query_process_map_()
|
||||
, query_bitset_()
|
||||
, has_and_logic_(false)
|
||||
{ }
|
||||
|
||||
BoolQueryProcess::~BoolQueryProcess()
|
||||
{
|
||||
std::map<int , QueryProcess*>::iterator iter = query_process_map_.begin();
|
||||
for ( ; iter != query_process_map_.end(); ++iter){
|
||||
if (iter->second != NULL){
|
||||
delete iter->second;
|
||||
iter->second = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BoolQueryProcess::HandleUnifiedIndex(){
|
||||
std::vector<std::vector<FieldInfo> >& and_keys = component_->AndKeys();
|
||||
|
||||
std::map<uint32_t , std::vector<FieldInfo> > fieldid_fieldinfos_map;
|
||||
std::vector<std::vector<FieldInfo> >::iterator iter = and_keys.begin();
|
||||
for (; iter != and_keys.end(); ++iter){
|
||||
fieldid_fieldinfos_map.insert(std::make_pair(((*iter)[0]).field , *iter));
|
||||
}
|
||||
|
||||
std::vector<std::vector<FieldInfo> > union_field_infos;
|
||||
std::vector<std::string> union_key_vec;
|
||||
DBManager::Instance()->GetUnionKeyField(component_->Appid() , union_key_vec);
|
||||
std::vector<std::string>::iterator union_key_iter = union_key_vec.begin();
|
||||
for(; union_key_iter != union_key_vec.end(); union_key_iter++){
|
||||
std::string union_key = *union_key_iter;
|
||||
std::vector<int> union_field_vec = splitInt(union_key, ",");
|
||||
std::vector<int>::iterator union_field_iter = union_field_vec.begin();
|
||||
bool hit_union_key = true;
|
||||
for(; union_field_iter != union_field_vec.end(); union_field_iter++){
|
||||
if(fieldid_fieldinfos_map.find(*union_field_iter) == fieldid_fieldinfos_map.end()){
|
||||
hit_union_key = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(hit_union_key == true){
|
||||
log_debug("hit union key combination");
|
||||
std::vector<std::vector<string> > keys_vvec;
|
||||
std::vector<FieldInfo> unionFieldInfos;
|
||||
bool b_has_range = false;
|
||||
for(union_field_iter = union_field_vec.begin(); union_field_iter != union_field_vec.end(); union_field_iter++){
|
||||
std::vector<FieldInfo> field_info_vec = fieldid_fieldinfos_map.at(*union_field_iter);
|
||||
std::vector<std::string> key_vec;
|
||||
GetKeyFromFieldInfo(field_info_vec, key_vec , b_has_range);
|
||||
keys_vvec.push_back(key_vec);
|
||||
fieldid_fieldinfos_map.erase(*union_field_iter); // 命中union_key的需要从fieldid_fieldinfos_map中删除
|
||||
}
|
||||
log_debug("has range query flag:%d" , (int)b_has_range);
|
||||
std::vector<std::string> union_keys = Combination(keys_vvec);
|
||||
for(int m = 0 ; m < (int)union_keys.size(); m++){
|
||||
FieldInfo info;
|
||||
info.field = 0;
|
||||
info.field_type = FIELD_INDEX;
|
||||
|
||||
for (uint32_t ui_query_type = E_INDEX_READ_PRE_TERM ;
|
||||
ui_query_type < E_INDEX_READ_TOTAL_NUM ;
|
||||
++ui_query_type){
|
||||
if (query_bitset_.test(ui_query_type)){
|
||||
info.query_type = ui_query_type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
info.segment_tag = (b_has_range ? SEGMENT_RANGE : SEGMENT_DEFAULT);
|
||||
info.word = union_keys[m];
|
||||
log_debug("union key[%d]:%s" , m, info.word.c_str());
|
||||
unionFieldInfos.push_back(info);
|
||||
}
|
||||
union_field_infos.push_back(unionFieldInfos);
|
||||
}
|
||||
}
|
||||
|
||||
if (!union_field_infos.empty()){
|
||||
log_debug("replace andkey database");
|
||||
component_->AndKeys().clear();
|
||||
std::vector<std::vector<FieldInfo> >::iterator field_info_vet_iter = union_field_infos.begin();
|
||||
for (; field_info_vet_iter != union_field_infos.end();++field_info_vet_iter){
|
||||
component_->AddToFieldList(ANDKEY, *field_info_vet_iter);
|
||||
}
|
||||
std::map<uint32_t, std::vector<FieldInfo> >::iterator field_key_map_iter = fieldid_fieldinfos_map.begin();
|
||||
for(; field_key_map_iter != fieldid_fieldinfos_map.end(); field_key_map_iter++){
|
||||
component_->AddToFieldList(ANDKEY, field_key_map_iter->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int BoolQueryProcess::ParseContent(){
|
||||
int ret = 0;
|
||||
if(parse_value_.isMember(MUST)){
|
||||
has_and_logic_ = true;
|
||||
log_debug("must parse:%s" , parse_value_[MUST].toStyledString().c_str() );
|
||||
ret = ParseRequest(parse_value_[MUST] , ANDKEY);
|
||||
if (ret != 0) { return ret; }
|
||||
}
|
||||
HandleUnifiedIndex();
|
||||
|
||||
if (parse_value_.isMember(SHOULD)){
|
||||
log_debug("should parse:%s" , parse_value_[SHOULD].toStyledString().c_str() );
|
||||
ret = ParseRequest(parse_value_[SHOULD] , ORKEY);
|
||||
if (ret != 0) { return ret; }
|
||||
}
|
||||
if (parse_value_.isMember(MUST_NOT)){
|
||||
log_debug("must not parse:%s" , parse_value_[MUST_NOT].toStyledString().c_str() );
|
||||
ret = ParseRequest(parse_value_[MUST_NOT] , INVERTKEY);
|
||||
if (ret != 0) { return ret; }
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int BoolQueryProcess::ParseContent(int logic_type){
|
||||
log_info("BoolQueryProcess no need parse content by logictype");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BoolQueryProcess::GetValidDoc(int logic_type , const std::vector<FieldInfo>& keys){
|
||||
log_info("BoolQueryProcess no need get valid doc by logictype");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BoolQueryProcess::GetValidDoc(){
|
||||
if (query_bitset_.test(E_INDEX_READ_PRE_TERM) && query_bitset_.test(E_INDEX_READ_TERM)){
|
||||
return query_process_map_[E_INDEX_READ_PRE_TERM]->GetValidDoc();
|
||||
}
|
||||
|
||||
for (uint32_t ui_key_type = ORKEY; ui_key_type < KEYTOTALNUM; ++ui_key_type){
|
||||
std::vector<std::vector<FieldInfo> >::const_iterator filedinfo_vet_iter = \
|
||||
component_->GetFieldList(ui_key_type).cbegin();
|
||||
|
||||
for (;filedinfo_vet_iter != component_->GetFieldList(ui_key_type).cend();
|
||||
++ filedinfo_vet_iter){
|
||||
if (filedinfo_vet_iter->empty()){
|
||||
continue;
|
||||
}
|
||||
uint32_t query_type = (*filedinfo_vet_iter)[FIRST_TEST_INDEX].query_type;
|
||||
|
||||
if (!query_bitset_.test(query_type)){
|
||||
log_error("get valid doc query type:%d , logic type:%d" , \
|
||||
query_type , ui_key_type);
|
||||
return -1;
|
||||
}
|
||||
|
||||
query_process_map_[query_type]->GetValidDoc(ui_key_type , *filedinfo_vet_iter);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BoolQueryProcess::GetScore(){
|
||||
for (uint32_t ui_query_type = E_INDEX_READ_PRE_TERM
|
||||
; ui_query_type < E_INDEX_READ_TOTAL_NUM
|
||||
; ++ui_query_type){
|
||||
if (!query_bitset_.test(ui_query_type)){
|
||||
continue;
|
||||
}
|
||||
|
||||
if (E_INDEX_READ_GEO_DISTANCE == ui_query_type || E_INDEX_READ_GEO_SHAPE == ui_query_type){
|
||||
if (has_and_logic_ &&
|
||||
component_->SortField().empty() &&
|
||||
!query_bitset_.test(E_INDEX_READ_RANGE)){
|
||||
return query_process_map_[ui_query_type]->GetScore();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
return query_process_map_[ui_query_type]->GetScore();
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
const Json::Value& BoolQueryProcess::SetResponse(){
|
||||
for (uint32_t ui_query_type = E_INDEX_READ_PRE_TERM
|
||||
; ui_query_type < E_INDEX_READ_TOTAL_NUM
|
||||
; ++ui_query_type){
|
||||
if (!query_bitset_.test(ui_query_type)){
|
||||
continue;
|
||||
}
|
||||
|
||||
if (E_INDEX_READ_GEO_DISTANCE == ui_query_type || E_INDEX_READ_GEO_SHAPE == ui_query_type){
|
||||
if (has_and_logic_ &&
|
||||
component_->SortField().empty() &&
|
||||
!query_bitset_.test(E_INDEX_READ_RANGE)){
|
||||
response_ = query_process_map_[ui_query_type]->SetResponse();
|
||||
return response_;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
response_ = query_process_map_[ui_query_type]->SetResponse();
|
||||
return response_;
|
||||
}
|
||||
return response_;
|
||||
}
|
||||
|
||||
int BoolQueryProcess::ParseRequest(
|
||||
const Json::Value& request,
|
||||
int logic_type)
|
||||
{
|
||||
int iret = 0;
|
||||
if(request.isArray()){
|
||||
log_debug("array parse");
|
||||
for(int i = 0; i < (int)request.size(); i++){
|
||||
Json::Value::Members search_member = request[i].getMemberNames();
|
||||
Json::Value::Members::iterator iter = search_member.begin();
|
||||
for (; iter != search_member.end(); ++iter){
|
||||
iret = InitQueryProcess(logic_type , *iter , request[i][*iter]);
|
||||
if(iret != 0){
|
||||
log_error("InitQueryProcess error!");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (request.isObject()) {
|
||||
log_debug("object parse");
|
||||
Json::Value::Members search_member = request.getMemberNames();
|
||||
Json::Value::Members::iterator iter = search_member.begin();
|
||||
for (; iter != search_member.end(); ++iter){
|
||||
iret = InitQueryProcess(logic_type, *iter , request[*iter]);
|
||||
if(iret != 0){
|
||||
log_error("InitQueryProcess error!");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BoolQueryProcess::InitQueryProcess(
|
||||
uint32_t type,
|
||||
const std::string& query_key,
|
||||
const Json::Value& parse_value)
|
||||
{
|
||||
log_debug("InitQueryProcess start");
|
||||
int query_type = -1;
|
||||
|
||||
if(0 == query_key.compare(TERM)){
|
||||
query_type = E_INDEX_READ_TERM;
|
||||
if (query_process_map_.find(query_type) == query_process_map_.end()){
|
||||
query_process_map_.insert(std::make_pair(query_type
|
||||
, new TermQueryProcess(parse_value)));
|
||||
log_debug("bool query term process init");
|
||||
}
|
||||
} else if(0 == query_key.compare(MATCH)){
|
||||
query_type = E_INDEX_READ_MATCH;
|
||||
if (query_process_map_.find(query_type) == query_process_map_.end()){
|
||||
query_process_map_.insert(std::make_pair(query_type
|
||||
, new MatchQueryProcess(parse_value)));
|
||||
log_debug("bool query match process init");
|
||||
}
|
||||
} else if(0 == query_key.compare(RANGE)){
|
||||
if (component_->TerminalTag()){
|
||||
query_type = E_INDEX_READ_PRE_TERM;
|
||||
}else{
|
||||
query_type = E_INDEX_READ_RANGE;
|
||||
}
|
||||
if (query_process_map_.find(query_type) == query_process_map_.end()){
|
||||
query_process_map_.insert(std::make_pair(query_type
|
||||
, RangeQueryGenerator::Instance()->GetRangeQueryProcess(query_type , parse_value)));
|
||||
log_debug("bool query range process init");
|
||||
}
|
||||
} else if(0 == query_key.compare(GEODISTANCE)){
|
||||
query_type = E_INDEX_READ_GEO_DISTANCE;
|
||||
if (query_process_map_.find(query_type) == query_process_map_.end()){
|
||||
query_process_map_.insert(std::make_pair(query_type
|
||||
, new GeoDistanceQueryProcess(parse_value)));
|
||||
log_debug("bool query geo distance process init");
|
||||
}
|
||||
} else if(0 == query_key.compare(GEOSHAPE)){
|
||||
query_type = E_INDEX_READ_GEO_SHAPE;
|
||||
if (query_process_map_.find(query_type) == query_process_map_.end()){
|
||||
query_process_map_.insert(std::make_pair(query_type
|
||||
, new GeoShapeQueryProcess(parse_value)));
|
||||
log_debug("bool query geo shape process init");
|
||||
}
|
||||
} else {
|
||||
log_error("BoolQueryParser only support term/match/range/geo_distance/geoshape!");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
|
||||
if (!query_bitset_.test(query_type)){
|
||||
query_bitset_.set(query_type);
|
||||
|
||||
query_process_map_[query_type]->SetRequest(request_);
|
||||
query_process_map_[query_type]->SetComponent(component_);
|
||||
query_process_map_[query_type]->SetDocManager(doc_manager_);
|
||||
log_debug("query bitset has type:%d" , query_type);
|
||||
}
|
||||
log_debug("current query type:%d , parse value:%s" , query_type , parse_value.toStyledString().c_str());
|
||||
query_process_map_[query_type]->SetParseJsonValue(parse_value);
|
||||
query_process_map_[query_type]->ParseContent(type);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void BoolQueryProcess::GetKeyFromFieldInfo(const std::vector<FieldInfo>& field_info_vec, std::vector<std::string>& key_vec, bool& b_has_range){
|
||||
std::vector<FieldInfo>::const_iterator iter = field_info_vec.cbegin();
|
||||
for(; iter != field_info_vec.cend(); iter++){
|
||||
KeyFormat::UnionKey o_keyinfo_vet;
|
||||
std::string s_format_key = "";
|
||||
if (E_INDEX_READ_RANGE == iter->query_type ||
|
||||
E_INDEX_READ_PRE_TERM == iter->query_type){
|
||||
b_has_range = true;
|
||||
|
||||
o_keyinfo_vet.push_back(std::make_pair(iter->field_type , std::to_string(iter->start)));
|
||||
s_format_key = KeyFormat::Encode(o_keyinfo_vet);
|
||||
key_vec.push_back(s_format_key);
|
||||
|
||||
o_keyinfo_vet.clear();
|
||||
o_keyinfo_vet.push_back(std::make_pair(iter->field_type , std::to_string(iter->end)));
|
||||
s_format_key = KeyFormat::Encode(o_keyinfo_vet);
|
||||
key_vec.push_back(s_format_key);
|
||||
}else{
|
||||
o_keyinfo_vet.push_back(std::make_pair(iter->field_type , iter->word));
|
||||
s_format_key = KeyFormat::Encode(o_keyinfo_vet);
|
||||
key_vec.push_back(s_format_key);
|
||||
log_debug("field type:%d , word:%s" , iter->field_type , iter->word.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** 通过递归求出二维vector每一维vector中取一个数的各种组合
|
||||
** 输入:[[a],[b1,b2],[c1,c2,c3]]
|
||||
** 输出:[a_b1_c1,a_b1_c2,a_b1_c3,a_b2_c1,a_b2_c2,a_b2_c3]
|
||||
*/
|
||||
std::vector<std::string> BoolQueryProcess::Combination(
|
||||
std::vector<std::vector<std::string> >& dimensionalArr)
|
||||
{
|
||||
int FLength = dimensionalArr.size();
|
||||
if(FLength >= 2){
|
||||
int SLength1 = dimensionalArr[0].size();
|
||||
int SLength2 = dimensionalArr[1].size();
|
||||
int DLength = SLength1 * SLength2;
|
||||
std::vector<std::string> temporary(DLength);
|
||||
int index = 0;
|
||||
for(int i = 0; i < SLength1; i++){
|
||||
for (int j = 0; j < SLength2; j++) {
|
||||
temporary[index].append(dimensionalArr[0][i]);
|
||||
temporary[index].append(dimensionalArr[1][j]);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
std::vector<std::vector<std::string> > new_arr;
|
||||
new_arr.push_back(temporary);
|
||||
for(int i = 2; i < (int)dimensionalArr.size(); i++){
|
||||
new_arr.push_back(dimensionalArr[i]);
|
||||
}
|
||||
return Combination(new_arr);
|
||||
} else {
|
||||
return dimensionalArr[0];
|
||||
}
|
||||
}
|
39
src/search_local/index_read/process/bool_query_process.h
Normal file
39
src/search_local/index_read/process/bool_query_process.h
Normal file
@ -0,0 +1,39 @@
|
||||
#ifndef BOOL_QUERY_PROCESS_H_
|
||||
#define BOOL_QUERY_PROCESS_H_
|
||||
|
||||
#include "query_process.h"
|
||||
#include <bitset>
|
||||
#include <algorithm>
|
||||
|
||||
class QueryProcess;
|
||||
class PreTerminal;
|
||||
class GeoDistanceQueryProcess;
|
||||
|
||||
class BoolQueryProcess : public QueryProcess{
|
||||
public:
|
||||
BoolQueryProcess(const Json::Value& value);
|
||||
virtual ~BoolQueryProcess();
|
||||
|
||||
private:
|
||||
virtual int ParseContent(int logic_type);
|
||||
virtual int GetValidDoc(int logic_type , const std::vector<FieldInfo>& keys);
|
||||
|
||||
virtual int ParseContent();
|
||||
virtual int GetValidDoc();
|
||||
virtual int GetScore();
|
||||
virtual const Json::Value& SetResponse();
|
||||
|
||||
private:
|
||||
int ParseRequest(const Json::Value& request, int logic_type);
|
||||
int InitQueryProcess(uint32_t type , const std::string& query_key, const Json::Value& parse_value);
|
||||
void HandleUnifiedIndex();
|
||||
void GetKeyFromFieldInfo(const std::vector<FieldInfo>& field_info_vec, std::vector<std::string>& key_vec , bool& b_has_range);
|
||||
std::vector<std::string> Combination(std::vector<std::vector<std::string> >& dimensionalArr);
|
||||
|
||||
private:
|
||||
std::map<int , QueryProcess*> query_process_map_;
|
||||
std::bitset<E_INDEX_READ_TOTAL_NUM> query_bitset_;
|
||||
bool has_and_logic_;
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,124 @@
|
||||
#include "geo_distance_query_process.h"
|
||||
#include "../sort_operator/geo_query_sort_operator.h"
|
||||
#include "../valid_doc_filter.h"
|
||||
|
||||
GeoDistanceQueryProcess::GeoDistanceQueryProcess(const Json::Value& value)
|
||||
: QueryProcess(value)
|
||||
, logictype_geopoint_map_()
|
||||
{
|
||||
response_["type"] = 1;
|
||||
}
|
||||
|
||||
GeoDistanceQueryProcess::~GeoDistanceQueryProcess()
|
||||
{ }
|
||||
|
||||
int GeoDistanceQueryProcess::ParseContent(){
|
||||
return ParseContent(ANDKEY);
|
||||
}
|
||||
|
||||
int GeoDistanceQueryProcess::ParseContent(int logic_type)
|
||||
{
|
||||
std::string s_geo_distance_fieldname("");
|
||||
GeoPointContext o_geo_point;
|
||||
Json::Value::Members member = parse_value_.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
for(; iter != member.end(); ++iter){
|
||||
Json::Value geo_value = parse_value_[*iter];
|
||||
if (DISTANCE == (*iter)){
|
||||
if (geo_value.isString()){
|
||||
o_geo_point.SetDistance(atof(geo_value.asString().c_str()));
|
||||
} else {
|
||||
log_error("GeoDistanceParser distance should be string, the unit is km.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
} else {
|
||||
s_geo_distance_fieldname = (*iter);
|
||||
o_geo_point(geo_value);
|
||||
}
|
||||
}
|
||||
|
||||
logictype_geopoint_map_.insert(std::make_pair(logic_type , o_geo_point));
|
||||
|
||||
GeoPoint geo;
|
||||
geo.lon = atof(o_geo_point.sLongtitude.c_str());
|
||||
geo.lat = atof(o_geo_point.sLatitude.c_str());
|
||||
double d_distance = o_geo_point.d_distance;
|
||||
log_debug("geo lng:%f ,lat:%f , dis:%f" , geo.lon , geo.lat , d_distance);
|
||||
|
||||
std::vector<std::string> gisCode = GetArroundGeoHash(geo, d_distance, GEO_PRECISION);
|
||||
if(!gisCode.empty()){
|
||||
uint32_t segment_tag = SEGMENT_NONE;
|
||||
FieldInfo fieldInfo;
|
||||
fieldInfo.query_type = E_INDEX_READ_GEO_DISTANCE;
|
||||
|
||||
uint32_t uiRet = DBManager::Instance()->GetWordField(segment_tag, component_->Appid()
|
||||
, s_geo_distance_fieldname , fieldInfo);
|
||||
|
||||
if (0 == uiRet){
|
||||
log_error("field_name:[%s] error ,not in the app_field_define", s_geo_distance_fieldname.c_str());
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
|
||||
std::vector<FieldInfo> fieldInfos;
|
||||
if (uiRet != 0 && SEGMENT_NONE == segment_tag) {
|
||||
component_->SetHasGisFlag(true);
|
||||
for (size_t index = 0; index < gisCode.size(); index++) {
|
||||
fieldInfo.word = gisCode[index];
|
||||
log_debug("geo point:%s", fieldInfo.word.c_str());
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}
|
||||
}
|
||||
|
||||
component_->AddToFieldList(logic_type, fieldInfos);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GeoDistanceQueryProcess::GetValidDoc()
|
||||
{
|
||||
if (component_->GetFieldList(ANDKEY).empty()){
|
||||
return -RT_GET_FIELD_ERROR;
|
||||
}
|
||||
return GetValidDoc(ANDKEY , component_->GetFieldList(ANDKEY)[FIRST_TEST_INDEX]);
|
||||
}
|
||||
|
||||
int GeoDistanceQueryProcess::GetValidDoc(
|
||||
int logic_type,
|
||||
const std::vector<FieldInfo>& keys)
|
||||
{
|
||||
log_debug("geo related query GetValidDoc beginning...");
|
||||
std::vector<IndexInfo> index_info_vet;
|
||||
int iret = ValidDocFilter::Instance()->TextInvertIndexSearch(keys, index_info_vet);
|
||||
if (iret != 0) { return iret; }
|
||||
|
||||
bool bRet = doc_manager_->GetDocContent(logictype_geopoint_map_[logic_type] , index_info_vet);
|
||||
if (false == bRet){
|
||||
log_error("GetDocContent error.");
|
||||
return -RT_DTC_ERR;
|
||||
}
|
||||
ResultContext::Instance()->SetIndexInfos(logic_type , index_info_vet);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GeoDistanceQueryProcess::GetScore()
|
||||
{
|
||||
log_debug("geo related query GetScore beginning...");
|
||||
sort_operator_base_ = new GeoQuerySortOperator(component_ , doc_manager_);
|
||||
p_scoredocid_set_ = sort_operator_base_->GetSortOperator((uint32_t)component_->SortType());
|
||||
return 0;
|
||||
}
|
||||
|
||||
void GeoDistanceQueryProcess::SortScore(int& i_sequence , int& i_rank)
|
||||
{
|
||||
log_debug("geo related query SortScore beginning...");
|
||||
|
||||
if ((SORT_FIELD_DESC == component_->SortType() || SORT_FIELD_ASC == component_->SortType())
|
||||
&& p_scoredocid_set_->empty()){
|
||||
SortByCOrderOp(i_rank);
|
||||
}else if (SORT_FIELD_DESC == component_->SortType()
|
||||
|| DONT_SORT == component_->SortType()){ // 降序和不排序处理
|
||||
DescSort(i_sequence , i_rank);
|
||||
}else { // 不指定情况下,默认升序,距离近在前
|
||||
AscSort(i_sequence , i_rank);
|
||||
}
|
||||
}
|
115
src/search_local/index_read/process/geo_distance_query_process.h
Normal file
115
src/search_local/index_read/process/geo_distance_query_process.h
Normal file
@ -0,0 +1,115 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: geo_distance_query_process.h
|
||||
*
|
||||
* Description: geo_distance_query_process class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 17/05/2021
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: chenyujie, chenyujie28@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
#ifndef GEO_DISTANCE_QUERY_PROCESS_H_
|
||||
#define GEO_DISTANCE_QUERY_PROCESS_H_
|
||||
|
||||
#include "query_process.h"
|
||||
#include "geohash.h"
|
||||
|
||||
const double DEFAULT_DISTANCE = 2.0;
|
||||
const int GEO_PRECISION = 6;
|
||||
|
||||
struct GeoPointContext
|
||||
{
|
||||
std::string sLatitude;
|
||||
std::string sLongtitude;
|
||||
double d_distance;
|
||||
|
||||
GeoPointContext()
|
||||
: sLatitude("")
|
||||
, sLongtitude("")
|
||||
, d_distance(DEFAULT_DISTANCE)
|
||||
{}
|
||||
|
||||
GeoPointContext(const Json::Value& oJsonValue){
|
||||
ParseJson(oJsonValue);
|
||||
}
|
||||
|
||||
GeoPointContext(const std::string& sLat, const std::string& sLng
|
||||
, double dDis = DEFAULT_DISTANCE)
|
||||
: sLatitude(sLat)
|
||||
, sLongtitude(sLng)
|
||||
, d_distance(dDis)
|
||||
{ }
|
||||
|
||||
void operator()(const Json::Value& oJsonValue)
|
||||
{
|
||||
ParseJson(oJsonValue);
|
||||
}
|
||||
|
||||
void SetDistance(const double& dDis){
|
||||
d_distance = dDis;
|
||||
}
|
||||
|
||||
bool IsGeoPointFormat() const{
|
||||
return ((!sLatitude.empty()) && (!sLongtitude.empty()));
|
||||
}
|
||||
|
||||
void Clear(){
|
||||
sLatitude.clear();
|
||||
sLongtitude.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
void ParseJson(const Json::Value& oJsonValue){
|
||||
if (oJsonValue.isString()){
|
||||
std::string sValue = oJsonValue.asString();
|
||||
std::size_t iPos = sValue.find(",");
|
||||
sLatitude = sValue.substr(0,iPos);
|
||||
sLongtitude = sValue.substr(iPos + 1);
|
||||
}
|
||||
|
||||
if (oJsonValue.isArray()){
|
||||
if (oJsonValue[0].isString()){
|
||||
sLatitude = oJsonValue[0].asString();
|
||||
}
|
||||
if (oJsonValue[1].isString()){
|
||||
sLongtitude = oJsonValue[1].asString();
|
||||
}
|
||||
}
|
||||
|
||||
if (oJsonValue.isObject()){
|
||||
if (oJsonValue["latitude"].isString()){
|
||||
sLatitude = oJsonValue["latitude"].asString();
|
||||
}
|
||||
if (oJsonValue["longitude"].isString()){
|
||||
sLongtitude = oJsonValue["longitude"].asString();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class GeoDistanceQueryProcess: public QueryProcess{
|
||||
public:
|
||||
GeoDistanceQueryProcess(const Json::Value& value);
|
||||
virtual ~GeoDistanceQueryProcess();
|
||||
|
||||
public:
|
||||
virtual int ParseContent(int logic_type);
|
||||
virtual int GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys);
|
||||
|
||||
virtual int ParseContent();
|
||||
virtual int GetValidDoc();
|
||||
virtual int GetScore();
|
||||
virtual void SortScore(int& i_sequence , int& i_rank);
|
||||
|
||||
private:
|
||||
std::map<int , GeoPointContext> logictype_geopoint_map_;
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,55 @@
|
||||
#include "geo_shape_query_process.h"
|
||||
|
||||
GeoShapeQueryProcess::GeoShapeQueryProcess(const Json::Value& value)
|
||||
: GeoDistanceQueryProcess(value)
|
||||
{ }
|
||||
|
||||
GeoShapeQueryProcess::~GeoShapeQueryProcess()
|
||||
{ }
|
||||
|
||||
int GeoShapeQueryProcess::ParseContent(int logic_type){
|
||||
Json::Value::Members member = parse_value_.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
if(iter == member.end()){ // 一个geo_shape下只对应一个字段
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
std::string fieldname = *iter;
|
||||
Json::Value field_value = parse_value_[fieldname];
|
||||
GeoShapeContext o_geo_shape;
|
||||
if(field_value.isMember(POINTS)){
|
||||
o_geo_shape(field_value[POINTS]);
|
||||
} else {
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
|
||||
if (o_geo_shape.IsGeoShapeFormat()){
|
||||
std::vector<std::string> gisCode = GetArroundGeoHash(o_geo_shape.GetMinEnclosRect(), GEO_PRECISION);
|
||||
if(!gisCode.empty()){
|
||||
vector<FieldInfo> fieldInfos;
|
||||
uint32_t segment_tag = SEGMENT_NONE;
|
||||
FieldInfo fieldInfo;
|
||||
fieldInfo.query_type = E_INDEX_READ_GEO_SHAPE;
|
||||
|
||||
uint32_t uiRet = DBManager::Instance()->GetWordField(segment_tag, component_->Appid()
|
||||
, fieldname, fieldInfo);
|
||||
|
||||
if (0 == uiRet){
|
||||
log_error("field_name:[%s] error ,not in the app_field_define", fieldname.c_str());
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
|
||||
if (uiRet != 0 && SEGMENT_NONE == segment_tag) {
|
||||
component_->SetHasGisFlag(true);
|
||||
for (size_t index = 0; index < gisCode.size(); index++) {
|
||||
fieldInfo.word = gisCode[index];
|
||||
log_debug("geo shape point:%s", fieldInfo.word.c_str());
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}
|
||||
}
|
||||
if (!fieldInfos.empty()) {
|
||||
component_->AddToFieldList(logic_type, fieldInfos);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,77 @@
|
||||
#ifndef GEO_SHAPE_QUERY_PROCESS_H_
|
||||
#define GEO_SHAPE_QUERY_PROCESS_H_
|
||||
|
||||
#include "query_process.h"
|
||||
#include "geo_distance_query_process.h"
|
||||
|
||||
const char* const POINTS ="points";
|
||||
|
||||
struct GeoShapeContext
|
||||
{
|
||||
std::vector<GeoPointContext> oGeoShapeVet;
|
||||
|
||||
void operator()(const Json::Value& oJsonValue)
|
||||
{
|
||||
if (oJsonValue.isString()){
|
||||
std::string sValue = oJsonValue.asString();
|
||||
sValue = delPrefix(sValue);
|
||||
std::vector<std::string> oValueVet = splitEx(sValue, ",");
|
||||
for(uint32_t str_vec_idx = 0; str_vec_idx < oValueVet.size(); str_vec_idx++){
|
||||
std::string wkt_str = trim(oValueVet[str_vec_idx]);
|
||||
std::vector<std::string> wkt_vec = splitEx(wkt_str, " ");
|
||||
if(wkt_vec.size() == 2){
|
||||
oGeoShapeVet.push_back(GeoPointContext(wkt_vec[1], wkt_vec[0]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (oJsonValue.isArray()){
|
||||
for(int i = 0; i < (int)oJsonValue.size(); i++){
|
||||
GeoPointContext o_geo_point(oJsonValue[i]);
|
||||
oGeoShapeVet.push_back(o_geo_point);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EnclosingRectangle GetMinEnclosRect(){
|
||||
std::vector<double> oLatVet;
|
||||
std::vector<double> oLngVet;
|
||||
for (size_t i = 0; i < oGeoShapeVet.size(); ++i){
|
||||
oLatVet.push_back(atof(oGeoShapeVet[i].sLatitude.c_str()));
|
||||
oLngVet.push_back(atof(oGeoShapeVet[i].sLongtitude.c_str()));
|
||||
}
|
||||
if (oLatVet.empty() || oLngVet.empty()){
|
||||
return EnclosingRectangle();
|
||||
}
|
||||
|
||||
std::sort(oLatVet.begin(), oLatVet.end());
|
||||
std::sort(oLngVet.begin(), oLngVet.end());
|
||||
|
||||
return EnclosingRectangle(*(oLngVet.end() - 1), *(oLngVet.begin())
|
||||
, *(oLatVet.end() - 1), *(oLatVet.begin()));
|
||||
}
|
||||
|
||||
bool IsGeoShapeFormat(){
|
||||
bool bRet = !oGeoShapeVet.empty();
|
||||
for (size_t i = 0; i < oGeoShapeVet.size(); i++)
|
||||
{
|
||||
bRet &= oGeoShapeVet[i].IsGeoPointFormat();
|
||||
}
|
||||
return bRet;
|
||||
}
|
||||
|
||||
void Clear(){
|
||||
oGeoShapeVet.clear();
|
||||
}
|
||||
};
|
||||
|
||||
class GeoShapeQueryProcess : public GeoDistanceQueryProcess{
|
||||
public:
|
||||
GeoShapeQueryProcess(const Json::Value& value);
|
||||
virtual~ GeoShapeQueryProcess();
|
||||
|
||||
public:
|
||||
virtual int ParseContent(int logic_type);
|
||||
};
|
||||
|
||||
#endif
|
@ -1,329 +1,80 @@
|
||||
#include "match_query_process.h"
|
||||
#include "math.h"
|
||||
#include "../order_op.h"
|
||||
#include "../valid_doc_filter.h"
|
||||
|
||||
#define DOC_CNT 10000
|
||||
|
||||
MatchQueryProcess::MatchQueryProcess(uint32_t appid, Json::Value& value, Component* component)
|
||||
:QueryProcess(appid, value, component){
|
||||
appid_ = component_->Appid();
|
||||
sort_type_ = component_->SortType();
|
||||
sort_field_ = component_->SortField();
|
||||
has_gis_ = false;
|
||||
}
|
||||
MatchQueryProcess::MatchQueryProcess(const Json::Value& value)
|
||||
: QueryProcess(value)
|
||||
{ }
|
||||
|
||||
MatchQueryProcess::~MatchQueryProcess(){
|
||||
|
||||
}
|
||||
MatchQueryProcess::~MatchQueryProcess()
|
||||
{ }
|
||||
|
||||
int MatchQueryProcess::ParseContent(){
|
||||
return ParseContent(ORKEY);
|
||||
}
|
||||
|
||||
int MatchQueryProcess::ParseContent(uint32_t type){
|
||||
int MatchQueryProcess::ParseContent(int logic_type){
|
||||
vector<FieldInfo> fieldInfos;
|
||||
Json::Value::Members member = value_.getMemberNames();
|
||||
Json::Value::Members member = parse_value_.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
string fieldname;
|
||||
std::string fieldname;
|
||||
Json::Value field_value;
|
||||
if(iter != member.end()){ // 一个match下只对应一个字段
|
||||
fieldname = *iter;
|
||||
field_value = value_[fieldname];
|
||||
field_value = parse_value_[fieldname];
|
||||
} else {
|
||||
log_error("MatchQueryProcess error, value is null");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
uint32_t segment_tag = 0;
|
||||
FieldInfo fieldInfo;
|
||||
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid_, fieldname, fieldInfo);
|
||||
if (field != 0 && segment_tag == 1)
|
||||
{
|
||||
string split_data = SplitManager::Instance()->split(field_value.asString(), appid_);
|
||||
fieldInfo.query_type = E_INDEX_READ_MATCH;
|
||||
|
||||
uint32_t segment_tag = SEGMENT_NONE;
|
||||
uint32_t uiRet = DBManager::Instance()->GetWordField(segment_tag, component_->Appid()
|
||||
, fieldname, fieldInfo);
|
||||
if (uiRet != 0 && SEGMENT_DEFAULT == segment_tag){
|
||||
std::string split_data = SplitManager::Instance()->split(field_value.asString(), component_->Appid());
|
||||
log_debug("split_data: %s", split_data.c_str());
|
||||
vector<string> split_datas = splitEx(split_data, "|");
|
||||
for(size_t index = 0; index < split_datas.size(); index++)
|
||||
{
|
||||
FieldInfo info;
|
||||
info.field = fieldInfo.field;
|
||||
info.field_type = fieldInfo.field_type;
|
||||
info.word = split_datas[index];
|
||||
info.segment_tag = fieldInfo.segment_tag;
|
||||
fieldInfos.push_back(info);
|
||||
std::vector<std::string> split_datas = splitEx(split_data, "|");
|
||||
for(size_t index = 0; index < split_datas.size(); index++){
|
||||
fieldInfo.word = split_datas[index];
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}
|
||||
}
|
||||
else if (field != 0)
|
||||
{
|
||||
else if (uiRet != 0){
|
||||
fieldInfo.word = field_value.asString();
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}else{
|
||||
log_error("field_name:[%s] error ,not in the app_field_define or segmentTag error", fieldname.c_str());
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
|
||||
component_->AddToFieldList(type, fieldInfos);
|
||||
component_->AddToFieldList(logic_type, fieldInfos);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int MatchQueryProcess::GetValidDoc(){
|
||||
doc_manager_ = new DocManager(component_);
|
||||
logical_operate_ = new LogicalOperate(appid_, sort_type_, has_gis_, component_->CacheSwitch());
|
||||
|
||||
for (size_t index = 0; index < component_->Keys().size(); index++)
|
||||
{
|
||||
vector<IndexInfo> doc_id_vec;
|
||||
vector<FieldInfo> fieldInfos = component_->Keys()[index];
|
||||
vector<FieldInfo>::iterator it;
|
||||
for (it = fieldInfos.begin(); it != fieldInfos.end(); it++) {
|
||||
vector<IndexInfo> doc_info;
|
||||
if ((*it).segment_tag == 3) {
|
||||
int ret = GetDocByShiftWord(*it, doc_info, appid_, highlightWord_);
|
||||
if (ret != 0) {
|
||||
doc_id_vec.clear();
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
sort(doc_info.begin(), doc_info.end());
|
||||
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
|
||||
KeyInfo info;
|
||||
info.word_freq = 1;
|
||||
info.field = (*it).field;
|
||||
info.word = (*it).word;
|
||||
doc_info_map_[doc_info[doc_info_idx].doc_id].push_back(info);
|
||||
}
|
||||
} else if ((*it).segment_tag == 4) {
|
||||
int ret = GetDocByShiftEnWord(*it, doc_info, appid_, highlightWord_);
|
||||
if (ret != 0) {
|
||||
doc_id_vec.clear();
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
sort(doc_info.begin(), doc_info.end());
|
||||
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
|
||||
KeyInfo info;
|
||||
info.word_freq = 1;
|
||||
info.field = (*it).field;
|
||||
info.word = (*it).word;
|
||||
doc_info_map_[doc_info[doc_info_idx].doc_id].push_back(info);
|
||||
}
|
||||
} else {
|
||||
int ret = logical_operate_->GetDocIdSetByWord(*it, doc_info);
|
||||
if (ret != 0){
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
if (doc_info.size() == 0)
|
||||
continue;
|
||||
if (!isAllNumber((*it).word))
|
||||
highlightWord_.insert((*it).word);
|
||||
if(sort_type_ == SORT_RELEVANCE){
|
||||
logical_operate_->CalculateByWord(*it, doc_info, doc_info_map_, key_in_doc_);
|
||||
}
|
||||
}
|
||||
doc_id_vec = vec_union(doc_id_vec, doc_info);
|
||||
}
|
||||
if(index == 0){ // 第一个直接赋值给vecs,后续的依次与前面的进行逻辑运算
|
||||
doc_vec_.assign(doc_id_vec.begin(), doc_id_vec.end());
|
||||
} else {
|
||||
doc_vec_ = vec_union(doc_vec_, doc_id_vec);
|
||||
}
|
||||
}
|
||||
|
||||
bool bRet = doc_manager_->GetDocContent(has_gis_, doc_vec_, valid_docs_, distances_);
|
||||
if (false == bRet) {
|
||||
log_error("GetDocContent error.");
|
||||
return -RT_DTC_ERR;
|
||||
if (component_->GetFieldList(ORKEY).empty()){
|
||||
return -RT_GET_FIELD_ERROR;
|
||||
}
|
||||
return GetValidDoc(ORKEY , component_->GetFieldList(ORKEY)[FIRST_TEST_INDEX]);
|
||||
}
|
||||
|
||||
int MatchQueryProcess::GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys){
|
||||
std::vector<IndexInfo> index_info_vet;
|
||||
int iret = -1;
|
||||
uint32_t segment_tag = keys[FIRST_SPLIT_WORD_INDEX].segment_tag;
|
||||
if(SEGMENT_CHINESE == segment_tag
|
||||
|| SEGMENT_ENGLISH == segment_tag){
|
||||
iret = ValidDocFilter::Instance()->HanPinTextInvertIndexSearch(keys , index_info_vet);
|
||||
}else{
|
||||
iret = ValidDocFilter::Instance()->TextInvertIndexSearch(keys , index_info_vet);
|
||||
}
|
||||
|
||||
if (iret != 0) { return iret; }
|
||||
ResultContext::Instance()->SetIndexInfos(logic_type , index_info_vet);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int MatchQueryProcess::GetScoreAndSort(){
|
||||
// BM25 algorithm
|
||||
uint32_t doc_cnt = DOC_CNT;
|
||||
double k1 = 1.2;
|
||||
double k2 = 200;
|
||||
double K = 1.65;
|
||||
string doc_id;
|
||||
string keyword;
|
||||
uint32_t word_freq = 0;
|
||||
uint32_t field = 0;
|
||||
|
||||
if(sort_type_ == SORT_RELEVANCE || sort_type_ == SORT_TIMESTAMP){
|
||||
map<string, vec>::iterator ves_iter = doc_info_map_.begin();
|
||||
for (; ves_iter != doc_info_map_.end(); ves_iter++) {
|
||||
double score = 0;
|
||||
uint32_t key_docs = 0;
|
||||
|
||||
doc_id = ves_iter->first;
|
||||
vector<KeyInfo> &key_info = ves_iter->second;
|
||||
if(valid_docs_.find(doc_id) == valid_docs_.end()){
|
||||
continue;
|
||||
}
|
||||
|
||||
set<string> word_set;
|
||||
map<string, vector<int> > pos_map;
|
||||
map<string, vector<int> > title_pos_map;
|
||||
for (uint32_t i = 0; i < key_info.size(); i++) {
|
||||
keyword = key_info[i].word;
|
||||
if (word_set.find(keyword) == word_set.end()) {
|
||||
word_set.insert(keyword);
|
||||
}
|
||||
word_freq = key_info[i].word_freq;
|
||||
field = key_info[i].field;
|
||||
if (field == LOCATE_ANY) {
|
||||
pos_map[keyword] = key_info[i].pos_vec;
|
||||
}
|
||||
if (field == LOCATE_TITLE) {
|
||||
title_pos_map[keyword] = key_info[i].pos_vec;
|
||||
}
|
||||
key_docs = key_in_doc_[keyword];
|
||||
score += log((doc_cnt - key_docs + 0.5) / (key_docs + 0.5)) * ((k1 + 1)*word_freq) / (K + word_freq) * (k2 + 1) * 1 / (k2 + 1);
|
||||
}
|
||||
/*if (!complete_keys.empty()) { // 完全匹配
|
||||
if (word_set.size() != word_vec.size()) { // 文章中出现的词语数量与输入的不一致,则不满足完全匹配
|
||||
continue;
|
||||
}
|
||||
else { // 在标题和正文中都不连续出现,则不满足
|
||||
if (CheckWordContinus(word_vec, pos_map) == false && CheckWordContinus(word_vec, title_pos_map) == false) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}*/
|
||||
skipList_.InsertNode(score, doc_id.c_str());
|
||||
}
|
||||
|
||||
} else {
|
||||
set<string>::iterator set_iter = valid_docs_.begin();
|
||||
for(; set_iter != valid_docs_.end(); set_iter++){
|
||||
doc_id = *set_iter;
|
||||
|
||||
if (sort_type_ == SORT_FIELD_ASC || sort_type_ == SORT_FIELD_DESC){
|
||||
doc_manager_->GetScoreMap(doc_id, sort_type_, sort_field_, sort_field_type_, appid_);
|
||||
} else {
|
||||
skipList_.InsertNode(1, doc_id.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void MatchQueryProcess::TaskEnd(){
|
||||
Json::FastWriter writer;
|
||||
Json::Value response;
|
||||
response["code"] = 0;
|
||||
int sequence = -1;
|
||||
int rank = 0;
|
||||
int page_size = component_->PageSize();
|
||||
int limit_start = page_size * (component_->PageIndex()-1);
|
||||
int limit_end = page_size * (component_->PageIndex()-1) + page_size - 1;
|
||||
|
||||
log_debug("search result begin.");
|
||||
|
||||
if((sort_type_ == SORT_FIELD_DESC || sort_type_ == SORT_FIELD_ASC) && skipList_.GetSize() == 0){
|
||||
OrderOpCond order_op_cond;
|
||||
order_op_cond.last_id = component_->LastId();
|
||||
order_op_cond.limit_start = limit_start;
|
||||
order_op_cond.count = page_size;
|
||||
order_op_cond.has_extra_filter = false;
|
||||
if(component_->ExtraFilterKeys().size() != 0 || component_->ExtraFilterAndKeys().size() != 0 || component_->ExtraFilterInvertKeys().size() != 0){
|
||||
order_op_cond.has_extra_filter = true;
|
||||
}
|
||||
if(sort_field_type_ == FIELDTYPE_INT){
|
||||
rank += doc_manager_->ScoreIntMap().size();
|
||||
COrderOp<int> orderOp(FIELDTYPE_INT, component_->SearchAfter(), sort_type_);
|
||||
orderOp.Process(doc_manager_->ScoreIntMap(), atoi(component_->LastScore().c_str()), order_op_cond, response, doc_manager_);
|
||||
} else if(sort_field_type_ == FIELDTYPE_DOUBLE) {
|
||||
rank += doc_manager_->ScoreDoubleMap().size();
|
||||
COrderOp<double> orderOp(FIELDTYPE_DOUBLE, component_->SearchAfter(), sort_type_);
|
||||
orderOp.Process(doc_manager_->ScoreDoubleMap(), atof(component_->LastScore().c_str()), order_op_cond, response, doc_manager_);
|
||||
} else {
|
||||
rank += doc_manager_->ScoreStrMap().size();
|
||||
COrderOp<string> orderOp(FIELDTYPE_STRING, component_->SearchAfter(), sort_type_);
|
||||
orderOp.Process(doc_manager_->ScoreStrMap(), component_->LastScore(), order_op_cond, response, doc_manager_);
|
||||
}
|
||||
} else if (has_gis_ || sort_type_ == SORT_FIELD_ASC) {
|
||||
log_debug("m_has_gis or SORT_FIELD_ASC, size:%d ", skipList_.GetSize());
|
||||
SkipListNode *tmp = skipList_.GetHeader()->level[0].forward;
|
||||
while (tmp->level[0].forward != NULL) {
|
||||
// 通过extra_filter_keys进行额外过滤(针对区分度不高的字段)
|
||||
if(doc_manager_->CheckDocByExtraFilterKey(tmp->value) == false){
|
||||
log_debug("CheckDocByExtraFilterKey failed, %s", tmp->value);
|
||||
tmp = tmp->level[0].forward;
|
||||
continue;
|
||||
}
|
||||
sequence++;
|
||||
rank++;
|
||||
if(component_->ReturnAll() == 0){
|
||||
if (sequence < limit_start || sequence > limit_end) {
|
||||
tmp = tmp->level[0].forward;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Json::Value doc_info;
|
||||
doc_info["doc_id"] = Json::Value(tmp->value);
|
||||
doc_info["score"] = Json::Value(tmp->key);
|
||||
response["result"].append(doc_info);
|
||||
tmp = tmp->level[0].forward;
|
||||
}
|
||||
} else {
|
||||
SkipListNode *tmp = skipList_.GetFooter()->backward;
|
||||
while(tmp->backward != NULL) {
|
||||
if(doc_manager_->CheckDocByExtraFilterKey(tmp->value) == false){
|
||||
tmp = tmp->backward;
|
||||
continue;
|
||||
}
|
||||
sequence++;
|
||||
rank++;
|
||||
if (component_->ReturnAll() == 0){
|
||||
if (sequence < limit_start || sequence > limit_end) {
|
||||
tmp = tmp->backward;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Json::Value doc_info;
|
||||
doc_info["doc_id"] = Json::Value(tmp->value);
|
||||
doc_info["score"] = Json::Value(tmp->key);
|
||||
response["result"].append(doc_info);
|
||||
tmp = tmp->backward;
|
||||
}
|
||||
}
|
||||
|
||||
if(component_->Fields().size() > 0){
|
||||
doc_manager_->AppendFieldsToRes(response, component_->Fields());
|
||||
}
|
||||
|
||||
if (rank > 0)
|
||||
AppendHighLightWord(response);
|
||||
if (has_gis_) {
|
||||
response["type"] = 1;
|
||||
}
|
||||
else {
|
||||
response["type"] = 0;
|
||||
}
|
||||
response["count"] = rank;
|
||||
/*if(m_index_set_cnt != 0){
|
||||
response["count"] = m_index_set_cnt;
|
||||
}*/
|
||||
log_debug("search result end: %lld.", (long long int)GetSysTimeMicros());
|
||||
std::string outputConfig = writer.write(response);
|
||||
request_->setResult(outputConfig);
|
||||
/*if (component_->ReturnAll() == 0 && component_->CacheSwitch() == 1 && component_->PageIndex() == 1 && has_gis_ == 0
|
||||
&& rank > 0 && outputConfig.size() < MAX_VALUE_LEN) {
|
||||
string m_Data_Cache = m_Primary_Data + "|" + component_->DataAnd() + "|" + component_->DataInvert() + "|" + component_->DataComplete() + "|" +
|
||||
ToString(sort_type_) + "|" + ToString(appid_);
|
||||
unsigned data_size = m_Data_Cache.size();
|
||||
int ret = cachelist->add_list(m_Data_Cache.c_str(), outputConfig.c_str(), data_size, outputConfig.size());
|
||||
if (ret != 0) {
|
||||
log_error("add to cache_list error, ret: %d.", ret);
|
||||
}
|
||||
else {
|
||||
log_debug("add to cache_list: %s.", m_Data_Cache.c_str());
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
void MatchQueryProcess::AppendHighLightWord(Json::Value& response)
|
||||
{
|
||||
int count = 0;
|
||||
set<string>::iterator iter = highlightWord_.begin();
|
||||
for (; iter != highlightWord_.end(); iter++) {
|
||||
if (count >= 10)
|
||||
break;
|
||||
count = count + 1;
|
||||
response["hlWord"].append((*iter).c_str());
|
||||
}
|
||||
return ;
|
||||
}
|
@ -23,28 +23,16 @@
|
||||
|
||||
class MatchQueryProcess: public QueryProcess{
|
||||
public:
|
||||
MatchQueryProcess(uint32_t appid, Json::Value& value, Component* component);
|
||||
~MatchQueryProcess();
|
||||
int ParseContent();
|
||||
int GetValidDoc();
|
||||
int GetScoreAndSort();
|
||||
void TaskEnd();
|
||||
MatchQueryProcess(const Json::Value& value);
|
||||
virtual~ MatchQueryProcess();
|
||||
|
||||
int ParseContent(uint32_t type);
|
||||
void AppendHighLightWord(Json::Value& response);
|
||||
public:
|
||||
virtual int ParseContent(int logic_type);
|
||||
virtual int GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys);
|
||||
|
||||
private:
|
||||
set<string> highlightWord_;
|
||||
map<string, vec> doc_info_map_;
|
||||
map<string, uint32_t> key_in_doc_;
|
||||
vector<IndexInfo> doc_vec_;
|
||||
hash_double_map distances_;
|
||||
set<string> valid_docs_;
|
||||
uint32_t appid_;
|
||||
uint32_t sort_type_;
|
||||
string sort_field_;
|
||||
bool has_gis_;
|
||||
FIELDTYPE sort_field_type_;
|
||||
virtual int ParseContent();
|
||||
virtual int GetValidDoc();
|
||||
};
|
||||
|
||||
#endif
|
@ -1,50 +1,191 @@
|
||||
#include "query_process.h"
|
||||
#include <assert.h>
|
||||
#include "../valid_doc_filter.h"
|
||||
#include "../order_op.h"
|
||||
|
||||
QueryProcess::QueryProcess(uint32_t appid, Json::Value& value, Component* component)
|
||||
:component_(component),
|
||||
appid_(appid),
|
||||
value_(value)
|
||||
{
|
||||
QueryProcess::QueryProcess(const Json::Value& value)
|
||||
: component_(NULL)
|
||||
, doc_manager_(NULL)
|
||||
, request_(NULL)
|
||||
, sort_operator_base_(NULL)
|
||||
, p_scoredocid_set_(NULL)
|
||||
, parse_value_(value)
|
||||
, response_()
|
||||
{ }
|
||||
|
||||
QueryProcess::~QueryProcess()
|
||||
{
|
||||
DELETE(sort_operator_base_);
|
||||
ResultContext::Instance()->Clear();
|
||||
}
|
||||
|
||||
QueryProcess::~QueryProcess(){
|
||||
int QueryProcess::StartQuery(){
|
||||
assert(component_ != NULL);
|
||||
assert(doc_manager_ != NULL);
|
||||
assert(request_ != NULL);
|
||||
|
||||
int iret = ParseContent();
|
||||
if (0 == iret){
|
||||
iret = GetValidDoc();
|
||||
if (0 == iret){
|
||||
iret = CheckValidDoc();
|
||||
if (0 == iret){
|
||||
iret = GetScore();
|
||||
if (0 == iret){
|
||||
SetResponse();
|
||||
|
||||
Json::FastWriter writer;
|
||||
std::string outputConfig = writer.write(response_);
|
||||
request_->setResult(outputConfig);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return iret;
|
||||
}
|
||||
|
||||
int QueryProcess::DoJob(){
|
||||
TaskBegin();
|
||||
ParseContent();
|
||||
GetValidDoc();
|
||||
GetScoreAndSort();
|
||||
TaskEnd();
|
||||
int QueryProcess::CheckValidDoc(){
|
||||
log_debug("query base CheckValidDoc beginning...");
|
||||
bool bRet = doc_manager_->GetDocContent();
|
||||
if (false == bRet){
|
||||
log_error("GetDocContent error.");
|
||||
return -RT_DTC_ERR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void QueryProcess::SetSkipList(SkipList& skipList){
|
||||
skipList_ = skipList;
|
||||
int QueryProcess::GetScore()
|
||||
{
|
||||
log_debug("query base GetScore beginning...");
|
||||
sort_operator_base_ = new SortOperatorBase(component_ , doc_manager_);
|
||||
p_scoredocid_set_ = sort_operator_base_->GetSortOperator((uint32_t)component_->SortType());
|
||||
return 0;
|
||||
}
|
||||
|
||||
void QueryProcess::SetRequest(CTaskRequest* request){
|
||||
request_ = request;
|
||||
void QueryProcess::SortScore(int& i_sequence , int& i_rank)
|
||||
{
|
||||
log_debug("query base sortscore beginning...");
|
||||
if ((SORT_FIELD_DESC == component_->SortType() || SORT_FIELD_ASC == component_->SortType())
|
||||
&& p_scoredocid_set_->empty()){
|
||||
SortByCOrderOp(i_rank);
|
||||
}else if(SORT_FIELD_ASC == component_->SortType()){
|
||||
AscSort(i_sequence, i_rank);
|
||||
}else{ // 不指定情况下,默认降序,分高的在前,时间新的在前,docid大的在前(地理位置查询除外)
|
||||
DescSort(i_sequence, i_rank);
|
||||
}
|
||||
}
|
||||
|
||||
void QueryProcess::TaskBegin(){
|
||||
const Json::Value& QueryProcess::SetResponse()
|
||||
{
|
||||
log_debug("search result begin.");
|
||||
response_["code"] = 0;
|
||||
|
||||
int sequence = -1;
|
||||
int rank = 0;
|
||||
response_["type"] = 0;
|
||||
SortScore(sequence , rank);
|
||||
|
||||
if(!component_->RequiredFields().empty()){
|
||||
doc_manager_->AppendFieldsToRes(response_, component_->RequiredFields());
|
||||
}
|
||||
|
||||
if (rank > 0){
|
||||
AppendHighLightWord();
|
||||
}
|
||||
response_["count"] = rank;
|
||||
log_debug("search result end: %lld.", (long long int)GetSysTimeMicros());
|
||||
return response_;
|
||||
}
|
||||
|
||||
int QueryProcess::ParseContent(){
|
||||
return 0;
|
||||
void QueryProcess::SortByCOrderOp(int& i_rank)
|
||||
{
|
||||
log_debug("query base SortByCOrderOp beginning...");
|
||||
OrderOpCond order_op_cond;
|
||||
order_op_cond.last_id = component_->LastId();
|
||||
order_op_cond.limit_start = component_->PageSize() * (component_->PageIndex()-1);
|
||||
order_op_cond.count = component_->PageSize();
|
||||
order_op_cond.has_extra_filter = false;
|
||||
if(component_->ExtraFilterOrKeys().size() != 0 || component_->ExtraFilterAndKeys().size() != 0
|
||||
|| component_->ExtraFilterInvertKeys().size() != 0){
|
||||
order_op_cond.has_extra_filter = true;
|
||||
}
|
||||
if(FIELDTYPE_INT == sort_operator_base_->GetSortFieldType()){
|
||||
i_rank += doc_manager_->ScoreIntMap().size();
|
||||
COrderOp<int> orderOp(FIELDTYPE_INT, component_->SearchAfter(), component_->SortType());
|
||||
orderOp.Process(doc_manager_->ScoreIntMap(), atoi(component_->LastScore().c_str()), order_op_cond, response_, doc_manager_);
|
||||
} else if(FIELDTYPE_DOUBLE == sort_operator_base_->GetSortFieldType()) {
|
||||
i_rank += doc_manager_->ScoreDoubleMap().size();
|
||||
COrderOp<double> orderOp(FIELDTYPE_DOUBLE, component_->SearchAfter(), component_->SortType());
|
||||
orderOp.Process(doc_manager_->ScoreDoubleMap(), atof(component_->LastScore().c_str()), order_op_cond, response_, doc_manager_);
|
||||
} else {
|
||||
i_rank += doc_manager_->ScoreStrMap().size();
|
||||
COrderOp<std::string> orderOp(FIELDTYPE_STRING, component_->SearchAfter(), component_->SortType());
|
||||
orderOp.Process(doc_manager_->ScoreStrMap(), component_->LastScore(), order_op_cond, response_, doc_manager_);
|
||||
}
|
||||
}
|
||||
|
||||
int QueryProcess::GetValidDoc(){
|
||||
return 0;
|
||||
void QueryProcess::AscSort(int& i_sequence , int& i_rank)
|
||||
{
|
||||
log_debug("ascsort, result size:%d ", (uint32_t)p_scoredocid_set_->size());
|
||||
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
|
||||
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
|
||||
|
||||
std::set<ScoreDocIdNode>::iterator iter = p_scoredocid_set_->begin();
|
||||
for( ;iter != p_scoredocid_set_->end(); ++iter){
|
||||
// 通过extra_filter_keys进行额外过滤(针对区分度不高的字段)
|
||||
if(doc_manager_->CheckDocByExtraFilterKey(iter->s_docid) == false){
|
||||
log_debug("CheckDocByExtraFilterKey failed, %s", iter->s_docid.c_str());
|
||||
continue;
|
||||
}
|
||||
i_sequence ++;
|
||||
i_rank ++;
|
||||
if(component_->ReturnAll() == 0){
|
||||
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Json::Value doc_info;
|
||||
doc_info["doc_id"] = Json::Value(iter->s_docid);
|
||||
doc_info["score"] = Json::Value(iter->d_score);
|
||||
response_["result"].append(doc_info);
|
||||
}
|
||||
}
|
||||
|
||||
int QueryProcess::GetScoreAndSort(){
|
||||
return 0;
|
||||
void QueryProcess::DescSort(int& i_sequence , int& i_rank)
|
||||
{
|
||||
log_debug("descsort, result size:%d ", (uint32_t)p_scoredocid_set_->size());
|
||||
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
|
||||
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
|
||||
log_debug("limit_start:%d , limit_end:%d", i_limit_start, i_limit_end);
|
||||
|
||||
std::set<ScoreDocIdNode>::reverse_iterator riter = p_scoredocid_set_->rbegin();
|
||||
for( ;riter != p_scoredocid_set_->rend(); ++riter){
|
||||
if(doc_manager_->CheckDocByExtraFilterKey(riter->s_docid) == false){
|
||||
continue;
|
||||
}
|
||||
i_sequence++;
|
||||
i_rank++;
|
||||
if (component_->ReturnAll() == 0){
|
||||
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Json::Value doc_info;
|
||||
doc_info["doc_id"] = Json::Value(riter->s_docid);
|
||||
doc_info["score"] = Json::Value(riter->d_score);
|
||||
response_["result"].append(doc_info);
|
||||
}
|
||||
}
|
||||
|
||||
void QueryProcess::TaskEnd(){
|
||||
|
||||
}
|
||||
void QueryProcess::AppendHighLightWord()
|
||||
{
|
||||
int count = 0;
|
||||
const HighLightWordSet& highlight_word_set = ResultContext::Instance()->GetHighLightWordSet();
|
||||
std::set<std::string>::const_iterator iter = highlight_word_set.cbegin();
|
||||
for (; iter != highlight_word_set.cend(); iter++) {
|
||||
if (count >= 10)
|
||||
break;
|
||||
++count;
|
||||
response_["hlWord"].append((*iter).c_str());
|
||||
}
|
||||
}
|
@ -19,38 +19,82 @@
|
||||
#ifndef __QUERY_PROCESS_H__
|
||||
#define __QUERY_PROCESS_H__
|
||||
|
||||
#include "../component.h"
|
||||
#include "../logical_operate.h"
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "../request_context.h"
|
||||
#include "../result_context.h"
|
||||
#include "../valid_doc_filter.h"
|
||||
#include "../doc_manager.h"
|
||||
#include "../comm.h"
|
||||
#include "../db_manager.h"
|
||||
#include "../split_manager.h"
|
||||
#include "../comm.h"
|
||||
#include "../sort_operator/sort_operator_base.h"
|
||||
#include "skiplist.h"
|
||||
#include "task_request.h"
|
||||
|
||||
const char* const BOOL ="bool";
|
||||
const char* const MUST ="must";
|
||||
const char* const SHOULD ="should";
|
||||
const char* const MUST_NOT ="must_not";
|
||||
const char* const TERM ="term";
|
||||
const char* const MATCH ="match";
|
||||
const char* const RANGE ="range";
|
||||
const char* const GEODISTANCE ="geo_distance";
|
||||
const char* const DISTANCE = "distance";
|
||||
const char* const GEOSHAPE ="geo_polygon";
|
||||
|
||||
#define FIRST_TEST_INDEX 0
|
||||
#define FIRST_SPLIT_WORD_INDEX 0
|
||||
|
||||
// query process definition has priorities ,beacause of BOOL query
|
||||
enum E_INDEX_READ_QUERY_PROCESS{
|
||||
E_INDEX_READ_PRE_TERM,
|
||||
E_INDEX_READ_RANGE,
|
||||
E_INDEX_READ_GEO_DISTANCE,
|
||||
E_INDEX_READ_GEO_SHAPE,
|
||||
E_INDEX_READ_MATCH,
|
||||
E_INDEX_READ_TERM,
|
||||
E_INDEX_READ_TOTAL_NUM
|
||||
};
|
||||
|
||||
class QueryProcess{
|
||||
public:
|
||||
QueryProcess(uint32_t appid, Json::Value& value, Component* component);
|
||||
~QueryProcess();
|
||||
int DoJob();
|
||||
void SetSkipList(SkipList& skipList);
|
||||
void SetRequest(CTaskRequest* request);
|
||||
QueryProcess(const Json::Value& value);
|
||||
virtual~ QueryProcess();
|
||||
|
||||
public:
|
||||
int StartQuery();
|
||||
void SetRequest(CTaskRequest* const request) { request_ = request; };
|
||||
void SetParseJsonValue(const Json::Value& value) { parse_value_ = value; };
|
||||
void SetComponent(RequestContext* const component) { component_ = component;};
|
||||
void SetDocManager(DocManager* const doc_manager) { doc_manager_ = doc_manager;};
|
||||
|
||||
public:
|
||||
virtual int ParseContent(int logic_type) = 0;
|
||||
virtual int GetValidDoc(int logic_type , const std::vector<FieldInfo>& keys) = 0;
|
||||
|
||||
virtual int ParseContent() = 0;
|
||||
virtual int GetValidDoc() = 0;
|
||||
virtual int GetScore();
|
||||
virtual void SortScore(int& i_sequence , int& i_rank);
|
||||
virtual const Json::Value& SetResponse();
|
||||
|
||||
protected:
|
||||
void TaskBegin();
|
||||
virtual int ParseContent();
|
||||
virtual int GetValidDoc();
|
||||
virtual int GetScoreAndSort();
|
||||
virtual void TaskEnd();
|
||||
void SortByCOrderOp(int& i_rank);
|
||||
void AscSort(int& i_sequence , int& i_rank);
|
||||
void DescSort(int& i_sequence , int& i_rank);
|
||||
void AppendHighLightWord();
|
||||
int CheckValidDoc();
|
||||
|
||||
protected:
|
||||
Component* component_;
|
||||
LogicalOperate* logical_operate_;
|
||||
RequestContext* component_;
|
||||
DocManager* doc_manager_;
|
||||
uint32_t appid_;
|
||||
Json::Value value_;
|
||||
SkipList skipList_;
|
||||
CTaskRequest* request_;
|
||||
SortOperatorBase* sort_operator_base_;
|
||||
std::set<ScoreDocIdNode>* p_scoredocid_set_;
|
||||
|
||||
Json::Value parse_value_;
|
||||
Json::Value response_;
|
||||
};
|
||||
|
||||
#endif
|
205
src/search_local/index_read/process/range_query_process.cc
Normal file
205
src/search_local/index_read/process/range_query_process.cc
Normal file
@ -0,0 +1,205 @@
|
||||
#include "range_query_process.h"
|
||||
#include "../valid_doc_filter.h"
|
||||
|
||||
RangeQueryProcess::RangeQueryProcess(const Json::Value& value, uint32_t ui_query_type)
|
||||
: QueryProcess(value)
|
||||
, ui_query_type_(ui_query_type)
|
||||
{ }
|
||||
|
||||
RangeQueryProcess::~RangeQueryProcess()
|
||||
{ }
|
||||
|
||||
int RangeQueryProcess::ParseContent(){
|
||||
return ParseContent(ORKEY);
|
||||
}
|
||||
|
||||
int RangeQueryProcess::ParseContent(int logic_type)
|
||||
{
|
||||
std::vector<FieldInfo> fieldInfos;
|
||||
Json::Value::Members member = parse_value_.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
if(iter != member.end()){ // 一个range下只对应一个字段
|
||||
std::string fieldname = *iter;
|
||||
uint32_t segment_tag = 0;
|
||||
FieldInfo fieldInfo;
|
||||
fieldInfo.query_type = ui_query_type_;
|
||||
|
||||
uint32_t uiRet = DBManager::Instance()->GetWordField(segment_tag, component_->Appid()
|
||||
, fieldname, fieldInfo);
|
||||
if (0 == uiRet){
|
||||
return -RT_GET_FIELD_ERROR;
|
||||
}
|
||||
|
||||
Json::Value field_value = parse_value_[fieldname];
|
||||
if(field_value.isObject()){
|
||||
Json::Value start;
|
||||
Json::Value end;
|
||||
RANGTYPE ui_range_type;
|
||||
if(field_value.isMember(GTE)){
|
||||
start = field_value[GTE];
|
||||
if(field_value.isMember(LTE)){
|
||||
end = field_value[LTE];
|
||||
ui_range_type = RANGE_GELE;
|
||||
} else if(field_value.isMember(LT)){
|
||||
end = field_value[LT];
|
||||
ui_range_type = RANGE_GELT;
|
||||
} else {
|
||||
ui_range_type = RANGE_GE;
|
||||
}
|
||||
} else if(field_value.isMember(GT)){
|
||||
start = field_value[GT];
|
||||
if(field_value.isMember(LTE)){
|
||||
end = field_value[LTE];
|
||||
ui_range_type = RANGE_GTLE;
|
||||
} else if(field_value.isMember(LT)){
|
||||
end = field_value[LT];
|
||||
ui_range_type = RANGE_GTLT;
|
||||
} else {
|
||||
ui_range_type = RANGE_GT;
|
||||
}
|
||||
} else if(field_value.isMember(LTE)){
|
||||
end = field_value[LTE];
|
||||
ui_range_type = RANGE_LE;
|
||||
} else if(field_value.isMember(LT)){
|
||||
end = field_value[LT];
|
||||
ui_range_type = RANGE_LT;
|
||||
}
|
||||
fieldInfo.range_type = ui_range_type;
|
||||
|
||||
log_debug("range_type:%d", ui_range_type);
|
||||
if(start.isInt()){
|
||||
fieldInfo.start = start.asInt();
|
||||
} else if (start.isDouble()){
|
||||
fieldInfo.start = start.asDouble();
|
||||
} else {
|
||||
log_error("range query lower value only support int/double");
|
||||
}
|
||||
|
||||
if (end.isInt()){
|
||||
fieldInfo.end = end.asInt();
|
||||
}else if (end.isDouble()){
|
||||
fieldInfo.end = end.asDouble();
|
||||
} else {
|
||||
log_error("range query upper limit value only support int/double");
|
||||
}
|
||||
|
||||
log_debug("start:%f , end:%f" , fieldInfo.start , fieldInfo.end);
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}
|
||||
if (!fieldInfos.empty()) {
|
||||
component_->AddToFieldList(logic_type, fieldInfos);
|
||||
}
|
||||
} else {
|
||||
log_error("RangeQueryParser error, value is null");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int RangeQueryProcess::GetValidDoc(){
|
||||
if (component_->GetFieldList(ORKEY).empty()){
|
||||
return -RT_GET_FIELD_ERROR;
|
||||
}
|
||||
|
||||
return GetValidDoc(ORKEY , component_->GetFieldList(ORKEY)[FIRST_TEST_INDEX]);
|
||||
}
|
||||
|
||||
int RangeQueryProcess::GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys)
|
||||
{
|
||||
std::vector<IndexInfo> index_info_vet;
|
||||
int iret = ValidDocFilter::Instance()->RangeQueryInvertIndexSearch(keys, index_info_vet);
|
||||
if (iret != 0) { return iret;}
|
||||
ResultContext::Instance()->SetIndexInfos(logic_type , index_info_vet);
|
||||
return iret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
PreTerminal::PreTerminal(const Json::Value& value, uint32_t ui_query_type)
|
||||
: RangeQueryProcess(value , ui_query_type)
|
||||
, candidate_doc_()
|
||||
{}
|
||||
|
||||
PreTerminal::~PreTerminal()
|
||||
{}
|
||||
|
||||
int PreTerminal::GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys){
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PreTerminal::GetValidDoc(){
|
||||
uint32_t count = 0;
|
||||
uint32_t N = 2;
|
||||
uint32_t limit_start = 0;
|
||||
int try_times = 0;
|
||||
while(count < component_->PageSize()){
|
||||
if(try_times++ > 10){
|
||||
log_debug("ProcessTerminal try_times is the max, return");
|
||||
break;
|
||||
}
|
||||
vector<TerminalRes> and_vecs;
|
||||
TerminalQryCond query_cond;
|
||||
query_cond.sort_type = component_->SortType();
|
||||
query_cond.sort_field = component_->SortField();
|
||||
query_cond.last_id = component_->LastId();
|
||||
query_cond.last_score = component_->LastScore();
|
||||
query_cond.limit_start = limit_start;
|
||||
query_cond.page_size = component_->PageSize() * N;
|
||||
int ret = ValidDocFilter::Instance()->ProcessTerminal(component_->AndKeys(), query_cond, and_vecs);
|
||||
if(0 != ret){
|
||||
log_error("ProcessTerminal error.");
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
for(int i = 0; i < (int)and_vecs.size(); i++){
|
||||
std::string doc_id = and_vecs[i].doc_id;
|
||||
std::stringstream ss;
|
||||
ss << (int)and_vecs[i].score;
|
||||
std::string ss_key = ss.str();
|
||||
log_debug("last_score: %s, ss_key: %s, score: %lf", query_cond.last_score.c_str(), ss_key.c_str(), and_vecs[i].score);
|
||||
if(component_->LastId() != "" && ss_key == query_cond.last_score){ // 翻页时过滤掉已经返回过的文档编号
|
||||
if(component_->SortType() == SORT_FIELD_DESC && doc_id >= component_->LastId()){
|
||||
continue;
|
||||
}
|
||||
if(component_->SortType() == SORT_FIELD_ASC && doc_id <= component_->LastId()){
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if(doc_manager_->CheckDocByExtraFilterKey(doc_id) == true){
|
||||
count++;
|
||||
candidate_doc_.push_back(and_vecs[i]);
|
||||
}
|
||||
}
|
||||
limit_start += component_->PageSize() * N;
|
||||
N *= 2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PreTerminal::GetScore(){
|
||||
log_info("RangeQueryPreTerminal do not need get score");
|
||||
return 0;
|
||||
}
|
||||
|
||||
const Json::Value& PreTerminal::SetResponse(){
|
||||
response_["code"] = 0;
|
||||
int sequence = -1;
|
||||
int rank = 0;
|
||||
for (uint32_t i = 0; i < candidate_doc_.size(); i++) {
|
||||
if(rank >= (int)component_->PageSize()){
|
||||
break;
|
||||
}
|
||||
sequence++;
|
||||
rank++;
|
||||
TerminalRes tmp = candidate_doc_[i];
|
||||
Json::Value doc_info;
|
||||
doc_info["doc_id"] = Json::Value(tmp.doc_id.c_str());
|
||||
doc_info["score"] = Json::Value(tmp.score);
|
||||
response_["result"].append(doc_info);
|
||||
}
|
||||
response_["type"] = 0;
|
||||
response_["count"] = rank; // TODO 这里的count并不是实际的总数
|
||||
return response_;
|
||||
}
|
81
src/search_local/index_read/process/range_query_process.h
Normal file
81
src/search_local/index_read/process/range_query_process.h
Normal file
@ -0,0 +1,81 @@
|
||||
#ifndef RANGE_QUERY_PROCESS_H_
|
||||
#define RANGE_QUERY_PROCESS_H_
|
||||
|
||||
#include "singleton.h"
|
||||
#include "noncopyable.h"
|
||||
#include "query_process.h"
|
||||
|
||||
const char* const GTE ="gte";
|
||||
const char* const GT ="gt";
|
||||
const char* const LTE ="lte";
|
||||
const char* const LT ="lt";
|
||||
|
||||
class RangeQueryProcess: public QueryProcess{
|
||||
public:
|
||||
RangeQueryProcess(const Json::Value& value , uint32_t ui_query_type);
|
||||
virtual~ RangeQueryProcess();
|
||||
|
||||
public:
|
||||
virtual int ParseContent(int logic_type);
|
||||
virtual int GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys);
|
||||
virtual int GetValidDoc();
|
||||
|
||||
protected:
|
||||
virtual int ParseContent();
|
||||
|
||||
private:
|
||||
uint32_t ui_query_type_;
|
||||
};
|
||||
|
||||
class PreTerminal : public RangeQueryProcess{
|
||||
public:
|
||||
PreTerminal(const Json::Value& value, uint32_t ui_query_type);
|
||||
virtual~ PreTerminal();
|
||||
|
||||
public:
|
||||
virtual int GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys);
|
||||
|
||||
virtual int GetValidDoc();
|
||||
virtual int GetScore();
|
||||
virtual const Json::Value& SetResponse();
|
||||
|
||||
private:
|
||||
std::vector<TerminalRes> candidate_doc_;
|
||||
};
|
||||
|
||||
class RangeQueryGenerator : private noncopyable{
|
||||
public:
|
||||
RangeQueryGenerator() { };
|
||||
virtual~ RangeQueryGenerator() { };
|
||||
|
||||
public:
|
||||
static RangeQueryGenerator* Instance(){
|
||||
return CSingleton<RangeQueryGenerator>::Instance();
|
||||
};
|
||||
|
||||
static void Destroy(){
|
||||
CSingleton<RangeQueryGenerator>::Destroy();
|
||||
};
|
||||
|
||||
public:
|
||||
// 内存释放由调用方处理
|
||||
QueryProcess* GetRangeQueryProcess(int iType , const Json::Value& parse_value){
|
||||
QueryProcess* current_range_query = NULL;
|
||||
switch (iType){
|
||||
case E_INDEX_READ_RANGE:{
|
||||
current_range_query = new RangeQueryProcess(parse_value , E_INDEX_READ_RANGE);
|
||||
}
|
||||
break;
|
||||
case E_INDEX_READ_PRE_TERM:{
|
||||
current_range_query = new PreTerminal(parse_value , E_INDEX_READ_PRE_TERM);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return current_range_query;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
69
src/search_local/index_read/process/term_query_process.cc
Normal file
69
src/search_local/index_read/process/term_query_process.cc
Normal file
@ -0,0 +1,69 @@
|
||||
#include "term_query_process.h"
|
||||
|
||||
TermQueryProcess::TermQueryProcess(const Json::Value& value)
|
||||
: QueryProcess(value)
|
||||
{}
|
||||
|
||||
TermQueryProcess::~TermQueryProcess(){
|
||||
|
||||
}
|
||||
|
||||
int TermQueryProcess::ParseContent(){
|
||||
return ParseContent(ORKEY);
|
||||
}
|
||||
|
||||
int TermQueryProcess::ParseContent(int logic_type){
|
||||
std::vector<FieldInfo> field_info_vec;
|
||||
Json::Value::Members member = parse_value_.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
std::string field_name;
|
||||
Json::Value field_value;
|
||||
if(iter != member.end()){ // 一个term下只对应一个字段
|
||||
field_name = *iter;
|
||||
field_value = parse_value_[field_name];
|
||||
} else {
|
||||
log_error("TermQueryProcess error, value is null");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
uint32_t segment_tag = 0;
|
||||
FieldInfo field_info;
|
||||
field_info.query_type = E_INDEX_READ_TERM;
|
||||
|
||||
uint32_t uiRet = DBManager::Instance()->GetWordField(segment_tag, component_->Appid()
|
||||
, field_name, field_info);
|
||||
if(uiRet != 0 && field_info.index_tag == 0){
|
||||
ExtraFilterKey extra_filter_key;
|
||||
extra_filter_key.field_name = field_name;
|
||||
extra_filter_key.field_value = field_value.asString();
|
||||
extra_filter_key.field_type = field_info.field_type;
|
||||
component_->AddToExtraFieldList(logic_type , extra_filter_key);
|
||||
} else if(uiRet != 0){
|
||||
field_info.word = field_value.asString();
|
||||
field_info_vec.push_back(field_info);
|
||||
} else {
|
||||
log_error("field_name:%s error, not in the app_field_define", field_name.c_str());
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
component_->AddToFieldList(logic_type, field_info_vec);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int TermQueryProcess::GetValidDoc(){
|
||||
if (component_->GetFieldList(ORKEY).empty()){
|
||||
return -RT_GET_FIELD_ERROR;
|
||||
}
|
||||
return GetValidDoc(ORKEY , component_->GetFieldList(ORKEY)[FIRST_TEST_INDEX]);
|
||||
}
|
||||
|
||||
int TermQueryProcess::GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys){
|
||||
log_debug("term query GetValidDoc beginning...");
|
||||
if (0 == keys[FIRST_SPLIT_WORD_INDEX].index_tag){
|
||||
return -RT_GET_FIELD_ERROR;
|
||||
}
|
||||
|
||||
std::vector<IndexInfo> index_info_vet;
|
||||
int iret = ValidDocFilter::Instance()->TextInvertIndexSearch(keys, index_info_vet);
|
||||
if (iret != 0) { return iret; }
|
||||
ResultContext::Instance()->SetIndexInfos(logic_type , index_info_vet);
|
||||
return 0;
|
||||
}
|
19
src/search_local/index_read/process/term_query_process.h
Normal file
19
src/search_local/index_read/process/term_query_process.h
Normal file
@ -0,0 +1,19 @@
|
||||
#ifndef TERM_QUERY_PROCESS_H_
|
||||
#define TERM_QUERY_PROCESS_H_
|
||||
|
||||
#include "query_process.h"
|
||||
|
||||
class TermQueryProcess : public QueryProcess{
|
||||
public:
|
||||
TermQueryProcess(const Json::Value& value);
|
||||
virtual ~TermQueryProcess();
|
||||
|
||||
public:
|
||||
virtual int ParseContent(int logic_type);
|
||||
virtual int GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys);
|
||||
|
||||
private:
|
||||
virtual int ParseContent();
|
||||
virtual int GetValidDoc();
|
||||
};
|
||||
#endif
|
@ -1,121 +0,0 @@
|
||||
#include "bool_query_parser.h"
|
||||
#include "../db_manager.h"
|
||||
#include "../split_manager.h"
|
||||
#include "range_query_parser.h"
|
||||
#include "term_query_parser.h"
|
||||
#include "match_query_parser.h"
|
||||
#include "geo_distance_parser.h"
|
||||
|
||||
const char* const NAME ="bool";
|
||||
const char* const MUST ="must";
|
||||
const char* const SHOULD ="should";
|
||||
const char* const MUST_NOT ="must_not";
|
||||
const char* const TERM ="term";
|
||||
const char* const MATCH ="match";
|
||||
const char* const RANGE ="range";
|
||||
const char* const GEODISTANCE ="geo_distance";
|
||||
|
||||
BoolQueryParser::BoolQueryParser(uint32_t a, Json::Value& v)
|
||||
:appid(a),value(v)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
BoolQueryParser::~BoolQueryParser(){
|
||||
if(NULL != range_query_parser){
|
||||
delete range_query_parser;
|
||||
}
|
||||
if(NULL != term_query_parser){
|
||||
delete term_query_parser;
|
||||
}
|
||||
if(NULL != match_query_parser){
|
||||
delete match_query_parser;
|
||||
}
|
||||
if(NULL != geo_query_parser){
|
||||
delete geo_query_parser;
|
||||
}
|
||||
}
|
||||
|
||||
int BoolQueryParser::DoJobByType(Json::Value& value, uint32_t type, QueryParserRes* query_parser_res){
|
||||
if(value.isMember(TERM)){
|
||||
term_query_parser = new TermQueryParser(appid, value[TERM]);
|
||||
return term_query_parser->ParseContent(query_parser_res, type);
|
||||
} else if(value.isMember(MATCH)){
|
||||
match_query_parser = new MatchQueryParser(appid, value[MATCH]);
|
||||
return match_query_parser->ParseContent(query_parser_res, type);
|
||||
} else if(value.isMember(RANGE)){
|
||||
range_query_parser = new RangeQueryParser(appid, value[RANGE]);
|
||||
return range_query_parser->ParseContent(query_parser_res, type);
|
||||
} else if(value.isMember(GEODISTANCE)){
|
||||
geo_query_parser = new GeoDistanceParser(appid, value[GEODISTANCE]);
|
||||
return geo_query_parser->ParseContent(query_parser_res);
|
||||
} else {
|
||||
string err_msg = "BoolQueryParser only support term/match/range/geo_distance!";
|
||||
log_error(err_msg.c_str());
|
||||
query_parser_res->ErrMsg() = err_msg;
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int BoolQueryParser::ParseContent(QueryParserRes* query_parser_res){
|
||||
int ret = 0;
|
||||
if(value.isMember(MUST)){
|
||||
int type = ANDKEY;
|
||||
Json::Value must = value[MUST];
|
||||
if(must.isArray()){
|
||||
for(int i = 0; i < (int)must.size(); i++){
|
||||
ret = DoJobByType(must[i], type, query_parser_res);
|
||||
if(ret != 0){
|
||||
log_error("DoJobByType error!");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
}
|
||||
} else if (must.isObject()){
|
||||
ret = DoJobByType(must, type, query_parser_res);
|
||||
if(ret != 0){
|
||||
log_error("DoJobByType error!");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (value.isMember(SHOULD)){
|
||||
int type = ORKEY;
|
||||
Json::Value should = value[SHOULD];
|
||||
if(should.isArray()){
|
||||
for(int i = 0; i < (int)should.size(); i++){
|
||||
ret = DoJobByType(should[i], type, query_parser_res);
|
||||
if(ret != 0){
|
||||
log_error("DoJobByType error!");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
}
|
||||
} else if (should.isObject()){
|
||||
ret = DoJobByType(should, type, query_parser_res);
|
||||
if(ret != 0){
|
||||
log_error("DoJobByType error!");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (value.isMember(MUST_NOT)){
|
||||
int type = INVERTKEY;
|
||||
Json::Value must_not = value[MUST_NOT];
|
||||
if(must_not.isArray()){
|
||||
for(int i = 0; i < (int)must_not.size(); i++){
|
||||
ret = DoJobByType(must_not[i], type, query_parser_res);
|
||||
if(ret != 0){
|
||||
log_error("DoJobByType error!");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
}
|
||||
} else if (must_not.isObject()) {
|
||||
ret = DoJobByType(must_not, type, query_parser_res);
|
||||
if(ret != 0){
|
||||
log_error("DoJobByType error!");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -1,48 +0,0 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: bool_query_parser.h
|
||||
*
|
||||
* Description: bool_query_parser class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 05/03/2021
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __BOOL_QUERY_PARSER_H__
|
||||
#define __BOOL_QUERY_PARSER_H__
|
||||
#include "query_parser.h"
|
||||
#include "json/json.h"
|
||||
|
||||
class RangeQueryParser;
|
||||
class TermQueryParser;
|
||||
class MatchQueryParser;
|
||||
class GeoDistanceParser;
|
||||
class BoolQueryParser : public QueryParser
|
||||
{
|
||||
public:
|
||||
BoolQueryParser(uint32_t a, Json::Value& v);
|
||||
~BoolQueryParser();
|
||||
|
||||
int ParseContent(QueryParserRes* query_parser_res);
|
||||
|
||||
private:
|
||||
int DoJobByType(Json::Value& value, uint32_t type, QueryParserRes* query_parser_res);
|
||||
|
||||
private:
|
||||
uint32_t appid;
|
||||
Json::Value value;
|
||||
RangeQueryParser* range_query_parser;
|
||||
TermQueryParser* term_query_parser;
|
||||
MatchQueryParser* match_query_parser;
|
||||
GeoDistanceParser* geo_query_parser;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,140 +0,0 @@
|
||||
#include "geo_distance_parser.h"
|
||||
#include "../db_manager.h"
|
||||
#include <sstream>
|
||||
|
||||
const char* const DISTANCE ="distance";
|
||||
const int GEO_PRECISION = 6;
|
||||
const int DEFAULT_DISTANCE = 2;
|
||||
|
||||
GeoDistanceParser::GeoDistanceParser(uint32_t a, Json::Value& v)
|
||||
:appid(a),value(v)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
GeoDistanceParser::~GeoDistanceParser(){
|
||||
|
||||
}
|
||||
|
||||
vector<double> splitDouble(const string& src, string separate_character)
|
||||
{
|
||||
vector<double> strs;
|
||||
|
||||
//分割字符串的长度,这样就可以支持如“,,”多字符串的分隔符
|
||||
int separate_characterLen = separate_character.size();
|
||||
int lastPosition = 0, index = -1;
|
||||
string str;
|
||||
double pos = 0;
|
||||
while (-1 != (index = src.find(separate_character, lastPosition)))
|
||||
{
|
||||
if (src.substr(lastPosition, index - lastPosition) != " ") {
|
||||
str = src.substr(lastPosition, index - lastPosition);
|
||||
pos = atof(str.c_str());
|
||||
strs.push_back(pos);
|
||||
}
|
||||
lastPosition = index + separate_characterLen;
|
||||
}
|
||||
string lastString = src.substr(lastPosition);//截取最后一个分隔符后的内容
|
||||
if (!lastString.empty() && lastString != " "){
|
||||
pos = atof(lastString.c_str());
|
||||
strs.push_back(pos);//如果最后一个分隔符后还有内容就入队
|
||||
}
|
||||
return strs;
|
||||
}
|
||||
|
||||
void SetErrMsg(QueryParserRes* query_parser_res, string err_msg){
|
||||
log_error(err_msg.c_str());
|
||||
query_parser_res->ErrMsg() = err_msg;
|
||||
}
|
||||
|
||||
int GeoDistanceParser::ParseContent(QueryParserRes* query_parser_res){
|
||||
vector<FieldInfo> fieldInfos;
|
||||
double distance = 0;
|
||||
string fieldname;
|
||||
Json::Value::Members member = value.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
for(; iter != member.end(); iter++){
|
||||
Json::Value geo_value = value[*iter];
|
||||
if(DISTANCE == *iter){
|
||||
if(geo_value.isString()){
|
||||
distance = atof(geo_value.asString().c_str());
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoDistanceParser distance should be string, the unit is km.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
} else {
|
||||
fieldname = *iter;
|
||||
if(geo_value.isString()){
|
||||
string geo_str = geo_value.asString();
|
||||
vector<double> res = splitDouble(geo_str, ",");
|
||||
if(res.size() >= 2){
|
||||
geo.lat = res[0];
|
||||
geo.lon = res[1];
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoDistanceParser format error.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
} else if (geo_value.isArray()){
|
||||
if(geo_value.size() >= 2){
|
||||
if(geo_value[0].isDouble()){
|
||||
geo.lon = geo_value[0].asDouble();
|
||||
}
|
||||
if(geo_value[1].isDouble()){
|
||||
geo.lat = geo_value[1].asDouble();
|
||||
}
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoDistanceParser format error.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
} else if (geo_value.isObject()){
|
||||
if(geo_value.isMember("lat") && geo_value["lat"].isDouble()){
|
||||
geo.lat = geo_value["lat"].asDouble();
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoDistanceParser lat format error.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
if(geo_value.isMember("lon") && geo_value["lon"].isDouble()){
|
||||
geo.lon = geo_value["lon"].asDouble();
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoDistanceParser lon format error.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoDistanceParser error, value is not string/array/object.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(distance == 0){
|
||||
distance = DEFAULT_DISTANCE;
|
||||
}
|
||||
vector<string> gisCode = GetArroundGeoHash(geo, distance, GEO_PRECISION);
|
||||
if(gisCode.size() > 0){
|
||||
vector<FieldInfo> fieldInfos;
|
||||
uint32_t segment_tag = 0;
|
||||
FieldInfo fieldInfo;
|
||||
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
|
||||
if (field != 0 && segment_tag == 0) {
|
||||
query_parser_res->HasGis() = 1;
|
||||
for (size_t index = 0; index < gisCode.size(); index++) {
|
||||
FieldInfo info;
|
||||
info.field = fieldInfo.field;
|
||||
info.field_type = fieldInfo.field_type;
|
||||
info.segment_tag = fieldInfo.segment_tag;
|
||||
info.word = gisCode[index];
|
||||
fieldInfos.push_back(info);
|
||||
}
|
||||
}
|
||||
if (fieldInfos.size() != 0) {
|
||||
query_parser_res->FieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
}
|
||||
stringstream sslat;
|
||||
stringstream sslon;
|
||||
sslat << geo.lat;
|
||||
query_parser_res->Latitude() = sslat.str();
|
||||
sslon << geo.lon;
|
||||
query_parser_res->Longitude() = sslon.str();
|
||||
query_parser_res->Distance() = distance;
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: geo_distance_parser.h
|
||||
*
|
||||
* Description: geo_distance_parser class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 20/04/2021
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __GEO_DISTANCE_PARSER_H__
|
||||
#define __GEO_DISTANCE_PARSER_H__
|
||||
#include "query_parser.h"
|
||||
#include "json/json.h"
|
||||
#include "geohash.h"
|
||||
|
||||
class GeoDistanceParser : public QueryParser
|
||||
{
|
||||
public:
|
||||
GeoDistanceParser(uint32_t a, Json::Value& v);
|
||||
~GeoDistanceParser();
|
||||
int ParseContent(QueryParserRes* query_parser_res);
|
||||
|
||||
private:
|
||||
uint32_t appid;
|
||||
Json::Value value;
|
||||
GeoPoint geo;
|
||||
double distance;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,141 +0,0 @@
|
||||
#include "geo_shape_parser.h"
|
||||
#include "../db_manager.h"
|
||||
#include <sstream>
|
||||
|
||||
const char* const POINTS ="points";
|
||||
const int GEO_PRECISION = 6;
|
||||
|
||||
GeoShapeParser::GeoShapeParser(uint32_t a, Json::Value& v)
|
||||
:appid(a),value(v)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
GeoShapeParser::~GeoShapeParser(){
|
||||
|
||||
}
|
||||
|
||||
vector<double> GeoShapeParser::splitDouble(const string& src, string separate_character)
|
||||
{
|
||||
vector<double> strs;
|
||||
|
||||
//分割字符串的长度,这样就可以支持如“,,”多字符串的分隔符
|
||||
int separate_characterLen = separate_character.size();
|
||||
int lastPosition = 0, index = -1;
|
||||
string str;
|
||||
double pos = 0;
|
||||
while (-1 != (index = src.find(separate_character, lastPosition)))
|
||||
{
|
||||
if (src.substr(lastPosition, index - lastPosition) != " ") {
|
||||
str = src.substr(lastPosition, index - lastPosition);
|
||||
pos = atof(str.c_str());
|
||||
strs.push_back(pos);
|
||||
}
|
||||
lastPosition = index + separate_characterLen;
|
||||
}
|
||||
string lastString = src.substr(lastPosition);//截取最后一个分隔符后的内容
|
||||
if (!lastString.empty() && lastString != " "){
|
||||
pos = atof(lastString.c_str());
|
||||
strs.push_back(pos);//如果最后一个分隔符后还有内容就入队
|
||||
}
|
||||
return strs;
|
||||
}
|
||||
|
||||
void GeoShapeParser::SetErrMsg(QueryParserRes* query_parser_res, string err_msg){
|
||||
log_error(err_msg.c_str());
|
||||
query_parser_res->ErrMsg() = err_msg;
|
||||
}
|
||||
|
||||
int GeoShapeParser::ParseContent(QueryParserRes* query_parser_res){
|
||||
Json::Value::Members member = value.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
if(iter == member.end()){ // 一个geo_shape下只对应一个字段
|
||||
SetErrMsg(query_parser_res, "GeoShapeParser format error, content is null.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
set<double> lat_arr;
|
||||
set<double> lon_arr;
|
||||
string fieldname = *iter;
|
||||
Json::Value field_value = value[fieldname];
|
||||
if(field_value.isMember(POINTS)){
|
||||
Json::Value points = field_value[POINTS];
|
||||
if(points.isArray()){
|
||||
for(int i = 0; i < (int)points.size(); i++){
|
||||
double lat;
|
||||
double lon;
|
||||
Json::Value geo_value = points[i];
|
||||
if(geo_value.isString()){
|
||||
string geo_str = geo_value.asString();
|
||||
vector<double> res = splitDouble(geo_str, ",");
|
||||
if(res.size() >= 2){
|
||||
lat = res[0];
|
||||
lon = res[1];
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoShapeParser format error.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
} else if (geo_value.isArray()){
|
||||
if(geo_value.size() >= 2){
|
||||
if(geo_value[0].isDouble()){
|
||||
lon = geo_value[0].asDouble();
|
||||
}
|
||||
if(geo_value[1].isDouble()){
|
||||
lat = geo_value[1].asDouble();
|
||||
}
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoShapeParser format error.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
} else if (geo_value.isObject()){
|
||||
if(geo_value.isMember("lat") && geo_value["lat"].isDouble()){
|
||||
lat = geo_value["lat"].asDouble();
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoShapeParser lat format error.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
if(geo_value.isMember("lon") && geo_value["lon"].isDouble()){
|
||||
lon = geo_value["lon"].asDouble();
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoShapeParser lon format error.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoShapeParser error, value is not string/array/object.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
lat_arr.insert(lat);
|
||||
lon_arr.insert(lon);
|
||||
}
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoShapeParser error, points is not a array.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
} else {
|
||||
SetErrMsg(query_parser_res, "GeoShapeParser error, no points content provide.");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
if(lon_arr.size() > 0 && lat_arr.size() > 0){
|
||||
vector<string> gisCode = GetArroundGeoHash(*lon_arr.rbegin(), *lon_arr.begin(), *lat_arr.rbegin(), *lat_arr.begin(), GEO_PRECISION);
|
||||
if(gisCode.size() > 0){
|
||||
vector<FieldInfo> fieldInfos;
|
||||
uint32_t segment_tag = 0;
|
||||
FieldInfo fieldInfo;
|
||||
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
|
||||
if (field != 0 && segment_tag == 0) {
|
||||
query_parser_res->HasGis() = 1;
|
||||
for (size_t index = 0; index < gisCode.size(); index++) {
|
||||
FieldInfo info;
|
||||
info.field = fieldInfo.field;
|
||||
info.field_type = fieldInfo.field_type;
|
||||
info.segment_tag = fieldInfo.segment_tag;
|
||||
info.word = gisCode[index];
|
||||
fieldInfos.push_back(info);
|
||||
}
|
||||
}
|
||||
if (fieldInfos.size() != 0) {
|
||||
query_parser_res->FieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: geo_shape_parser.h
|
||||
*
|
||||
* Description: geo_shape_parser class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 08/05/2021
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __GEO_SHAPE_PARSER_H__
|
||||
#define __GEO_SHAPE_PARSER_H__
|
||||
#include "query_parser.h"
|
||||
#include "json/json.h"
|
||||
#include "geohash.h"
|
||||
|
||||
class GeoShapeParser : public QueryParser
|
||||
{
|
||||
public:
|
||||
GeoShapeParser(uint32_t a, Json::Value& v);
|
||||
~GeoShapeParser();
|
||||
int ParseContent(QueryParserRes* query_parser_res);
|
||||
|
||||
private:
|
||||
void SetErrMsg(QueryParserRes* query_parser_res, string err_msg);
|
||||
vector<double> splitDouble(const string& src, string separate_character);
|
||||
|
||||
private:
|
||||
uint32_t appid;
|
||||
Json::Value value;
|
||||
GeoPoint geo;
|
||||
double distance;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,66 +0,0 @@
|
||||
#include "match_query_parser.h"
|
||||
#include "../db_manager.h"
|
||||
#include "../split_manager.h"
|
||||
|
||||
MatchQueryParser::MatchQueryParser(uint32_t a, Json::Value& v)
|
||||
:appid(a),value(v)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
MatchQueryParser::~MatchQueryParser(){
|
||||
|
||||
}
|
||||
|
||||
int MatchQueryParser::ParseContent(QueryParserRes* query_parser_res){
|
||||
return ParseContent(query_parser_res, ORKEY);
|
||||
}
|
||||
|
||||
int MatchQueryParser::ParseContent(QueryParserRes* query_parser_res, uint32_t type){
|
||||
vector<FieldInfo> fieldInfos;
|
||||
Json::Value::Members member = value.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
string fieldname;
|
||||
Json::Value field_value;
|
||||
if(iter != member.end()){ // 一个match下只对应一个字段
|
||||
fieldname = *iter;
|
||||
field_value = value[fieldname];
|
||||
} else {
|
||||
log_error("TermQueryParser error, value is null");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
uint32_t segment_tag = 0;
|
||||
FieldInfo fieldInfo;
|
||||
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
|
||||
if (field != 0 && segment_tag == 1)
|
||||
{
|
||||
string split_data = SplitManager::Instance()->split(field_value.asString(), appid);
|
||||
log_debug("split_data: %s", split_data.c_str());
|
||||
vector<string> split_datas = splitEx(split_data, "|");
|
||||
for(size_t index = 0; index < split_datas.size(); index++)
|
||||
{
|
||||
FieldInfo info;
|
||||
info.field = fieldInfo.field;
|
||||
info.field_type = fieldInfo.field_type;
|
||||
info.word = split_datas[index];
|
||||
info.segment_tag = fieldInfo.segment_tag;
|
||||
fieldInfos.push_back(info);
|
||||
}
|
||||
}
|
||||
else if (field != 0)
|
||||
{
|
||||
fieldInfo.word = field_value.asString();
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}
|
||||
|
||||
if(fieldInfos.size() != 0){
|
||||
if(type == ORKEY){
|
||||
query_parser_res->OrFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
} else if(type == ANDKEY){
|
||||
query_parser_res->FieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
} else if(type == INVERTKEY){
|
||||
query_parser_res->InvertFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: match_query_parser.h
|
||||
*
|
||||
* Description: match_query_parser class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 20/04/2021
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __MATCH_QUERY_PARSER_H__
|
||||
#define __MATCH_QUERY_PARSER_H__
|
||||
#include "query_parser.h"
|
||||
#include "json/json.h"
|
||||
|
||||
class MatchQueryParser : public QueryParser
|
||||
{
|
||||
public:
|
||||
MatchQueryParser(uint32_t a, Json::Value& v);
|
||||
~MatchQueryParser();
|
||||
int ParseContent(QueryParserRes* query_parser_res);
|
||||
int ParseContent(QueryParserRes* query_parser_res, uint32_t type);
|
||||
|
||||
private:
|
||||
uint32_t appid;
|
||||
Json::Value value;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,82 +0,0 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: query_parser.h
|
||||
*
|
||||
* Description: query_parser class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 19/04/2021
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __QUERY_PARSER_H__
|
||||
#define __QUERY_PARSER_H__
|
||||
#include "../comm.h"
|
||||
#include <map>
|
||||
|
||||
class QueryParserRes{
|
||||
public:
|
||||
QueryParserRes(){
|
||||
m_has_gis = 0;
|
||||
}
|
||||
map<uint32_t, vector<FieldInfo> >& FieldKeysMap(){
|
||||
return field_keys_map;
|
||||
}
|
||||
map<uint32_t, vector<FieldInfo> >& OrFieldKeysMap(){
|
||||
return or_field_keys_map;
|
||||
}
|
||||
map<uint32_t, vector<FieldInfo> >& InvertFieldKeysMap(){
|
||||
return invert_field_keys_map;
|
||||
}
|
||||
vector<ExtraFilterKey>& ExtraFilterKeys(){
|
||||
return extra_filter_keys;
|
||||
}
|
||||
vector<ExtraFilterKey>& ExtraFilterAndKeys(){
|
||||
return extra_filter_and_keys;
|
||||
}
|
||||
vector<ExtraFilterKey>& ExtraFilterInvertKeys(){
|
||||
return extra_filter_invert_keys;
|
||||
}
|
||||
uint32_t& HasGis(){
|
||||
return m_has_gis;
|
||||
}
|
||||
string& Latitude(){
|
||||
return latitude;
|
||||
}
|
||||
string& Longitude(){
|
||||
return longitude;
|
||||
}
|
||||
double& Distance(){
|
||||
return distance;
|
||||
}
|
||||
string& ErrMsg(){
|
||||
return err_msg;
|
||||
}
|
||||
private:
|
||||
uint32_t m_has_gis;
|
||||
string latitude;
|
||||
string longitude;
|
||||
double distance;
|
||||
string err_msg;
|
||||
map<uint32_t, vector<FieldInfo> > field_keys_map;
|
||||
map<uint32_t, vector<FieldInfo> > or_field_keys_map;
|
||||
map<uint32_t, vector<FieldInfo> > invert_field_keys_map;
|
||||
vector<ExtraFilterKey> extra_filter_keys;
|
||||
vector<ExtraFilterKey> extra_filter_and_keys;
|
||||
vector<ExtraFilterKey> extra_filter_invert_keys;
|
||||
};
|
||||
|
||||
class QueryParser{
|
||||
public:
|
||||
virtual int ParseContent(QueryParserRes* query_parser_res) = 0;
|
||||
virtual ~QueryParser() {};
|
||||
};
|
||||
|
||||
#endif
|
@ -1,95 +0,0 @@
|
||||
#include "range_query_parser.h"
|
||||
#include "../db_manager.h"
|
||||
|
||||
const char* const GTE ="gte";
|
||||
const char* const GT ="gt";
|
||||
const char* const LTE ="lte";
|
||||
const char* const LT ="lt";
|
||||
|
||||
RangeQueryParser::RangeQueryParser(uint32_t a, Json::Value& v)
|
||||
:appid(a),value(v)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
RangeQueryParser::~RangeQueryParser(){
|
||||
|
||||
}
|
||||
|
||||
int RangeQueryParser::ParseContent(QueryParserRes* query_parser_res){
|
||||
return ParseContent(query_parser_res, ORKEY);
|
||||
}
|
||||
|
||||
int RangeQueryParser::ParseContent(QueryParserRes* query_parser_res, uint32_t type){
|
||||
vector<FieldInfo> fieldInfos;
|
||||
Json::Value::Members member = value.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
if(iter != member.end()){ // 一个range下只对应一个字段
|
||||
string fieldname = *iter;
|
||||
uint32_t segment_tag = 0;
|
||||
FieldInfo fieldInfo;
|
||||
DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
|
||||
Json::Value field_value = value[fieldname];
|
||||
if(field_value.isObject()){
|
||||
FieldInfo info;
|
||||
Json::Value start;
|
||||
Json::Value end;
|
||||
if(field_value.isMember(GTE)){
|
||||
start = field_value[GTE];
|
||||
if(field_value.isMember(LTE)){
|
||||
end = field_value[LTE];
|
||||
info.range_type = RANGE_GELE;
|
||||
} else if(field_value.isMember(LT)){
|
||||
end = field_value[LT];
|
||||
info.range_type = RANGE_GELT;
|
||||
} else {
|
||||
info.range_type = RANGE_GE;
|
||||
}
|
||||
} else if(field_value.isMember(GT)){
|
||||
start = field_value[GT];
|
||||
if(field_value.isMember(LTE)){
|
||||
end = field_value[LTE];
|
||||
info.range_type = RANGE_GTLE;
|
||||
} else if(field_value.isMember(LT)){
|
||||
end = field_value[LT];
|
||||
info.range_type = RANGE_GTLT;
|
||||
} else {
|
||||
info.range_type = RANGE_GT;
|
||||
}
|
||||
} else if(field_value.isMember(LTE)){
|
||||
end = field_value[LTE];
|
||||
info.range_type = RANGE_LE;
|
||||
} else if(field_value.isMember(LT)){
|
||||
end = field_value[LT];
|
||||
info.range_type = RANGE_LT;
|
||||
}
|
||||
if(!start.isInt() && !start.isNull()){
|
||||
log_error("range query only support integer");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
if(!end.isInt() && !end.isNull()){
|
||||
log_error("range query only support integer");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
if(start.isInt() || end.isInt()){
|
||||
fieldInfo.start = start.isInt() ? start.asInt() : 0;
|
||||
fieldInfo.end = end.isInt() ? end.asInt() : 0;
|
||||
fieldInfo.range_type = info.range_type;
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}
|
||||
}
|
||||
if(fieldInfos.size() != 0){
|
||||
if(type == ORKEY){
|
||||
query_parser_res->OrFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
} else if(type == ANDKEY){
|
||||
query_parser_res->FieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
} else if(type == INVERTKEY){
|
||||
query_parser_res->InvertFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log_error("RangeQueryParser error, value is null");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: range_query_parser.h
|
||||
*
|
||||
* Description: range_query_parser class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 19/04/2021
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __RANGE_QUERY_PARSER_H__
|
||||
#define __RANGE_QUERY_PARSER_H__
|
||||
#include "query_parser.h"
|
||||
#include "json/json.h"
|
||||
|
||||
class RangeQueryParser : public QueryParser
|
||||
{
|
||||
public:
|
||||
RangeQueryParser(uint32_t a, Json::Value& v);
|
||||
~RangeQueryParser();
|
||||
int ParseContent(QueryParserRes* query_parser_res);
|
||||
int ParseContent(QueryParserRes* query_parser_res, uint32_t type);
|
||||
|
||||
private:
|
||||
uint32_t appid;
|
||||
Json::Value value;
|
||||
};
|
||||
|
||||
#endif
|
@ -1,64 +0,0 @@
|
||||
#include "term_query_parser.h"
|
||||
#include "../db_manager.h"
|
||||
|
||||
|
||||
TermQueryParser::TermQueryParser(uint32_t a, Json::Value& v)
|
||||
:appid(a),value(v)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
TermQueryParser::~TermQueryParser(){
|
||||
|
||||
}
|
||||
|
||||
int TermQueryParser::ParseContent(QueryParserRes* query_parser_res){
|
||||
return ParseContent(query_parser_res, ORKEY);
|
||||
}
|
||||
|
||||
int TermQueryParser::ParseContent(QueryParserRes* query_parser_res, uint32_t type){
|
||||
vector<FieldInfo> fieldInfos;
|
||||
Json::Value::Members member = value.getMemberNames();
|
||||
Json::Value::Members::iterator iter = member.begin();
|
||||
string fieldname;
|
||||
string field_value;
|
||||
Json::Value json_value;
|
||||
if(iter != member.end()){ // 一个term下只对应一个字段
|
||||
fieldname = *iter;
|
||||
json_value = value[fieldname];
|
||||
field_value = json_value.asString();
|
||||
} else {
|
||||
log_error("TermQueryParser error, value is null");
|
||||
return -RT_PARSE_CONTENT_ERROR;
|
||||
}
|
||||
uint32_t segment_tag = 0;
|
||||
FieldInfo fieldInfo;
|
||||
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
|
||||
if(field != 0 && fieldInfo.index_tag == 0){
|
||||
ExtraFilterKey extra_filter_key;
|
||||
extra_filter_key.field_name = fieldname;
|
||||
extra_filter_key.field_value = field_value;
|
||||
extra_filter_key.field_type = fieldInfo.field_type;
|
||||
if(type == ORKEY){
|
||||
query_parser_res->ExtraFilterKeys().push_back(extra_filter_key);
|
||||
} else if (type == ANDKEY) {
|
||||
query_parser_res->ExtraFilterAndKeys().push_back(extra_filter_key);
|
||||
} else if (type == INVERTKEY) {
|
||||
query_parser_res->ExtraFilterInvertKeys().push_back(extra_filter_key);
|
||||
}
|
||||
} else if (field != 0)
|
||||
{
|
||||
fieldInfo.word = field_value;
|
||||
fieldInfos.push_back(fieldInfo);
|
||||
}
|
||||
if(fieldInfos.size() != 0){
|
||||
if(type == ORKEY){
|
||||
query_parser_res->OrFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
} else if(type == ANDKEY) {
|
||||
query_parser_res->FieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
} else if(type == INVERTKEY){
|
||||
query_parser_res->InvertFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: term_query_parser.h
|
||||
*
|
||||
* Description: term_query_parser class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 20/04/2021
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __TERM_QUERY_PARSER_H__
|
||||
#define __TERM_QUERY_PARSER_H__
|
||||
#include "query_parser.h"
|
||||
#include "json/json.h"
|
||||
|
||||
class TermQueryParser : public QueryParser
|
||||
{
|
||||
public:
|
||||
TermQueryParser(uint32_t a, Json::Value& v);
|
||||
~TermQueryParser();
|
||||
int ParseContent(QueryParserRes* query_parser_res);
|
||||
int ParseContent(QueryParserRes* query_parser_res, uint32_t type);
|
||||
|
||||
private:
|
||||
uint32_t appid;
|
||||
Json::Value value;
|
||||
};
|
||||
|
||||
#endif
|
288
src/search_local/index_read/request_context.cc
Normal file
288
src/search_local/index_read/request_context.cc
Normal file
@ -0,0 +1,288 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: component.h
|
||||
*
|
||||
* Description: component class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2019
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "request_context.h"
|
||||
#include "split_manager.h"
|
||||
#include "db_manager.h"
|
||||
#include "utf8_str.h"
|
||||
#include <sstream>
|
||||
|
||||
RequestContext::RequestContext()
|
||||
: or_keys_()
|
||||
, and_keys_()
|
||||
, invert_keys_()
|
||||
, extra_filter_or_keys_()
|
||||
, extra_filter_and_keys_()
|
||||
, extra_filter_invert_keys_()
|
||||
, page_index_(0)
|
||||
, page_size_(0)
|
||||
, cache_switch_(0)
|
||||
, snapshot_switch_(0)
|
||||
, sort_type_(SORT_RELEVANCE)
|
||||
, appid_(10001)
|
||||
, return_all_(0)
|
||||
, sort_field_("")
|
||||
, last_id_("")
|
||||
, last_score_("")
|
||||
, search_after_(false)
|
||||
, required_fields_()
|
||||
, preterminal_tag_(0)
|
||||
, query_value_()
|
||||
, has_gis_(false)
|
||||
{ }
|
||||
|
||||
RequestContext::~RequestContext(){
|
||||
|
||||
}
|
||||
|
||||
int RequestContext::ParseJson(const char *sz_json, int json_len, Json::Value &recv_packet)
|
||||
{
|
||||
Json::Reader r(Json::Features::strictMode());
|
||||
int ret;
|
||||
ret = r.parse(sz_json, sz_json + json_len, recv_packet);
|
||||
if (0 == ret)
|
||||
{
|
||||
log_error("the err json string is : %s", sz_json);
|
||||
log_error("parse json error , errmsg : %s", r.getFormattedErrorMessages().c_str());
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("appid")){
|
||||
appid_ = ParseJsonReturnInt("appid" , recv_packet);
|
||||
}
|
||||
else {
|
||||
appid_ = 10001;
|
||||
}
|
||||
|
||||
if(recv_packet.isMember("query")){
|
||||
query_value_ = recv_packet["query"];
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("page_index")){
|
||||
page_index_ = ParseJsonReturnInt("page_index" , recv_packet);
|
||||
}
|
||||
else {
|
||||
page_index_ = 1 ;
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("page_size")){
|
||||
page_size_ = ParseJsonReturnInt("page_size" , recv_packet);
|
||||
}
|
||||
else {
|
||||
page_size_ = 10;
|
||||
}
|
||||
|
||||
if(recv_packet.isMember("sort_type")){
|
||||
sort_type_ = ParseJsonReturnInt("sort_type" , recv_packet);
|
||||
}
|
||||
else {
|
||||
sort_type_ = SORT_RELEVANCE;
|
||||
}
|
||||
|
||||
if(recv_packet.isMember("sort_field") && recv_packet["sort_field"].isString()){
|
||||
sort_field_ = recv_packet["sort_field"].asString();
|
||||
}
|
||||
else {
|
||||
sort_field_ = "";
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("return_all")){
|
||||
return_all_ = ParseJsonReturnInt("return_all" , recv_packet);
|
||||
}
|
||||
else {
|
||||
return_all_ = 0;
|
||||
}
|
||||
|
||||
if(recv_packet.isMember("fields") && recv_packet["fields"].isString()){
|
||||
std::string fields = recv_packet["fields"].asString();
|
||||
required_fields_ = splitEx(fields, ",");
|
||||
}
|
||||
|
||||
if (recv_packet.isMember("terminal_tag")){
|
||||
preterminal_tag_ = ParseJsonReturnInt("terminal_tag" , recv_packet);
|
||||
}
|
||||
else {
|
||||
preterminal_tag_ = 0;
|
||||
}
|
||||
|
||||
if(recv_packet.isMember("last_id") && recv_packet["last_id"].isString()){
|
||||
last_id_ = recv_packet["last_id"].asString();
|
||||
}
|
||||
else {
|
||||
last_id_ = "";
|
||||
}
|
||||
|
||||
bool score_flag = true;
|
||||
if (recv_packet.isMember("last_score") && recv_packet["last_score"].isString()){
|
||||
last_score_ = recv_packet["last_score"].asString();
|
||||
}
|
||||
else {
|
||||
score_flag = false;
|
||||
last_score_ = "0";
|
||||
}
|
||||
if(last_id_ != "" && score_flag == true){
|
||||
search_after_ = true;
|
||||
}
|
||||
if(search_after_ == true && sort_type_ != SORT_FIELD_DESC && sort_type_ != SORT_FIELD_ASC){
|
||||
log_error("in search_after mode, sort_type must be SORT_FIELD_DESC or SORT_FIELD_ASC.");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
log_debug("sort_type:%d , sort_field:%s", sort_type_ , sort_field_.c_str());
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t RequestContext::ParseJsonReturnInt(
|
||||
const std::string& field,
|
||||
const Json::Value& json_value)
|
||||
{
|
||||
uint32_t ui_value = 0;
|
||||
if (json_value[field].isString()){
|
||||
ui_value = atoi(json_value[field].asString().c_str());
|
||||
}else if (json_value[field].isInt()){
|
||||
ui_value = json_value[field].asInt();
|
||||
}
|
||||
return ui_value;
|
||||
}
|
||||
|
||||
void RequestContext::InitSwitch()
|
||||
{
|
||||
AppInfo app_info;
|
||||
bool res = SearchConf::Instance()->GetAppInfo(appid_, app_info);
|
||||
if (true == res){
|
||||
cache_switch_ = app_info.cache_switch;
|
||||
snapshot_switch_ = app_info.snapshot_switch;
|
||||
}
|
||||
}
|
||||
|
||||
void RequestContext::AddToFieldList(int type, vector<FieldInfo>& fields)
|
||||
{
|
||||
if (fields.size() == 0)
|
||||
return ;
|
||||
if (type == ORKEY) {
|
||||
or_keys_.push_back(fields);
|
||||
} else if (type == ANDKEY) {
|
||||
and_keys_.push_back(fields);
|
||||
} else if (type == INVERTKEY) {
|
||||
invert_keys_.push_back(fields);
|
||||
}
|
||||
return ;
|
||||
}
|
||||
|
||||
const std::vector<std::vector<FieldInfo> >& RequestContext::GetFieldList(int logic_type){
|
||||
if (ORKEY == logic_type){
|
||||
return or_keys_;
|
||||
}else if (ANDKEY == logic_type){
|
||||
return and_keys_;
|
||||
}else if (INVERTKEY == logic_type){
|
||||
return invert_keys_;
|
||||
}
|
||||
log_error("get field list with error type, please check");
|
||||
static std::vector<std::vector<FieldInfo> > empty_fieldinfos;
|
||||
return empty_fieldinfos;
|
||||
}
|
||||
|
||||
void RequestContext::AddToExtraFieldList(int type , const ExtraFilterKey& extra_field){
|
||||
if (ORKEY == type){
|
||||
extra_filter_or_keys_.push_back(extra_field);
|
||||
}else if (ANDKEY == type){
|
||||
extra_filter_and_keys_.push_back(extra_field);
|
||||
}else if (INVERTKEY == type){
|
||||
extra_filter_invert_keys_.push_back(extra_field);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const std::vector<std::vector<FieldInfo> >& RequestContext::OrKeys(){
|
||||
return or_keys_;
|
||||
}
|
||||
|
||||
std::vector<std::vector<FieldInfo> >& RequestContext::AndKeys(){
|
||||
return and_keys_;
|
||||
}
|
||||
|
||||
const std::vector<std::vector<FieldInfo> >& RequestContext::InvertKeys(){
|
||||
return invert_keys_;
|
||||
}
|
||||
|
||||
const std::vector<ExtraFilterKey>& RequestContext::ExtraFilterOrKeys(){
|
||||
return extra_filter_or_keys_;
|
||||
}
|
||||
|
||||
const std::vector<ExtraFilterKey>& RequestContext::ExtraFilterAndKeys(){
|
||||
return extra_filter_and_keys_;
|
||||
}
|
||||
|
||||
const std::vector<ExtraFilterKey>& RequestContext::ExtraFilterInvertKeys(){
|
||||
return extra_filter_invert_keys_;
|
||||
}
|
||||
|
||||
uint32_t RequestContext::Appid(){
|
||||
return appid_;
|
||||
}
|
||||
|
||||
uint32_t RequestContext::SortType(){
|
||||
return sort_type_;
|
||||
}
|
||||
|
||||
uint32_t RequestContext::PageIndex(){
|
||||
return page_index_;
|
||||
}
|
||||
uint32_t RequestContext::PageSize(){
|
||||
return page_size_;
|
||||
}
|
||||
|
||||
uint32_t RequestContext::ReturnAll(){
|
||||
return return_all_;
|
||||
}
|
||||
|
||||
uint32_t RequestContext::CacheSwitch(){
|
||||
return cache_switch_;
|
||||
}
|
||||
|
||||
uint32_t RequestContext::SnapshotSwitch(){
|
||||
return snapshot_switch_;
|
||||
}
|
||||
|
||||
string RequestContext::SortField(){
|
||||
return sort_field_;
|
||||
}
|
||||
|
||||
string RequestContext::LastId(){
|
||||
return last_id_;
|
||||
}
|
||||
|
||||
string RequestContext::LastScore(){
|
||||
return last_score_;
|
||||
}
|
||||
|
||||
bool RequestContext::SearchAfter(){
|
||||
return search_after_;
|
||||
}
|
||||
|
||||
vector<string>& RequestContext::RequiredFields(){
|
||||
return required_fields_;
|
||||
}
|
||||
|
||||
uint32_t RequestContext::TerminalTag(){
|
||||
return preterminal_tag_;
|
||||
}
|
||||
|
||||
Json::Value& RequestContext::GetQuery(){
|
||||
return query_value_;
|
||||
}
|
97
src/search_local/index_read/request_context.h
Normal file
97
src/search_local/index_read/request_context.h
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: component.h
|
||||
*
|
||||
* Description: component class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2019
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Modified by: chenyujie ,chenyujie28@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef __COMPONENT_H__
|
||||
#define __COMPONENT_H__
|
||||
|
||||
#include "comm.h"
|
||||
#include "json/json.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class RequestContext
|
||||
{
|
||||
public:
|
||||
RequestContext();
|
||||
~RequestContext();
|
||||
|
||||
public:
|
||||
int ParseJson(const char* sz_json, int json_len, Json::Value &recv_packet);
|
||||
void InitSwitch();
|
||||
|
||||
const std::vector<std::vector<FieldInfo> >& OrKeys();
|
||||
std::vector<std::vector<FieldInfo> >& AndKeys();
|
||||
const std::vector<std::vector<FieldInfo> >& InvertKeys();
|
||||
const std::vector<ExtraFilterKey>& ExtraFilterOrKeys();
|
||||
const std::vector<ExtraFilterKey>& ExtraFilterAndKeys();
|
||||
const std::vector<ExtraFilterKey>& ExtraFilterInvertKeys();
|
||||
|
||||
uint32_t Appid();
|
||||
uint32_t SortType();
|
||||
uint32_t PageIndex();
|
||||
uint32_t PageSize();
|
||||
uint32_t ReturnAll();
|
||||
uint32_t CacheSwitch();
|
||||
uint32_t TopSwitch();
|
||||
uint32_t SnapshotSwitch();
|
||||
std::string SortField();
|
||||
std::string LastId();
|
||||
std::string LastScore();
|
||||
bool SearchAfter();
|
||||
std::vector<std::string>& RequiredFields();
|
||||
uint32_t TerminalTag();
|
||||
Json::Value& GetQuery();
|
||||
|
||||
void AddToFieldList(int type, std::vector<FieldInfo>& fields);
|
||||
const std::vector<std::vector<FieldInfo> >& GetFieldList(int logic_type);
|
||||
|
||||
void AddToExtraFieldList(int type , const ExtraFilterKey& extra_field);
|
||||
|
||||
void SetHasGisFlag(bool bFlag) { has_gis_ = bFlag; };
|
||||
bool GetHasGisFlag() { return has_gis_; };
|
||||
|
||||
private:
|
||||
uint32_t ParseJsonReturnInt(const std::string& field, const Json::Value& json_value);
|
||||
|
||||
private:
|
||||
std::vector<std::vector<FieldInfo> > or_keys_;
|
||||
std::vector<std::vector<FieldInfo> > and_keys_;
|
||||
std::vector<std::vector<FieldInfo> > invert_keys_;
|
||||
std::vector<ExtraFilterKey> extra_filter_or_keys_;
|
||||
std::vector<ExtraFilterKey> extra_filter_and_keys_;
|
||||
std::vector<ExtraFilterKey> extra_filter_invert_keys_;
|
||||
|
||||
uint32_t page_index_;
|
||||
uint32_t page_size_;
|
||||
|
||||
uint32_t cache_switch_;
|
||||
uint32_t snapshot_switch_;
|
||||
uint32_t sort_type_;
|
||||
uint32_t appid_;
|
||||
uint32_t return_all_;
|
||||
std::string sort_field_;
|
||||
std::string last_id_;
|
||||
std::string last_score_;
|
||||
bool search_after_;
|
||||
std::vector<std::string> required_fields_;
|
||||
uint32_t preterminal_tag_;
|
||||
Json::Value query_value_;
|
||||
bool has_gis_;
|
||||
};
|
||||
#endif
|
111
src/search_local/index_read/result_context.cc
Normal file
111
src/search_local/index_read/result_context.cc
Normal file
@ -0,0 +1,111 @@
|
||||
#include "result_context.h"
|
||||
#include <algorithm>
|
||||
|
||||
ResultContext::ResultContext()
|
||||
: index_info_vet_()
|
||||
, valid_docs_set_()
|
||||
, highlight_word_set_()
|
||||
, docid_keyinfovet_map_()
|
||||
, key_doccount_map_()
|
||||
{ }
|
||||
|
||||
ResultContext::~ResultContext(){
|
||||
|
||||
}
|
||||
|
||||
void ResultContext::Clear(){
|
||||
index_info_vet_.clear();
|
||||
valid_docs_set_.clear();
|
||||
highlight_word_set_.clear();
|
||||
docid_keyinfovet_map_.clear();
|
||||
key_doccount_map_.clear();
|
||||
}
|
||||
|
||||
void ResultContext::SetHighLightWordSet(const std::string& highlight_word){
|
||||
highlight_word_set_.insert(highlight_word);
|
||||
}
|
||||
|
||||
void ResultContext::SetDocKeyinfoMap(const std::string& s_doc_id, const KeyInfo& key_info){
|
||||
docid_keyinfovet_map_[s_doc_id].push_back(key_info);
|
||||
}
|
||||
|
||||
void ResultContext::SetWordDoccountMap(const std::string& s_word, uint32_t ui_doc_count){
|
||||
key_doccount_map_.insert(std::make_pair(s_word , ui_doc_count));
|
||||
}
|
||||
|
||||
void ResultContext::SetValidDocs(const std::string& valid_docid){
|
||||
valid_docs_set_.insert(valid_docid);
|
||||
}
|
||||
|
||||
void ResultContext::SetIndexInfos(int logic_type , std::vector<IndexInfo>& index_info_vet){
|
||||
if (ORKEY == logic_type){
|
||||
SetOrIndexInfos(index_info_vet);
|
||||
}else if(ANDKEY == logic_type){
|
||||
SetAndIndexInfos(index_info_vet);
|
||||
}else if(INVERTKEY == logic_type){
|
||||
SetInvertIndexInfos(index_info_vet);
|
||||
}
|
||||
}
|
||||
|
||||
void ResultContext::SetOrIndexInfos(std::vector<IndexInfo>& or_index_info_vet){
|
||||
if (index_info_vet_.empty()){
|
||||
index_info_vet_ = or_index_info_vet;
|
||||
}else{
|
||||
std::vector<IndexInfo> index_info_result;
|
||||
int i_max_size = index_info_vet_.size() + or_index_info_vet.size();
|
||||
index_info_result.resize(i_max_size);
|
||||
|
||||
std::sort(index_info_vet_.begin() , index_info_vet_.end());
|
||||
std::sort(or_index_info_vet.begin() , or_index_info_vet.end());
|
||||
|
||||
std::vector<IndexInfo>::iterator iter = std::set_union(
|
||||
index_info_vet_.begin(),index_info_vet_.end(),
|
||||
or_index_info_vet.begin() ,or_index_info_vet.end(),
|
||||
index_info_result.begin());
|
||||
|
||||
index_info_result.resize(iter - index_info_result.begin());
|
||||
index_info_vet_.swap(index_info_result);
|
||||
}
|
||||
}
|
||||
|
||||
void ResultContext::SetAndIndexInfos(std::vector<IndexInfo>& and_index_info_vet){
|
||||
if (index_info_vet_.empty()){
|
||||
index_info_vet_ = and_index_info_vet;
|
||||
}else{
|
||||
std::vector<IndexInfo> index_info_result;
|
||||
int i_min_size = (index_info_vet_.size() <= and_index_info_vet.size() ? index_info_vet_.size() : and_index_info_vet.size());
|
||||
index_info_result.resize(i_min_size);
|
||||
|
||||
std::sort(index_info_vet_.begin() , index_info_vet_.end());
|
||||
std::sort(and_index_info_vet.begin() , and_index_info_vet.end());
|
||||
|
||||
std::vector<IndexInfo>::iterator iter = std::set_intersection(
|
||||
index_info_vet_.begin(), index_info_vet_.end(),
|
||||
and_index_info_vet.begin() , and_index_info_vet.end(),
|
||||
index_info_result.begin());
|
||||
|
||||
index_info_result.resize(iter - index_info_result.begin());
|
||||
index_info_vet_.swap(index_info_result);
|
||||
}
|
||||
}
|
||||
|
||||
void ResultContext::SetInvertIndexInfos(std::vector<IndexInfo>& invert_index_info_vet){
|
||||
if (index_info_vet_.empty()){
|
||||
index_info_vet_ = invert_index_info_vet;
|
||||
}else{
|
||||
std::vector<IndexInfo> index_info_result;
|
||||
int i_max_size = index_info_vet_.size() + invert_index_info_vet.size();
|
||||
index_info_result.resize(i_max_size);
|
||||
|
||||
std::sort(index_info_vet_.begin() , index_info_vet_.end());
|
||||
std::sort(invert_index_info_vet.begin() , invert_index_info_vet.end());
|
||||
|
||||
std::vector<IndexInfo>::iterator iter = std::set_difference(
|
||||
index_info_vet_.begin(), index_info_vet_.end(),
|
||||
invert_index_info_vet.begin() , invert_index_info_vet.end() ,
|
||||
index_info_result.begin());
|
||||
|
||||
index_info_result.resize(iter - index_info_result.begin());
|
||||
index_info_vet_.swap(index_info_result);
|
||||
}
|
||||
}
|
52
src/search_local/index_read/result_context.h
Normal file
52
src/search_local/index_read/result_context.h
Normal file
@ -0,0 +1,52 @@
|
||||
#ifndef SYSTEM_STATUS_H_
|
||||
#define SYSTEM_STATUS_H_
|
||||
|
||||
#include "comm.h"
|
||||
#include "singleton.h"
|
||||
#include "noncopyable.h"
|
||||
|
||||
class ResultContext: private noncopyable{
|
||||
public:
|
||||
ResultContext();
|
||||
virtual ~ResultContext();
|
||||
|
||||
public:
|
||||
static ResultContext* Instance(){
|
||||
return CSingleton<ResultContext>::Instance();
|
||||
};
|
||||
|
||||
static void Destroy(){
|
||||
CSingleton<ResultContext>::Destroy();
|
||||
};
|
||||
|
||||
public:
|
||||
void Clear();
|
||||
|
||||
void SetHighLightWordSet(const std::string& highlight_word);
|
||||
const HighLightWordSet& GetHighLightWordSet() const {return highlight_word_set_;};
|
||||
|
||||
void SetDocKeyinfoMap(const std::string& s_doc_id, const KeyInfo& key_info);
|
||||
const DocKeyinfosMap& GetDocKeyinfosMap() const { return docid_keyinfovet_map_;};
|
||||
|
||||
void SetWordDoccountMap(const std::string& s_word, uint32_t ui_doc_count);
|
||||
uint32_t GetKeywordDoccountMap(const std::string& s_word) { return key_doccount_map_[s_word];};
|
||||
|
||||
void SetIndexInfos(int logic_type , std::vector<IndexInfo>& index_info_vet);
|
||||
const std::vector<IndexInfo>& GetIndexInfos() const { return index_info_vet_;};
|
||||
|
||||
void SetValidDocs(const std::string& valid_docid);
|
||||
ValidDocSet* GetValidDocs() { return &valid_docs_set_;};
|
||||
|
||||
private:
|
||||
void SetOrIndexInfos(std::vector<IndexInfo>& or_index_info_vet);
|
||||
void SetAndIndexInfos(std::vector<IndexInfo>& and_index_info_vet);
|
||||
void SetInvertIndexInfos(std::vector<IndexInfo>& invert_index_info_vet);
|
||||
|
||||
private:
|
||||
std::vector<IndexInfo> index_info_vet_;
|
||||
ValidDocSet valid_docs_set_;
|
||||
HighLightWordSet highlight_word_set_;
|
||||
DocKeyinfosMap docid_keyinfovet_map_;
|
||||
KeywordDoccountMap key_doccount_map_;
|
||||
};
|
||||
#endif
|
@ -11,567 +11,43 @@
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Modified by: chenyujie ,chenyujie28@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "split_manager.h"
|
||||
#include "search_util.h"
|
||||
#include "search_task.h"
|
||||
#include "json/reader.h"
|
||||
#include "json/writer.h"
|
||||
#include "timemanager.h"
|
||||
#include "cpa_md5.h"
|
||||
#include "data_manager.h"
|
||||
#include "stem.h"
|
||||
#include "result_cache.h"
|
||||
#include "cachelist_unit.h"
|
||||
#include <netinet/in.h>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <math.h>
|
||||
#include "stat_index.h"
|
||||
#include "db_manager.h"
|
||||
#include "utf8_str.h"
|
||||
#include "split_manager.h"
|
||||
#include "monitor.h"
|
||||
#include "index_sync/sync_index_timer.h"
|
||||
#include "index_sync/sequence_search_index.h"
|
||||
#include "order_op.h"
|
||||
|
||||
#include "index_tbl_op.h"
|
||||
#include "valid_doc_filter.h"
|
||||
#include "process/geo_distance_query_process.h"
|
||||
#include "process/geo_shape_query_process.h"
|
||||
#include "process/match_query_process.h"
|
||||
|
||||
using namespace std;
|
||||
#define DOC_CNT 10000
|
||||
|
||||
typedef pair<string, double> PAIR;
|
||||
extern CCacheListUnit *cachelist;
|
||||
extern SyncIndexTimer *globalSyncIndexTimer;
|
||||
|
||||
struct CmpByValue {
|
||||
bool operator()(const PAIR& lhs, const PAIR& rhs) {
|
||||
if(fabs(lhs.second - rhs.second) < 0.000001){
|
||||
return lhs.first.compare(rhs.first) > 0;
|
||||
}
|
||||
return lhs.second > rhs.second;
|
||||
}
|
||||
};
|
||||
#include "process/range_query_process.h"
|
||||
#include "process/term_query_process.h"
|
||||
#include "process/bool_query_process.h"
|
||||
|
||||
SearchTask::SearchTask()
|
||||
{
|
||||
m_index_set_cnt = 0;
|
||||
m_has_gis = 0;
|
||||
component = new Component();
|
||||
: ProcessTask()
|
||||
, component_(new RequestContext())
|
||||
, doc_manager_(new DocManager(component_))
|
||||
, query_process_(NULL)
|
||||
{
|
||||
ValidDocFilter::Instance()->BindDataBasePointer(component_);
|
||||
}
|
||||
|
||||
int SearchTask::GetTopDocIdSetByWord(FieldInfo fieldInfo, vector<TopDocInfo>& doc_info) {
|
||||
if (DataManager::Instance()->IsSensitiveWord(fieldInfo.word)) {
|
||||
log_debug("%s is a sensitive word.", fieldInfo.word.c_str());
|
||||
return 0;
|
||||
SearchTask::~SearchTask() {
|
||||
if(component_ != NULL){
|
||||
delete component_;
|
||||
}
|
||||
|
||||
string word_new = stem(fieldInfo.word);
|
||||
bool bRet = false;
|
||||
vector<TopDocInfo> no_filter_docs;
|
||||
bRet = g_IndexInstance.GetTopDocInfo(m_appid, word_new, no_filter_docs);
|
||||
if (false == bRet) {
|
||||
log_error("GetTopDocInfo error.");
|
||||
return -RT_DTC_ERR;
|
||||
if(doc_manager_ != NULL){
|
||||
delete doc_manager_;
|
||||
}
|
||||
|
||||
if (0 == no_filter_docs.size())
|
||||
return 0;
|
||||
|
||||
if (component->SnapshotSwitch() == 1) {
|
||||
bRet = g_IndexInstance.TopDocValid(m_appid, no_filter_docs, doc_info);
|
||||
if (false == bRet) {
|
||||
log_error("GetTopDocInfo by snapshot error.");
|
||||
return -RT_DTC_ERR;
|
||||
}
|
||||
if (query_process_ != NULL){
|
||||
delete query_process_;
|
||||
}
|
||||
else {
|
||||
for (size_t i = 0; i < no_filter_docs.size(); i++)
|
||||
{
|
||||
TopDocInfo info = no_filter_docs[i];
|
||||
doc_info.push_back(info);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int SearchTask::GetTopDocScore(map<string, double>& top_doc_score)
|
||||
{
|
||||
vector<TopDocInfo> doc_info;
|
||||
for (size_t index = 0; index < component->Keys().size(); index++) {
|
||||
vector<FieldInfo> topInfos = component->Keys()[index];
|
||||
vector<FieldInfo>::iterator iter;
|
||||
for (iter = topInfos.begin(); iter != topInfos.end(); iter++) {
|
||||
int ret = GetTopDocIdSetByWord(*iter, doc_info);
|
||||
if (ret != 0) {
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double score = 0;
|
||||
for(size_t i = 0; i < doc_info.size(); i++)
|
||||
{
|
||||
score = (double)doc_info[i].weight;
|
||||
if (m_sort_type == DONT_SORT) {
|
||||
score = 1;
|
||||
} else if (m_sort_type == SORT_TIMESTAMP) {
|
||||
score = (double)doc_info[i].created_time;
|
||||
}
|
||||
|
||||
top_doc_score[doc_info[i].doc_id] = score;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int SearchTask::GetValidDoc(map<string, vec> &ves, vector<string> &word_vec, map<string, uint32_t> &key_in_doc, hash_double_map &distances, set<string> &valid_docs){
|
||||
vector<IndexInfo> doc_id_ver_vec; // 最终求完交集并集差集的结果
|
||||
|
||||
// key_or
|
||||
vector<IndexInfo> or_vecs;
|
||||
logical_operate->SetFunc(vec_union);
|
||||
int ret = logical_operate->Process(component->Keys(), or_vecs, highlightWord, ves, key_in_doc);
|
||||
if (ret != 0) {
|
||||
log_debug("logical_operate error.");
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
doc_id_ver_vec.assign(or_vecs.begin(), or_vecs.end());
|
||||
|
||||
if ((doc_id_ver_vec.size() == 0) && (component->Keys().size() != 0)) {
|
||||
log_debug("search result of keys is empty.");
|
||||
return 0;
|
||||
}
|
||||
log_debug("logical_operate begin: %lld.", (long long int)GetSysTimeMicros());
|
||||
// key_and
|
||||
vector<IndexInfo> and_vecs;
|
||||
logical_operate->SetFunc(vec_intersection);
|
||||
ret = logical_operate->Process(component->AndKeys(), and_vecs, highlightWord, ves, key_in_doc);
|
||||
if (ret != 0) {
|
||||
log_debug("logical_operate error.");
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
|
||||
if ((and_vecs.size() == 0) && (component->AndKeys().size() != 0)) {
|
||||
log_debug("search result of and_keys is empty.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(component->AndKeys().size() != 0){
|
||||
if(component->Keys().size() != 0){
|
||||
doc_id_ver_vec = vec_intersection(and_vecs, doc_id_ver_vec);
|
||||
} else {
|
||||
doc_id_ver_vec.assign(and_vecs.begin(), and_vecs.end());
|
||||
}
|
||||
}
|
||||
log_debug("logical_operate end: %lld.", (long long int)GetSysTimeMicros());
|
||||
// key_complete
|
||||
vector<IndexInfo> complete_vecs;
|
||||
ret = logical_operate->ProcessComplete(complete_keys, complete_vecs, word_vec, ves, key_in_doc);
|
||||
if (ret != 0) {
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
|
||||
if ((complete_vecs.size() == 0) && (complete_keys.size() != 0)) {
|
||||
log_debug("search result of complete_keys is empty.");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if(complete_keys.size() != 0){
|
||||
if(component->AndKeys().size() == 0 && component->Keys().size() == 0){
|
||||
doc_id_ver_vec.assign(complete_vecs.begin(), complete_vecs.end());
|
||||
} else {
|
||||
doc_id_ver_vec = vec_intersection(doc_id_ver_vec, complete_vecs);
|
||||
}
|
||||
}
|
||||
|
||||
// key_invert,多个字段的结果先求并集,最后一起求差集
|
||||
vector<IndexInfo> invert_vecs;
|
||||
logical_operate->SetFunc(vec_union);
|
||||
ret = logical_operate->Process(component->InvertKeys(), invert_vecs, highlightWord, ves, key_in_doc);
|
||||
if (ret != 0) {
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
doc_id_ver_vec = vec_difference(doc_id_ver_vec, invert_vecs);
|
||||
|
||||
if (doc_id_ver_vec.size() == 0){
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool bRet = doc_manager->GetDocContent(m_has_gis, doc_id_ver_vec, valid_docs, distances);
|
||||
if (false == bRet) {
|
||||
log_error("GetDocContent error.");
|
||||
return -RT_DTC_ERR;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int SearchTask::GetDocScore(map<string, double>& top_doc_score)
|
||||
{
|
||||
/***
|
||||
关键词搜索的策略是:
|
||||
1)如果主查询词(包括keys,and_keys,complete_keys等)不为空,但查询结果为空,则表示没有符合查询条件的结果
|
||||
2)如果主查询词(包括keys,and_keys,complete_keys等)不为空,查询结果也不为空,则表示有符合查询条件的结果,记为S
|
||||
3)若域字段为空,则直接返回S,若域字段不为空,则需将S与域搜索结果F进行AND运算
|
||||
4)如果主查询词(包括keys,and_keys,complete_keys等)为空,则直接返回域搜索结果F
|
||||
***/
|
||||
|
||||
map<string, vec> ves; // statistic word information in the latitude of documents
|
||||
vector<string> word_vec;
|
||||
map<string, uint32_t> key_in_doc; // how many documents contains key
|
||||
hash_double_map distances;
|
||||
set<string> valid_docs;
|
||||
int ret = GetValidDoc(ves, word_vec, key_in_doc, distances, valid_docs);
|
||||
if (ret != 0){
|
||||
log_error("GetValidDoc error.");
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
log_debug("GetValidDoc end: %lld. valid_docs size: %d.", (long long int)GetSysTimeMicros(), (int)valid_docs.size());
|
||||
|
||||
// BM25 algorithm
|
||||
uint32_t doc_cnt = DOC_CNT;
|
||||
double k1 = 1.2;
|
||||
double k2 = 200;
|
||||
double K = 1.65;
|
||||
string doc_id;
|
||||
string keyword;
|
||||
uint32_t word_freq = 0;
|
||||
uint32_t field = 0;
|
||||
|
||||
if(m_sort_type == SORT_RELEVANCE || m_sort_type == SORT_TIMESTAMP){
|
||||
if(m_has_gis){
|
||||
hash_double_map::iterator dis_iter = distances.begin();
|
||||
for(; dis_iter != distances.end(); dis_iter++){
|
||||
doc_id = dis_iter->first;
|
||||
double score = dis_iter->second;
|
||||
if ((component->Distance() > -0.0001 && component->Distance() < 0.0001) || (score + 1e-6 <= component->Distance())){
|
||||
skipList.InsertNode(score, doc_id.c_str());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
map<string, vec>::iterator ves_iter = ves.begin();
|
||||
for (; ves_iter != ves.end(); ves_iter++) {
|
||||
double score = 0;
|
||||
uint32_t key_docs = 0;
|
||||
|
||||
doc_id = ves_iter->first;
|
||||
vector<KeyInfo> &key_info = ves_iter->second;
|
||||
if(valid_docs.find(doc_id) == valid_docs.end()){
|
||||
continue;
|
||||
}
|
||||
|
||||
if (m_sort_type == SORT_TIMESTAMP) { //按照时间排序
|
||||
score = (double)key_info[0].created_time;
|
||||
skipList.InsertNode(score, doc_id.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
set<string> word_set;
|
||||
map<string, vector<int> > pos_map;
|
||||
map<string, vector<int> > title_pos_map;
|
||||
for (uint32_t i = 0; i < key_info.size(); i++) {
|
||||
keyword = key_info[i].word;
|
||||
if (word_set.find(keyword) == word_set.end()) {
|
||||
word_set.insert(keyword);
|
||||
}
|
||||
word_freq = key_info[i].word_freq;
|
||||
field = key_info[i].field;
|
||||
if (field == LOCATE_ANY) {
|
||||
pos_map[keyword] = key_info[i].pos_vec;
|
||||
}
|
||||
if (field == LOCATE_TITLE) {
|
||||
title_pos_map[keyword] = key_info[i].pos_vec;
|
||||
}
|
||||
key_docs = key_in_doc[keyword];
|
||||
score += log((doc_cnt - key_docs + 0.5) / (key_docs + 0.5)) * ((k1 + 1)*word_freq) / (K + word_freq) * (k2 + 1) * 1 / (k2 + 1);
|
||||
}
|
||||
if (!complete_keys.empty()) { // 完全匹配
|
||||
if (word_set.size() != word_vec.size()) { // 文章中出现的词语数量与输入的不一致,则不满足完全匹配
|
||||
continue;
|
||||
}
|
||||
else { // 在标题和正文中都不连续出现,则不满足
|
||||
if (CheckWordContinus(word_vec, pos_map) == false && CheckWordContinus(word_vec, title_pos_map) == false) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
skipList.InsertNode(score, doc_id.c_str());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
set<string>::iterator set_iter = valid_docs.begin();
|
||||
for(; set_iter != valid_docs.end(); set_iter++){
|
||||
doc_id = *set_iter;
|
||||
double score = 0;
|
||||
|
||||
if (top_doc_score.find(doc_id) != top_doc_score.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (m_sort_type == SORT_FIELD_ASC || m_sort_type == SORT_FIELD_DESC){
|
||||
//if(doc_manager->CheckDocByExtraFilterKey(doc_id) == false){
|
||||
// continue;
|
||||
//}
|
||||
doc_manager->GetScoreMap(doc_id, m_sort_type, m_sort_field, m_sort_field_type, m_appid);
|
||||
} else {
|
||||
skipList.InsertNode(1, doc_id.c_str());
|
||||
}
|
||||
|
||||
if (m_has_gis) {
|
||||
if (distances.find(doc_id) == distances.end())
|
||||
continue;
|
||||
score = distances[doc_id];
|
||||
if ((component->Distance() > -0.0001 && component->Distance() < 0.0001) || (score <= component->Distance()))
|
||||
skipList.InsertNode(score, doc_id.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 范围查的时候如果不指定排序类型,需要在这里对skipList进行赋值
|
||||
if (!m_has_gis && ves.size() == 0 && skipList.GetSize() == 0 && m_sort_type == SORT_RELEVANCE) {
|
||||
set<string>::iterator iter = valid_docs.begin();
|
||||
for(; iter != valid_docs.end(); iter++){
|
||||
skipList.InsertNode(1, (*iter).c_str());
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void SearchTask::AppendHighLightWord(Json::Value& response)
|
||||
{
|
||||
int count = 0;
|
||||
set<string>::iterator iter = highlightWord.begin();
|
||||
for (; iter != highlightWord.end(); iter++) {
|
||||
if (count >= 10)
|
||||
break;
|
||||
count = count + 1;
|
||||
response["hlWord"].append((*iter).c_str());
|
||||
}
|
||||
return ;
|
||||
}
|
||||
|
||||
int SearchTask::DoJob(CTaskRequest *request) {
|
||||
int ret = 0;
|
||||
|
||||
// terminal_tag=1时单独处理
|
||||
if(component->TerminalTag() == 1){
|
||||
uint32_t count = 0;
|
||||
uint32_t N = 2;
|
||||
uint32_t limit_start = 0;
|
||||
vector<TerminalRes> candidate_doc;
|
||||
int try_times = 0;
|
||||
while(count < component->PageSize()){
|
||||
if(try_times++ > 10){
|
||||
log_debug("ProcessTerminal try_times is the max, return");
|
||||
break;
|
||||
}
|
||||
vector<TerminalRes> and_vecs;
|
||||
TerminalQryCond query_cond;
|
||||
query_cond.sort_type = m_sort_type;
|
||||
query_cond.sort_field = m_sort_field;
|
||||
query_cond.last_id = component->LastId();
|
||||
query_cond.last_score = component->LastScore();
|
||||
query_cond.limit_start = limit_start;
|
||||
query_cond.page_size = component->PageSize() * N;
|
||||
ret = logical_operate->ProcessTerminal(component->AndKeys(), query_cond, and_vecs);
|
||||
if(0 != ret){
|
||||
log_error("ProcessTerminal error.");
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
for(int i = 0; i < (int)and_vecs.size(); i++){
|
||||
string doc_id = and_vecs[i].doc_id;
|
||||
stringstream ss;
|
||||
ss << (int)and_vecs[i].score;
|
||||
string ss_key = ss.str();
|
||||
log_debug("last_score: %s, ss_key: %s, score: %lf", query_cond.last_score.c_str(), ss_key.c_str(), and_vecs[i].score);
|
||||
if(component->LastId() != "" && ss_key == query_cond.last_score){ // 翻页时过滤掉已经返回过的文档编号
|
||||
if(m_sort_type == SORT_FIELD_DESC && doc_id >= component->LastId()){
|
||||
continue;
|
||||
}
|
||||
if(m_sort_type == SORT_FIELD_ASC && doc_id <= component->LastId()){
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if(doc_manager->CheckDocByExtraFilterKey(doc_id) == true){
|
||||
count++;
|
||||
candidate_doc.push_back(and_vecs[i]);
|
||||
}
|
||||
}
|
||||
limit_start += component->PageSize() * N;
|
||||
N *= 2;
|
||||
}
|
||||
Json::FastWriter writer;
|
||||
Json::Value response;
|
||||
response["code"] = 0;
|
||||
int sequence = -1;
|
||||
int rank = 0;
|
||||
for (uint32_t i = 0; i < candidate_doc.size(); i++) {
|
||||
if(rank >= (int)component->PageSize()){
|
||||
break;
|
||||
}
|
||||
sequence++;
|
||||
rank++;
|
||||
TerminalRes tmp = candidate_doc[i];
|
||||
Json::Value doc_info;
|
||||
doc_info["doc_id"] = Json::Value(tmp.doc_id.c_str());
|
||||
doc_info["score"] = Json::Value(tmp.score);
|
||||
response["result"].append(doc_info);
|
||||
}
|
||||
response["type"] = 0;
|
||||
response["count"] = rank; // TODO 这里的count并不是实际的总数
|
||||
std::string outputConfig = writer.write(response);
|
||||
request->setResult(outputConfig);
|
||||
return 0;
|
||||
}
|
||||
|
||||
map<string, double> top_doc_score;
|
||||
if (component->TopSwitch() == 1) {
|
||||
ret = GetTopDocScore(top_doc_score);
|
||||
if (ret != 0) {
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
}
|
||||
ret = GetDocScore(top_doc_score);
|
||||
if (ret != 0) {
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
Json::FastWriter writer;
|
||||
Json::Value response;
|
||||
response["code"] = 0;
|
||||
int sequence = -1;
|
||||
int rank = 0;
|
||||
int page_size = component->PageSize();
|
||||
int limit_start = page_size * (component->PageIndex()-1);
|
||||
int limit_end = page_size * (component->PageIndex()-1) + page_size - 1;
|
||||
|
||||
log_debug("search result begin.");
|
||||
vector<PAIR> top_vec(top_doc_score.begin(), top_doc_score.end());
|
||||
sort(top_vec.begin(), top_vec.end(), CmpByValue());
|
||||
|
||||
for (uint32_t i = 0; i < top_vec.size(); i++) {
|
||||
sequence++;
|
||||
rank++;
|
||||
if(component->ReturnAll() == 0){
|
||||
if (sequence < limit_start || sequence > limit_end) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
pair<string, double> tmp = top_vec[i];
|
||||
Json::Value doc_info;
|
||||
doc_info["doc_id"] = Json::Value(tmp.first.c_str());
|
||||
doc_info["score"] = Json::Value(tmp.second);
|
||||
response["result"].append(doc_info);
|
||||
}
|
||||
|
||||
if((m_sort_type == SORT_FIELD_DESC || m_sort_type == SORT_FIELD_ASC) && skipList.GetSize() == 0){
|
||||
OrderOpCond order_op_cond;
|
||||
order_op_cond.last_id = component->LastId();
|
||||
order_op_cond.limit_start = limit_start;
|
||||
order_op_cond.count = page_size;
|
||||
order_op_cond.has_extra_filter = false;
|
||||
if(component->ExtraFilterKeys().size() != 0 || component->ExtraFilterAndKeys().size() != 0 || component->ExtraFilterInvertKeys().size() != 0){
|
||||
order_op_cond.has_extra_filter = true;
|
||||
}
|
||||
if(m_sort_field_type == FIELDTYPE_INT){
|
||||
rank += doc_manager->ScoreIntMap().size();
|
||||
COrderOp<int> orderOp(FIELDTYPE_INT, component->SearchAfter(), m_sort_type);
|
||||
orderOp.Process(doc_manager->ScoreIntMap(), atoi(component->LastScore().c_str()), order_op_cond, response, doc_manager);
|
||||
} else if(m_sort_field_type == FIELDTYPE_DOUBLE) {
|
||||
rank += doc_manager->ScoreDoubleMap().size();
|
||||
COrderOp<double> orderOp(FIELDTYPE_DOUBLE, component->SearchAfter(), m_sort_type);
|
||||
orderOp.Process(doc_manager->ScoreDoubleMap(), atof(component->LastScore().c_str()), order_op_cond, response, doc_manager);
|
||||
} else {
|
||||
rank += doc_manager->ScoreStrMap().size();
|
||||
COrderOp<string> orderOp(FIELDTYPE_STRING, component->SearchAfter(), m_sort_type);
|
||||
orderOp.Process(doc_manager->ScoreStrMap(), component->LastScore(), order_op_cond, response, doc_manager);
|
||||
}
|
||||
} else if (m_has_gis || m_sort_type == SORT_FIELD_ASC) {
|
||||
log_debug("m_has_gis, size:%d ", skipList.GetSize());
|
||||
SkipListNode *tmp = skipList.GetHeader()->level[0].forward;
|
||||
while (tmp->level[0].forward != NULL) {
|
||||
// 通过extra_filter_keys进行额外过滤(针对区分度不高的字段)
|
||||
if(doc_manager->CheckDocByExtraFilterKey(tmp->value) == false){
|
||||
log_debug("CheckDocByExtraFilterKey failed, %s", tmp->value);
|
||||
tmp = tmp->level[0].forward;
|
||||
continue;
|
||||
}
|
||||
sequence++;
|
||||
rank++;
|
||||
if(component->ReturnAll() == 0){
|
||||
if (sequence < limit_start || sequence > limit_end) {
|
||||
tmp = tmp->level[0].forward;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Json::Value doc_info;
|
||||
doc_info["doc_id"] = Json::Value(tmp->value);
|
||||
doc_info["score"] = Json::Value(tmp->key);
|
||||
response["result"].append(doc_info);
|
||||
tmp = tmp->level[0].forward;
|
||||
}
|
||||
} else {
|
||||
SkipListNode *tmp = skipList.GetFooter()->backward;
|
||||
while(tmp->backward != NULL) {
|
||||
if(doc_manager->CheckDocByExtraFilterKey(tmp->value) == false){
|
||||
tmp = tmp->backward;
|
||||
continue;
|
||||
}
|
||||
sequence++;
|
||||
rank++;
|
||||
if (component->ReturnAll() == 0){
|
||||
if (sequence < limit_start || sequence > limit_end) {
|
||||
tmp = tmp->backward;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Json::Value doc_info;
|
||||
doc_info["doc_id"] = Json::Value(tmp->value);
|
||||
doc_info["score"] = Json::Value(tmp->key);
|
||||
response["result"].append(doc_info);
|
||||
tmp = tmp->backward;
|
||||
}
|
||||
}
|
||||
|
||||
if(m_fields.size() > 0){
|
||||
doc_manager->AppendFieldsToRes(response, m_fields);
|
||||
}
|
||||
|
||||
if (rank > 0)
|
||||
AppendHighLightWord(response);
|
||||
if (m_has_gis) {
|
||||
response["type"] = 1;
|
||||
}
|
||||
else {
|
||||
response["type"] = 0;
|
||||
}
|
||||
response["count"] = rank;
|
||||
if(m_index_set_cnt != 0){
|
||||
response["count"] = m_index_set_cnt;
|
||||
}
|
||||
log_debug("search result end: %lld.", (long long int)GetSysTimeMicros());
|
||||
std::string outputConfig = writer.write(response);
|
||||
request->setResult(outputConfig);
|
||||
if (component->ReturnAll() == 0 && component->CacheSwitch() == 1 && component->PageIndex() == 1 && m_has_gis == 0
|
||||
&& rank > 0 && outputConfig.size() < MAX_VALUE_LEN && m_Primary_Data != "") {
|
||||
string m_Data_Cache = m_Primary_Data + "|" + component->DataAnd() + "|" + component->DataInvert() + "|" + component->DataComplete() + "|" +
|
||||
ToString(m_sort_type) + "|" + ToString(m_appid);
|
||||
unsigned data_size = m_Data_Cache.size();
|
||||
int ret = cachelist->add_list(m_Data_Cache.c_str(), outputConfig.c_str(), data_size, outputConfig.size());
|
||||
if (ret != 0) {
|
||||
log_error("add to cache_list error, ret: %d.", ret);
|
||||
}
|
||||
else {
|
||||
log_debug("add to cache_list: %s.", m_Data_Cache.c_str());
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int SearchTask::Process(CTaskRequest *request)
|
||||
@ -579,110 +55,49 @@ int SearchTask::Process(CTaskRequest *request)
|
||||
log_debug("SearchTask::Process begin: %lld.", (long long int)GetSysTimeMicros());
|
||||
common::CallerInfo caller_info = common::ProfilerMonitor::GetInstance().RegisterInfo(std::string("searchEngine.searchService.searchTask"));
|
||||
Json::Value recv_packet;
|
||||
string request_string = request->buildRequsetString();
|
||||
if (component->ParseJson(request_string.c_str(), request_string.length(), recv_packet) != 0) {
|
||||
std::string request_string = request->buildRequsetString();
|
||||
if (component_->ParseJson(request_string.c_str(), request_string.length(), recv_packet) != 0) {
|
||||
string str = GenReplyStr(PARAMETER_ERR);
|
||||
request->setResult(str);
|
||||
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
m_Primary_Data = component->Data();
|
||||
m_appid = component->Appid();
|
||||
m_sort_type = component->SortType();
|
||||
m_sort_field = component->SortField();
|
||||
if(component->Fields().size() > 0){
|
||||
m_fields.assign(component->Fields().begin(), component->Fields().end());
|
||||
}
|
||||
component_->InitSwitch();
|
||||
|
||||
skipList.InitList();
|
||||
component->InitSwitch();
|
||||
log_debug("m_Data: %s", m_Primary_Data.c_str());
|
||||
|
||||
m_query_ = component->GetQuery();
|
||||
if(m_query_.isObject()){
|
||||
if(m_query_.isMember("match")){
|
||||
query_process_ = new MatchQueryProcess(m_appid, m_query_["match"], component);
|
||||
} else {
|
||||
log_error("query type error.");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
query_process_->SetSkipList(skipList);
|
||||
Json::Value query = component_->GetQuery();
|
||||
if(query.isObject()){
|
||||
if(query.isMember(MATCH)){
|
||||
query_process_ = new MatchQueryProcess(query[MATCH]);
|
||||
}else if(query.isMember(TERM)){
|
||||
query_process_ = new TermQueryProcess(query[TERM]);
|
||||
}else if (query.isMember(GEODISTANCE)){
|
||||
query_process_ = new GeoDistanceQueryProcess(query[GEODISTANCE]);
|
||||
}else if (query.isMember(GEOSHAPE)){
|
||||
query_process_ = new GeoShapeQueryProcess(query[GEOSHAPE]);
|
||||
}else if (query.isMember(RANGE)){
|
||||
query_process_ = RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE , query[RANGE]);
|
||||
}else if (query.isMember(BOOL)){
|
||||
query_process_ = new BoolQueryProcess(query[BOOL]);
|
||||
}else{
|
||||
log_error("no suit query process.");
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
|
||||
query_process_->SetRequest(request);
|
||||
int ret = query_process_->DoJob();
|
||||
if(ret != 0){
|
||||
log_error("query_process_ DoJob error, ret: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
query_process_->SetComponent(component_);
|
||||
query_process_->SetDocManager(doc_manager_);
|
||||
|
||||
string err_msg = "";
|
||||
int ret = component->GetQueryWord(m_has_gis, err_msg);
|
||||
if (ret != 0) {
|
||||
string str = GenReplyStr(PARAMETER_ERR, err_msg);
|
||||
request->setResult(str);
|
||||
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
|
||||
return ret;
|
||||
}
|
||||
if(component->TerminalTag() == 1 && component->TerminalTagValid() == false){
|
||||
log_error("TerminalTag is 1 and TerminalTagValid is false.");
|
||||
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
|
||||
return -RT_PARSE_JSON_ERR;
|
||||
}
|
||||
doc_manager = new DocManager(component);
|
||||
|
||||
log_debug("cache_switch: %u", component->CacheSwitch());
|
||||
if (component->ReturnAll() == 0 && component->CacheSwitch() == 1 && component->PageIndex() == 1 && m_Primary_Data != "" && m_has_gis == 0) {
|
||||
string m_Data_Cache = m_Primary_Data + "|" + component->DataAnd() + "|" + component->DataInvert() + "|" + component->DataComplete() + "|" +
|
||||
ToString(m_sort_type) + "|" + ToString(m_appid);
|
||||
uint8_t value[MAX_VALUE_LEN] = { 0 };
|
||||
unsigned vsize = 0;
|
||||
if (cachelist->in_list(m_Data_Cache.c_str(), m_Data_Cache.size(), value, vsize))
|
||||
{
|
||||
statmgr.GetItemU32(INDEX_SEARCH_HIT_CACHE)++;
|
||||
log_debug("hit cache.");
|
||||
value[vsize] = '\0';
|
||||
std::string outputConfig = (char *)value;
|
||||
request->setResult(outputConfig);
|
||||
int ret = query_process_->StartQuery();
|
||||
if(ret != 0){
|
||||
std::string str = GenReplyStr(PARAMETER_ERR);
|
||||
request->setResult(str);
|
||||
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return 0;
|
||||
log_error("query_process_ StartQuery error, ret: %d", ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (component->DataComplete() != "") {
|
||||
FieldInfo fieldInfo;
|
||||
string split_data = SplitManager::Instance()->split(component->DataComplete(), m_appid);
|
||||
log_debug("complete split_data: %s", split_data.c_str());
|
||||
vector<string> split_datas = splitEx(split_data, "|");
|
||||
for(size_t i = 0; i < split_datas.size(); i++) {
|
||||
fieldInfo.word = split_datas[i];
|
||||
complete_keys.push_back(fieldInfo);
|
||||
}
|
||||
}
|
||||
logical_operate = new LogicalOperate(m_appid, m_sort_type, m_has_gis, component->CacheSwitch());
|
||||
ret = DoJob(request);
|
||||
if (ret != 0) {
|
||||
string str = GenReplyStr(PARAMETER_ERR);
|
||||
request->setResult(str);
|
||||
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return ret;
|
||||
}
|
||||
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
SearchTask::~SearchTask() {
|
||||
if(component != NULL){
|
||||
delete component;
|
||||
}
|
||||
if(logical_operate != NULL){
|
||||
delete logical_operate;
|
||||
}
|
||||
if(doc_manager != NULL){
|
||||
delete doc_manager;
|
||||
}
|
||||
}
|
||||
}
|
@ -26,57 +26,28 @@
|
||||
#include "index_tbl_op.h"
|
||||
#include "task_request.h"
|
||||
#include "skiplist.h"
|
||||
#include "component.h"
|
||||
#include "logical_operate.h"
|
||||
#include "request_context.h"
|
||||
#include "valid_doc_filter.h"
|
||||
#include "doc_manager.h"
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "task_request.h"
|
||||
#include "process/query_process.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
typedef vector<KeyInfo> vec;
|
||||
|
||||
class SearchTask : public ProcessTask
|
||||
{
|
||||
public:
|
||||
SearchTask();
|
||||
virtual int Process(CTaskRequest *request);
|
||||
virtual ~SearchTask();
|
||||
SearchTask();
|
||||
virtual ~SearchTask();
|
||||
|
||||
public:
|
||||
virtual int Process(CTaskRequest *request);
|
||||
|
||||
private:
|
||||
int DoJob(CTaskRequest *request);
|
||||
int GetTopDocIdSetByWord(FieldInfo fieldInfo, vector<TopDocInfo>& doc_info);
|
||||
int GetTopDocScore(map<string, double>& top_doc_score);
|
||||
int GetDocScore(map<string, double>& top_doc_score);
|
||||
int GetValidDoc(map<string, vec> &ves, vector<string> &word_vec, map<string, uint32_t> &key_in_doc, hash_double_map &distances, set<string> &valid_docs);
|
||||
void AppendHighLightWord(Json::Value& response);
|
||||
|
||||
private:
|
||||
Component *component;
|
||||
LogicalOperate *logical_operate;
|
||||
DocManager *doc_manager;
|
||||
vector<FieldInfo> complete_keys;
|
||||
|
||||
string m_Primary_Data;
|
||||
FIELDTYPE m_sort_field_type;
|
||||
uint32_t m_index_set_cnt;
|
||||
uint32_t m_appid;
|
||||
uint32_t m_sort_type;
|
||||
string m_sort_field;
|
||||
vector<string> m_fields;
|
||||
|
||||
uint32_t m_has_gis; //该appid是否包含有地理位置gis信息的查询
|
||||
set<string> highlightWord;
|
||||
SkipList skipList;
|
||||
QueryProcess* query_process_;
|
||||
Json::Value m_query_;
|
||||
RequestContext* component_;
|
||||
DocManager* doc_manager_;
|
||||
QueryProcess* query_process_;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -28,7 +28,9 @@
|
||||
#include "comm.h"
|
||||
#include "search_conf.h"
|
||||
#include "index_tbl_op.h"
|
||||
#include "result_context.h"
|
||||
using namespace std;
|
||||
struct GeoPointContext;
|
||||
|
||||
vector<int> splitInt(const string& src, string separate_character);
|
||||
set<string> splitStr(const string& src, string separate_character);
|
||||
@ -57,17 +59,17 @@ set<string> sets_intersection(set<string> v1, set<string> v2); // 集合求交
|
||||
set<string> sets_union(set<string> v1, set<string> v2); // 集合求并集
|
||||
set<string> sets_difference(set<string> v1, set<string> v2); // 集合求差集
|
||||
double strToDouble(const string& str);
|
||||
bool GetGisDistance(uint32_t appid, const string& latLeft, const string& lngLeft, hash_double_map& distances, hash_string_map& doc_content);
|
||||
bool GetGisDistance(uint32_t appid, const GeoPointContext& geo_point, const hash_string_map& doc_content , hash_double_map& distances);
|
||||
void ConvertCharIntelligent(const string word, IntelligentInfo &info, int &len);
|
||||
void ConvertIntelligent(const vector<Content> &result, IntelligentInfo &info, bool &flag);
|
||||
bool GetGisCode(string lng, string lat, string ip, double distance, vector<string>& gisCode);
|
||||
bool GetGisCode(const vector<string>& lng_arr, const vector<string>& lat_arr, vector<string>& gisCode);
|
||||
uint32_t GetIpNum(string ip);
|
||||
int ShiftIntelligentInfo(IntelligentInfo &info, int len);
|
||||
bool GetSuggestDoc(FieldInfo& fieldInfo, uint32_t len, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set);
|
||||
bool GetSuggestDoc(FieldInfo& fieldInfo, uint32_t len, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set, uint32_t appid);
|
||||
bool GetSuggestDocWithoutCharacter(FieldInfo& fieldInfo, uint32_t len, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set);
|
||||
int GetDocByShiftWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_id_set, uint32_t appid, set<string>& highlightWord);
|
||||
int GetDocByShiftEnWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_id_set, uint32_t appid, set<string>& highlightWord);
|
||||
int GetDocByShiftWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_id_set, uint32_t appid);
|
||||
int GetDocByShiftEnWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_id_set, uint32_t appid);
|
||||
uint64_t GetSysTimeMicros();
|
||||
string trim(string& str);
|
||||
string delPrefix(string& str);
|
||||
|
@ -0,0 +1,23 @@
|
||||
#include "geo_query_sort_operator.h"
|
||||
|
||||
GeoQuerySortOperator::GeoQuerySortOperator(RequestContext* request_cnt, DocManager* doc_manager)
|
||||
: SortOperatorBase(request_cnt , doc_manager)
|
||||
{}
|
||||
|
||||
GeoQuerySortOperator::~GeoQuerySortOperator()
|
||||
{}
|
||||
|
||||
void GeoQuerySortOperator::RelevanceSort()
|
||||
{
|
||||
log_debug("relevance score sort type");
|
||||
const std::vector<IndexInfo>& o_index_info_vet = ResultContext::Instance()->GetIndexInfos();
|
||||
std::set<std::string>::iterator valid_docs_iter = p_valid_docs_set_->begin();
|
||||
for(; valid_docs_iter != p_valid_docs_set_->end(); valid_docs_iter++){
|
||||
std::vector<IndexInfo>::const_iterator index_info_iter = o_index_info_vet.cbegin();
|
||||
for (; index_info_iter != o_index_info_vet.cend(); ++index_info_iter){
|
||||
if ((*valid_docs_iter) == (index_info_iter->doc_id)){
|
||||
scoredocid_set_.insert(ScoreDocIdNode(index_info_iter->distance , index_info_iter->doc_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
#ifndef GEO_QUERY_SORT_OPERATOR_H_
|
||||
#define GEO_QUERY_SORT_OPERATOR_H_
|
||||
|
||||
#include "sort_operator_base.h"
|
||||
|
||||
class GeoQuerySortOperator : public SortOperatorBase
|
||||
{
|
||||
public:
|
||||
GeoQuerySortOperator(RequestContext* request_cnt , DocManager* doc_manager);
|
||||
virtual~ GeoQuerySortOperator();
|
||||
|
||||
private:
|
||||
virtual void RelevanceSort();
|
||||
};
|
||||
#endif
|
101
src/search_local/index_read/sort_operator/sort_operator_base.cc
Normal file
101
src/search_local/index_read/sort_operator/sort_operator_base.cc
Normal file
@ -0,0 +1,101 @@
|
||||
#include "sort_operator_base.h"
|
||||
|
||||
SortOperatorBase::SortOperatorBase(RequestContext* request_cnt , DocManager* doc_manager)
|
||||
: component_(request_cnt)
|
||||
, doc_manager_(doc_manager)
|
||||
, p_valid_docs_set_(ResultContext::Instance()->GetValidDocs())
|
||||
, scoredocid_set_()
|
||||
, sort_field_type_()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
SortOperatorBase::~SortOperatorBase()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
std::set<ScoreDocIdNode>* SortOperatorBase::GetSortOperator(uint32_t ui_sort_type)
|
||||
{
|
||||
log_debug("GetSortOperator beginning...");
|
||||
switch (ui_sort_type)
|
||||
{
|
||||
case SORT_RELEVANCE:
|
||||
case SORT_GEO_DISTANCE:
|
||||
{
|
||||
RelevanceSort();
|
||||
}
|
||||
break;
|
||||
case DONT_SORT: {
|
||||
NoneSort();
|
||||
}
|
||||
break;
|
||||
case SORT_FIELD_ASC:
|
||||
case SORT_FIELD_DESC:
|
||||
{
|
||||
AssignFieldSort();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return (&scoredocid_set_);
|
||||
}
|
||||
|
||||
void SortOperatorBase::RelevanceSort()
|
||||
{
|
||||
// 按照相关度得分,并以此排序
|
||||
log_debug("relevance score sort type");
|
||||
// 范围查的时候如果不指定排序类型,需要在这里对skipList进行赋值
|
||||
const DocKeyinfosMap& docid_keyinfovet_map = ResultContext::Instance()->GetDocKeyinfosMap();
|
||||
if (docid_keyinfovet_map.empty() && scoredocid_set_.empty()) {
|
||||
std::set<std::string>::iterator iter = p_valid_docs_set_->begin();
|
||||
for(; iter != p_valid_docs_set_->end(); iter++){
|
||||
scoredocid_set_.insert(ScoreDocIdNode(1,*iter));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
std::map<std::string, KeyInfoVet>::const_iterator docid_keyinfovet_iter = docid_keyinfovet_map.cbegin();
|
||||
for (; docid_keyinfovet_iter != docid_keyinfovet_map.cend(); ++ docid_keyinfovet_iter){
|
||||
std::string doc_id = docid_keyinfovet_iter->first;
|
||||
const KeyInfoVet& key_info = docid_keyinfovet_iter->second;
|
||||
|
||||
if(p_valid_docs_set_->find(doc_id) == p_valid_docs_set_->end()){
|
||||
continue;
|
||||
}
|
||||
|
||||
double score = 0.0;
|
||||
for (uint32_t i = 0; i < key_info.size(); i++) {
|
||||
std::string keyword = key_info[i].word;
|
||||
uint32_t ui_word_freq = key_info[i].word_freq;
|
||||
uint32_t ui_doc_count = ResultContext::Instance()->GetKeywordDoccountMap(keyword);
|
||||
score += log((DOC_CNT - ui_doc_count + 0.5) / (ui_doc_count + 0.5)) * ((D_BM25_K1 + 1)*ui_word_freq) \
|
||||
/ (D_BM25_K + ui_word_freq) * (D_BM25_K2 + 1) * 1 / (D_BM25_K2 + 1);
|
||||
log_debug("loop score[%d]:%f", i , score);
|
||||
}
|
||||
scoredocid_set_.insert(ScoreDocIdNode(score , doc_id));
|
||||
}
|
||||
}
|
||||
|
||||
void SortOperatorBase::NoneSort()
|
||||
{
|
||||
log_debug("no sort type");
|
||||
std::set<std::string>::iterator valid_docs_iter = p_valid_docs_set_->begin();
|
||||
for(; valid_docs_iter != p_valid_docs_set_->end(); valid_docs_iter++){
|
||||
std::string doc_id = *valid_docs_iter;
|
||||
scoredocid_set_.insert(ScoreDocIdNode(1 , doc_id));
|
||||
}
|
||||
}
|
||||
|
||||
void SortOperatorBase::AssignFieldSort()
|
||||
{
|
||||
std::set<std::string>::iterator valid_docs_iter = p_valid_docs_set_->begin();
|
||||
for(; valid_docs_iter != p_valid_docs_set_->end(); valid_docs_iter++){
|
||||
std::string doc_id = *valid_docs_iter;
|
||||
doc_manager_->GetScoreMap(doc_id, component_->SortType()
|
||||
, component_->SortField(), sort_field_type_);
|
||||
}
|
||||
log_debug("assign field sort type , order option:%d" , (int)sort_field_type_);
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
#ifndef SORT_OPERATOR_BASE_H_
|
||||
#define SORT_OPERATOR_BASE_H_
|
||||
|
||||
#include <set>
|
||||
#include "log.h"
|
||||
#include "../comm.h"
|
||||
#include "../result_context.h"
|
||||
#include "../request_context.h"
|
||||
#include "../doc_manager.h"
|
||||
|
||||
class SortOperatorBase
|
||||
{
|
||||
public:
|
||||
SortOperatorBase(RequestContext* request_cnt , DocManager* doc_manager);
|
||||
virtual ~SortOperatorBase();
|
||||
|
||||
public:
|
||||
std::set<ScoreDocIdNode>* GetSortOperator(uint32_t ui_sort_type);
|
||||
int GetSortFieldType() { return sort_field_type_;};
|
||||
|
||||
protected:
|
||||
virtual void RelevanceSort();
|
||||
virtual void NoneSort();
|
||||
virtual void AssignFieldSort();
|
||||
|
||||
protected:
|
||||
RequestContext* component_;
|
||||
DocManager* doc_manager_;
|
||||
ValidDocSet* p_valid_docs_set_;
|
||||
std::set<ScoreDocIdNode> scoredocid_set_;
|
||||
|
||||
private:
|
||||
FIELDTYPE sort_field_type_;
|
||||
};
|
||||
|
||||
#endif
|
32
src/search_local/index_read/test/timer_counter.h
Normal file
32
src/search_local/index_read/test/timer_counter.h
Normal file
@ -0,0 +1,32 @@
|
||||
#ifndef TIMER_COUNTER_H_
|
||||
#define TIMER_COUNTER_H_
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
// Timer::GetInstance()->Start();
|
||||
// log_error("costtime:%f",Timer::GetInstance()->Stop());
|
||||
class Timer{
|
||||
public:
|
||||
Timer() {};
|
||||
~Timer(){};
|
||||
public:
|
||||
static Timer* GetInstance(){
|
||||
static Timer timer;
|
||||
return &timer;
|
||||
};
|
||||
public:
|
||||
void Start(){
|
||||
gettimeofday(&tm_start_ , NULL);
|
||||
};
|
||||
|
||||
float Stop(){
|
||||
timeval tm_stop;
|
||||
gettimeofday(&tm_stop , NULL);
|
||||
|
||||
return ((tm_stop.tv_sec - tm_start_.tv_sec)*1000000 \
|
||||
+ tm_stop.tv_usec - tm_start_.tv_usec) / 100000;
|
||||
};
|
||||
private:
|
||||
timeval tm_start_;
|
||||
};
|
||||
#endif
|
@ -147,6 +147,10 @@ std::vector<DocIdEntry<T> > CMaxHeap<T>::getNumbers(vector<DocIdEntry<T> >& arr,
|
||||
}
|
||||
}
|
||||
|
||||
if (max_heap_vec.empty()){
|
||||
return max_heap_vec;
|
||||
}
|
||||
|
||||
buildMaxHeap(max_heap_vec);
|
||||
for(int i = k; i < (int)arr.size(); ++i){
|
||||
// 出现比堆顶元素小且大于last_value的值, 置换堆顶元素, 并调整堆
|
||||
@ -171,6 +175,11 @@ std::vector<DocIdEntry<T> > CMaxHeap<T>::getNumbers(vector<DocIdEntry<T> >& arr,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_heap_vec.empty()){
|
||||
return max_heap_vec;
|
||||
}
|
||||
|
||||
buildMaxHeap(max_heap_vec);
|
||||
for(int i = k; i < (int)arr.size(); ++i){
|
||||
// 出现比堆顶元素小的值, 置换堆顶元素, 并调整堆
|
||||
|
350
src/search_local/index_read/valid_doc_filter.cc
Normal file
350
src/search_local/index_read/valid_doc_filter.cc
Normal file
@ -0,0 +1,350 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: valid_doc_filter.h
|
||||
*
|
||||
* Description: logical operate class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2018
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Modified by: chenyujie ,chenyujie28@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#include "valid_doc_filter.h"
|
||||
#include "search_util.h"
|
||||
#include "cachelist_unit.h"
|
||||
#include "data_manager.h"
|
||||
#include "json/reader.h"
|
||||
#include "json/writer.h"
|
||||
#include "index_tbl_op.h"
|
||||
#include "index_sync/sync_index_timer.h"
|
||||
#include "index_sync/sequence_search_index.h"
|
||||
#include "stem.h"
|
||||
#include "key_format.h"
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
|
||||
extern SyncIndexTimer* globalSyncIndexTimer;
|
||||
extern CCacheListUnit* indexcachelist;
|
||||
|
||||
ValidDocFilter::ValidDocFilter()
|
||||
: p_data_base_(NULL)
|
||||
{ }
|
||||
|
||||
ValidDocFilter::~ValidDocFilter()
|
||||
{ }
|
||||
|
||||
//汉拼无需memcomparable format
|
||||
int ValidDocFilter::HanPinTextInvertIndexSearch(const std::vector<FieldInfo>& keys
|
||||
, std::vector<IndexInfo>& index_info_vet){
|
||||
if (keys.empty()){
|
||||
return -RT_GET_FIELD_ERROR;
|
||||
}
|
||||
std::vector<FieldInfo>::const_iterator iter = keys.cbegin();
|
||||
for (; iter != keys.cend(); ++iter){
|
||||
std::vector<IndexInfo> doc_info;
|
||||
if ((iter->segment_tag) == SEGMENT_CHINESE) {
|
||||
int ret = GetDocByShiftWord(*iter, doc_info, p_data_base_->Appid());
|
||||
if (ret != 0) {
|
||||
index_info_vet.clear();
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
std::sort(doc_info.begin(), doc_info.end());
|
||||
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
|
||||
KeyInfo info;
|
||||
info.word_freq = 1;
|
||||
info.field = (iter->field);
|
||||
info.word = (iter->word);
|
||||
ResultContext::Instance()->SetDocKeyinfoMap(doc_info[doc_info_idx].doc_id , info);
|
||||
}
|
||||
} else if ((iter->segment_tag) == SEGMENT_ENGLISH) {
|
||||
int ret = GetDocByShiftEnWord(*iter, doc_info, p_data_base_->Appid());
|
||||
if (ret != 0) {
|
||||
index_info_vet.clear();
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
std::sort(doc_info.begin(), doc_info.end());
|
||||
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
|
||||
KeyInfo info;
|
||||
info.word_freq = 1;
|
||||
info.field = (iter->field);
|
||||
info.word = (iter->word);
|
||||
ResultContext::Instance()->SetDocKeyinfoMap(doc_info[doc_info_idx].doc_id , info);
|
||||
}
|
||||
}
|
||||
index_info_vet = Union(index_info_vet, doc_info);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ValidDocFilter::RangeQueryInvertIndexSearch(const std::vector<FieldInfo>& keys
|
||||
, std::vector<IndexInfo>& index_info_vet){
|
||||
if (keys.empty()){
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
std::vector<FieldInfo>::const_iterator iter = keys.cbegin();
|
||||
for (; iter != keys.cend(); ++iter){
|
||||
std::vector<IndexInfo> doc_info;
|
||||
log_debug("segment:%d , word:%s , field:%d, fieldtype:%d" ,
|
||||
iter->segment_tag ,iter->word.c_str() ,
|
||||
iter->field , iter->field_type);
|
||||
if (iter->word.empty()){
|
||||
std::stringstream ss;
|
||||
ss << p_data_base_->Appid();
|
||||
InvertIndexEntry startEntry(ss.str(), iter->field, iter->field_type , iter->start);
|
||||
InvertIndexEntry endEntry(ss.str(), iter->field, iter->field_type , iter->end);
|
||||
std::vector<InvertIndexEntry> resultEntry;
|
||||
globalSyncIndexTimer->GetSearchIndex()->GetRangeIndex(iter->range_type, startEntry, endEntry, resultEntry);
|
||||
std::vector<InvertIndexEntry>::iterator res_iter = resultEntry.begin();
|
||||
for (; res_iter != resultEntry.end(); res_iter ++) {
|
||||
IndexInfo info;
|
||||
info.doc_id = res_iter->_InvertIndexDocId;
|
||||
info.doc_version = res_iter->_InvertIndexDocVersion;
|
||||
doc_info.push_back(info);
|
||||
}
|
||||
log_debug("appid: %s, field: %d, count: %d", startEntry._InvertIndexAppid.c_str(), iter->field, (int)resultEntry.size());
|
||||
}
|
||||
index_info_vet = Union(index_info_vet, doc_info);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ValidDocFilter::TextInvertIndexSearch(const std::vector<FieldInfo>& keys, std::vector<IndexInfo>& index_info_vet){
|
||||
if (keys.empty()){
|
||||
return -RT_GET_FIELD_ERROR;
|
||||
}
|
||||
|
||||
std::vector<FieldInfo>::const_iterator iter = keys.cbegin();
|
||||
for (; iter != keys.cend(); ++iter){
|
||||
std::vector<IndexInfo> doc_info;
|
||||
int ret = GetDocIdSetByWord(*iter, doc_info);
|
||||
if (ret != 0){
|
||||
return -RT_GET_DOC_ERR;
|
||||
}
|
||||
if (doc_info.size() == 0)
|
||||
continue;
|
||||
if (!p_data_base_->GetHasGisFlag() || !isAllNumber(iter->word)){
|
||||
if (iter->field_type != FIELD_INDEX){
|
||||
ResultContext::Instance()->SetHighLightWordSet(iter->word);
|
||||
}
|
||||
}
|
||||
if(!p_data_base_->GetHasGisFlag() && (SORT_RELEVANCE == p_data_base_->SortType())){
|
||||
CalculateByWord(*iter, doc_info);
|
||||
}
|
||||
index_info_vet = Union(index_info_vet, doc_info);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ValidDocFilter::ProcessTerminal(const std::vector<std::vector<FieldInfo> >& and_keys, const TerminalQryCond& query_cond, std::vector<TerminalRes>& vecs){
|
||||
if(and_keys.size() != 1){
|
||||
return 0;
|
||||
}
|
||||
std::vector<FieldInfo> field_vec = and_keys[0];
|
||||
if(field_vec.size() != 1){
|
||||
return 0;
|
||||
}
|
||||
FieldInfo field_info = field_vec[0];
|
||||
if(field_info.segment_tag != SEGMENT_RANGE){
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
ss << p_data_base_->Appid();
|
||||
InvertIndexEntry beginEntry(ss.str(), field_info.field, field_info.field_type , field_info.start);
|
||||
InvertIndexEntry endEntry(ss.str(), field_info.field, field_info.field_type , field_info.end);
|
||||
std::vector<InvertIndexEntry> resultEntry;
|
||||
globalSyncIndexTimer->GetSearchIndex()->GetRangeIndexInTerminal(field_info.range_type, beginEntry, endEntry, query_cond, resultEntry);
|
||||
std::vector<InvertIndexEntry>::iterator iter = resultEntry.begin();
|
||||
for (; iter != resultEntry.end(); iter ++) {
|
||||
TerminalRes info;
|
||||
info.doc_id = (*iter)._InvertIndexDocId;
|
||||
info.score = (*iter)._InvertIndexKey;
|
||||
vecs.push_back(info);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ValidDocFilter::CalculateByWord(FieldInfo fieldInfo, const std::vector<IndexInfo>& doc_info) {
|
||||
std::vector<IndexInfo>::const_iterator iter = doc_info.cbegin();
|
||||
for ( ; iter != doc_info.cend(); ++iter) {
|
||||
std::string pos_str = iter->pos;
|
||||
std::vector<int> pos_vec;
|
||||
if (pos_str != "" && pos_str.size() > 2) {
|
||||
pos_str = pos_str.substr(1, pos_str.size() - 2);
|
||||
pos_vec = splitInt(pos_str, ",");
|
||||
}
|
||||
KeyInfo info;
|
||||
info.word_freq = iter->word_freq;
|
||||
info.field = iter->field;
|
||||
info.word = fieldInfo.word;
|
||||
info.created_time = iter->created_time;
|
||||
info.pos_vec = pos_vec;
|
||||
ResultContext::Instance()->SetDocKeyinfoMap(iter->doc_id , info);
|
||||
}
|
||||
ResultContext::Instance()->SetWordDoccountMap(fieldInfo.word , doc_info.size());
|
||||
}
|
||||
|
||||
|
||||
bool ValidDocFilter::GetDocIndexCache(std::string word, uint32_t field, std::vector<IndexInfo>& doc_info) {
|
||||
log_debug("get doc index start");
|
||||
bool res = false;
|
||||
uint8_t value[MAX_VALUE_LEN] = { 0 };
|
||||
unsigned vsize = 0;
|
||||
std::string output = "";
|
||||
std::string indexCache = word + "|" + ToString(field);
|
||||
if (p_data_base_->CacheSwitch() == 1 && indexcachelist->in_list(indexCache.c_str(), indexCache.size(), value, vsize))
|
||||
{
|
||||
log_debug("hit index cache.");
|
||||
value[vsize] = '\0';
|
||||
output = (char *)value;
|
||||
res = true;
|
||||
}
|
||||
|
||||
if (res) {
|
||||
Json::Value packet;
|
||||
Json::Reader r(Json::Features::strictMode());
|
||||
int ret;
|
||||
ret = r.parse(output.c_str(), output.c_str() + output.size(), packet);
|
||||
if (0 == ret)
|
||||
{
|
||||
log_error("the err json string is : %s, errmsg : %s", output.c_str(), r.getFormattedErrorMessages().c_str());
|
||||
res = false;
|
||||
return res;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < packet.size(); ++i) {
|
||||
IndexInfo info;
|
||||
Json::Value& index_cache = packet[i];
|
||||
if (index_cache.isMember("appid") && index_cache["appid"].isUInt() &&
|
||||
index_cache.isMember("id") && index_cache["id"].isString() &&
|
||||
index_cache.isMember("version") && index_cache["version"].isUInt() &&
|
||||
index_cache.isMember("field") && index_cache["field"].isUInt() &&
|
||||
index_cache.isMember("freq") && index_cache["freq"].isUInt() &&
|
||||
index_cache.isMember("time") && index_cache["time"].isUInt() &&
|
||||
index_cache.isMember("pos") && index_cache["pos"].isString() &&
|
||||
index_cache.isMember("extend") && index_cache["extend"].isString())
|
||||
{
|
||||
info.appid = index_cache["appid"].asUInt();
|
||||
info.doc_id = index_cache["id"].asString();
|
||||
info.doc_version = index_cache["version"].asUInt();
|
||||
info.field = index_cache["field"].asUInt();
|
||||
info.word_freq = index_cache["freq"].asUInt();
|
||||
info.created_time = index_cache["time"].asUInt();
|
||||
info.pos = index_cache["pos"].asString();
|
||||
info.extend = index_cache["extend"].asString();
|
||||
doc_info.push_back(info);
|
||||
}
|
||||
else {
|
||||
log_error("parse index_cache error, no appid");
|
||||
doc_info.clear();
|
||||
res = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void ValidDocFilter::SetDocIndexCache(const std::vector<IndexInfo>& doc_info, std::string& indexJsonStr) {
|
||||
Json::Value indexJson;
|
||||
std::vector<IndexInfo>::const_iterator iter = doc_info.cbegin();
|
||||
for ( ; iter != doc_info.cend(); ++iter) {
|
||||
Json::Value json_tmp;
|
||||
json_tmp["appid"] = iter->appid;
|
||||
json_tmp["id"] = iter->doc_id;
|
||||
json_tmp["version"] = iter->doc_version;
|
||||
json_tmp["field"] = iter->field;
|
||||
json_tmp["freq"] = iter->word_freq;
|
||||
json_tmp["time"] = iter->created_time;
|
||||
json_tmp["pos"] = iter->pos;
|
||||
json_tmp["extend"] = iter->extend;
|
||||
indexJson.append(json_tmp);
|
||||
}
|
||||
|
||||
Json::FastWriter writer;
|
||||
indexJsonStr = writer.write(indexJson);
|
||||
}
|
||||
|
||||
int ValidDocFilter::GetDocIdSetByWord(FieldInfo fieldInfo, std::vector<IndexInfo> &doc_info) {
|
||||
bool bRet = false;
|
||||
if (DataManager::Instance()->IsSensitiveWord(fieldInfo.word)) {
|
||||
log_debug("%s is a sensitive word.", fieldInfo.word.c_str());
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::stringstream ss_key;
|
||||
ss_key << p_data_base_->Appid();
|
||||
ss_key << "#00#";
|
||||
|
||||
if (FIELD_IP == fieldInfo.field_type) {
|
||||
uint32_t word_id = GetIpNum(fieldInfo.word);
|
||||
if (word_id == 0) { return 0; }
|
||||
std::stringstream stream_ip;
|
||||
stream_ip << word_id;
|
||||
fieldInfo.word = stream_ip.str();
|
||||
}
|
||||
|
||||
// 联合索引MemFormat在拼接的时候已经完成,此处无需再次编码
|
||||
if(FIELD_INDEX == fieldInfo.field_type){
|
||||
ss_key << fieldInfo.word;
|
||||
}else {
|
||||
KeyFormat::UnionKey o_keyinfo_vet;
|
||||
o_keyinfo_vet.push_back(std::make_pair(fieldInfo.field_type , fieldInfo.word));
|
||||
std::string s_format_key = KeyFormat::Encode(o_keyinfo_vet);
|
||||
ss_key << s_format_key;
|
||||
}
|
||||
|
||||
log_debug("appid [%u], key[%s]", p_data_base_->Appid(), ss_key.str().c_str());
|
||||
if (p_data_base_->GetHasGisFlag() && GetDocIndexCache(ss_key.str(), fieldInfo.field, doc_info)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bRet = g_IndexInstance.GetDocInfo(p_data_base_->Appid(), ss_key.str(), fieldInfo.field, doc_info);
|
||||
if (false == bRet) {
|
||||
log_error("GetDocInfo error.");
|
||||
return -RT_DTC_ERR;
|
||||
}
|
||||
|
||||
if (p_data_base_->CacheSwitch() == 1 && p_data_base_->GetHasGisFlag() == 1
|
||||
&& doc_info.size() > 0 && doc_info.size() <= 1000) {
|
||||
std::string index_str;
|
||||
SetDocIndexCache(doc_info, index_str);
|
||||
if (index_str != "" && index_str.size() < MAX_VALUE_LEN) {
|
||||
std::string indexCache = ss_key.str() + "|" + ToString(fieldInfo.field);
|
||||
unsigned data_size = indexCache.size();
|
||||
int ret = indexcachelist->add_list(indexCache.c_str(), index_str.c_str(), data_size, index_str.size());
|
||||
if (ret != 0) {
|
||||
log_error("add to index_cache_list error, ret: %d.", ret);
|
||||
}
|
||||
else {
|
||||
log_debug("add to index_cache_list: %s.", indexCache.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::vector<IndexInfo> ValidDocFilter::Union(std::vector<IndexInfo>& first_indexinfo_vet, std::vector<IndexInfo>& second_indexinfo_vet){
|
||||
std::vector<IndexInfo> index_info_result;
|
||||
int i_max_size = first_indexinfo_vet.size() + second_indexinfo_vet.size();
|
||||
index_info_result.resize(i_max_size);
|
||||
|
||||
std::sort(first_indexinfo_vet.begin() , first_indexinfo_vet.end());
|
||||
std::sort(second_indexinfo_vet.begin() , second_indexinfo_vet.end());
|
||||
|
||||
std::vector<IndexInfo>::iterator iter = std::set_union(
|
||||
first_indexinfo_vet.begin(), first_indexinfo_vet.end(),
|
||||
second_indexinfo_vet.begin() , second_indexinfo_vet.end() ,
|
||||
index_info_result.begin());
|
||||
|
||||
index_info_result.resize(iter - index_info_result.begin());
|
||||
return index_info_result;
|
||||
}
|
72
src/search_local/index_read/valid_doc_filter.h
Normal file
72
src/search_local/index_read/valid_doc_filter.h
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* =====================================================================================
|
||||
*
|
||||
* Filename: valid_doc_filter.h
|
||||
*
|
||||
* Description: logical operate class definition.
|
||||
*
|
||||
* Version: 1.0
|
||||
* Created: 09/08/2018
|
||||
* Revision: none
|
||||
* Compiler: gcc
|
||||
*
|
||||
* Author: zhulin, shzhulin3@jd.com
|
||||
* Modified by: chenyujie ,chenyujie28@jd.com
|
||||
* Company: JD.com, Inc.
|
||||
*
|
||||
* =====================================================================================
|
||||
*/
|
||||
|
||||
#ifndef LOGICAL_OP_H
|
||||
#define LOGICAL_OP_H
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <functional>
|
||||
#include "comm.h"
|
||||
#include "singleton.h"
|
||||
#include "noncopyable.h"
|
||||
#include "request_context.h"
|
||||
#include "result_context.h"
|
||||
|
||||
class ValidDocFilter : private noncopyable{
|
||||
public:
|
||||
ValidDocFilter();
|
||||
virtual ~ValidDocFilter();
|
||||
|
||||
public:
|
||||
static ValidDocFilter* Instance(){
|
||||
return CSingleton<ValidDocFilter>::Instance();
|
||||
};
|
||||
|
||||
static void Destroy(){
|
||||
CSingleton<ValidDocFilter>::Destroy();
|
||||
};
|
||||
|
||||
public:
|
||||
void BindDataBasePointer(RequestContext* const p_data_base) { p_data_base_ = p_data_base; };
|
||||
|
||||
public:
|
||||
int HanPinTextInvertIndexSearch(const std::vector<FieldInfo>& keys
|
||||
, std::vector<IndexInfo>& index_info_vet );
|
||||
|
||||
int RangeQueryInvertIndexSearch(const std::vector<FieldInfo>& keys
|
||||
, std::vector<IndexInfo>& index_info_vet);
|
||||
|
||||
int TextInvertIndexSearch(const std::vector<FieldInfo>& keys
|
||||
, std::vector<IndexInfo>& index_info_vet);
|
||||
|
||||
int ProcessTerminal(const std::vector<std::vector<FieldInfo> >& and_keys, const TerminalQryCond& query_cond, std::vector<TerminalRes>& vecs);
|
||||
|
||||
private:
|
||||
void CalculateByWord(FieldInfo fieldInfo, const std::vector<IndexInfo> &doc_info);
|
||||
void SetDocIndexCache(const std::vector<IndexInfo> &doc_info, std::string& indexJsonStr);
|
||||
bool GetDocIndexCache(std::string word, uint32_t field, std::vector<IndexInfo> &doc_info);
|
||||
int GetDocIdSetByWord(FieldInfo fieldInfo, std::vector<IndexInfo> &doc_info);
|
||||
std::vector<IndexInfo> Union(std::vector<IndexInfo>& first_indexinfo_vet, std::vector<IndexInfo>& second_indexinfo_vet);
|
||||
|
||||
private:
|
||||
RequestContext* p_data_base_;
|
||||
};
|
||||
|
||||
#endif
|
@ -25,10 +25,12 @@
|
||||
#include <unistd.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include <bitset>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include "db_process_rocks.h"
|
||||
@ -443,13 +445,21 @@ inline int RocksdbProcess::str2Value(
|
||||
break;
|
||||
|
||||
case DField::String:
|
||||
Value.str.len = Str.length();
|
||||
Value.str.ptr = const_cast<char *>(Str.data()); // 不重新new,要等这个value使用完后释放内存(如果Str是动态分配的)
|
||||
{
|
||||
char* p = (char*)calloc(Str.length() , sizeof(char));
|
||||
memcpy((void*)p , (void*)Str.data() , Str.length());
|
||||
Value.str.ptr = p;
|
||||
Value.str.len = Str.length();
|
||||
}
|
||||
break;
|
||||
|
||||
case DField::Binary:
|
||||
Value.bin.len = Str.length();
|
||||
Value.bin.ptr = const_cast<char *>(Str.data());
|
||||
{
|
||||
char* p = (char*)calloc(Str.length() , sizeof(char));
|
||||
memcpy((void*)p , (void*)Str.data() , Str.length());
|
||||
Value.bin.ptr = p;
|
||||
Value.bin.len = Str.length();
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -600,6 +610,7 @@ int RocksdbProcess::condition_filter(
|
||||
|
||||
case DField::String:
|
||||
case DField::Binary:
|
||||
{
|
||||
matched = is_matched(rocksValue.c_str(), comparator, condValue.c_str(), (int)rocksValue.length(), (int)condValue.length(), false);
|
||||
if (!matched)
|
||||
{
|
||||
@ -607,6 +618,7 @@ int RocksdbProcess::condition_filter(
|
||||
condValue.c_str(), comparator);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -662,6 +674,23 @@ template bool RocksdbProcess::is_matched<int64_t>(const int64_t lv, int comp, co
|
||||
template bool RocksdbProcess::is_matched<uint64_t>(const uint64_t lv, int comp, const uint64_t rv);
|
||||
template bool RocksdbProcess::is_matched<double>(const double lv, int comp, const double rv);
|
||||
|
||||
int RocksdbProcess::memcmp_ignore_case(
|
||||
const void* lv,
|
||||
const void* rv,
|
||||
int count)
|
||||
{
|
||||
int iret = 0;
|
||||
for (int i = 0; i < count; i++){
|
||||
char lv_buffer = tolower(((char*)lv)[i]);
|
||||
char rv_buffer = tolower(((char*)rv)[i]);
|
||||
iret = memcmp(&lv_buffer , &rv_buffer , sizeof(char));
|
||||
if (iret != 0){
|
||||
return iret;
|
||||
}
|
||||
}
|
||||
return iret;
|
||||
}
|
||||
|
||||
//template<>
|
||||
bool RocksdbProcess::is_matched(
|
||||
const char *lv,
|
||||
@ -686,37 +715,38 @@ bool RocksdbProcess::is_matched(
|
||||
{
|
||||
case 0:
|
||||
if (caseSensitive)
|
||||
return lLen == rLen && !strncmp(lv, rv, minLen);
|
||||
return lLen == rLen && !strncasecmp(lv, rv, minLen);
|
||||
return lLen == rLen && !memcmp(lv, rv, minLen);
|
||||
return lLen == rLen && !memcmp_ignore_case(lv, rv, minLen);
|
||||
case 1:
|
||||
if (lLen != rLen)
|
||||
return true;
|
||||
if (caseSensitive)
|
||||
return strncmp(lv, rv, minLen);
|
||||
return strncasecmp(lv, rv, minLen);
|
||||
return memcmp(lv, rv, minLen);
|
||||
return memcmp_ignore_case(lv, rv, minLen);
|
||||
case 2:
|
||||
if (caseSensitive)
|
||||
ret = strncmp(lv, rv, minLen);
|
||||
ret = memcmp(lv, rv, minLen);
|
||||
else
|
||||
ret = strncasecmp(lv, rv, minLen);
|
||||
ret = memcmp_ignore_case(lv, rv, minLen);
|
||||
return ret < 0 || (ret == 0 && lLen < rLen);
|
||||
case 3:
|
||||
if (caseSensitive)
|
||||
ret = strncmp(lv, rv, minLen);
|
||||
ret = memcmp(lv, rv, minLen);
|
||||
else
|
||||
ret = strncasecmp(lv, rv, minLen);
|
||||
ret = memcmp_ignore_case(lv, rv, minLen);
|
||||
log_error("iret:%d , len:%d ,rLen:%d", ret , lLen , rLen);
|
||||
return ret < 0 || (ret == 0 && lLen <= rLen);
|
||||
case 4:
|
||||
if (caseSensitive)
|
||||
ret = strncmp(lv, rv, minLen);
|
||||
ret = memcmp(lv, rv, minLen);
|
||||
else
|
||||
ret = strncasecmp(lv, rv, minLen);
|
||||
ret = memcmp_ignore_case(lv, rv, minLen);
|
||||
return ret > 0 || (ret == 0 && lLen > rLen);
|
||||
case 5:
|
||||
if (caseSensitive)
|
||||
ret = strncmp(lv, rv, minLen);
|
||||
ret = memcmp(lv, rv, minLen);
|
||||
else
|
||||
ret = strncasecmp(lv, rv, minLen);
|
||||
ret = memcmp_ignore_case(lv, rv, minLen);
|
||||
return ret > 0 || (ret == 0 && lLen >= rLen);
|
||||
default:
|
||||
log_error("unsupport comparator:%d", comparator);
|
||||
@ -2204,6 +2234,24 @@ int RocksdbProcess::process_direct_query(
|
||||
|
||||
std::vector<QueryCond> primaryKeyConds;
|
||||
ret = analyse_primary_key_conds(reqCxt, primaryKeyConds);
|
||||
|
||||
#if 0
|
||||
std::vector<QueryCond>::iterator iter = primaryKeyConds.begin();
|
||||
for (; iter != primaryKeyConds.end(); ++iter){
|
||||
std::vector<int> fieldTypes;
|
||||
fieldTypes.push_back(DField::Signed);
|
||||
std::vector<std::string> fieldValues;
|
||||
|
||||
int ipos = iter->sCondValue.find_last_of("#");
|
||||
std::string stemp = iter->sCondValue.substr(ipos + 1);
|
||||
key_format::Decode(stemp , fieldTypes , fieldValues);
|
||||
log_error("field index:%d , condopr:%d , condvalue:%s" ,
|
||||
iter->sFieldIndex ,
|
||||
iter->sCondOpr ,
|
||||
fieldValues[0].c_str());
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ret != 0)
|
||||
{
|
||||
log_error("query condition incorrect in query context!");
|
||||
@ -2231,8 +2279,11 @@ int RocksdbProcess::process_direct_query(
|
||||
std::string value;
|
||||
RocksDBConn::RocksItr_t rocksItr;
|
||||
|
||||
bool forwardDirection = (primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eEQ || primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eGT || primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eGE);
|
||||
bool forwardDirection = (primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eEQ ||
|
||||
primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eGT ||
|
||||
primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eGE);
|
||||
bool backwardEqual = primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eLE;
|
||||
log_debug("forwardDirection:%d , backwardEqual:%d", (int)forwardDirection , (int)backwardEqual);
|
||||
if (backwardEqual)
|
||||
{
|
||||
// if the query condtion is < || <=, use seek_for_prev to seek in the total_order_seek mode
|
||||
@ -2316,6 +2367,39 @@ int RocksdbProcess::process_direct_query(
|
||||
while (true)
|
||||
{
|
||||
ret = range_key_matched(fullKey, primaryKeyConds);
|
||||
|
||||
#if 0
|
||||
std::vector<std::string> rocksValues;
|
||||
std::vector<int> fieldTypes;
|
||||
fieldTypes.push_back(DField::String);
|
||||
fieldTypes.push_back(DField::String);
|
||||
fieldTypes.push_back(DField::Signed);
|
||||
fieldTypes.push_back(DField::Signed);
|
||||
fieldTypes.push_back(DField::Signed);
|
||||
|
||||
key_format::Decode(fullKey, fieldTypes, rocksValues);
|
||||
|
||||
for (int i = 0; i < rocksValues[0].length(); i++){
|
||||
log_error("No:%d, is %d \n" , i , (int)rocksValues[0][i]);
|
||||
}
|
||||
|
||||
int ipos = rocksValues[0].find_last_of("#");
|
||||
std::string stemp = rocksValues[0].substr(ipos + 1);
|
||||
std::vector<std::string> rocksValues001;
|
||||
|
||||
std::vector<int> fieldTypes001;
|
||||
fieldTypes001.push_back(DField::Signed);
|
||||
|
||||
key_format::Decode(stemp , fieldTypes001 , rocksValues001);
|
||||
log_error("primary value:%s", rocksValues001[0].c_str());
|
||||
|
||||
for (size_t i = 0; i < rocksValues.size(); i++)
|
||||
{
|
||||
|
||||
log_error("value:%s", rocksValues[i].c_str() );
|
||||
}
|
||||
#endif
|
||||
|
||||
if (ret == -1)
|
||||
{
|
||||
// prefix key not matched, reach to the end
|
||||
|
@ -269,6 +269,8 @@ class RocksdbProcess : public HelperProcessBase
|
||||
int64_t timeElapse);
|
||||
|
||||
void print_stat_info();
|
||||
|
||||
int memcmp_ignore_case(const void* lv, const void* rv, int count);
|
||||
};
|
||||
|
||||
#endif // __DB_PROCESS_ROCKS_H__
|
||||
|
@ -66,6 +66,7 @@ enum FieldType{
|
||||
};
|
||||
|
||||
enum SEGMENTTAG {
|
||||
SEGMENT_NONE = 0,
|
||||
SEGMENT_DEFAULT = 1,
|
||||
SEGMENT_NGRAM = 2,
|
||||
SEGMENT_CHINESE = 3,
|
||||
|
Loading…
Reference in New Issue
Block a user