Compare commits

...

34 Commits

Author SHA1 Message Date
chenyujie28
3736d4972b Add:timer_for_performance_test 2021-08-20 10:41:46 +08:00
chenyujie28
b1494d633a Mod:fix_range_search_count_incorrect 2021-08-18 18:01:17 +08:00
chenyujie28
b93e25166f Mod:deep_split_table_lastentry_bug 2021-08-06 11:06:04 +08:00
chenyujie28
6c9b1e64a7 Mod:fix_search_"10"_docsize_bug 2021-08-05 11:28:20 +08:00
chenyujie28
3847045aa5 Mod:add_SORT_GEO_DISTANCE_check 2021-08-04 11:33:22 +08:00
chenyujie28
cce43b936f Mod:field_in_appdefine_check_&&_empty_check 2021-08-04 11:31:36 +08:00
chenyujie28
2248c8747c Mod:fix_union_size_unfit 2021-08-03 15:52:06 +08:00
chenyujie28
9a9c901cc4 Mod:fix_docid_error_bug 2021-08-03 15:50:47 +08:00
chenyujie28
26aaa94920 Mod:add_sort_operator_and_CMakefile 2021-07-23 11:56:05 +08:00
chenyujie28
2825798e5c Mod:field_parse_support_more_type 2021-07-21 17:05:53 +08:00
chenyujie28
2bae879f75 Mod:fix_geo_score_no_set_bug 2021-07-21 15:58:22 +08:00
chenyujie28
ba68bc2117 Mod:fix_should_geo_query_with_or_query_bugs 2021-07-21 15:40:20 +08:00
chenyujie28
5c04c247ab Mod:fix_union_intersect_differ_vector_size_bug 2021-07-21 15:38:46 +08:00
chenyujie28
199e540a58 Mod:fix_max_heap_empty_check_bug 2021-07-20 19:46:44 +08:00
chenyujie28
6fa36f3a4c Mod:fix_geo_assignsortfield_and_require_fields_bug 2021-07-20 10:42:49 +08:00
chenyujie28
88e5846294 Mod:fix_double2str_enable_e_bugs 2021-07-19 14:58:41 +08:00
chenyujie28
008427a214 Mod:fix_range_query_compara_func_bugs 2021-07-16 15:41:59 +08:00
chenyujie28
101d872c09 Mod:fix_range_query_bugs 2021-07-13 14:51:58 +08:00
chenyujie28
bb2358103e Mod:fix_union_query_bugs 2021-07-09 18:11:12 +08:00
chenyujie28
97408057e8 Mod:fix_bool_query_bugs 2021-07-08 20:37:56 +08:00
chenyujie28
27b4d80a36 Mod:fix_geo_related_query_bugs 2021-07-07 14:40:55 +08:00
chenyujie28
f93694baca Mod:fix_read_getkey_bug 2021-07-06 18:45:28 +08:00
chenyujie28
6be870dfb6 Mod:fix_bug_parse_request_json_function 2021-07-05 18:36:57 +08:00
chenyujie28
77afde66a1 Mod:repair_complie_errors_and_warnings 2021-06-23 16:47:51 +08:00
chenyujie28
78562a2ae6 Mod:CMake_support_C++11 2021-06-23 11:11:35 +08:00
chenyujie28
5d35cff86a Mod:split_getvaliddoc_logic 2021-06-23 10:57:53 +08:00
chenyujie28
2942e539a7 Mod-Add:add_result_database_and_rechange_boollogic 2021-06-20 00:01:17 +08:00
chenyujie28
787aee4566 Add:memcomparableformat_for_read 2021-06-11 17:33:19 +08:00
chenyujie28
4ccbea999a Mod:remove_skiplist_fix_query_bugs 2021-06-10 19:30:38 +08:00
chenyujie28
149dc03bfc Mod:searchtask_new_instance_error 2021-06-04 20:02:57 +08:00
chenyujie28
1ecc06d0f8 Add:add_unified_index_query_function 2021-06-03 17:30:33 +08:00
chenyujie28
801f486365 Add:extra_field_filter_in_term_query 2021-06-02 17:56:22 +08:00
chenyujie28
a50af991c1 Mod:fix_complie_errors_no_test 2021-06-02 15:00:09 +08:00
chenyujie28
3b2b3784ef Add:query_process_reconstruction_for_test 2021-06-01 14:39:38 +08:00
66 changed files with 5733 additions and 5858 deletions

View File

@ -160,14 +160,14 @@ vector<string> GetArroundGeoHash(GeoPoint& circle_center, double distance, int p
return list;
}
vector<string> GetArroundGeoHash(double lng_max, double lng_min, double lat_max, double lat_min, int precision)
vector<string> GetArroundGeoHash(const EnclosingRectangle& oEnclosingRectangle, int precision)
{
vector<string> list;
GeoPoint top_left, bottom_right;
top_left.lat = lat_max;
top_left.lon = lng_min;
bottom_right.lat = lat_min;
bottom_right.lon = lng_max;
top_left.lat = oEnclosingRectangle.dlatMax;
top_left.lon = oEnclosingRectangle.dlngMin;
bottom_right.lat = oEnclosingRectangle.dlatMin;
bottom_right.lon = oEnclosingRectangle.dlngMax;
double min_lat, min_lon;
std::set <std::string> result;
getMinLatLng(precision, min_lat, min_lon);
@ -181,7 +181,7 @@ vector<string> GetArroundGeoHash(double lng_max, double lng_min, double lat_max,
list.push_back(geohash);
}
}
top_left.lon = lng_min;
top_left.lon = oEnclosingRectangle.dlngMin;
}
return list;
}

View File

@ -25,18 +25,47 @@
#include <stdlib.h>
using namespace std;
const double DOUBLE_EPS = 1e-10;
struct GeoPoint {
double lon;
double lat;
double lon;
double lat;
};
struct EnclosingRectangle{
double dlngMax;
double dlngMin;
double dlatMax;
double dlatMin;
EnclosingRectangle()
: dlngMax(0.0)
, dlngMin(0.0)
, dlatMax(0.0)
, dlatMin(0.0)
{ }
EnclosingRectangle(double _dlngMax, double _dlngMin
, double _dlatMax , double _dlatMin)
: dlngMax(_dlngMax)
, dlngMin(_dlngMin)
, dlatMax(_dlatMax)
, dlatMin(_dlatMin)
{ }
bool IsVaild(){
return (!(fabs(dlngMax - dlngMin) < DOUBLE_EPS))
&& (!(fabs(dlatMax - dlatMin) < DOUBLE_EPS));
}
};
string encode(double lat, double lng, int precision);
vector<string> getArroundGeoHash(double lat, double lon, int precision);
GeoPoint GetTerminalGeo(GeoPoint& beg , // 初始的geo坐标
double distance,// 距离
double angle //角度
double distance,// 距离
double angle //角度
);
vector<string> GetArroundGeoHash(GeoPoint& circle_center, double distance, int precision);
vector<string> GetArroundGeoHash(double lng_max, double lng_min, double lat_max, double lat_min, int precision);
vector<string> GetArroundGeoHash(const EnclosingRectangle& oEnclosingRectangle, int precision);
#endif

21
src/comm/noncopyable.h Normal file
View File

@ -0,0 +1,21 @@
/* $Id: noncopyable.h 602 2009-01-08 02:27:44Z jackda $ */
#ifndef __TTC_NONCOPY_H__
#define __TTC_NONCOPY_H__
#include "namespace.h"
TTC_BEGIN_NAMESPACE
class noncopyable
{
protected:
noncopyable(void){}
~noncopyable(void){}
private:
noncopyable(const noncopyable&);
const noncopyable& operator= (const noncopyable&);
};
TTC_END_NAMESPACE
#endif

View File

@ -3,13 +3,14 @@ cmake_minimum_required(VERSION 2.6)
PROJECT(index_read)
EXECUTE_PROCESS(COMMAND git log -1 --pretty=format:%h . OUTPUT_VARIABLE version)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -lrt -Wall -O2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x -g -lrt -Wall")
AUX_SOURCE_DIRECTORY(. main)
AUX_SOURCE_DIRECTORY(./index_sync index_sync)
AUX_SOURCE_DIRECTORY(./utils utils)
AUX_SOURCE_DIRECTORY(./query query)
AUX_SOURCE_DIRECTORY(./process process)
AUX_SOURCE_DIRECTORY(./sort_operator sort_operator)
LINK_DIRECTORIES(
${PROJECT_SOURCE_DIR}/../../comm
@ -17,7 +18,7 @@ ${PROJECT_SOURCE_DIR}/../../3rdlib/jsoncpp/lib
${PROJECT_SOURCE_DIR}/../../comm/stat
)
ADD_EXECUTABLE(index_read ${main} ${index_sync} ${utils} ${query} ${process})
ADD_EXECUTABLE(index_read ${main} ${index_sync} ${utils} ${query} ${process} ${sort_operator})
target_include_directories(index_read PUBLIC
../../3rdlib/jsoncpp/include

View File

@ -21,17 +21,27 @@
#include <string>
#include <stdint.h>
#include <vector>
#include <tr1/unordered_map>
#include <set>
#include <unordered_map>
#include <limits.h>
using namespace std;
#include <map>
#include <cmath>
#define DOC_CNT 10000
#define MAX_DOCID_LENGTH 32
const double D_BM25_K = 1.65;
const double D_BM25_K1 = 1.2;
const double D_BM25_K2 = 200;
const uint32_t MAX_SEARCH_LEN = 60;
const uint32_t SINGLE_WORD_LEN = 18;
const uint32_t MAX_VALUE_LEN = 51200;
typedef std::tr1::unordered_map<string, double> hash_double_map;
typedef std::tr1::unordered_map<string, string> hash_string_map;
const double DOUBLE_EPS_3 = 1e-3;
typedef std::unordered_map<std::string, double> hash_double_map;
typedef std::unordered_map<std::string, std::string> hash_string_map;
enum RetCode{
RT_PARSE_JSON_ERR = 10001,
@ -76,31 +86,32 @@ enum KEYLOCATE{
};
enum SORTTYPE {
SORT_RELEVANCE = 1, // 按相关性排序
SORT_TIMESTAMP = 2, // 按时间排序
SORT_RELEVANCE = 1, // 默认,按相关性排序
DONT_SORT = 3, //不排序
SORT_FIELD_ASC = 4, // 按字段升序
SORT_FIELD_DESC = 5, // 按字段降序
SORT_GEO_DISTANCE = 6 // 按距离升序
};
enum FieldType{
FIELD_INT = 1,
FIELD_STRING,
FIELD_TEXT,
FIELD_IP,
FIELD_LNG,
FIELD_LAT,
FIELD_GIS,
FIELD_DISTANCE,
FIELD_DOUBLE,
FIELD_LONG,
FIELD_INDEX = 11,
FIELD_LNG_ARRAY,
FIELD_LAT_ARRAY,
FIELD_WKT,
FIELD_INT = 1,
FIELD_STRING = 2,
FIELD_TEXT = 3,
FIELD_IP = 4,
FIELD_GEO_POINT = 5,
FIELD_LAT = 6,
FIELD_GIS = 7,
FIELD_DISTANCE = 8,
FIELD_DOUBLE = 9,
FIELD_LONG = 10,
FIELD_INDEX = 11,
FIELD_LNG_ARRAY = 12,
FIELD_LAT_ARRAY = 13,
FIELD_GEO_SHAPE = 14
};
enum SEGMENTTAG {
SEGMENT_NONE = 0,
SEGMENT_DEFAULT = 1,
SEGMENT_NGRAM = 2,
SEGMENT_CHINESE = 3,
@ -151,36 +162,37 @@ enum RANGTYPE {
struct Content {
uint32_t type;
string str;
std::string str;
};
struct Info {
string title;
string content;
string classify;
string keywords;
string url;
std::string title;
std::string content;
std::string classify;
std::string keywords;
std::string url;
};
struct KeyInfo {
string word;
std::string word;
uint32_t field;
uint32_t word_freq;
uint32_t created_time;
vector<int> pos_vec;
std::vector<int> pos_vec;
};
struct FieldInfo
{
string word;
std::string word;
uint32_t field;
uint32_t field_type;
uint32_t segment_tag;
uint32_t segment_feature;
uint32_t start;
uint32_t end;
double start;
double end;
uint32_t index_tag;
RANGTYPE range_type;
uint32_t query_type;
FieldInfo() {
field = 1;
field_type = 0;
@ -190,6 +202,7 @@ struct FieldInfo
end = 0;
range_type = RANGE_INIT;
index_tag = 0;
query_type = -1;
}
};
@ -200,14 +213,14 @@ struct AppFieldInfo {
uint16_t segment_tag;
uint16_t field_value;
uint16_t segment_feature;
string index_info;
std::string index_info;
};
struct ScoreInfo
{
double score;
FIELDTYPE type;
string str;
std::string str;
int i;
double d;
ScoreInfo(){
@ -225,8 +238,8 @@ struct CacheQueryInfo
uint32_t sort_type;
uint32_t page_index;
uint32_t page_size;
string last_score;
string last_id;
std::string last_score;
std::string last_id;
CacheQueryInfo(){
appid = 0;
sort_field = 0;
@ -241,17 +254,36 @@ enum KeyType
ORKEY,
ANDKEY,
INVERTKEY,
KEYTOTALNUM
};
struct ScoreDocIdNode{
double d_score;
std::string s_docid;
ScoreDocIdNode(double score , const std::string& docid)
: d_score(score)
, s_docid(docid)
{ }
bool operator<(const ScoreDocIdNode& score_docid_node) const {
if (fabs(d_score - score_docid_node.d_score) < DOUBLE_EPS_3){
return s_docid.compare(score_docid_node.s_docid) < 0;
}
return (d_score + DOUBLE_EPS_3) < score_docid_node.d_score;
}
};
struct IndexInfo {
uint32_t appid;
string doc_id;
std::string doc_id;
uint32_t doc_version;
uint32_t field;
uint32_t word_freq;
uint32_t created_time;
string pos;
string extend;
std::string pos;
std::string extend;
double distance;
IndexInfo(){
appid = 0;
@ -259,6 +291,7 @@ struct IndexInfo {
field = 0;
word_freq = 0;
created_time = 0;
distance = 0.0;
}
bool operator<(const IndexInfo& src) const {
@ -271,29 +304,29 @@ struct IndexInfo {
struct ExtraFilterKey
{
string field_name;
string field_value;
std::string field_name;
std::string field_value;
uint16_t field_type;
};
struct TerminalQryCond{
uint32_t sort_type;
string sort_field;
string last_id;
string last_score;
std::string sort_field;
std::string last_id;
std::string last_score;
uint32_t limit_start;
uint32_t page_size;
};
struct OrderOpCond{
string last_id;
std::string last_id;
uint32_t limit_start;
uint32_t count;
bool has_extra_filter;
};
struct TerminalRes{
string doc_id;
std::string doc_id;
double score;
};
@ -306,4 +339,13 @@ enum QUERYRTPE{
QUERY_TYPE_GEO_SHAPE,
};
typedef std::set<std::string> ValidDocSet;
typedef std::set<std::string> HighLightWordSet;
typedef std::vector<KeyInfo> KeyInfoVet;
typedef std::map<std::string, KeyInfoVet> DocKeyinfosMap;
typedef std::map<std::string, uint32_t> KeywordDoccountMap;
#endif

View File

@ -1,791 +0,0 @@
/*
* =====================================================================================
*
* Filename: component.h
*
* Description: component class definition.
*
* Version: 1.0
* Created: 09/08/2019
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#include "component.h"
#include "split_manager.h"
#include "db_manager.h"
#include "utf8_str.h"
#include "query/bool_query_parser.h"
#include "query/geo_distance_parser.h"
#include "query/range_query_parser.h"
#include "query/match_query_parser.h"
#include "query/term_query_parser.h"
#include "query/geo_shape_parser.h"
#include <sstream>
using namespace std;
Component::Component(){
SGlobalConfig &global_cfg = SearchConf::Instance()->GetGlobalConfig();
m_default_query = global_cfg.sDefaultQuery;
m_jdq_switch = global_cfg.iJdqSwitch;
m_page_index = 0;
m_page_size = 0;
m_return_all = 0;
m_cache_switch = 0;
m_top_switch = 0;
m_snapshot_switch = 0;
m_sort_type = SORT_RELEVANCE;
m_appid = 10001;
m_last_id = "";
m_last_score = "";
m_search_after = false;
distance = 0;
m_terminal_tag = 0;
m_terminal_tag_valid = false;
m_query_type = 0;
}
Component::~Component(){
if(NULL != query_parser){
delete query_parser;
}
if(NULL != query_parser_res){
delete query_parser_res;
}
}
int Component::ParseJson(const char *sz_json, int json_len, Json::Value &recv_packet)
{
Json::Reader r(Json::Features::strictMode());
int ret;
ret = r.parse(sz_json, sz_json + json_len, recv_packet);
if (0 == ret)
{
log_error("the err json string is : %s", sz_json);
log_error("parse json error , errmsg : %s", r.getFormattedErrorMessages().c_str());
return -RT_PARSE_JSON_ERR;
}
if (recv_packet.isMember("appid") && recv_packet["appid"].isUInt())
{
m_appid = recv_packet["appid"].asUInt();
}
else {
m_appid = 10001;
}
if(recv_packet.isMember("query")){
m_query = recv_packet["query"];
}
if (recv_packet.isMember("key") && recv_packet["key"].isString())
{
m_Data = recv_packet["key"].asString();
}
else {
m_Data = "";
}
if (recv_packet.isMember("key_and") && recv_packet["key_and"].isString())
{
m_Data_and = recv_packet["key_and"].asString();
}
else {
m_Data_and = "";
}
if (recv_packet.isMember("key_invert") && recv_packet["key_invert"].isString())
{
m_Data_invert = recv_packet["key_invert"].asString();
}
else {
m_Data_invert = "";
}
if (recv_packet.isMember("key_complete") && recv_packet["key_complete"].isString())
{
m_Data_complete = recv_packet["key_complete"].asString();
}
else {
m_Data_complete = "";
}
if (recv_packet.isMember("page_index") && recv_packet["page_index"].isString())
{
m_page_index = atoi(recv_packet["page_index"].asString().c_str());
}
else {
m_page_index = 1 ;
}
if (recv_packet.isMember("page_size") && recv_packet["page_size"].isString())
{
m_page_size = atoi(recv_packet["page_size"].asString().c_str());
}
else {
m_page_size = 10;
}
if(recv_packet.isMember("sort_type") && recv_packet["sort_type"].isString())
{
m_sort_type = atoi(recv_packet["sort_type"].asString().c_str());
}
else {
m_sort_type = SORT_RELEVANCE;
}
if(recv_packet.isMember("sort_field") && recv_packet["sort_field"].isString())
{
m_sort_field = recv_packet["sort_field"].asString();
}
else {
m_sort_field = "";
}
if (recv_packet.isMember("return_all") && recv_packet["return_all"].isString())
{
m_return_all = atoi(recv_packet["return_all"].asString().c_str());
}
else {
m_return_all = 0;
}
if(recv_packet.isMember("fields") && recv_packet["fields"].isString())
{
string fields = recv_packet["fields"].asString();
m_fields = splitEx(fields, ",");
}
if (recv_packet.isMember("terminal_tag") && recv_packet["terminal_tag"].isString())
{
m_terminal_tag = atoi(recv_packet["terminal_tag"].asString().c_str());
}
else {
m_terminal_tag = 0;
}
if(m_terminal_tag == 1){
if(m_Data_and == "" || m_Data != "" || m_Data_invert != ""){
log_error("terminal_tag is true, only key_and is available.");
return -RT_PARSE_JSON_ERR;
}
}
if(recv_packet.isMember("last_id") && recv_packet["last_id"].isString())
{
m_last_id = recv_packet["last_id"].asString();
}
else {
m_last_id = "";
}
bool score_flag = true;
if (recv_packet.isMember("last_score") && recv_packet["last_score"].isString())
{
m_last_score = recv_packet["last_score"].asString();
}
else {
score_flag = false;
m_last_score = "0";
}
if(m_last_id != "" && score_flag == true){
m_search_after = true;
}
if(m_search_after == true && m_sort_type != SORT_FIELD_DESC && m_sort_type != SORT_FIELD_ASC){
log_error("in search_after mode, sort_type must be SORT_FIELD_DESC or SORT_FIELD_ASC.");
return -RT_PARSE_JSON_ERR;
}
if ("" == m_Data && "" == m_Data_and && "" == m_Data_complete) {
m_Data = m_default_query;
}
log_debug("parse success, m_Data: %s, m_Data_and: %s, m_Data_invert: %s, m_page_index: %u, m_return_all: %u",
m_Data.c_str(), m_Data_and.c_str(), m_Data_invert.c_str(), m_page_index, m_return_all);
return 0;
}
void Component::InitSwitch()
{
AppInfo app_info;
bool res = SearchConf::Instance()->GetAppInfo(m_appid, app_info);
if (true == res){
m_cache_switch = app_info.cache_switch;
m_top_switch = app_info.top_switch;
m_snapshot_switch = app_info.snapshot_switch;
}
}
int Component::GetQueryWord(uint32_t &m_has_gis, string &err_msg){
if(m_query.isObject()){
if(m_query.isMember("bool")){
m_query_type = QUERY_TYPE_BOOL;
query_parser = new BoolQueryParser(m_appid, m_query["bool"]);
} else if(m_query.isMember("geo_distance")){
m_query_type = QUERY_TYPE_GEO_DISTANCE;
query_parser = new GeoDistanceParser(m_appid, m_query["geo_distance"]);
} else if(m_query.isMember("range")){
m_query_type = QUERY_TYPE_RANGE;
query_parser = new RangeQueryParser(m_appid, m_query["range"]);
} else if(m_query.isMember("match")){
m_query_type = QUERY_TYPE_MATCH;
query_parser = new MatchQueryParser(m_appid, m_query["match"]);
} else if(m_query.isMember("term")){
m_query_type = QUERY_TYPE_TERM;
query_parser = new TermQueryParser(m_appid, m_query["term"]);
} else if(m_query.isMember("geo_shape")){
m_query_type = QUERY_TYPE_GEO_SHAPE;
query_parser = new GeoShapeParser(m_appid, m_query["geo_shape"]);
} else {
log_error("query type error!");
return -RT_QUERY_TYPE_ERROR;
}
query_parser_res = new QueryParserRes();
int ret = query_parser->ParseContent(query_parser_res);
if(ret != 0){
err_msg = query_parser_res->ErrMsg();
log_error("query_parser ParseContent error, ret: %d", ret);
return ret;
}
map<uint32_t, vector<FieldInfo> >::iterator field_key_map_iter = query_parser_res->FieldKeysMap().begin();
for(; field_key_map_iter != query_parser_res->FieldKeysMap().end(); field_key_map_iter++){
AddToFieldList(ANDKEY, field_key_map_iter->second);
}
map<uint32_t, vector<FieldInfo> >::iterator or_key_map_iter = query_parser_res->OrFieldKeysMap().begin();
for(; or_key_map_iter != query_parser_res->OrFieldKeysMap().end(); or_key_map_iter++){
AddToFieldList(ORKEY, or_key_map_iter->second);
}
map<uint32_t, vector<FieldInfo> >::iterator invert_key_map_iter = query_parser_res->InvertFieldKeysMap().begin();
for(; invert_key_map_iter != query_parser_res->InvertFieldKeysMap().end(); invert_key_map_iter++){
AddToFieldList(INVERTKEY, invert_key_map_iter->second);
}
m_has_gis = query_parser_res->HasGis();
if(m_has_gis){
latitude = query_parser_res->Latitude();
longitude = query_parser_res->Longitude();
distance = query_parser_res->Distance();
log_debug("lat: %s, lon: %s, distance: %f", latitude.c_str(), longitude.c_str(), distance);
}
extra_filter_keys.assign(query_parser_res->ExtraFilterKeys().begin(), query_parser_res->ExtraFilterKeys().end());
extra_filter_and_keys.assign(query_parser_res->ExtraFilterAndKeys().begin(), query_parser_res->ExtraFilterAndKeys().end());
extra_filter_invert_keys.assign(query_parser_res->ExtraFilterInvertKeys().begin(), query_parser_res->ExtraFilterInvertKeys().end());
} else {
GetFieldWords(ORKEY, m_Data, m_appid, m_has_gis);
GetFieldWords(ANDKEY, m_Data_and, m_appid, m_has_gis);
GetFieldWords(INVERTKEY, m_Data_invert, m_appid, m_has_gis);
}
return 0;
}
void Component::GetFieldWords(int type, string dataStr, uint32_t appid, uint32_t &m_has_gis){
if (dataStr == "")
return ;
string latitude_tmp = "";
string longitude_tmp = "";
string gisip_tmp = "";
string field_Data = "";
string primary_Data = "";
vector<FieldInfo> joinFieldInfos;
int i = dataStr.find(":");
if (i == -1) {
primary_Data = dataStr;
} else {
int j = dataStr.substr(0, i).rfind(" ");
if (j == -1) {
field_Data = dataStr;
primary_Data = "";
} else {
primary_Data = dataStr.substr(0, j);
field_Data = dataStr.substr(j+1);
}
}
if (type == 0) {
m_Query_Word = primary_Data;
}
if (primary_Data.length() > MAX_SEARCH_LEN) { // 超长进行截断
primary_Data = primary_Data.substr(0, MAX_SEARCH_LEN);
}
string probably_key = "";
bool is_correct = false;
if(primary_Data != "" && primary_Data.length() <= SINGLE_WORD_LEN) // 判断输入的词语是否正确,如果超过一定长度,则认为是多个词组成
{
JudgeWord(appid, primary_Data, is_correct, probably_key);
m_probably_data = probably_key;
}
vector<FieldInfo> primaryInfo;
FieldInfo pInfo;
string split_data;
if (is_correct == true) {
pInfo.field = INT_MAX;
pInfo.word = primary_Data;
primaryInfo.push_back(pInfo);
DataManager::Instance()->GetSynonymByKey(primary_Data, primaryInfo);
}
else if (probably_key != "") {
pInfo.word = probably_key;
primaryInfo.push_back(pInfo);
DataManager::Instance()->GetSynonymByKey(probably_key, primaryInfo);
}
else if (primary_Data != ""){
split_data = SplitManager::Instance()->split(primary_Data, appid);
log_debug("split_data: %s", split_data.c_str());
vector<string> split_datas = splitEx(split_data, "|");
for(size_t i = 0; i < split_datas.size(); i++) //是否有重复的同义词存在?
{
pInfo.field = INT_MAX;
pInfo.word = split_datas[i];
primaryInfo.push_back(pInfo);
DataManager::Instance()->GetSynonymByKey(split_datas[i], primaryInfo);
}
}
AddToFieldList(type, primaryInfo);
vector<string> gisCode;
vector<string> vec = splitEx(field_Data, " ");
vector<string>::iterator iter;
map<uint32_t, vector<FieldInfo> > field_keys_map;
uint32_t range_query = 0;
vector<string> lng_arr;
vector<string> lat_arr;
for (iter = vec.begin(); iter < vec.end(); iter++)
{
vector<FieldInfo> fieldInfos;
if ((*iter)[0] == '\0')
continue;
vector<string> tmp = splitEx(*iter, ":");
if (tmp.size() != 2)
continue;
uint32_t segment_tag = 0;
FieldInfo fieldInfo;
string fieldname = tmp[0];
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
if(field != 0 && fieldInfo.index_tag == 0){
ExtraFilterKey extra_filter_key;
extra_filter_key.field_name = fieldname;
extra_filter_key.field_value = tmp[1];
extra_filter_key.field_type = fieldInfo.field_type;
if(type == 0){
extra_filter_keys.push_back(extra_filter_key);
} else if (type == 1) {
extra_filter_and_keys.push_back(extra_filter_key);
} else if (type == 2) {
extra_filter_invert_keys.push_back(extra_filter_key);
}
continue;
}
if (field != 0 && segment_tag == 1)
{
string split_data = SplitManager::Instance()->split(tmp[1], appid);
log_debug("split_data: %s", split_data.c_str());
vector<string> split_datas = splitEx(split_data, "|");
for(size_t index = 0; index < split_datas.size(); index++)
{
FieldInfo info;
info.field = fieldInfo.field;
info.field_type = fieldInfo.field_type;
info.word = split_datas[index];
info.segment_tag = fieldInfo.segment_tag;
fieldInfos.push_back(info);
}
}
else if (field != 0 && segment_tag == 5) {
range_query++;
string str = tmp[1];
str.erase(0, str.find_first_not_of(" "));
str.erase(str.find_last_not_of(" ") + 1);
if (str.size() == 0) {
log_error("field[%s] content is null", fieldname.c_str());
continue;
}
if (str[0] == '[') { // 范围查询
int l = str.find(",");
if (l == -1 || (str[str.size() - 1] != ']' && str[str.size() - 1] != ')')) {
log_error("field[%s] content[%s] invalid", fieldname.c_str(), str.c_str());
continue;
}
istringstream iss(str.substr(1, l).c_str());
iss >> fieldInfo.start;
string end_str = str.substr(l + 1, str.size() - l - 2);
end_str.erase(0, end_str.find_first_not_of(" "));
istringstream end_iss(end_str);
end_iss >> fieldInfo.end;
if (str[str.size() - 1] == ']') {
fieldInfo.range_type = RANGE_GELE;
}
else {
if (end_str.size() == 0) {
fieldInfo.range_type = RANGE_GE;
}
else {
fieldInfo.range_type = RANGE_GELT;
}
}
fieldInfos.push_back(fieldInfo);
}
else if (str[0] == '(') {
int l = str.find(",");
if (l == -1 || (str[str.size() - 1] != ']' && str[str.size() - 1] != ')')) {
log_error("field[%s] content[%s] invalid", fieldname.c_str(), str.c_str());
continue;
}
string start_str = str.substr(1, l).c_str();
string end_str = str.substr(l + 1, str.size() - l - 2);
start_str.erase(0, start_str.find_first_not_of(" "));
end_str.erase(0, end_str.find_first_not_of(" "));
istringstream start_iss(start_str);
start_iss >> fieldInfo.start;
istringstream end_iss(end_str);
end_iss >> fieldInfo.end;
if (str[str.size() - 1] == ']') {
if (start_str.size() == 0) {
fieldInfo.range_type = RANGE_LE;
}
else {
fieldInfo.range_type = RANGE_GTLE;
}
}
else {
if (start_str.size() != 0 && end_str.size() != 0) {
fieldInfo.range_type = RANGE_GTLT;
}
else if (start_str.size() == 0 && end_str.size() != 0) {
fieldInfo.range_type = RANGE_LT;
}
else if (start_str.size() != 0 && end_str.size() == 0) {
fieldInfo.range_type = RANGE_GT;
}
else {
log_error("field[%s] content[%s] invalid", fieldname.c_str(), str.c_str());
continue;
}
}
fieldInfos.push_back(fieldInfo);
}
else {
fieldInfo.word = tmp[1];
fieldInfos.push_back(fieldInfo);
}
log_debug("range_type: %d, start: %u, end: %u, segment_tag: %d, word: %s", fieldInfo.range_type, fieldInfo.start, fieldInfo.end, fieldInfo.segment_tag, fieldInfo.word.c_str());
}
else if (field != 0)
{
fieldInfo.word = tmp[1];
fieldInfos.push_back(fieldInfo);
}
else if (field == 0)
{
if (fieldInfo.field_type == 5) {
longitude_tmp = tmp[1];
longitude = longitude_tmp;
} else if (fieldInfo.field_type == 6) {
latitude_tmp = tmp[1];
latitude = latitude_tmp;
} else if (fieldInfo.field_type == 8) {
distance = strToDouble(tmp[1]);
} else if (fieldInfo.field_type == 7) {
gisip_tmp = tmp[1];
gisip = gisip_tmp;
} else if (fieldInfo.field_type == FIELD_WKT) {
string str = tmp[1];
str = delPrefix(str);
vector<string> str_vec = splitEx(str, ",");
for(uint32_t str_vec_idx = 0; str_vec_idx < str_vec.size(); str_vec_idx++){
string wkt_str = trim(str_vec[str_vec_idx]);
vector<string> wkt_vec = splitEx(wkt_str, "-");
if(wkt_vec.size() == 2){
lng_arr.push_back(wkt_vec[0]);
lat_arr.push_back(wkt_vec[1]);
}
}
}
}
if (fieldInfos.size() != 0) {
field_keys_map.insert(make_pair(fieldInfo.field, fieldInfos));
}
}
double distance_tmp = 2; // 不指定distance时最多返回2km内的数据
if(distance > 1e-6 && distance_tmp > distance + 1e-6){ // distance大于0小于2时取distance的值
distance_tmp = distance;
}
GetGisCode(longitude_tmp, latitude_tmp, gisip_tmp, distance_tmp, gisCode);
log_debug("lng_arr size: %d, lat_arr size: %d", (int)lng_arr.size(), (int)lat_arr.size());
if (gisCode.size() == 0 && lng_arr.size() > 0){
GetGisCode(lng_arr, lat_arr, gisCode);
}
if(gisCode.size() > 0){
vector<FieldInfo> fieldInfos;
uint32_t segment_tag = 0;
FieldInfo fieldInfo;
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, "gis", fieldInfo);
if (field != 0 && segment_tag == 0) {
m_has_gis = 1;
for (size_t index = 0; index < gisCode.size(); index++) {
FieldInfo info;
info.field = fieldInfo.field;
info.field_type = fieldInfo.field_type;
info.segment_tag = fieldInfo.segment_tag;
info.word = gisCode[index];
fieldInfos.push_back(info);
}
}
if (fieldInfos.size() != 0) {
field_keys_map.insert(make_pair(fieldInfo.field, fieldInfos));
}
}
//如果key_and查询的field匹配到联合索引则将查询词拼接起来作为新的查询词
if(type == 1){
vector<string> union_key_vec;
DBManager::Instance()->GetUnionKeyField(appid, union_key_vec);
vector<string>::iterator union_key_iter = union_key_vec.begin();
for(; union_key_iter != union_key_vec.end(); union_key_iter++){
string union_key = *union_key_iter;
vector<int> union_field_vec = splitInt(union_key, ",");
vector<int>::iterator union_field_iter = union_field_vec.begin();
bool hit_union_key = true;
for(; union_field_iter != union_field_vec.end(); union_field_iter++){
if(field_keys_map.find(*union_field_iter) == field_keys_map.end()){
hit_union_key = false;
break;
}
}
if(hit_union_key == true){
vector<vector<string> > keys_vvec;
vector<FieldInfo> unionFieldInfos;
for(union_field_iter = union_field_vec.begin(); union_field_iter != union_field_vec.end(); union_field_iter++){
vector<FieldInfo> field_info_vec = field_keys_map.at(*union_field_iter);
vector<string> key_vec;
GetKeyFromFieldInfo(field_info_vec, key_vec);
keys_vvec.push_back(key_vec);
field_keys_map.erase(*union_field_iter); // 命中union_key的需要从field_keys_map中删除
}
vector<string> union_keys = Combination(keys_vvec);
for(int m = 0 ; m < (int)union_keys.size(); m++){
FieldInfo info;
info.field = 0;
info.field_type = FIELD_STRING;
info.segment_tag = 1;
info.word = union_keys[m];
unionFieldInfos.push_back(info);
}
AddToFieldList(type, unionFieldInfos);
log_debug("hit union key index.");
break;
}
}
map<uint32_t, vector<FieldInfo> >::iterator field_key_map_iter = field_keys_map.begin();
for(; field_key_map_iter != field_keys_map.end(); field_key_map_iter++){
AddToFieldList(type, field_key_map_iter->second);
}
} else {
map<uint32_t, vector<FieldInfo> >::iterator field_key_map_iter = field_keys_map.begin();
for(; field_key_map_iter != field_keys_map.end(); field_key_map_iter++){
AddToFieldList(type, field_key_map_iter->second);
}
}
if(type == 1){ // terminal_tag为1时key_and中必须只带有一个范围查询
if(m_terminal_tag == 1 && range_query == 1 && and_keys.size() == 1){
m_terminal_tag_valid = true;
}
}
return ;
}
void Component::AddToFieldList(int type, vector<FieldInfo>& fields)
{
if (fields.size() == 0)
return ;
if (type == 0) {
keys.push_back(fields);
} else if (type == 1) {
and_keys.push_back(fields);
} else if (type == 2) {
invert_keys.push_back(fields);
}
return ;
}
const vector<vector<FieldInfo> >& Component::Keys(){
return keys;
}
const vector<vector<FieldInfo> >& Component::AndKeys(){
return and_keys;
}
const vector<vector<FieldInfo> >& Component::InvertKeys(){
return invert_keys;
}
const vector<ExtraFilterKey>& Component::ExtraFilterKeys(){
return extra_filter_keys;
}
const vector<ExtraFilterKey>& Component::ExtraFilterAndKeys(){
return extra_filter_and_keys;
}
const vector<ExtraFilterKey>& Component::ExtraFilterInvertKeys(){
return extra_filter_invert_keys;
}
string Component::QueryWord(){
return m_Query_Word;
}
void Component::SetQueryWord(string query_word){
m_Query_Word = query_word;
}
string Component::ProbablyData(){
return m_probably_data;
}
void Component::SetProbablyData(string probably_data){
m_probably_data = probably_data;
}
string Component::Latitude(){
return latitude;
}
string Component::Longitude(){
return longitude;
}
double Component::Distance(){
return distance;
}
string Component::Data(){
return m_Data;
}
uint32_t Component::JdqSwitch(){
return m_jdq_switch;
}
uint32_t Component::Appid(){
return m_appid;
}
string Component::DataAnd(){
return m_Data_and;
}
string Component::DataInvert(){
return m_Data_invert;
}
string Component::DataComplete(){
return m_Data_complete;
}
uint32_t Component::SortType(){
return m_sort_type;
}
uint32_t Component::PageIndex(){
return m_page_index;
}
uint32_t Component::PageSize(){
return m_page_size;
}
uint32_t Component::ReturnAll(){
return m_return_all;
}
uint32_t Component::CacheSwitch(){
return m_cache_switch;
}
uint32_t Component::TopSwitch(){
return m_top_switch;
}
uint32_t Component::SnapshotSwitch(){
return m_snapshot_switch;
}
string Component::SortField(){
return m_sort_field;
}
string Component::LastId(){
return m_last_id;
}
string Component::LastScore(){
return m_last_score;
}
bool Component::SearchAfter(){
return m_search_after;
}
vector<string>& Component::Fields(){
return m_fields;
}
uint32_t Component::TerminalTag(){
return m_terminal_tag;
}
bool Component::TerminalTagValid(){
return m_terminal_tag_valid;
}
Json::Value& Component::GetQuery(){
return m_query;
}
void Component::GetKeyFromFieldInfo(const vector<FieldInfo>& field_info_vec, vector<string>& key_vec){
vector<FieldInfo>::const_iterator iter = field_info_vec.begin();
for(; iter != field_info_vec.end(); iter++){
key_vec.push_back((*iter).word);
}
}
/*
** vector每一维vector中取一个数的各种组合
** [[a],[b1,b2],[c1,c2,c3]]
** [a_b1_c1,a_b1_c2,a_b1_c3,a_b2_c1,a_b2_c2,a_b2_c3]
*/
vector<string> Component::Combination(vector<vector<string> > &dimensionalArr){
int FLength = dimensionalArr.size();
if(FLength >= 2){
int SLength1 = dimensionalArr[0].size();
int SLength2 = dimensionalArr[1].size();
int DLength = SLength1 * SLength2;
vector<string> temporary(DLength);
int index = 0;
for(int i = 0; i < SLength1; i++){
for (int j = 0; j < SLength2; j++) {
temporary[index] = dimensionalArr[0][i] +"_"+ dimensionalArr[1][j];
index++;
}
}
vector<vector<string> > new_arr;
new_arr.push_back(temporary);
for(int i = 2; i < (int)dimensionalArr.size(); i++){
new_arr.push_back(dimensionalArr[i]);
}
return Combination(new_arr);
} else {
return dimensionalArr[0];
}
}

View File

@ -1,118 +0,0 @@
/*
* =====================================================================================
*
* Filename: component.h
*
* Description: component class definition.
*
* Version: 1.0
* Created: 09/08/2019
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef __COMPONENT_H__
#define __COMPONENT_H__
#include "comm.h"
#include "json/json.h"
#include <string>
#include <vector>
#include "query/query_parser.h"
using namespace std;
class Component
{
public:
Component();
~Component();
int GetQueryWord(uint32_t &m_has_gis, string &err_msg);
const vector<vector<FieldInfo> >& Keys();
const vector<vector<FieldInfo> >& AndKeys();
const vector<vector<FieldInfo> >& InvertKeys();
const vector<ExtraFilterKey>& ExtraFilterKeys();
const vector<ExtraFilterKey>& ExtraFilterAndKeys();
const vector<ExtraFilterKey>& ExtraFilterInvertKeys();
int ParseJson(const char *sz_json, int json_len, Json::Value &recv_packet);
void InitSwitch();
string QueryWord();
void SetQueryWord(string query_word);
string ProbablyData();
void SetProbablyData(string probably_data);
string Latitude();
string Longitude();
double Distance();
string Data();
string DataAnd();
string DataInvert();
string DataComplete();
uint32_t JdqSwitch();
uint32_t Appid();
uint32_t SortType();
uint32_t PageIndex();
uint32_t PageSize();
uint32_t ReturnAll();
uint32_t CacheSwitch();
uint32_t TopSwitch();
uint32_t SnapshotSwitch();
string SortField();
string LastId();
string LastScore();
bool SearchAfter();
vector<string>& Fields();
uint32_t TerminalTag();
bool TerminalTagValid();
Json::Value& GetQuery();
void GetFieldWords(int type, string dataStr, uint32_t appid, uint32_t &m_has_gis);
void AddToFieldList(int type, vector<FieldInfo>& fields);
void GetKeyFromFieldInfo(const vector<FieldInfo>& field_info_vec, vector<string>& key_vec);
vector<string> Combination(vector<vector<string> > &dimensionalArr);
private:
vector<vector<FieldInfo> > keys;
vector<vector<FieldInfo> > and_keys;
vector<vector<FieldInfo> > invert_keys;
vector<ExtraFilterKey> extra_filter_keys;
vector<ExtraFilterKey> extra_filter_and_keys;
vector<ExtraFilterKey> extra_filter_invert_keys;
string m_Query_Word;
string m_probably_data;
string latitude;
string longitude;
string gisip;
double distance;
string m_Data; //查询词
string m_Data_and; // 包含该查询词
string m_Data_invert; // 不包含该查询词
string m_Data_complete; // 完整关键词
uint32_t m_page_index;
uint32_t m_page_size;
uint32_t m_return_all;
uint32_t m_cache_switch;
uint32_t m_top_switch;
uint32_t m_snapshot_switch;
uint32_t m_sort_type;
uint32_t m_appid;
uint32_t m_query_type;
string m_sort_field;
string m_last_id;
string m_last_score;
bool m_search_after;
vector<string> m_fields;
string m_default_query;
uint32_t m_jdq_switch;
uint32_t m_terminal_tag;
bool m_terminal_tag_valid;
Json::Value m_query;
QueryParser* query_parser;
QueryParserRes* query_parser_res;
};
#endif

View File

@ -23,8 +23,8 @@
[
{
"ip": "127.0.0.1",
"bid": 2153,
"port": 20000,
"bid": 2113,
"port": 30311,
"weight": 1,
"status": 1
}

View File

@ -16,288 +16,329 @@
* =====================================================================================
*/
#include "component.h"
#include "request_context.h"
#include "doc_manager.h"
#include "log.h"
#include "search_util.h"
#include "db_manager.h"
#include "process/geo_distance_query_process.h"
#include <math.h>
#include <sstream>
DocManager::DocManager(Component *c): component(c){
}
extern CIndexTableManager g_IndexInstance;
DocManager::DocManager(RequestContext *c)
: score_str_map()
, score_int_map()
, score_double_map()
, valid_version_()
, doc_content_map_()
, component(c)
{ }
DocManager::~DocManager(){
}
bool DocManager::CheckDocByExtraFilterKey(string doc_id){
vector<ExtraFilterKey> extra_filter_vec = component->ExtraFilterKeys();
vector<ExtraFilterKey> extra_filter_and_vec = component->ExtraFilterAndKeys();
vector<ExtraFilterKey> extra_filter_invert_vec = component->ExtraFilterInvertKeys();
if(extra_filter_vec.size() == 0 && extra_filter_and_vec.size() == 0 && extra_filter_invert_vec.size() == 0){
return true;
} else {
vector<string> fields;
for(int i = 0; i < (int)extra_filter_vec.size(); i++){
fields.push_back(extra_filter_vec[i].field_name);
}
for(int i = 0; i < (int)extra_filter_and_vec.size(); i++){
fields.push_back(extra_filter_and_vec[i].field_name);
}
for(int i = 0; i < (int)extra_filter_invert_vec.size(); i++){
fields.push_back(extra_filter_invert_vec[i].field_name);
}
Json::Value value;
uint32_t doc_version = 0;
if(valid_version.find(doc_id) != valid_version.end()){
doc_version = valid_version[doc_id];
}
if(doc_content_map.find(doc_id) != doc_content_map.end()){
string extend = doc_content_map[doc_id];
Json::Reader r(Json::Features::strictMode());
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), value);
if (0 == ret2){
log_error("the err json is %s, errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
return false;
}
} else {
bool bRet = g_IndexInstance.GetContentByField(component->Appid(), doc_id, doc_version, fields, value);
if(bRet == false){
log_error("get field content error, appid[%d] doc_id[%s].", component->Appid(), doc_id.c_str());
return true;
}
}
bool DocManager::CheckDocByExtraFilterKey(std::string doc_id){
std::vector<ExtraFilterKey> extra_filter_vec = component->ExtraFilterOrKeys();
std::vector<ExtraFilterKey> extra_filter_and_vec = component->ExtraFilterAndKeys();
std::vector<ExtraFilterKey> extra_filter_invert_vec = component->ExtraFilterInvertKeys();
if(extra_filter_vec.size() == 0 && extra_filter_and_vec.size() == 0 && extra_filter_invert_vec.size() == 0){
return true;
} else {
std::vector<std::string> fields;
for(int i = 0; i < (int)extra_filter_vec.size(); i++){
fields.push_back(extra_filter_vec[i].field_name);
}
for(int i = 0; i < (int)extra_filter_and_vec.size(); i++){
fields.push_back(extra_filter_and_vec[i].field_name);
}
for(int i = 0; i < (int)extra_filter_invert_vec.size(); i++){
fields.push_back(extra_filter_invert_vec[i].field_name);
}
Json::Value value;
uint32_t doc_version = 0;
if(valid_version_.find(doc_id) != valid_version_.end()){
doc_version = valid_version_[doc_id];
}
if(doc_content_map_.find(doc_id) != doc_content_map_.end()){
std::string extend = doc_content_map_[doc_id];
Json::Reader r(Json::Features::strictMode());
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), value);
if (0 == ret2){
log_error("the err json is %s, errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
return false;
}
} else {
bool bRet = g_IndexInstance.GetContentByField(component->Appid(), doc_id, doc_version, fields, value);
if(bRet == false){
log_error("get field content error, appid[%d] doc_id[%s].", component->Appid(), doc_id.c_str());
return true;
}
}
bool key_or_valid = false;
CheckIfKeyValid(extra_filter_vec, value, true, key_or_valid);
if(extra_filter_vec.size() > 0 && key_or_valid == false){
return false;
}
bool key_or_valid = false;
CheckIfKeyValid(extra_filter_vec, value, true, key_or_valid);
if(extra_filter_vec.size() > 0 && key_or_valid == false){
return false;
}
bool key_and_valid = true;
CheckIfKeyValid(extra_filter_and_vec, value, false, key_and_valid);
if(key_and_valid == false){
return false;
}
bool key_and_valid = true;
CheckIfKeyValid(extra_filter_and_vec, value, false, key_and_valid);
if(key_and_valid == false){
return false;
}
bool key_invert_valid = false;
CheckIfKeyValid(extra_filter_invert_vec, value, true, key_invert_valid);
if(key_invert_valid == true){
return false;
}
bool key_invert_valid = false;
CheckIfKeyValid(extra_filter_invert_vec, value, true, key_invert_valid);
if(key_invert_valid == true){
return false;
}
return true;
}
return true;
}
}
void DocManager::CheckIfKeyValid(const vector<ExtraFilterKey>& extra_filter_vec, const Json::Value &value, bool flag, bool &key_valid){
for(int i = 0; i < (int)extra_filter_vec.size(); i++){
bool the_same = false;
string field_name = extra_filter_vec[i].field_name;
if(extra_filter_vec[i].field_type == FIELD_INT){
string query = extra_filter_vec[i].field_value;
vector<string> query_vec = splitEx(query, "|");
if(query_vec.size() > 1){
for(int i = 0 ; i < (int)query_vec.size(); i++){
if(atoi(query_vec[i].c_str()) == value[field_name.c_str()].asInt()){
the_same = true;
break;
}
}
} else {
the_same = (atoi(extra_filter_vec[i].field_value.c_str()) == value[field_name.c_str()].asInt());
}
} else if(extra_filter_vec[i].field_type == FIELD_DOUBLE){
double d_field_value = atof(extra_filter_vec[i].field_value.c_str());
double d_extend = value[field_name.c_str()].asDouble();
the_same = (fabs(d_field_value - d_extend) < 1e-15);
} else if(extra_filter_vec[i].field_type == FIELD_STRING){
string snapshot = value[field_name.c_str()].asString();
string query = extra_filter_vec[i].field_value;
set<string> snapshot_set = splitStr(snapshot, "|");
vector<string> query_vec = splitEx(query, "|");
for(int i = 0 ; i < (int)query_vec.size(); i++){
if(snapshot_set.find(query_vec[i]) != snapshot_set.end()){
the_same = true;
break;
}
}
}
if(the_same == flag){
key_valid = flag;
break;
}
}
void DocManager::CheckIfKeyValid(const std::vector<ExtraFilterKey>& extra_filter_vec, const Json::Value &value, bool flag, bool &key_valid){
for(int i = 0; i < (int)extra_filter_vec.size(); i++){
bool the_same = false;
std::string field_name = extra_filter_vec[i].field_name;
if(extra_filter_vec[i].field_type == FIELD_INT){
std::string query = extra_filter_vec[i].field_value;
std::vector<std::string> query_vec = splitEx(query, "|");
if(query_vec.size() > 1){
for(int i = 0 ; i < (int)query_vec.size(); i++){
if(atoi(query_vec[i].c_str()) == value[field_name.c_str()].asInt()){
the_same = true;
break;
}
}
} else {
the_same = (atoi(extra_filter_vec[i].field_value.c_str()) == value[field_name.c_str()].asInt());
}
} else if(extra_filter_vec[i].field_type == FIELD_DOUBLE){
double d_field_value = atof(extra_filter_vec[i].field_value.c_str());
double d_extend = value[field_name.c_str()].asDouble();
the_same = (fabs(d_field_value - d_extend) < 1e-15);
} else if(extra_filter_vec[i].field_type == FIELD_STRING){
std::string snapshot = value[field_name.c_str()].asString();
std::string query = extra_filter_vec[i].field_value;
std::set<std::string> snapshot_set = splitStr(snapshot, "|");
std::vector<std::string> query_vec = splitEx(query, "|");
for(int i = 0 ; i < (int)query_vec.size(); i++){
if(snapshot_set.find(query_vec[i]) != snapshot_set.end()){
the_same = true;
break;
}
}
}
if(the_same == flag){
key_valid = flag;
break;
}
}
}
bool DocManager::GetDocContent(uint32_t m_has_gis, vector<IndexInfo> &doc_id_ver_vec, set<string> &valid_docs, hash_double_map &distances){
if (!m_has_gis && component->SnapshotSwitch() == 1 && doc_id_ver_vec.size() <= 1000) {
bool need_version = false;
if(component->Fields().size() > 0){
need_version = true;
}
bool bRet = g_IndexInstance.DocValid(component->Appid(), doc_id_ver_vec, valid_docs, need_version, valid_version, doc_content_map);
if (false == bRet) {
log_error("GetDocInfo by snapshot error.");
return false;
}
} else {
for(size_t i = 0 ; i < doc_id_ver_vec.size(); i++){
if(!m_has_gis){
valid_docs.insert(doc_id_ver_vec[i].doc_id);
}
if(doc_id_ver_vec[i].extend != ""){
doc_content_map.insert(make_pair(doc_id_ver_vec[i].doc_id, doc_id_ver_vec[i].extend));
}
}
}
bool DocManager::GetDocContent(){
const std::vector<IndexInfo>& o_index_info_vet = ResultContext::Instance()->GetIndexInfos();
if (component->SnapshotSwitch() == 1 && o_index_info_vet.size() <= 1000) {
bool need_version = false;
if(component->RequiredFields().size() > 0){
need_version = true;
}
bool bRet = g_IndexInstance.DocValid(component->Appid(), o_index_info_vet, need_version, valid_version_, doc_content_map_);
if (false == bRet) {
log_error("GetDocInfo by snapshot error.");
return false;
}
} else {
for(size_t i = 0 ; i < o_index_info_vet.size(); i++){
ResultContext::Instance()->SetValidDocs(o_index_info_vet[i].doc_id);
if(o_index_info_vet[i].extend != ""){
doc_content_map_.insert(std::make_pair(o_index_info_vet[i].doc_id, o_index_info_vet[i].extend));
}
}
}
log_debug("doc_id_ver_vec size: %d", (int)doc_id_ver_vec.size());
if (m_has_gis) {
if(doc_content_map.size() == 0){
g_IndexInstance.GetDocContent(component->Appid(), doc_id_ver_vec, doc_content_map);
}
GetGisDistance(component->Appid(), component->Latitude(), component->Longitude(), distances, doc_content_map);
}
return true;
log_debug("doc_id_ver_vec size: %d", (int)o_index_info_vet.size());
return true;
}
bool DocManager::AppendFieldsToRes(Json::Value &response, vector<string> &m_fields){
for(int i = 0; i < (int)response["result"].size(); i++){
Json::Value doc_info = response["result"][i];
bool DocManager::GetDocContent(
const GeoPointContext& geo_point ,
std::vector<IndexInfo>& index_infos)
{
std::vector<IndexInfo>::iterator iter = index_infos.begin();
for( ;iter != index_infos.end(); ++iter){
if((iter->extend) != ""){
doc_content_map_.insert(make_pair(iter->doc_id, iter->extend));
}
}
string doc_id = doc_info["doc_id"].asString();
if(doc_content_map.find(doc_id) != doc_content_map.end()){
string extend = doc_content_map[doc_id];
Json::Reader r(Json::Features::strictMode());
Json::Value recv_packet;
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), recv_packet);
if (0 == ret2){
log_error("parse json error [%s], errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
return false;
}
if(doc_content_map_.empty()){
g_IndexInstance.GetDocContent(component->Appid(),index_infos , doc_content_map_);
}
Json::Value &value = response["result"][i];
for(int i = 0; i < (int)m_fields.size(); i++){
if (recv_packet.isMember(m_fields[i].c_str()))
{
if(recv_packet[m_fields[i].c_str()].isUInt()){
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asUInt();
} else if(recv_packet[m_fields[i].c_str()].isString()){
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asString();
} else if(recv_packet[m_fields[i].c_str()].isDouble()){
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asDouble();
} else {
log_error("field[%s] data type error.", m_fields[i].c_str());
}
} else {
log_error("appid[%u] field[%s] invalid.", component->Appid(), m_fields[i].c_str());
}
}
} else {
uint32_t doc_version = 0;
if(valid_version.find(doc_info["doc_id"].asString()) != valid_version.end()){
doc_version = valid_version[doc_info["doc_id"].asString()];
}
bool bRet = g_IndexInstance.GetContentByField(component->Appid(), doc_info["doc_id"].asString(), doc_version, m_fields, response["result"][i]);
if(bRet == false){
log_error("get field content error.");
return false;
}
}
}
return true;
hash_double_map docid_dis_map;
bool bret = GetGisDistance(component->Appid(), geo_point, doc_content_map_, docid_dis_map);
if (!bret){
return bret;
}
std::vector<IndexInfo> o_valid_index_infos;
hash_double_map::iterator docid_dis_iter = docid_dis_map.begin();
for ( ; docid_dis_iter != docid_dis_map.end(); ++docid_dis_iter){
iter = index_infos.begin();
for( ;iter != index_infos.end(); ++iter){
if ((docid_dis_iter->first) == (iter->doc_id)){
iter->distance = docid_dis_iter->second;
o_valid_index_infos.push_back(*iter);
}
}
}
index_infos.swap(o_valid_index_infos);
return bret;
}
bool DocManager::GetScoreMap(string doc_id, uint32_t m_sort_type, string m_sort_field, FIELDTYPE &m_sort_field_type, uint32_t appid){
if(doc_content_map.find(doc_id) != doc_content_map.end()){
uint32_t field_type = 0;
bool bRet = DBManager::Instance()->GetFieldType(appid, m_sort_field, field_type);
if(false == bRet){
log_error("appid[%d] field[%s] not find.", appid, m_sort_field.c_str());
return false;
}
string extend = doc_content_map[doc_id];
bool DocManager::AppendFieldsToRes(Json::Value &response, std::vector<std::string> &m_fields){
for(int i = 0; i < (int)response["result"].size(); i++){
Json::Value doc_info = response["result"][i];
if(field_type == FIELD_INT){
int len = strlen(m_sort_field.c_str()) + strlen("\":");
size_t pos1 = extend.find(m_sort_field);
size_t pos2 = extend.find_first_of(",", pos1);
if(pos2 == string::npos){
pos2 = extend.find_first_of("}", pos1);
}
if(pos1 != string::npos && pos2 != string::npos){
string field_str = extend.substr(pos1+len, pos2-pos1-len);
int field_int;
istringstream iss(field_str);
iss >> field_int;
m_sort_field_type = FIELDTYPE_INT;
score_int_map.insert(make_pair(doc_id, field_int));
} else {
m_sort_field_type = FIELDTYPE_INT;
score_int_map.insert(make_pair(doc_id, 0));
}
} else {
Json::Reader r(Json::Features::strictMode());
Json::Value recv_packet;
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), recv_packet);
if (0 == ret2){
log_error("the err json is %s, errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
return false;
}
std::string doc_id = doc_info["doc_id"].asString();
if(doc_content_map_.find(doc_id) != doc_content_map_.end()){
std::string extend = doc_content_map_[doc_id];
Json::Reader r(Json::Features::strictMode());
Json::Value recv_packet;
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), recv_packet);
if (0 == ret2){
log_error("parse json error [%s], errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
return false;
}
if(recv_packet.isMember(m_sort_field.c_str()))
{
if(recv_packet[m_sort_field.c_str()].isUInt()){
m_sort_field_type = FIELDTYPE_INT;
score_int_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asUInt()));
} else if(recv_packet[m_sort_field.c_str()].isString()){
m_sort_field_type = FIELDTYPE_STRING;
score_str_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asString()));
} else if(recv_packet[m_sort_field.c_str()].isDouble()){
m_sort_field_type = FIELDTYPE_DOUBLE;
score_double_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asDouble()));
} else {
log_error("sort_field[%s] data type error.", m_sort_field.c_str());
return false;
}
} else {
log_error("appid[%u] sort_field[%s] invalid.", component->Appid(), m_sort_field.c_str());
return false;
}
}
} else {
ScoreInfo score_info;
bool bRet = g_IndexInstance.GetScoreByField(component->Appid(), doc_id, m_sort_field, m_sort_type, score_info);
if(bRet == false){
log_error("get score by field error.");
return false;
}
m_sort_field_type = score_info.type;
if(score_info.type == FIELDTYPE_INT){
score_int_map.insert(make_pair(doc_id, score_info.i));
} else if(score_info.type == FIELDTYPE_STRING){
score_str_map.insert(make_pair(doc_id, score_info.str));
} else if(score_info.type == FIELDTYPE_DOUBLE){
score_double_map.insert(make_pair(doc_id, score_info.d));
}
}
return true;
Json::Value &value = response["result"][i];
for(int i = 0; i < (int)m_fields.size(); i++){
if (recv_packet.isMember(m_fields[i].c_str()))
{
if(recv_packet[m_fields[i].c_str()].isUInt()){
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asUInt();
} else if(recv_packet[m_fields[i].c_str()].isString()){
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asString();
} else if(recv_packet[m_fields[i].c_str()].isDouble()){
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()].asDouble();
} else if(recv_packet[m_fields[i].c_str()].isObject()
|| recv_packet[m_fields[i].c_str()].isArray()){
value[m_fields[i].c_str()] = recv_packet[m_fields[i].c_str()];
}else{
log_error("field[%s] data type error.", m_fields[i].c_str());
}
} else {
log_error("appid[%u] field[%s] invalid.", component->Appid(), m_fields[i].c_str());
}
}
} else {
uint32_t doc_version = 0;
if(valid_version_.find(doc_info["doc_id"].asString()) != valid_version_.end()){
doc_version = valid_version_[doc_info["doc_id"].asString()];
}
bool bRet = g_IndexInstance.GetContentByField(component->Appid(), doc_info["doc_id"].asString(), doc_version, m_fields, response["result"][i]);
if(bRet == false){
log_error("get field content error.");
return false;
}
}
}
return true;
}
map<string, string>& DocManager::ScoreStrMap(){
return score_str_map;
bool DocManager::GetScoreMap(std::string doc_id, uint32_t m_sort_type, std::string m_sort_field, FIELDTYPE &m_sort_field_type){
if(doc_content_map_.find(doc_id) != doc_content_map_.end()){
uint32_t field_type = 0;
bool bRet = DBManager::Instance()->GetFieldType(component->Appid(), m_sort_field, field_type);
if(false == bRet){
log_error("appid[%d] field[%s] not find.", component->Appid(), m_sort_field.c_str());
return false;
}
std::string extend = doc_content_map_[doc_id];
if(field_type == FIELD_INT){
int len = strlen(m_sort_field.c_str()) + strlen("\":");
size_t pos1 = extend.find(m_sort_field);
size_t pos2 = extend.find_first_of(",", pos1);
if(pos2 == std::string::npos){
pos2 = extend.find_first_of("}", pos1);
}
if(pos1 != std::string::npos && pos2 != std::string::npos){
string field_str = extend.substr(pos1+len, pos2-pos1-len);
int field_int;
istringstream iss(field_str);
iss >> field_int;
m_sort_field_type = FIELDTYPE_INT;
score_int_map.insert(std::make_pair(doc_id, field_int));
} else {
m_sort_field_type = FIELDTYPE_INT;
score_int_map.insert(std::make_pair(doc_id, 0));
}
} else {
Json::Reader r(Json::Features::strictMode());
Json::Value recv_packet;
int ret2 = r.parse(extend.c_str(), extend.c_str() + extend.length(), recv_packet);
if (0 == ret2){
log_error("the err json is %s, errmsg : %s", extend.c_str(), r.getFormattedErrorMessages().c_str());
return false;
}
if(recv_packet.isMember(m_sort_field.c_str()))
{
if(recv_packet[m_sort_field.c_str()].isUInt()){
m_sort_field_type = FIELDTYPE_INT;
score_int_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asUInt()));
} else if(recv_packet[m_sort_field.c_str()].isString()){
m_sort_field_type = FIELDTYPE_STRING;
score_str_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asString()));
} else if(recv_packet[m_sort_field.c_str()].isDouble()){
m_sort_field_type = FIELDTYPE_DOUBLE;
score_double_map.insert(make_pair(doc_id, recv_packet[m_sort_field.c_str()].asDouble()));
} else {
log_error("sort_field[%s] data type error.", m_sort_field.c_str());
return false;
}
} else {
log_error("appid[%u] sort_field[%s] invalid.", component->Appid(), m_sort_field.c_str());
return false;
}
}
} else {
ScoreInfo score_info;
bool bRet = g_IndexInstance.GetScoreByField(component->Appid(), doc_id, m_sort_field, m_sort_type, score_info);
if(bRet == false){
log_error("get score by field error.");
return false;
}
m_sort_field_type = score_info.type;
if(score_info.type == FIELDTYPE_INT){
score_int_map.insert(make_pair(doc_id, score_info.i));
} else if(score_info.type == FIELDTYPE_STRING){
score_str_map.insert(make_pair(doc_id, score_info.str));
} else if(score_info.type == FIELDTYPE_DOUBLE){
score_double_map.insert(make_pair(doc_id, score_info.d));
}
}
return true;
}
map<string, int>& DocManager::ScoreIntMap(){
return score_int_map;
std::map<std::string, std::string>& DocManager::ScoreStrMap(){
return score_str_map;
}
map<string, double>& DocManager::ScoreDoubleMap(){
return score_double_map;
std::map<std::string, int>& DocManager::ScoreIntMap(){
return score_int_map;
}
map<string, uint32_t>& DocManager::ValidVersion(){
return valid_version;
std::map<std::string, double>& DocManager::ScoreDoubleMap(){
return score_double_map;
}
std::map<std::string, uint32_t>& DocManager::ValidVersion(){
return valid_version_;
}

View File

@ -22,33 +22,36 @@
#include "json/json.h"
#include <map>
#include <set>
using namespace std;
class Component;
class RequestContext;
struct GeoPointContext;
class DocManager{
public:
DocManager(Component *c);
~DocManager();
DocManager(RequestContext *c);
~DocManager();
bool CheckDocByExtraFilterKey(string doc_id);
bool GetDocContent(uint32_t m_has_gis, vector<IndexInfo> &doc_id_ver_vec, set<string> &valid_docs, hash_double_map &distances);
bool AppendFieldsToRes(Json::Value &response, vector<string> &m_fields);
bool GetScoreMap(string doc_id, uint32_t m_sort_type, string m_sort_field, FIELDTYPE &m_sort_field_type, uint32_t appid);
map<string, string>& ScoreStrMap();
map<string, int>& ScoreIntMap();
map<string, double>& ScoreDoubleMap();
map<string, uint32_t>& ValidVersion();
bool CheckDocByExtraFilterKey(std::string doc_id);
bool GetDocContent();
bool GetDocContent(const GeoPointContext& geo_point , std::vector<IndexInfo>& index_infos);
bool AppendFieldsToRes(Json::Value &response, std::vector<std::string> &m_fields);
bool GetScoreMap(std::string doc_id, uint32_t m_sort_type, std::string m_sort_field, FIELDTYPE &m_sort_field_type);
std::map<std::string, std::string>& ScoreStrMap();
std::map<std::string, int>& ScoreIntMap();
std::map<std::string, double>& ScoreDoubleMap();
std::map<std::string, uint32_t>& ValidVersion();
private:
void CheckIfKeyValid(const vector<ExtraFilterKey>& extra_filter_vec, const Json::Value &value, bool flag, bool &key_valid);
void CheckIfKeyValid(const std::vector<ExtraFilterKey>& extra_filter_vec, const Json::Value &value, bool flag, bool &key_valid);
private:
map<string, string> score_str_map;
map<string, int> score_int_map;
map<string, double> score_double_map;
map<string, uint32_t> valid_version;
hash_string_map doc_content_map;
Component *component;
std::map<std::string, std::string> score_str_map;
std::map<std::string, int> score_int_map;
std::map<std::string, double> score_double_map;
std::map<std::string, uint32_t> valid_version_;
hash_string_map doc_content_map_;
RequestContext* component;
};
#endif

View File

@ -95,8 +95,8 @@ struct LimitCond
int sLimitStart;
int sLimitStep;
LimitCond(){
sLimitStart = 0;
sLimitStep = 10;
sLimitStart = -1;
sLimitStep = -1;
}
void reset() { sLimitStart = -1, sLimitStep = -1; }

View File

@ -21,6 +21,7 @@
#include "log.h"
#include "../index_tbl_op.h"
#include "rocksdb_direct_context.h"
#include "../key_format.h"
#include <algorithm>
#include <iomanip>
@ -33,12 +34,23 @@ const char *INDEX_SYMBOL = "00";
const char *MAX_BORDER_SYMBOL = "10";
const char *MIN_BORDER_SYMBOL = "00";
static string gen_dtc_key_string(string appid, string type, double key) {
stringstream ssKey;
ssKey << setw(20) << setfill('0') << (int)key;
stringstream ss;
ss << appid << "#" << type << "#" << ssKey.str();
return ss.str();
static string gen_dtc_key_string(string appid, string type, uint32_t key_type, double key) {
log_debug("fieldtype:%d , key:%f " , key_type , key);
KeyFormat::UnionKey o_keyinfo_vet;
o_keyinfo_vet.push_back(std::make_pair(key_type , std::to_string(key)));
std::string s_format_key = KeyFormat::Encode(o_keyinfo_vet);
#if 0
KeyFormat::UnionKey o_output_vet;
o_output_vet.push_back(std::make_pair(key_type , ""));
KeyFormat::Decode(s_format_key , o_output_vet);
for (int i = 0; i < o_output_vet.size(); i++){
log_error("decode string:%s", o_output_vet[i].second.c_str());
}
#endif
std::stringstream stream_key;
stream_key << appid << "#" << type << "#" << s_format_key;
return stream_key.str();
}
static string gen_dtc_key_string(string appid, string type, string key) {
@ -403,7 +415,7 @@ vector<string> split(const string& str, const string& delim) {
}
std::string getPath(const char *bind_addr){
string s = "/tmp/domain_socket/rocks_direct_20000.sock";
string s = "/tmp/domain_socket/rocks_direct_30311.sock";
return s;
}
@ -471,6 +483,8 @@ int SearchRocksDBIndex::getFieldIndex(const char *fieldName){
void SearchRocksDBIndex::GetRangeIndex(uint32_t range_type, InvertIndexEntry &startEntry, InvertIndexEntry &endEntry, std::vector<InvertIndexEntry>& resultEntry){
log_debug("range_type:%d", range_type);
if (range_type == RANGE_GELE) {
GetRangeIndexGELE(startEntry, endEntry, resultEntry);
}
@ -493,7 +507,7 @@ void SearchRocksDBIndex::GetRangeIndex(uint32_t range_type, InvertIndexEntry &st
GetRangeIndexGT(startEntry, resultEntry);
}
else if (range_type == RANGE_LT) {
GetRangeIndexLT(startEntry, resultEntry);
GetRangeIndexLT(endEntry, resultEntry);
}
}
@ -535,10 +549,6 @@ void SearchRocksDBIndex::setQueryCond(QueryCond& query_cond, int field_index, in
void SearchRocksDBIndex::GetRangeIndexGELE(InvertIndexEntry& begin_key, const InvertIndexEntry& end_key, std::vector<InvertIndexEntry>& entry){
DirectRequestContext direct_request_context;
stringstream ss;
if(getFieldIndex("field") == -1){
log_error("GetRangeIndexGELE get field Index error");
return;
@ -549,25 +559,31 @@ void SearchRocksDBIndex::GetRangeIndexGELE(InvertIndexEntry& begin_key, const In
return;
}
DirectRequestContext direct_request_context;
QueryCond query_cond1;
query_cond1.sFieldIndex = getFieldIndex("field");
query_cond1.sCondOpr = 0;
query_cond1.sCondOpr = eEQ;
stringstream ss;
ss << begin_key._InvertIndexField;
query_cond1.sCondValue = ss.str();
direct_request_context.sFieldConds.push_back(query_cond1);
QueryCond query_cond2;
query_cond2.sFieldIndex = getFieldIndex("key");
query_cond2.sCondOpr = 5;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
query_cond2.sCondOpr = eGE;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL
, begin_key._InvertIndex_key_type ,begin_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond2);
QueryCond query_cond3;
query_cond3.sFieldIndex = getFieldIndex("key");
query_cond3.sCondOpr = 3;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
query_cond3.sCondOpr = eLE;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond3);
setEntry(direct_request_context, entry);
@ -590,7 +606,7 @@ void SearchRocksDBIndex::GetRangeIndexGE(InvertIndexEntry& begin_key, std::vecto
QueryCond query_cond1;
query_cond1.sFieldIndex = getFieldIndex("field");
query_cond1.sCondOpr = 0;
query_cond1.sCondOpr = eEQ;
ss << begin_key._InvertIndexField;
query_cond1.sCondValue = ss.str();
direct_request_context.sFieldConds.push_back(query_cond1);
@ -598,13 +614,14 @@ void SearchRocksDBIndex::GetRangeIndexGE(InvertIndexEntry& begin_key, std::vecto
QueryCond query_cond2;
query_cond2.sFieldIndex = getFieldIndex("key");
query_cond2.sCondOpr = 5;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
query_cond2.sCondOpr = eGE;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL,
begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond2);
QueryCond query_cond3;
query_cond3.sFieldIndex = getFieldIndex("key");
query_cond3.sCondOpr = 2;
query_cond3.sCondOpr = eLT;
query_cond3.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, MAX_BORDER_SYMBOL, "");
direct_request_context.sFieldConds.push_back(query_cond3);
@ -628,20 +645,21 @@ void SearchRocksDBIndex::GetRangeIndexLE(InvertIndexEntry& end_key, std::vector<
QueryCond query_cond1;
query_cond1.sFieldIndex = getFieldIndex("field");
query_cond1.sCondOpr = 0;
query_cond1.sCondOpr = eEQ;
ss << end_key._InvertIndexField;
query_cond1.sCondValue = ss.str();
direct_request_context.sFieldConds.push_back(query_cond1);
QueryCond query_cond2;
query_cond2.sFieldIndex = getFieldIndex("key");
query_cond2.sCondOpr = 3;
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
query_cond2.sCondOpr = eLE;
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond2);
QueryCond query_cond3;
query_cond3.sFieldIndex = getFieldIndex("key");
query_cond3.sCondOpr = 4;
query_cond3.sCondOpr = eGT;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, MIN_BORDER_SYMBOL, "");
direct_request_context.sFieldConds.push_back(query_cond3);
@ -665,21 +683,23 @@ void SearchRocksDBIndex::GetRangeIndexGTLT(InvertIndexEntry& begin_key, const In
QueryCond query_cond1;
query_cond1.sFieldIndex = getFieldIndex("field");
query_cond1.sCondOpr = 0;
query_cond1.sCondOpr = eEQ;
ss << begin_key._InvertIndexField;
query_cond1.sCondValue = ss.str();
direct_request_context.sFieldConds.push_back(query_cond1);
QueryCond query_cond2;
query_cond2.sFieldIndex = getFieldIndex("key");
query_cond2.sCondOpr = 4;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
query_cond2.sCondOpr = eGT;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL
, begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond2);
QueryCond query_cond3;
query_cond3.sFieldIndex = getFieldIndex("key");
query_cond3.sCondOpr = 2;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
query_cond3.sCondOpr = eLT;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond3);
@ -703,21 +723,23 @@ void SearchRocksDBIndex::GetRangeIndexGTLE(InvertIndexEntry& begin_key, const In
QueryCond query_cond1;
query_cond1.sFieldIndex = getFieldIndex("field");
query_cond1.sCondOpr = 0;
query_cond1.sCondOpr = eEQ;
ss << begin_key._InvertIndexField;
query_cond1.sCondValue = ss.str();
direct_request_context.sFieldConds.push_back(query_cond1);
QueryCond query_cond2;
query_cond2.sFieldIndex = getFieldIndex("key");
query_cond2.sCondOpr = 4;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
query_cond2.sCondOpr = eGT;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL
, begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond2);
QueryCond query_cond3;
query_cond3.sFieldIndex = getFieldIndex("key");
query_cond3.sCondOpr = 3;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
query_cond3.sCondOpr = eLE;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond3);
setEntry(direct_request_context, entry);
@ -740,21 +762,23 @@ void SearchRocksDBIndex::GetRangeIndexGELT(InvertIndexEntry& begin_key, const In
QueryCond query_cond1;
query_cond1.sFieldIndex = getFieldIndex("field");
query_cond1.sCondOpr = 0;
query_cond1.sCondOpr = eEQ;
ss << begin_key._InvertIndexField;
query_cond1.sCondValue = ss.str();
direct_request_context.sFieldConds.push_back(query_cond1);
QueryCond query_cond2;
query_cond2.sFieldIndex = getFieldIndex("key");
query_cond2.sCondOpr = 5;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
query_cond2.sCondOpr = eGE;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL
, begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond2);
QueryCond query_cond3;
query_cond3.sFieldIndex = getFieldIndex("key");
query_cond3.sCondOpr = 2;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
query_cond3.sCondOpr = eLT;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond3);
setEntry(direct_request_context, entry);
@ -777,20 +801,21 @@ void SearchRocksDBIndex::GetRangeIndexGT(InvertIndexEntry& begin_key, std::vecto
QueryCond query_cond1;
query_cond1.sFieldIndex = getFieldIndex("field");
query_cond1.sCondOpr = 0;
query_cond1.sCondOpr = eEQ;
ss << begin_key._InvertIndexField;
query_cond1.sCondValue = ss.str();
direct_request_context.sFieldConds.push_back(query_cond1);
QueryCond query_cond2;
query_cond2.sFieldIndex = getFieldIndex("key");
query_cond2.sCondOpr = 4;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL, begin_key._InvertIndexKey);
query_cond2.sCondOpr = eGT;
query_cond2.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, INDEX_SYMBOL
, begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond2);
QueryCond query_cond3;
query_cond3.sFieldIndex = getFieldIndex("key");
query_cond3.sCondOpr = 2;
query_cond3.sCondOpr = eLT;
query_cond3.sCondValue = gen_dtc_key_string(begin_key._InvertIndexAppid, MAX_BORDER_SYMBOL, "");
direct_request_context.sFieldConds.push_back(query_cond3);
@ -814,21 +839,22 @@ void SearchRocksDBIndex::GetRangeIndexLT(InvertIndexEntry& end_key, std::vector<
QueryCond query_cond1;
query_cond1.sFieldIndex = getFieldIndex("field");
query_cond1.sCondOpr = 0;
query_cond1.sCondOpr = eEQ;
ss << end_key._InvertIndexField;
query_cond1.sCondValue = ss.str();
direct_request_context.sFieldConds.push_back(query_cond1);
QueryCond query_cond2;
query_cond2.sFieldIndex = getFieldIndex("key");
query_cond2.sCondOpr = 2;
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL, end_key._InvertIndexKey);
query_cond2.sCondOpr = eLT;
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, INDEX_SYMBOL
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond2);
QueryCond query_cond3;
query_cond3.sFieldIndex = getFieldIndex("key");
query_cond3.sCondOpr = 4;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, MAX_BORDER_SYMBOL, "");
query_cond3.sCondOpr = eGT;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, MIN_BORDER_SYMBOL, "");
direct_request_context.sFieldConds.push_back(query_cond3);
setEntry(direct_request_context, entry);
@ -879,7 +905,7 @@ void SearchRocksDBIndex::GetRangeIndexInTerminal(RANGTYPE range_type, const Inve
QueryCond query_cond1;
query_cond1.sFieldIndex = getFieldIndex("field");
query_cond1.sCondOpr = 0;
query_cond1.sCondOpr = eEQ;
stringstream ss;
ss << end_key._InvertIndexField;
query_cond1.sCondValue = ss.str();
@ -889,14 +915,16 @@ void SearchRocksDBIndex::GetRangeIndexInTerminal(RANGTYPE range_type, const Inve
QueryCond query_cond2;
query_cond2.sFieldIndex = getFieldIndex("key");
query_cond2.sCondOpr = greater_type;
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, begin_symbol, begin_key._InvertIndexKey);
query_cond2.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, begin_symbol
, begin_key._InvertIndex_key_type , begin_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond2);
QueryCond query_cond3;
query_cond3.sFieldIndex = getFieldIndex("key");
query_cond3.sCondOpr = less_type;
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, end_symbol, end_key._InvertIndexKey);
query_cond3.sCondValue = gen_dtc_key_string(end_key._InvertIndexAppid, end_symbol
, end_key._InvertIndex_key_type , end_key._InvertIndexKey);
direct_request_context.sFieldConds.push_back(query_cond3);
// key和docd_id对应的field值分别为0和1

View File

@ -55,13 +55,15 @@ struct InvertIndexEntry {
_IsValid = true;
}
InvertIndexEntry(std::string appid, int field, double key){
InvertIndexEntry(std::string appid, int field, uint32_t key_type, double key){
_InvertIndexAppid = appid;
_InvertIndexField = field;
_InvertIndex_key_type = key_type;
_InvertIndexKey = key;
}
InvertIndexEntry(const InvertIndexEntry& src) {
this->_InvertIndex_key_type = src._InvertIndex_key_type;
this->_InvertIndexKey = src._InvertIndexKey;
this->_InvertIndexDocId = src._InvertIndexDocId;
this->_InvertIndexAppid = src._InvertIndexAppid;
@ -72,6 +74,7 @@ struct InvertIndexEntry {
}
InvertIndexEntry& operator=(const InvertIndexEntry& src) {
this->_InvertIndex_key_type = src._InvertIndex_key_type;
this->_InvertIndexKey = src._InvertIndexKey;
this->_InvertIndexDocId = src._InvertIndexDocId;
this->_InvertIndexAppid = src._InvertIndexAppid;
@ -109,6 +112,7 @@ struct InvertIndexEntry {
return oss.str();
}
uint32_t _InvertIndex_key_type;
double _InvertIndexKey;
std::string _InvertIndexDocId;
std::string _InvertIndexAppid;

File diff suppressed because it is too large Load Diff

View File

@ -22,9 +22,11 @@
#include "dtcapi.h"
#include "chash.h"
#include "comm.h"
#include "search_conf.h"
#include "search_util.h"
#include "json/value.h"
#include "result_context.h"
#include <stdio.h>
#include <unistd.h>
#include <vector>
@ -52,18 +54,18 @@ class CIndexTableManager
public:
int InitServer(const SDTCHost &dtchost, string bindAddr);
int InitServer2(const SDTCHost &dtchost);
bool GetDocInfo(uint32_t appid, string word, uint32_t key_locate, vector<IndexInfo> &doc_info);
bool GetDocInfo(uint32_t appid, string word, uint32_t field_id, vector<IndexInfo> &doc_info);
int GetDocCnt(uint32_t appid);
bool get_snapshot_execute(int left, int right,uint32_t appid, vector<IndexInfo>& no_filter_docs, vector<DocVersionInfo>& docVersionInfo);
bool get_snapshot_execute(int left, int right,uint32_t appid, const vector<IndexInfo>& no_filter_docs, vector<DocVersionInfo>& docVersionInfo);
bool get_top_snapshot_execute(int left, int right, uint32_t appid, vector<TopDocInfo>& no_filter_docs, vector<DocVersionInfo>& docVersionInfo);
bool TopDocValid(uint32_t appid, vector<TopDocInfo>& no_filter_docs, vector<TopDocInfo>& doc_info);
bool DocValid(uint32_t appid, vector<IndexInfo>& vecs, set<string>& valid_set, bool need_version, map<string, uint32_t>& valid_version, hash_string_map& doc_content_map);
bool DocValid(uint32_t appid, const vector<IndexInfo>& vecs, bool need_version, map<string, uint32_t>& valid_version, hash_string_map& doc_content_map);
bool GetTopDocInfo(uint32_t appid, string word, vector<TopDocInfo>& doc_info);
bool GetDocContent(uint32_t appid, vector<IndexInfo> &doc_id_set, hash_string_map& doc_content);
bool GetSnapshotContent(int left, int right, uint32_t appid, vector<IndexInfo>& docs, hash_string_map& doc_content);
bool GetSuggestDoc(uint32_t appid, int index, uint32_t len, uint32_t field, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set, set<string>& hlWord);
bool GetSuggestDocWithoutCharacter(uint32_t appid, int index, uint32_t len, uint32_t field, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set, set<string>& hlWord);
bool GetDocContent(uint32_t appid, const std::vector<IndexInfo>& index_infos, hash_string_map& doc_content);
bool GetSnapshotContent(int left, int right, uint32_t appid , const std::vector<IndexInfo>& index_infos, hash_string_map& doc_content);
bool GetSuggestDoc(uint32_t appid, int index, uint32_t len, uint32_t field, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set);
bool GetSuggestDocWithoutCharacter(uint32_t appid, int index, uint32_t len, uint32_t field, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set);
bool GetScoreByField(uint32_t appid, string doc_id, string sort_field, uint32_t sort_type, ScoreInfo &score_info);
bool DocValid(uint32_t appid, string doc_id, bool &is_valid);
bool GetContentByField(uint32_t appid, string doc_id, uint32_t doc_version, const vector<string>& fields, Json::Value &value);

View File

@ -0,0 +1,213 @@
#include "key_format.h"
#include <iostream>
#include <utility>
#include "comm.h"
#define SEGMENT_SIZE 8
const std::string SEG_SYMBOL = "|";
const char ENCODER_MARKER = 127;
const uint64_t signMask = 0x8000000000000000;
uint64_t encode_into_cmp_uint(int64_t src) {
return uint64_t(src) ^ signMask;
}
uint64_t htonll(uint64_t val) {
return (((uint64_t)htonl(val)) << 32) + htonl(val >> 32);
}
uint64_t ntohll(uint64_t val)
{
return (((uint64_t)ntohl(val)) << 32) + ntohl(val >> 32);
}
std::string KeyFormat::Encode(const UnionKey& oUnionKey)
{
std::string sUnionKey;
for (size_t i = 0; i < oUnionKey.size(); ++i)
{
switch (oUnionKey[i].first)
{
case FIELD_INT:
case FIELD_LONG:
case FIELD_IP:
{
sUnionKey.append(EncodeBytes((int64_t)strtoll(oUnionKey[i].second.c_str(), NULL, 10)));
}
break;
case FIELD_DOUBLE:
sUnionKey.append(EncodeBytes(strtod(oUnionKey[i].second.c_str(), NULL)));
break;
case FIELD_STRING:
case FIELD_TEXT:
case FIELD_GEO_POINT:
case FIELD_GEO_SHAPE:
sUnionKey.append(EncodeBytes(oUnionKey[i].second));
break;
default:
sUnionKey.clear();
break;
}
}
return sUnionKey;
}
bool KeyFormat::Decode(const std::string& sKey, UnionKey& oUnionKey)
{
if (oUnionKey.empty()){
return false;
}
int iPos = 0;
for (size_t i = 0; i < oUnionKey.size(); ++i)
{
switch (oUnionKey[i].first)
{
case FIELD_INT:
case FIELD_LONG:
case FIELD_IP:
{
int64_t lValue;
DecodeBytes(sKey.substr(iPos, 8), lValue);
iPos += 8;
oUnionKey[i].second = std::to_string((long long)lValue);
}
break;
case FIELD_DOUBLE:
{
double dValue;
DecodeBytes(sKey.substr(iPos, 8), dValue);
iPos += 8;
oUnionKey[i].second = std::to_string((long double)dValue);
}
break;
case FIELD_STRING:
case FIELD_TEXT:
{
int begin_pos = iPos;
iPos += SEGMENT_SIZE ;
for ( ; ENCODER_MARKER == sKey[ iPos - 1 ] ; iPos += SEGMENT_SIZE) {
}
DecodeBytes(sKey.substr(begin_pos, iPos - begin_pos ), oUnionKey[i].second);
}
break;
default:
return false;
}
}
return true;
}
std::string KeyFormat::EncodeBytes(const std::string & src)
{
unsigned char padding_bytes;
size_t left_length = src.length();
size_t pos = 0;
std::stringstream oss_dst;
while (true) {
unsigned char copy_len = SEGMENT_SIZE - 1 < left_length ? SEGMENT_SIZE - 1 : left_length;
padding_bytes = SEGMENT_SIZE - 1 - copy_len;
oss_dst << src.substr(pos, copy_len);
pos += copy_len;
left_length -= copy_len;
if (padding_bytes) {
oss_dst << std::string(padding_bytes, '\0');
oss_dst << (char)(ENCODER_MARKER - padding_bytes);
break;
}
else {
oss_dst << ENCODER_MARKER;
}
}
return oss_dst.str();
}
std::string KeyFormat::EncodeBytes(int src)
{
return EncodeBytes((int64_t)src);
}
std::string KeyFormat::EncodeBytes(int64_t src)
{
uint64_t host_bytes = encode_into_cmp_uint(src);
uint64_t net_bytes = htonll(host_bytes);
char dst_bytes[8];
memcpy(dst_bytes, &net_bytes, sizeof(uint64_t));
std::string dst = std::string(8, '\0');
for (size_t i = 0; i < dst.length(); i++) {
dst[i] = dst_bytes[i];
}
return dst;
}
std::string KeyFormat::EncodeBytes(double src)
{
uint64_t u;
memcpy(&u, &src, sizeof(double));
if (src >= 0) {
u |= signMask;
}
else {
u = ~u;
}
return EncodeBytes(u);
}
std::string KeyFormat::EncodeBytes(uint64_t src)
{
uint64_t net_bytes = htonll(src);
char dst_bytes[8];
memcpy(dst_bytes, &net_bytes, sizeof(uint64_t));
std::string dst = std::string(8, '\0');
for (size_t i = 0; i < dst.length(); i++) {
dst[i] = dst_bytes[i];
}
return dst;
}
void KeyFormat::DecodeBytes(const std::string & src, int64_t& dst)
{
uint64_t net_bytes;
memcpy(&net_bytes, src.c_str(), sizeof(uint64_t));
uint64_t host_bytes = ntohll(net_bytes);
dst = int64_t(host_bytes ^ signMask);
}
void KeyFormat::DecodeBytes(const std::string & src, std::string & dst)
{
if (src.length() == 0) {
dst = "";
}
std::stringstream oss_dst;
for (size_t i = 0; i < src.length(); i += SEGMENT_SIZE) {
char padding_bytes = ENCODER_MARKER - src[i + 7];
oss_dst << src.substr(i, SEGMENT_SIZE - 1 - padding_bytes);
}
dst = oss_dst.str();
}
void KeyFormat::DecodeBytes(const std::string & src, uint64_t & dst)
{
uint64_t net_bytes;
memcpy(&net_bytes, src.c_str(), sizeof(uint64_t));
dst = ntohll(net_bytes);
}
void KeyFormat::DecodeBytes(const std::string & src, double & dst)
{
uint64_t u;
DecodeBytes(src, u);
if ((u & signMask) > 0) {
u &= (~signMask);
}
else {
u = ~u;
}
memcpy(&dst, &u, sizeof(dst));
}

View File

@ -0,0 +1,33 @@
#ifndef __KEY_FORMAT_H__
#define __KEY_FORMAT_H__
#include <string>
#include <sstream>
#include <stdint.h>
#include <arpa/inet.h>
#include <inttypes.h>
#include <string.h>
#include <map>
#include <vector>
class KeyFormat {
public:
typedef std::vector<std::pair<int,std::string> > UnionKey;
public:
static std::string Encode(const UnionKey& oUnionKey);
static bool Decode(const std::string& sKey, UnionKey& oUnionKey);
static std::string EncodeBytes(const std::string& src);
static std::string EncodeBytes(int src);
static std::string EncodeBytes(int64_t src);
static std::string EncodeBytes(uint64_t src);
static std::string EncodeBytes(double src);
static void DecodeBytes(const std::string& src, int64_t& dst);
static void DecodeBytes(const std::string& src, std::string& dst);
static void DecodeBytes(const std::string& src, uint64_t& dst);
static void DecodeBytes(const std::string& src, double& dst);
};
#endif

View File

@ -1,341 +0,0 @@
/*
* =====================================================================================
*
* Filename: logical_operate.h
*
* Description: logical operate class definition.
*
* Version: 1.0
* Created: 09/08/2018
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#include "logical_operate.h"
#include "search_util.h"
#include "cachelist_unit.h"
#include "data_manager.h"
#include "json/reader.h"
#include "json/writer.h"
#include "index_tbl_op.h"
#include "index_sync/sync_index_timer.h"
#include "index_sync/sequence_search_index.h"
#include "stem.h"
#include <sstream>
#include <iomanip>
using namespace std;
extern SyncIndexTimer *globalSyncIndexTimer;
extern CCacheListUnit *indexcachelist;
LogicalOperate::LogicalOperate(uint32_t a, uint32_t s, uint32_t h, uint32_t c):m_appid(a), m_sort_type(s), m_has_gis(h), m_cache_switch(c)
{
}
LogicalOperate::~LogicalOperate(){
}
void LogicalOperate::SetFunc(logical_func func){
m_func = func;
}
int LogicalOperate::Process(const vector<vector<FieldInfo> >& keys, vector<IndexInfo>& vecs, set<string>& highlightWord, map<string, vec> &ves, map<string, uint32_t> &key_in_doc){
for (size_t index = 0; index < keys.size(); index++)
{
vector<IndexInfo> doc_id_vec;
vector<FieldInfo> fieldInfos = keys[index];
vector<FieldInfo>::iterator it;
for (it = fieldInfos.begin(); it != fieldInfos.end(); it++) {
vector<IndexInfo> doc_info;
if ((*it).segment_tag == 3) {
int ret = GetDocByShiftWord(*it, doc_info, m_appid, highlightWord);
if (ret != 0) {
doc_id_vec.clear();
return -RT_GET_DOC_ERR;
}
sort(doc_info.begin(), doc_info.end());
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
KeyInfo info;
info.word_freq = 1;
info.field = (*it).field;
info.word = (*it).word;
ves[doc_info[doc_info_idx].doc_id].push_back(info);
}
} else if ((*it).segment_tag == 4) {
int ret = GetDocByShiftEnWord(*it, doc_info, m_appid, highlightWord);
if (ret != 0) {
doc_id_vec.clear();
return -RT_GET_DOC_ERR;
}
sort(doc_info.begin(), doc_info.end());
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
KeyInfo info;
info.word_freq = 1;
info.field = (*it).field;
info.word = (*it).word;
ves[doc_info[doc_info_idx].doc_id].push_back(info);
}
} else if ((*it).segment_tag == 5 && (*it).word == "") { // 范围查询
stringstream ss;
ss << m_appid;
InvertIndexEntry startEntry(ss.str(), (*it).field, (double)(*it).start);
InvertIndexEntry endEntry(ss.str(), (*it).field, (double)(*it).end);
std::vector<InvertIndexEntry> resultEntry;
globalSyncIndexTimer->GetSearchIndex()->GetRangeIndex((*it).range_type, startEntry, endEntry, resultEntry);
std::vector<InvertIndexEntry>::iterator iter = resultEntry.begin();
for (; iter != resultEntry.end(); iter ++) {
IndexInfo info;
info.doc_id = (*iter)._InvertIndexDocId;
info.doc_version = (*iter)._InvertIndexDocVersion;
doc_info.push_back(info);
}
log_debug("appid: %s, field: %d, count: %d", startEntry._InvertIndexAppid.c_str(), (*it).field, (int)resultEntry.size());
} else {
int ret = GetDocIdSetByWord(*it, doc_info);
if (ret != 0){
return -RT_GET_DOC_ERR;
}
if (doc_info.size() == 0)
continue;
if (!m_has_gis || !isAllNumber((*it).word))
highlightWord.insert((*it).word);
if(!m_has_gis && (m_sort_type == SORT_RELEVANCE || m_sort_type == SORT_TIMESTAMP)){
CalculateByWord(*it, doc_info, ves, key_in_doc);
}
}
doc_id_vec = vec_union(doc_id_vec, doc_info);
}
if(index == 0){ // 第一个直接赋值给vecs后续的依次与前面的进行逻辑运算
vecs.assign(doc_id_vec.begin(), doc_id_vec.end());
} else {
vecs = m_func(vecs, doc_id_vec);
}
}
return 0;
}
int LogicalOperate::ProcessTerminal(const vector<vector<FieldInfo> >& and_keys, const TerminalQryCond& query_cond, vector<TerminalRes>& vecs){
if(and_keys.size() != 1){
return 0;
}
vector<FieldInfo> field_vec = and_keys[0];
if(field_vec.size() != 1){
return 0;
}
FieldInfo field_info = field_vec[0];
if(field_info.segment_tag != SEGMENT_RANGE){
return 0;
}
stringstream ss;
ss << m_appid;
InvertIndexEntry beginEntry(ss.str(), field_info.field, (double)field_info.start);
InvertIndexEntry endEntry(ss.str(), field_info.field, (double)field_info.end);
std::vector<InvertIndexEntry> resultEntry;
globalSyncIndexTimer->GetSearchIndex()->GetRangeIndexInTerminal(field_info.range_type, beginEntry, endEntry, query_cond, resultEntry);
std::vector<InvertIndexEntry>::iterator iter = resultEntry.begin();
for (; iter != resultEntry.end(); iter ++) {
TerminalRes info;
info.doc_id = (*iter)._InvertIndexDocId;
info.score = (*iter)._InvertIndexKey;
vecs.push_back(info);
}
return 0;
}
int LogicalOperate::ProcessComplete(const vector<FieldInfo>& complete_keys, vector<IndexInfo>& complete_vecs, vector<string>& word_vec, map<string, vec> &ves, map<string, uint32_t> &key_in_doc){
vector<FieldInfo>::const_iterator iter;
for (iter = complete_keys.begin(); iter != complete_keys.end(); iter++) {
vector<IndexInfo> doc_info;
int ret = GetDocIdSetByWord(*iter, doc_info);
if (ret != 0) {
return -RT_GET_DOC_ERR;
}
word_vec.push_back((*iter).word);
if(m_sort_type == SORT_RELEVANCE || m_sort_type == SORT_TIMESTAMP){
CalculateByWord(*iter, doc_info, ves, key_in_doc);
}
if(iter == complete_keys.begin()){
complete_vecs.assign(doc_info.begin(), doc_info.end());
} else {
complete_vecs = vec_intersection(complete_vecs, doc_info);
}
}
return 0;
}
void LogicalOperate::CalculateByWord(FieldInfo fieldInfo, const vector<IndexInfo> &doc_info, map<string, vec> &ves, map<string, uint32_t> &key_in_doc) {
string doc_id;
uint32_t word_freq = 0;
uint32_t field = 0;
uint32_t created_time;
string pos_str = "";
for (size_t i = 0; i < doc_info.size(); i++) {
doc_id = doc_info[i].doc_id;
word_freq = doc_info[i].word_freq;
field = doc_info[i].field;
created_time = doc_info[i].created_time;
pos_str = doc_info[i].pos;
vector<int> pos_vec;
if (pos_str != "" && pos_str.size() > 2) {
pos_str = pos_str.substr(1, pos_str.size() - 2);
pos_vec = splitInt(pos_str, ",");
}
KeyInfo info;
info.word_freq = word_freq;
info.field = field;
info.word = fieldInfo.word;
info.created_time = created_time;
info.pos_vec = pos_vec;
ves[doc_id].push_back(info);
}
key_in_doc[fieldInfo.word] = doc_info.size();
}
bool LogicalOperate::GetDocIndexCache(string word, uint32_t field, vector<IndexInfo> &doc_info) {
log_debug("get doc index start");
bool res = false;
uint8_t value[MAX_VALUE_LEN] = { 0 };
unsigned vsize = 0;
string output = "";
string indexCache = word + "|" + ToString(field);
if (m_cache_switch == 1 && indexcachelist->in_list(indexCache.c_str(), indexCache.size(), value, vsize))
{
log_debug("hit index cache.");
value[vsize] = '\0';
output = (char *)value;
res = true;
}
if (res) {
Json::Value packet;
Json::Reader r(Json::Features::strictMode());
int ret;
ret = r.parse(output.c_str(), output.c_str() + output.size(), packet);
if (0 == ret)
{
log_error("the err json string is : %s, errmsg : %s", output.c_str(), r.getFormattedErrorMessages().c_str());
res = false;
return res;
}
for (uint32_t i = 0; i < packet.size(); ++i) {
IndexInfo info;
Json::Value& index_cache = packet[i];
if (index_cache.isMember("appid") && index_cache["appid"].isUInt() &&
index_cache.isMember("id") && index_cache["id"].isString() &&
index_cache.isMember("version") && index_cache["version"].isUInt() &&
index_cache.isMember("field") && index_cache["field"].isUInt() &&
index_cache.isMember("freq") && index_cache["freq"].isUInt() &&
index_cache.isMember("time") && index_cache["time"].isUInt() &&
index_cache.isMember("pos") && index_cache["pos"].isString())
{
info.appid = index_cache["appid"].asUInt();
info.doc_id = index_cache["id"].asString();
info.doc_version = index_cache["version"].asUInt();
info.field = index_cache["field"].asUInt();
info.word_freq = index_cache["freq"].asUInt();
info.created_time = index_cache["time"].asUInt();
info.pos = index_cache["pos"].asString();
doc_info.push_back(info);
}
else {
log_error("parse index_cache error, no appid");
doc_info.clear();
res = false;
break;
}
}
}
return res;
}
void LogicalOperate::SetDocIndexCache(const vector<IndexInfo> &doc_info, string& indexJsonStr) {
Json::Value indexJson;
Json::FastWriter writer;
for (size_t i = 0; i < doc_info.size(); i++) {
Json::Value json_tmp;
json_tmp["appid"] = doc_info[i].appid;
json_tmp["id"] = doc_info[i].doc_id;
json_tmp["version"] = doc_info[i].doc_version;
json_tmp["field"] = doc_info[i].field;
json_tmp["freq"] = doc_info[i].word_freq;
json_tmp["time"] = doc_info[i].created_time;
json_tmp["pos"] = doc_info[i].pos;
indexJson.append(json_tmp);
}
indexJsonStr = writer.write(indexJson);
}
int LogicalOperate::GetDocIdSetByWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_info) {
bool bRet = false;
if (DataManager::Instance()->IsSensitiveWord(fieldInfo.word)) {
log_debug("%s is a sensitive word.", fieldInfo.word.c_str());
return 0;
}
stringstream ss_key;
ss_key << m_appid;
ss_key << "#00#";
if(fieldInfo.segment_tag == 5){
stringstream ss;
ss << setw(20) << setfill('0') << fieldInfo.word;
ss_key << ss.str();
}
else if (fieldInfo.field_type == FIELD_INT || fieldInfo.field_type == FIELD_DOUBLE || fieldInfo.field_type == FIELD_LONG) {
ss_key << fieldInfo.word;
}
else if (fieldInfo.field_type == FIELD_IP) {
uint32_t word_id = GetIpNum(fieldInfo.word);
if (word_id == 0)
return 0;
ss_key << word_id;
}
else if (fieldInfo.word.find("_") != string::npos) { // 联合索引
ss_key << fieldInfo.word;
}
else {
string word_new = stem(fieldInfo.word);
ss_key << word_new;
}
log_debug("appid [%u], key[%s]", m_appid, ss_key.str().c_str());
if (m_has_gis && GetDocIndexCache(ss_key.str(), fieldInfo.field, doc_info)) {
return 0;
}
bRet = g_IndexInstance.GetDocInfo(m_appid, ss_key.str(), fieldInfo.field, doc_info);
if (false == bRet) {
log_error("GetDocInfo error.");
return -RT_DTC_ERR;
}
if (m_cache_switch == 1 && m_has_gis == 1 && doc_info.size() > 0 && doc_info.size() <= 1000) {
string index_str;
SetDocIndexCache(doc_info, index_str);
if (index_str != "" && index_str.size() < MAX_VALUE_LEN) {
string indexCache = ss_key.str() + "|" + ToString(fieldInfo.field);
unsigned data_size = indexCache.size();
int ret = indexcachelist->add_list(indexCache.c_str(), index_str.c_str(), data_size, index_str.size());
if (ret != 0) {
log_error("add to index_cache_list error, ret: %d.", ret);
}
else {
log_debug("add to index_cache_list: %s.", indexCache.c_str());
}
}
}
return 0;
}

View File

@ -1,54 +0,0 @@
/*
* =====================================================================================
*
* Filename: logical_operate.h
*
* Description: logical operate class definition.
*
* Version: 1.0
* Created: 09/08/2018
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef LOGICAL_OP_H
#define LOGICAL_OP_H
#include "component.h"
#include <map>
#include <set>
using namespace std;
typedef vector<KeyInfo> vec;
typedef vector<IndexInfo> (*logical_func)(vector<IndexInfo> &a, vector<IndexInfo> &b);
class LogicalOperate
{
public:
LogicalOperate(uint32_t appid, uint32_t sort_type, uint32_t has_gis, uint32_t cache_switch);
~LogicalOperate();
int Process(const vector<vector<FieldInfo> >& keys, vector<IndexInfo>& vecs, set<string>& highlightWord, map<string, vec> &ves, map<string, uint32_t> &key_in_doc);
int ProcessComplete(const vector<FieldInfo>& complete_keys, vector<IndexInfo>& complete_vecs, vector<string>& word_vec, map<string, vec> &ves, map<string, uint32_t> &key_in_doc);
void SetFunc(logical_func func);
int ProcessTerminal(const vector<vector<FieldInfo> >& and_keys, const TerminalQryCond& query_cond, vector<TerminalRes>& vecs);
void CalculateByWord(FieldInfo fieldInfo, const vector<IndexInfo> &doc_info, map<string, vec> &ves, map<string, uint32_t> &key_in_doc);
void SetDocIndexCache(const vector<IndexInfo> &doc_info, string& indexJsonStr);
bool GetDocIndexCache(string word, uint32_t field, vector<IndexInfo> &doc_info);
int GetDocIdSetByWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_info);
private:
uint32_t m_appid;
uint32_t m_sort_type;
uint32_t m_has_gis;
uint32_t m_cache_switch;
logical_func m_func;
};
#endif

View File

@ -58,7 +58,7 @@ private:
template<typename T>
void COrderOp<T>::Process(const std::map<std::string, T>& score_map, T last_value, OrderOpCond order_op_cond, Json::Value& response, DocManager *doc_manager)
{
DocIdEntry<T> last_entry;
DocIdEntry<T> last_entry(order_op_cond.last_id , last_value , _OrderFieldType, _SortType);
for (typename std::map<std::string, T>::const_iterator it = score_map.begin(); it != score_map.end(); it++) {
DocIdEntry<T> doc_entry(it->first, it->second, _OrderFieldType, _SortType);
if(order_op_cond.has_extra_filter){
@ -66,9 +66,6 @@ void COrderOp<T>::Process(const std::map<std::string, T>& score_map, T last_valu
} else {
_ScoreVec.push_back(doc_entry);
}
if(it->second == last_value && it->first == order_op_cond.last_id){
last_entry = doc_entry;
}
}
if (_SearchAfter) {

View File

@ -0,0 +1,363 @@
#include "bool_query_process.h"
#include "geo_distance_query_process.h"
#include "geo_shape_query_process.h"
#include "match_query_process.h"
#include "term_query_process.h"
#include "range_query_process.h"
#include "../key_format.h"
BoolQueryProcess::BoolQueryProcess(const Json::Value& value)
: QueryProcess(value)
, query_process_map_()
, query_bitset_()
, has_and_logic_(false)
{ }
BoolQueryProcess::~BoolQueryProcess()
{
std::map<int , QueryProcess*>::iterator iter = query_process_map_.begin();
for ( ; iter != query_process_map_.end(); ++iter){
if (iter->second != NULL){
delete iter->second;
iter->second = NULL;
}
}
}
void BoolQueryProcess::HandleUnifiedIndex(){
std::vector<std::vector<FieldInfo> >& and_keys = component_->AndKeys();
std::map<uint32_t , std::vector<FieldInfo> > fieldid_fieldinfos_map;
std::vector<std::vector<FieldInfo> >::iterator iter = and_keys.begin();
for (; iter != and_keys.end(); ++iter){
fieldid_fieldinfos_map.insert(std::make_pair(((*iter)[0]).field , *iter));
}
std::vector<std::vector<FieldInfo> > union_field_infos;
std::vector<std::string> union_key_vec;
DBManager::Instance()->GetUnionKeyField(component_->Appid() , union_key_vec);
std::vector<std::string>::iterator union_key_iter = union_key_vec.begin();
for(; union_key_iter != union_key_vec.end(); union_key_iter++){
std::string union_key = *union_key_iter;
std::vector<int> union_field_vec = splitInt(union_key, ",");
std::vector<int>::iterator union_field_iter = union_field_vec.begin();
bool hit_union_key = true;
for(; union_field_iter != union_field_vec.end(); union_field_iter++){
if(fieldid_fieldinfos_map.find(*union_field_iter) == fieldid_fieldinfos_map.end()){
hit_union_key = false;
break;
}
}
if(hit_union_key == true){
log_debug("hit union key combination");
std::vector<std::vector<string> > keys_vvec;
std::vector<FieldInfo> unionFieldInfos;
bool b_has_range = false;
for(union_field_iter = union_field_vec.begin(); union_field_iter != union_field_vec.end(); union_field_iter++){
std::vector<FieldInfo> field_info_vec = fieldid_fieldinfos_map.at(*union_field_iter);
std::vector<std::string> key_vec;
GetKeyFromFieldInfo(field_info_vec, key_vec , b_has_range);
keys_vvec.push_back(key_vec);
fieldid_fieldinfos_map.erase(*union_field_iter); // 命中union_key的需要从fieldid_fieldinfos_map中删除
}
log_debug("has range query flag:%d" , (int)b_has_range);
std::vector<std::string> union_keys = Combination(keys_vvec);
for(int m = 0 ; m < (int)union_keys.size(); m++){
FieldInfo info;
info.field = 0;
info.field_type = FIELD_INDEX;
for (uint32_t ui_query_type = E_INDEX_READ_PRE_TERM ;
ui_query_type < E_INDEX_READ_TOTAL_NUM ;
++ui_query_type){
if (query_bitset_.test(ui_query_type)){
info.query_type = ui_query_type;
break;
}
}
info.segment_tag = (b_has_range ? SEGMENT_RANGE : SEGMENT_DEFAULT);
info.word = union_keys[m];
log_debug("union key[%d]:%s" , m, info.word.c_str());
unionFieldInfos.push_back(info);
}
union_field_infos.push_back(unionFieldInfos);
}
}
if (!union_field_infos.empty()){
log_debug("replace andkey database");
component_->AndKeys().clear();
std::vector<std::vector<FieldInfo> >::iterator field_info_vet_iter = union_field_infos.begin();
for (; field_info_vet_iter != union_field_infos.end();++field_info_vet_iter){
component_->AddToFieldList(ANDKEY, *field_info_vet_iter);
}
std::map<uint32_t, std::vector<FieldInfo> >::iterator field_key_map_iter = fieldid_fieldinfos_map.begin();
for(; field_key_map_iter != fieldid_fieldinfos_map.end(); field_key_map_iter++){
component_->AddToFieldList(ANDKEY, field_key_map_iter->second);
}
}
}
int BoolQueryProcess::ParseContent(){
int ret = 0;
if(parse_value_.isMember(MUST)){
has_and_logic_ = true;
log_debug("must parse:%s" , parse_value_[MUST].toStyledString().c_str() );
ret = ParseRequest(parse_value_[MUST] , ANDKEY);
if (ret != 0) { return ret; }
}
HandleUnifiedIndex();
if (parse_value_.isMember(SHOULD)){
log_debug("should parse:%s" , parse_value_[SHOULD].toStyledString().c_str() );
ret = ParseRequest(parse_value_[SHOULD] , ORKEY);
if (ret != 0) { return ret; }
}
if (parse_value_.isMember(MUST_NOT)){
log_debug("must not parse:%s" , parse_value_[MUST_NOT].toStyledString().c_str() );
ret = ParseRequest(parse_value_[MUST_NOT] , INVERTKEY);
if (ret != 0) { return ret; }
}
return ret;
}
int BoolQueryProcess::ParseContent(int logic_type){
log_info("BoolQueryProcess no need parse content by logictype");
return 0;
}
int BoolQueryProcess::GetValidDoc(int logic_type , const std::vector<FieldInfo>& keys){
log_info("BoolQueryProcess no need get valid doc by logictype");
return 0;
}
int BoolQueryProcess::GetValidDoc(){
if (query_bitset_.test(E_INDEX_READ_PRE_TERM) && query_bitset_.test(E_INDEX_READ_TERM)){
return query_process_map_[E_INDEX_READ_PRE_TERM]->GetValidDoc();
}
for (uint32_t ui_key_type = ORKEY; ui_key_type < KEYTOTALNUM; ++ui_key_type){
std::vector<std::vector<FieldInfo> >::const_iterator filedinfo_vet_iter = \
component_->GetFieldList(ui_key_type).cbegin();
for (;filedinfo_vet_iter != component_->GetFieldList(ui_key_type).cend();
++ filedinfo_vet_iter){
if (filedinfo_vet_iter->empty()){
continue;
}
uint32_t query_type = (*filedinfo_vet_iter)[FIRST_TEST_INDEX].query_type;
if (!query_bitset_.test(query_type)){
log_error("get valid doc query type:%d , logic type:%d" , \
query_type , ui_key_type);
return -1;
}
query_process_map_[query_type]->GetValidDoc(ui_key_type , *filedinfo_vet_iter);
}
}
return 0;
}
int BoolQueryProcess::GetScore(){
for (uint32_t ui_query_type = E_INDEX_READ_PRE_TERM
; ui_query_type < E_INDEX_READ_TOTAL_NUM
; ++ui_query_type){
if (!query_bitset_.test(ui_query_type)){
continue;
}
if (E_INDEX_READ_GEO_DISTANCE == ui_query_type || E_INDEX_READ_GEO_SHAPE == ui_query_type){
if (has_and_logic_ &&
component_->SortField().empty() &&
!query_bitset_.test(E_INDEX_READ_RANGE)){
return query_process_map_[ui_query_type]->GetScore();
}
continue;
}
return query_process_map_[ui_query_type]->GetScore();
}
return -1;
}
const Json::Value& BoolQueryProcess::SetResponse(){
for (uint32_t ui_query_type = E_INDEX_READ_PRE_TERM
; ui_query_type < E_INDEX_READ_TOTAL_NUM
; ++ui_query_type){
if (!query_bitset_.test(ui_query_type)){
continue;
}
if (E_INDEX_READ_GEO_DISTANCE == ui_query_type || E_INDEX_READ_GEO_SHAPE == ui_query_type){
if (has_and_logic_ &&
component_->SortField().empty() &&
!query_bitset_.test(E_INDEX_READ_RANGE)){
response_ = query_process_map_[ui_query_type]->SetResponse();
return response_;
}
continue;
}
response_ = query_process_map_[ui_query_type]->SetResponse();
return response_;
}
return response_;
}
int BoolQueryProcess::ParseRequest(
const Json::Value& request,
int logic_type)
{
int iret = 0;
if(request.isArray()){
log_debug("array parse");
for(int i = 0; i < (int)request.size(); i++){
Json::Value::Members search_member = request[i].getMemberNames();
Json::Value::Members::iterator iter = search_member.begin();
for (; iter != search_member.end(); ++iter){
iret = InitQueryProcess(logic_type , *iter , request[i][*iter]);
if(iret != 0){
log_error("InitQueryProcess error!");
return -RT_PARSE_CONTENT_ERROR;
}
}
}
} else if (request.isObject()) {
log_debug("object parse");
Json::Value::Members search_member = request.getMemberNames();
Json::Value::Members::iterator iter = search_member.begin();
for (; iter != search_member.end(); ++iter){
iret = InitQueryProcess(logic_type, *iter , request[*iter]);
if(iret != 0){
log_error("InitQueryProcess error!");
return -RT_PARSE_CONTENT_ERROR;
}
}
}
return 0;
}
int BoolQueryProcess::InitQueryProcess(
uint32_t type,
const std::string& query_key,
const Json::Value& parse_value)
{
log_debug("InitQueryProcess start");
int query_type = -1;
if(0 == query_key.compare(TERM)){
query_type = E_INDEX_READ_TERM;
if (query_process_map_.find(query_type) == query_process_map_.end()){
query_process_map_.insert(std::make_pair(query_type
, new TermQueryProcess(parse_value)));
log_debug("bool query term process init");
}
} else if(0 == query_key.compare(MATCH)){
query_type = E_INDEX_READ_MATCH;
if (query_process_map_.find(query_type) == query_process_map_.end()){
query_process_map_.insert(std::make_pair(query_type
, new MatchQueryProcess(parse_value)));
log_debug("bool query match process init");
}
} else if(0 == query_key.compare(RANGE)){
if (component_->TerminalTag()){
query_type = E_INDEX_READ_PRE_TERM;
}else{
query_type = E_INDEX_READ_RANGE;
}
if (query_process_map_.find(query_type) == query_process_map_.end()){
query_process_map_.insert(std::make_pair(query_type
, RangeQueryGenerator::Instance()->GetRangeQueryProcess(query_type , parse_value)));
log_debug("bool query range process init");
}
} else if(0 == query_key.compare(GEODISTANCE)){
query_type = E_INDEX_READ_GEO_DISTANCE;
if (query_process_map_.find(query_type) == query_process_map_.end()){
query_process_map_.insert(std::make_pair(query_type
, new GeoDistanceQueryProcess(parse_value)));
log_debug("bool query geo distance process init");
}
} else if(0 == query_key.compare(GEOSHAPE)){
query_type = E_INDEX_READ_GEO_SHAPE;
if (query_process_map_.find(query_type) == query_process_map_.end()){
query_process_map_.insert(std::make_pair(query_type
, new GeoShapeQueryProcess(parse_value)));
log_debug("bool query geo shape process init");
}
} else {
log_error("BoolQueryParser only support term/match/range/geo_distance/geoshape!");
return -RT_PARSE_CONTENT_ERROR;
}
if (!query_bitset_.test(query_type)){
query_bitset_.set(query_type);
query_process_map_[query_type]->SetRequest(request_);
query_process_map_[query_type]->SetComponent(component_);
query_process_map_[query_type]->SetDocManager(doc_manager_);
log_debug("query bitset has type:%d" , query_type);
}
log_debug("current query type:%d , parse value:%s" , query_type , parse_value.toStyledString().c_str());
query_process_map_[query_type]->SetParseJsonValue(parse_value);
query_process_map_[query_type]->ParseContent(type);
return 0;
}
void BoolQueryProcess::GetKeyFromFieldInfo(const std::vector<FieldInfo>& field_info_vec, std::vector<std::string>& key_vec, bool& b_has_range){
std::vector<FieldInfo>::const_iterator iter = field_info_vec.cbegin();
for(; iter != field_info_vec.cend(); iter++){
KeyFormat::UnionKey o_keyinfo_vet;
std::string s_format_key = "";
if (E_INDEX_READ_RANGE == iter->query_type ||
E_INDEX_READ_PRE_TERM == iter->query_type){
b_has_range = true;
o_keyinfo_vet.push_back(std::make_pair(iter->field_type , std::to_string(iter->start)));
s_format_key = KeyFormat::Encode(o_keyinfo_vet);
key_vec.push_back(s_format_key);
o_keyinfo_vet.clear();
o_keyinfo_vet.push_back(std::make_pair(iter->field_type , std::to_string(iter->end)));
s_format_key = KeyFormat::Encode(o_keyinfo_vet);
key_vec.push_back(s_format_key);
}else{
o_keyinfo_vet.push_back(std::make_pair(iter->field_type , iter->word));
s_format_key = KeyFormat::Encode(o_keyinfo_vet);
key_vec.push_back(s_format_key);
log_debug("field type:%d , word:%s" , iter->field_type , iter->word.c_str());
}
}
}
/*
** vector每一维vector中取一个数的各种组合
** [[a],[b1,b2],[c1,c2,c3]]
** [a_b1_c1,a_b1_c2,a_b1_c3,a_b2_c1,a_b2_c2,a_b2_c3]
*/
std::vector<std::string> BoolQueryProcess::Combination(
std::vector<std::vector<std::string> >& dimensionalArr)
{
int FLength = dimensionalArr.size();
if(FLength >= 2){
int SLength1 = dimensionalArr[0].size();
int SLength2 = dimensionalArr[1].size();
int DLength = SLength1 * SLength2;
std::vector<std::string> temporary(DLength);
int index = 0;
for(int i = 0; i < SLength1; i++){
for (int j = 0; j < SLength2; j++) {
temporary[index].append(dimensionalArr[0][i]);
temporary[index].append(dimensionalArr[1][j]);
index++;
}
}
std::vector<std::vector<std::string> > new_arr;
new_arr.push_back(temporary);
for(int i = 2; i < (int)dimensionalArr.size(); i++){
new_arr.push_back(dimensionalArr[i]);
}
return Combination(new_arr);
} else {
return dimensionalArr[0];
}
}

View File

@ -0,0 +1,39 @@
#ifndef BOOL_QUERY_PROCESS_H_
#define BOOL_QUERY_PROCESS_H_
#include "query_process.h"
#include <bitset>
#include <algorithm>
class QueryProcess;
class PreTerminal;
class GeoDistanceQueryProcess;
class BoolQueryProcess : public QueryProcess{
public:
BoolQueryProcess(const Json::Value& value);
virtual ~BoolQueryProcess();
private:
virtual int ParseContent(int logic_type);
virtual int GetValidDoc(int logic_type , const std::vector<FieldInfo>& keys);
virtual int ParseContent();
virtual int GetValidDoc();
virtual int GetScore();
virtual const Json::Value& SetResponse();
private:
int ParseRequest(const Json::Value& request, int logic_type);
int InitQueryProcess(uint32_t type , const std::string& query_key, const Json::Value& parse_value);
void HandleUnifiedIndex();
void GetKeyFromFieldInfo(const std::vector<FieldInfo>& field_info_vec, std::vector<std::string>& key_vec , bool& b_has_range);
std::vector<std::string> Combination(std::vector<std::vector<std::string> >& dimensionalArr);
private:
std::map<int , QueryProcess*> query_process_map_;
std::bitset<E_INDEX_READ_TOTAL_NUM> query_bitset_;
bool has_and_logic_;
};
#endif

View File

@ -0,0 +1,124 @@
#include "geo_distance_query_process.h"
#include "../sort_operator/geo_query_sort_operator.h"
#include "../valid_doc_filter.h"
GeoDistanceQueryProcess::GeoDistanceQueryProcess(const Json::Value& value)
: QueryProcess(value)
, logictype_geopoint_map_()
{
response_["type"] = 1;
}
GeoDistanceQueryProcess::~GeoDistanceQueryProcess()
{ }
int GeoDistanceQueryProcess::ParseContent(){
return ParseContent(ANDKEY);
}
int GeoDistanceQueryProcess::ParseContent(int logic_type)
{
std::string s_geo_distance_fieldname("");
GeoPointContext o_geo_point;
Json::Value::Members member = parse_value_.getMemberNames();
Json::Value::Members::iterator iter = member.begin();
for(; iter != member.end(); ++iter){
Json::Value geo_value = parse_value_[*iter];
if (DISTANCE == (*iter)){
if (geo_value.isString()){
o_geo_point.SetDistance(atof(geo_value.asString().c_str()));
} else {
log_error("GeoDistanceParser distance should be string, the unit is km.");
return -RT_PARSE_CONTENT_ERROR;
}
} else {
s_geo_distance_fieldname = (*iter);
o_geo_point(geo_value);
}
}
logictype_geopoint_map_.insert(std::make_pair(logic_type , o_geo_point));
GeoPoint geo;
geo.lon = atof(o_geo_point.sLongtitude.c_str());
geo.lat = atof(o_geo_point.sLatitude.c_str());
double d_distance = o_geo_point.d_distance;
log_debug("geo lng:%f ,lat:%f , dis:%f" , geo.lon , geo.lat , d_distance);
std::vector<std::string> gisCode = GetArroundGeoHash(geo, d_distance, GEO_PRECISION);
if(!gisCode.empty()){
uint32_t segment_tag = SEGMENT_NONE;
FieldInfo fieldInfo;
fieldInfo.query_type = E_INDEX_READ_GEO_DISTANCE;
uint32_t uiRet = DBManager::Instance()->GetWordField(segment_tag, component_->Appid()
, s_geo_distance_fieldname , fieldInfo);
if (0 == uiRet){
log_error("field_name:[%s] error ,not in the app_field_define", s_geo_distance_fieldname.c_str());
return -RT_PARSE_CONTENT_ERROR;
}
std::vector<FieldInfo> fieldInfos;
if (uiRet != 0 && SEGMENT_NONE == segment_tag) {
component_->SetHasGisFlag(true);
for (size_t index = 0; index < gisCode.size(); index++) {
fieldInfo.word = gisCode[index];
log_debug("geo point:%s", fieldInfo.word.c_str());
fieldInfos.push_back(fieldInfo);
}
}
component_->AddToFieldList(logic_type, fieldInfos);
}
return 0;
}
int GeoDistanceQueryProcess::GetValidDoc()
{
if (component_->GetFieldList(ANDKEY).empty()){
return -RT_GET_FIELD_ERROR;
}
return GetValidDoc(ANDKEY , component_->GetFieldList(ANDKEY)[FIRST_TEST_INDEX]);
}
int GeoDistanceQueryProcess::GetValidDoc(
int logic_type,
const std::vector<FieldInfo>& keys)
{
log_debug("geo related query GetValidDoc beginning...");
std::vector<IndexInfo> index_info_vet;
int iret = ValidDocFilter::Instance()->TextInvertIndexSearch(keys, index_info_vet);
if (iret != 0) { return iret; }
bool bRet = doc_manager_->GetDocContent(logictype_geopoint_map_[logic_type] , index_info_vet);
if (false == bRet){
log_error("GetDocContent error.");
return -RT_DTC_ERR;
}
ResultContext::Instance()->SetIndexInfos(logic_type , index_info_vet);
return 0;
}
int GeoDistanceQueryProcess::GetScore()
{
log_debug("geo related query GetScore beginning...");
sort_operator_base_ = new GeoQuerySortOperator(component_ , doc_manager_);
p_scoredocid_set_ = sort_operator_base_->GetSortOperator((uint32_t)component_->SortType());
return 0;
}
void GeoDistanceQueryProcess::SortScore(int& i_sequence , int& i_rank)
{
log_debug("geo related query SortScore beginning...");
if ((SORT_FIELD_DESC == component_->SortType() || SORT_FIELD_ASC == component_->SortType())
&& p_scoredocid_set_->empty()){
SortByCOrderOp(i_rank);
}else if (SORT_FIELD_DESC == component_->SortType()
|| DONT_SORT == component_->SortType()){ // 降序和不排序处理
DescSort(i_sequence , i_rank);
}else { // 不指定情况下,默认升序,距离近在前
AscSort(i_sequence , i_rank);
}
}

View File

@ -0,0 +1,115 @@
/*
* =====================================================================================
*
* Filename: geo_distance_query_process.h
*
* Description: geo_distance_query_process class definition.
*
* Version: 1.0
* Created: 17/05/2021
* Revision: none
* Compiler: gcc
*
* Author: chenyujie, chenyujie28@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef GEO_DISTANCE_QUERY_PROCESS_H_
#define GEO_DISTANCE_QUERY_PROCESS_H_
#include "query_process.h"
#include "geohash.h"
const double DEFAULT_DISTANCE = 2.0;
const int GEO_PRECISION = 6;
struct GeoPointContext
{
std::string sLatitude;
std::string sLongtitude;
double d_distance;
GeoPointContext()
: sLatitude("")
, sLongtitude("")
, d_distance(DEFAULT_DISTANCE)
{}
GeoPointContext(const Json::Value& oJsonValue){
ParseJson(oJsonValue);
}
GeoPointContext(const std::string& sLat, const std::string& sLng
, double dDis = DEFAULT_DISTANCE)
: sLatitude(sLat)
, sLongtitude(sLng)
, d_distance(dDis)
{ }
void operator()(const Json::Value& oJsonValue)
{
ParseJson(oJsonValue);
}
void SetDistance(const double& dDis){
d_distance = dDis;
}
bool IsGeoPointFormat() const{
return ((!sLatitude.empty()) && (!sLongtitude.empty()));
}
void Clear(){
sLatitude.clear();
sLongtitude.clear();
}
private:
void ParseJson(const Json::Value& oJsonValue){
if (oJsonValue.isString()){
std::string sValue = oJsonValue.asString();
std::size_t iPos = sValue.find(",");
sLatitude = sValue.substr(0,iPos);
sLongtitude = sValue.substr(iPos + 1);
}
if (oJsonValue.isArray()){
if (oJsonValue[0].isString()){
sLatitude = oJsonValue[0].asString();
}
if (oJsonValue[1].isString()){
sLongtitude = oJsonValue[1].asString();
}
}
if (oJsonValue.isObject()){
if (oJsonValue["latitude"].isString()){
sLatitude = oJsonValue["latitude"].asString();
}
if (oJsonValue["longitude"].isString()){
sLongtitude = oJsonValue["longitude"].asString();
}
}
}
};
class GeoDistanceQueryProcess: public QueryProcess{
public:
GeoDistanceQueryProcess(const Json::Value& value);
virtual ~GeoDistanceQueryProcess();
public:
virtual int ParseContent(int logic_type);
virtual int GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys);
virtual int ParseContent();
virtual int GetValidDoc();
virtual int GetScore();
virtual void SortScore(int& i_sequence , int& i_rank);
private:
std::map<int , GeoPointContext> logictype_geopoint_map_;
};
#endif

View File

@ -0,0 +1,55 @@
#include "geo_shape_query_process.h"
GeoShapeQueryProcess::GeoShapeQueryProcess(const Json::Value& value)
: GeoDistanceQueryProcess(value)
{ }
GeoShapeQueryProcess::~GeoShapeQueryProcess()
{ }
int GeoShapeQueryProcess::ParseContent(int logic_type){
Json::Value::Members member = parse_value_.getMemberNames();
Json::Value::Members::iterator iter = member.begin();
if(iter == member.end()){ // 一个geo_shape下只对应一个字段
return -RT_PARSE_CONTENT_ERROR;
}
std::string fieldname = *iter;
Json::Value field_value = parse_value_[fieldname];
GeoShapeContext o_geo_shape;
if(field_value.isMember(POINTS)){
o_geo_shape(field_value[POINTS]);
} else {
return -RT_PARSE_CONTENT_ERROR;
}
if (o_geo_shape.IsGeoShapeFormat()){
std::vector<std::string> gisCode = GetArroundGeoHash(o_geo_shape.GetMinEnclosRect(), GEO_PRECISION);
if(!gisCode.empty()){
vector<FieldInfo> fieldInfos;
uint32_t segment_tag = SEGMENT_NONE;
FieldInfo fieldInfo;
fieldInfo.query_type = E_INDEX_READ_GEO_SHAPE;
uint32_t uiRet = DBManager::Instance()->GetWordField(segment_tag, component_->Appid()
, fieldname, fieldInfo);
if (0 == uiRet){
log_error("field_name:[%s] error ,not in the app_field_define", fieldname.c_str());
return -RT_PARSE_CONTENT_ERROR;
}
if (uiRet != 0 && SEGMENT_NONE == segment_tag) {
component_->SetHasGisFlag(true);
for (size_t index = 0; index < gisCode.size(); index++) {
fieldInfo.word = gisCode[index];
log_debug("geo shape point:%s", fieldInfo.word.c_str());
fieldInfos.push_back(fieldInfo);
}
}
if (!fieldInfos.empty()) {
component_->AddToFieldList(logic_type, fieldInfos);
}
}
}
return 0;
}

View File

@ -0,0 +1,77 @@
#ifndef GEO_SHAPE_QUERY_PROCESS_H_
#define GEO_SHAPE_QUERY_PROCESS_H_
#include "query_process.h"
#include "geo_distance_query_process.h"
const char* const POINTS ="points";
struct GeoShapeContext
{
std::vector<GeoPointContext> oGeoShapeVet;
void operator()(const Json::Value& oJsonValue)
{
if (oJsonValue.isString()){
std::string sValue = oJsonValue.asString();
sValue = delPrefix(sValue);
std::vector<std::string> oValueVet = splitEx(sValue, ",");
for(uint32_t str_vec_idx = 0; str_vec_idx < oValueVet.size(); str_vec_idx++){
std::string wkt_str = trim(oValueVet[str_vec_idx]);
std::vector<std::string> wkt_vec = splitEx(wkt_str, " ");
if(wkt_vec.size() == 2){
oGeoShapeVet.push_back(GeoPointContext(wkt_vec[1], wkt_vec[0]));
}
}
}
if (oJsonValue.isArray()){
for(int i = 0; i < (int)oJsonValue.size(); i++){
GeoPointContext o_geo_point(oJsonValue[i]);
oGeoShapeVet.push_back(o_geo_point);
}
}
}
EnclosingRectangle GetMinEnclosRect(){
std::vector<double> oLatVet;
std::vector<double> oLngVet;
for (size_t i = 0; i < oGeoShapeVet.size(); ++i){
oLatVet.push_back(atof(oGeoShapeVet[i].sLatitude.c_str()));
oLngVet.push_back(atof(oGeoShapeVet[i].sLongtitude.c_str()));
}
if (oLatVet.empty() || oLngVet.empty()){
return EnclosingRectangle();
}
std::sort(oLatVet.begin(), oLatVet.end());
std::sort(oLngVet.begin(), oLngVet.end());
return EnclosingRectangle(*(oLngVet.end() - 1), *(oLngVet.begin())
, *(oLatVet.end() - 1), *(oLatVet.begin()));
}
bool IsGeoShapeFormat(){
bool bRet = !oGeoShapeVet.empty();
for (size_t i = 0; i < oGeoShapeVet.size(); i++)
{
bRet &= oGeoShapeVet[i].IsGeoPointFormat();
}
return bRet;
}
void Clear(){
oGeoShapeVet.clear();
}
};
class GeoShapeQueryProcess : public GeoDistanceQueryProcess{
public:
GeoShapeQueryProcess(const Json::Value& value);
virtual~ GeoShapeQueryProcess();
public:
virtual int ParseContent(int logic_type);
};
#endif

View File

@ -1,329 +1,80 @@
#include "match_query_process.h"
#include "math.h"
#include "../order_op.h"
#include "../valid_doc_filter.h"
#define DOC_CNT 10000
MatchQueryProcess::MatchQueryProcess(uint32_t appid, Json::Value& value, Component* component)
:QueryProcess(appid, value, component){
appid_ = component_->Appid();
sort_type_ = component_->SortType();
sort_field_ = component_->SortField();
has_gis_ = false;
}
MatchQueryProcess::MatchQueryProcess(const Json::Value& value)
: QueryProcess(value)
{ }
MatchQueryProcess::~MatchQueryProcess(){
}
MatchQueryProcess::~MatchQueryProcess()
{ }
int MatchQueryProcess::ParseContent(){
return ParseContent(ORKEY);
}
int MatchQueryProcess::ParseContent(uint32_t type){
int MatchQueryProcess::ParseContent(int logic_type){
vector<FieldInfo> fieldInfos;
Json::Value::Members member = value_.getMemberNames();
Json::Value::Members member = parse_value_.getMemberNames();
Json::Value::Members::iterator iter = member.begin();
string fieldname;
std::string fieldname;
Json::Value field_value;
if(iter != member.end()){ // 一个match下只对应一个字段
fieldname = *iter;
field_value = value_[fieldname];
field_value = parse_value_[fieldname];
} else {
log_error("MatchQueryProcess error, value is null");
return -RT_PARSE_CONTENT_ERROR;
}
uint32_t segment_tag = 0;
FieldInfo fieldInfo;
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid_, fieldname, fieldInfo);
if (field != 0 && segment_tag == 1)
{
string split_data = SplitManager::Instance()->split(field_value.asString(), appid_);
fieldInfo.query_type = E_INDEX_READ_MATCH;
uint32_t segment_tag = SEGMENT_NONE;
uint32_t uiRet = DBManager::Instance()->GetWordField(segment_tag, component_->Appid()
, fieldname, fieldInfo);
if (uiRet != 0 && SEGMENT_DEFAULT == segment_tag){
std::string split_data = SplitManager::Instance()->split(field_value.asString(), component_->Appid());
log_debug("split_data: %s", split_data.c_str());
vector<string> split_datas = splitEx(split_data, "|");
for(size_t index = 0; index < split_datas.size(); index++)
{
FieldInfo info;
info.field = fieldInfo.field;
info.field_type = fieldInfo.field_type;
info.word = split_datas[index];
info.segment_tag = fieldInfo.segment_tag;
fieldInfos.push_back(info);
std::vector<std::string> split_datas = splitEx(split_data, "|");
for(size_t index = 0; index < split_datas.size(); index++){
fieldInfo.word = split_datas[index];
fieldInfos.push_back(fieldInfo);
}
}
else if (field != 0)
{
else if (uiRet != 0){
fieldInfo.word = field_value.asString();
fieldInfos.push_back(fieldInfo);
}else{
log_error("field_name:[%s] error ,not in the app_field_define or segmentTag error", fieldname.c_str());
return -RT_PARSE_CONTENT_ERROR;
}
component_->AddToFieldList(type, fieldInfos);
component_->AddToFieldList(logic_type, fieldInfos);
return 0;
}
int MatchQueryProcess::GetValidDoc(){
doc_manager_ = new DocManager(component_);
logical_operate_ = new LogicalOperate(appid_, sort_type_, has_gis_, component_->CacheSwitch());
for (size_t index = 0; index < component_->Keys().size(); index++)
{
vector<IndexInfo> doc_id_vec;
vector<FieldInfo> fieldInfos = component_->Keys()[index];
vector<FieldInfo>::iterator it;
for (it = fieldInfos.begin(); it != fieldInfos.end(); it++) {
vector<IndexInfo> doc_info;
if ((*it).segment_tag == 3) {
int ret = GetDocByShiftWord(*it, doc_info, appid_, highlightWord_);
if (ret != 0) {
doc_id_vec.clear();
return -RT_GET_DOC_ERR;
}
sort(doc_info.begin(), doc_info.end());
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
KeyInfo info;
info.word_freq = 1;
info.field = (*it).field;
info.word = (*it).word;
doc_info_map_[doc_info[doc_info_idx].doc_id].push_back(info);
}
} else if ((*it).segment_tag == 4) {
int ret = GetDocByShiftEnWord(*it, doc_info, appid_, highlightWord_);
if (ret != 0) {
doc_id_vec.clear();
return -RT_GET_DOC_ERR;
}
sort(doc_info.begin(), doc_info.end());
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
KeyInfo info;
info.word_freq = 1;
info.field = (*it).field;
info.word = (*it).word;
doc_info_map_[doc_info[doc_info_idx].doc_id].push_back(info);
}
} else {
int ret = logical_operate_->GetDocIdSetByWord(*it, doc_info);
if (ret != 0){
return -RT_GET_DOC_ERR;
}
if (doc_info.size() == 0)
continue;
if (!isAllNumber((*it).word))
highlightWord_.insert((*it).word);
if(sort_type_ == SORT_RELEVANCE){
logical_operate_->CalculateByWord(*it, doc_info, doc_info_map_, key_in_doc_);
}
}
doc_id_vec = vec_union(doc_id_vec, doc_info);
}
if(index == 0){ // 第一个直接赋值给vecs后续的依次与前面的进行逻辑运算
doc_vec_.assign(doc_id_vec.begin(), doc_id_vec.end());
} else {
doc_vec_ = vec_union(doc_vec_, doc_id_vec);
}
}
bool bRet = doc_manager_->GetDocContent(has_gis_, doc_vec_, valid_docs_, distances_);
if (false == bRet) {
log_error("GetDocContent error.");
return -RT_DTC_ERR;
if (component_->GetFieldList(ORKEY).empty()){
return -RT_GET_FIELD_ERROR;
}
return GetValidDoc(ORKEY , component_->GetFieldList(ORKEY)[FIRST_TEST_INDEX]);
}
int MatchQueryProcess::GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys){
std::vector<IndexInfo> index_info_vet;
int iret = -1;
uint32_t segment_tag = keys[FIRST_SPLIT_WORD_INDEX].segment_tag;
if(SEGMENT_CHINESE == segment_tag
|| SEGMENT_ENGLISH == segment_tag){
iret = ValidDocFilter::Instance()->HanPinTextInvertIndexSearch(keys , index_info_vet);
}else{
iret = ValidDocFilter::Instance()->TextInvertIndexSearch(keys , index_info_vet);
}
if (iret != 0) { return iret; }
ResultContext::Instance()->SetIndexInfos(logic_type , index_info_vet);
return 0;
}
int MatchQueryProcess::GetScoreAndSort(){
// BM25 algorithm
uint32_t doc_cnt = DOC_CNT;
double k1 = 1.2;
double k2 = 200;
double K = 1.65;
string doc_id;
string keyword;
uint32_t word_freq = 0;
uint32_t field = 0;
if(sort_type_ == SORT_RELEVANCE || sort_type_ == SORT_TIMESTAMP){
map<string, vec>::iterator ves_iter = doc_info_map_.begin();
for (; ves_iter != doc_info_map_.end(); ves_iter++) {
double score = 0;
uint32_t key_docs = 0;
doc_id = ves_iter->first;
vector<KeyInfo> &key_info = ves_iter->second;
if(valid_docs_.find(doc_id) == valid_docs_.end()){
continue;
}
set<string> word_set;
map<string, vector<int> > pos_map;
map<string, vector<int> > title_pos_map;
for (uint32_t i = 0; i < key_info.size(); i++) {
keyword = key_info[i].word;
if (word_set.find(keyword) == word_set.end()) {
word_set.insert(keyword);
}
word_freq = key_info[i].word_freq;
field = key_info[i].field;
if (field == LOCATE_ANY) {
pos_map[keyword] = key_info[i].pos_vec;
}
if (field == LOCATE_TITLE) {
title_pos_map[keyword] = key_info[i].pos_vec;
}
key_docs = key_in_doc_[keyword];
score += log((doc_cnt - key_docs + 0.5) / (key_docs + 0.5)) * ((k1 + 1)*word_freq) / (K + word_freq) * (k2 + 1) * 1 / (k2 + 1);
}
/*if (!complete_keys.empty()) { // 完全匹配
if (word_set.size() != word_vec.size()) { // 文章中出现的词语数量与输入的不一致,则不满足完全匹配
continue;
}
else { // 在标题和正文中都不连续出现,则不满足
if (CheckWordContinus(word_vec, pos_map) == false && CheckWordContinus(word_vec, title_pos_map) == false) {
continue;
}
}
}*/
skipList_.InsertNode(score, doc_id.c_str());
}
} else {
set<string>::iterator set_iter = valid_docs_.begin();
for(; set_iter != valid_docs_.end(); set_iter++){
doc_id = *set_iter;
if (sort_type_ == SORT_FIELD_ASC || sort_type_ == SORT_FIELD_DESC){
doc_manager_->GetScoreMap(doc_id, sort_type_, sort_field_, sort_field_type_, appid_);
} else {
skipList_.InsertNode(1, doc_id.c_str());
}
}
}
return 0;
}
void MatchQueryProcess::TaskEnd(){
Json::FastWriter writer;
Json::Value response;
response["code"] = 0;
int sequence = -1;
int rank = 0;
int page_size = component_->PageSize();
int limit_start = page_size * (component_->PageIndex()-1);
int limit_end = page_size * (component_->PageIndex()-1) + page_size - 1;
log_debug("search result begin.");
if((sort_type_ == SORT_FIELD_DESC || sort_type_ == SORT_FIELD_ASC) && skipList_.GetSize() == 0){
OrderOpCond order_op_cond;
order_op_cond.last_id = component_->LastId();
order_op_cond.limit_start = limit_start;
order_op_cond.count = page_size;
order_op_cond.has_extra_filter = false;
if(component_->ExtraFilterKeys().size() != 0 || component_->ExtraFilterAndKeys().size() != 0 || component_->ExtraFilterInvertKeys().size() != 0){
order_op_cond.has_extra_filter = true;
}
if(sort_field_type_ == FIELDTYPE_INT){
rank += doc_manager_->ScoreIntMap().size();
COrderOp<int> orderOp(FIELDTYPE_INT, component_->SearchAfter(), sort_type_);
orderOp.Process(doc_manager_->ScoreIntMap(), atoi(component_->LastScore().c_str()), order_op_cond, response, doc_manager_);
} else if(sort_field_type_ == FIELDTYPE_DOUBLE) {
rank += doc_manager_->ScoreDoubleMap().size();
COrderOp<double> orderOp(FIELDTYPE_DOUBLE, component_->SearchAfter(), sort_type_);
orderOp.Process(doc_manager_->ScoreDoubleMap(), atof(component_->LastScore().c_str()), order_op_cond, response, doc_manager_);
} else {
rank += doc_manager_->ScoreStrMap().size();
COrderOp<string> orderOp(FIELDTYPE_STRING, component_->SearchAfter(), sort_type_);
orderOp.Process(doc_manager_->ScoreStrMap(), component_->LastScore(), order_op_cond, response, doc_manager_);
}
} else if (has_gis_ || sort_type_ == SORT_FIELD_ASC) {
log_debug("m_has_gis or SORT_FIELD_ASC, size:%d ", skipList_.GetSize());
SkipListNode *tmp = skipList_.GetHeader()->level[0].forward;
while (tmp->level[0].forward != NULL) {
// 通过extra_filter_keys进行额外过滤针对区分度不高的字段
if(doc_manager_->CheckDocByExtraFilterKey(tmp->value) == false){
log_debug("CheckDocByExtraFilterKey failed, %s", tmp->value);
tmp = tmp->level[0].forward;
continue;
}
sequence++;
rank++;
if(component_->ReturnAll() == 0){
if (sequence < limit_start || sequence > limit_end) {
tmp = tmp->level[0].forward;
continue;
}
}
Json::Value doc_info;
doc_info["doc_id"] = Json::Value(tmp->value);
doc_info["score"] = Json::Value(tmp->key);
response["result"].append(doc_info);
tmp = tmp->level[0].forward;
}
} else {
SkipListNode *tmp = skipList_.GetFooter()->backward;
while(tmp->backward != NULL) {
if(doc_manager_->CheckDocByExtraFilterKey(tmp->value) == false){
tmp = tmp->backward;
continue;
}
sequence++;
rank++;
if (component_->ReturnAll() == 0){
if (sequence < limit_start || sequence > limit_end) {
tmp = tmp->backward;
continue;
}
}
Json::Value doc_info;
doc_info["doc_id"] = Json::Value(tmp->value);
doc_info["score"] = Json::Value(tmp->key);
response["result"].append(doc_info);
tmp = tmp->backward;
}
}
if(component_->Fields().size() > 0){
doc_manager_->AppendFieldsToRes(response, component_->Fields());
}
if (rank > 0)
AppendHighLightWord(response);
if (has_gis_) {
response["type"] = 1;
}
else {
response["type"] = 0;
}
response["count"] = rank;
/*if(m_index_set_cnt != 0){
response["count"] = m_index_set_cnt;
}*/
log_debug("search result end: %lld.", (long long int)GetSysTimeMicros());
std::string outputConfig = writer.write(response);
request_->setResult(outputConfig);
/*if (component_->ReturnAll() == 0 && component_->CacheSwitch() == 1 && component_->PageIndex() == 1 && has_gis_ == 0
&& rank > 0 && outputConfig.size() < MAX_VALUE_LEN) {
string m_Data_Cache = m_Primary_Data + "|" + component_->DataAnd() + "|" + component_->DataInvert() + "|" + component_->DataComplete() + "|" +
ToString(sort_type_) + "|" + ToString(appid_);
unsigned data_size = m_Data_Cache.size();
int ret = cachelist->add_list(m_Data_Cache.c_str(), outputConfig.c_str(), data_size, outputConfig.size());
if (ret != 0) {
log_error("add to cache_list error, ret: %d.", ret);
}
else {
log_debug("add to cache_list: %s.", m_Data_Cache.c_str());
}
}*/
}
void MatchQueryProcess::AppendHighLightWord(Json::Value& response)
{
int count = 0;
set<string>::iterator iter = highlightWord_.begin();
for (; iter != highlightWord_.end(); iter++) {
if (count >= 10)
break;
count = count + 1;
response["hlWord"].append((*iter).c_str());
}
return ;
}

View File

@ -23,28 +23,16 @@
class MatchQueryProcess: public QueryProcess{
public:
MatchQueryProcess(uint32_t appid, Json::Value& value, Component* component);
~MatchQueryProcess();
int ParseContent();
int GetValidDoc();
int GetScoreAndSort();
void TaskEnd();
MatchQueryProcess(const Json::Value& value);
virtual~ MatchQueryProcess();
int ParseContent(uint32_t type);
void AppendHighLightWord(Json::Value& response);
public:
virtual int ParseContent(int logic_type);
virtual int GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys);
private:
set<string> highlightWord_;
map<string, vec> doc_info_map_;
map<string, uint32_t> key_in_doc_;
vector<IndexInfo> doc_vec_;
hash_double_map distances_;
set<string> valid_docs_;
uint32_t appid_;
uint32_t sort_type_;
string sort_field_;
bool has_gis_;
FIELDTYPE sort_field_type_;
virtual int ParseContent();
virtual int GetValidDoc();
};
#endif

View File

@ -1,50 +1,191 @@
#include "query_process.h"
#include <assert.h>
#include "../valid_doc_filter.h"
#include "../order_op.h"
QueryProcess::QueryProcess(uint32_t appid, Json::Value& value, Component* component)
:component_(component),
appid_(appid),
value_(value)
{
QueryProcess::QueryProcess(const Json::Value& value)
: component_(NULL)
, doc_manager_(NULL)
, request_(NULL)
, sort_operator_base_(NULL)
, p_scoredocid_set_(NULL)
, parse_value_(value)
, response_()
{ }
QueryProcess::~QueryProcess()
{
DELETE(sort_operator_base_);
ResultContext::Instance()->Clear();
}
QueryProcess::~QueryProcess(){
int QueryProcess::StartQuery(){
assert(component_ != NULL);
assert(doc_manager_ != NULL);
assert(request_ != NULL);
int iret = ParseContent();
if (0 == iret){
iret = GetValidDoc();
if (0 == iret){
iret = CheckValidDoc();
if (0 == iret){
iret = GetScore();
if (0 == iret){
SetResponse();
Json::FastWriter writer;
std::string outputConfig = writer.write(response_);
request_->setResult(outputConfig);
}
}
}
}
return iret;
}
int QueryProcess::DoJob(){
TaskBegin();
ParseContent();
GetValidDoc();
GetScoreAndSort();
TaskEnd();
int QueryProcess::CheckValidDoc(){
log_debug("query base CheckValidDoc beginning...");
bool bRet = doc_manager_->GetDocContent();
if (false == bRet){
log_error("GetDocContent error.");
return -RT_DTC_ERR;
}
return 0;
}
void QueryProcess::SetSkipList(SkipList& skipList){
skipList_ = skipList;
int QueryProcess::GetScore()
{
log_debug("query base GetScore beginning...");
sort_operator_base_ = new SortOperatorBase(component_ , doc_manager_);
p_scoredocid_set_ = sort_operator_base_->GetSortOperator((uint32_t)component_->SortType());
return 0;
}
void QueryProcess::SetRequest(CTaskRequest* request){
request_ = request;
void QueryProcess::SortScore(int& i_sequence , int& i_rank)
{
log_debug("query base sortscore beginning...");
if ((SORT_FIELD_DESC == component_->SortType() || SORT_FIELD_ASC == component_->SortType())
&& p_scoredocid_set_->empty()){
SortByCOrderOp(i_rank);
}else if(SORT_FIELD_ASC == component_->SortType()){
AscSort(i_sequence, i_rank);
}else{ // 不指定情况下,默认降序,分高的在前,时间新的在前,docid大的在前地理位置查询除外
DescSort(i_sequence, i_rank);
}
}
void QueryProcess::TaskBegin(){
const Json::Value& QueryProcess::SetResponse()
{
log_debug("search result begin.");
response_["code"] = 0;
int sequence = -1;
int rank = 0;
response_["type"] = 0;
SortScore(sequence , rank);
if(!component_->RequiredFields().empty()){
doc_manager_->AppendFieldsToRes(response_, component_->RequiredFields());
}
if (rank > 0){
AppendHighLightWord();
}
response_["count"] = rank;
log_debug("search result end: %lld.", (long long int)GetSysTimeMicros());
return response_;
}
int QueryProcess::ParseContent(){
return 0;
void QueryProcess::SortByCOrderOp(int& i_rank)
{
log_debug("query base SortByCOrderOp beginning...");
OrderOpCond order_op_cond;
order_op_cond.last_id = component_->LastId();
order_op_cond.limit_start = component_->PageSize() * (component_->PageIndex()-1);
order_op_cond.count = component_->PageSize();
order_op_cond.has_extra_filter = false;
if(component_->ExtraFilterOrKeys().size() != 0 || component_->ExtraFilterAndKeys().size() != 0
|| component_->ExtraFilterInvertKeys().size() != 0){
order_op_cond.has_extra_filter = true;
}
if(FIELDTYPE_INT == sort_operator_base_->GetSortFieldType()){
i_rank += doc_manager_->ScoreIntMap().size();
COrderOp<int> orderOp(FIELDTYPE_INT, component_->SearchAfter(), component_->SortType());
orderOp.Process(doc_manager_->ScoreIntMap(), atoi(component_->LastScore().c_str()), order_op_cond, response_, doc_manager_);
} else if(FIELDTYPE_DOUBLE == sort_operator_base_->GetSortFieldType()) {
i_rank += doc_manager_->ScoreDoubleMap().size();
COrderOp<double> orderOp(FIELDTYPE_DOUBLE, component_->SearchAfter(), component_->SortType());
orderOp.Process(doc_manager_->ScoreDoubleMap(), atof(component_->LastScore().c_str()), order_op_cond, response_, doc_manager_);
} else {
i_rank += doc_manager_->ScoreStrMap().size();
COrderOp<std::string> orderOp(FIELDTYPE_STRING, component_->SearchAfter(), component_->SortType());
orderOp.Process(doc_manager_->ScoreStrMap(), component_->LastScore(), order_op_cond, response_, doc_manager_);
}
}
int QueryProcess::GetValidDoc(){
return 0;
void QueryProcess::AscSort(int& i_sequence , int& i_rank)
{
log_debug("ascsort, result size:%d ", (uint32_t)p_scoredocid_set_->size());
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
std::set<ScoreDocIdNode>::iterator iter = p_scoredocid_set_->begin();
for( ;iter != p_scoredocid_set_->end(); ++iter){
// 通过extra_filter_keys进行额外过滤针对区分度不高的字段
if(doc_manager_->CheckDocByExtraFilterKey(iter->s_docid) == false){
log_debug("CheckDocByExtraFilterKey failed, %s", iter->s_docid.c_str());
continue;
}
i_sequence ++;
i_rank ++;
if(component_->ReturnAll() == 0){
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
continue;
}
}
Json::Value doc_info;
doc_info["doc_id"] = Json::Value(iter->s_docid);
doc_info["score"] = Json::Value(iter->d_score);
response_["result"].append(doc_info);
}
}
int QueryProcess::GetScoreAndSort(){
return 0;
void QueryProcess::DescSort(int& i_sequence , int& i_rank)
{
log_debug("descsort, result size:%d ", (uint32_t)p_scoredocid_set_->size());
int i_limit_start = component_->PageSize() * (component_->PageIndex() - 1);
int i_limit_end = component_->PageSize() * component_->PageIndex() - 1;
log_debug("limit_start:%d , limit_end:%d", i_limit_start, i_limit_end);
std::set<ScoreDocIdNode>::reverse_iterator riter = p_scoredocid_set_->rbegin();
for( ;riter != p_scoredocid_set_->rend(); ++riter){
if(doc_manager_->CheckDocByExtraFilterKey(riter->s_docid) == false){
continue;
}
i_sequence++;
i_rank++;
if (component_->ReturnAll() == 0){
if (i_sequence < i_limit_start || i_sequence > i_limit_end) {
continue;
}
}
Json::Value doc_info;
doc_info["doc_id"] = Json::Value(riter->s_docid);
doc_info["score"] = Json::Value(riter->d_score);
response_["result"].append(doc_info);
}
}
void QueryProcess::TaskEnd(){
}
void QueryProcess::AppendHighLightWord()
{
int count = 0;
const HighLightWordSet& highlight_word_set = ResultContext::Instance()->GetHighLightWordSet();
std::set<std::string>::const_iterator iter = highlight_word_set.cbegin();
for (; iter != highlight_word_set.cend(); iter++) {
if (count >= 10)
break;
++count;
response_["hlWord"].append((*iter).c_str());
}
}

View File

@ -19,38 +19,82 @@
#ifndef __QUERY_PROCESS_H__
#define __QUERY_PROCESS_H__
#include "../component.h"
#include "../logical_operate.h"
#include <iostream>
#include <sstream>
#include "../request_context.h"
#include "../result_context.h"
#include "../valid_doc_filter.h"
#include "../doc_manager.h"
#include "../comm.h"
#include "../db_manager.h"
#include "../split_manager.h"
#include "../comm.h"
#include "../sort_operator/sort_operator_base.h"
#include "skiplist.h"
#include "task_request.h"
const char* const BOOL ="bool";
const char* const MUST ="must";
const char* const SHOULD ="should";
const char* const MUST_NOT ="must_not";
const char* const TERM ="term";
const char* const MATCH ="match";
const char* const RANGE ="range";
const char* const GEODISTANCE ="geo_distance";
const char* const DISTANCE = "distance";
const char* const GEOSHAPE ="geo_polygon";
#define FIRST_TEST_INDEX 0
#define FIRST_SPLIT_WORD_INDEX 0
// query process definition has priorities ,beacause of BOOL query
enum E_INDEX_READ_QUERY_PROCESS{
E_INDEX_READ_PRE_TERM,
E_INDEX_READ_RANGE,
E_INDEX_READ_GEO_DISTANCE,
E_INDEX_READ_GEO_SHAPE,
E_INDEX_READ_MATCH,
E_INDEX_READ_TERM,
E_INDEX_READ_TOTAL_NUM
};
class QueryProcess{
public:
QueryProcess(uint32_t appid, Json::Value& value, Component* component);
~QueryProcess();
int DoJob();
void SetSkipList(SkipList& skipList);
void SetRequest(CTaskRequest* request);
QueryProcess(const Json::Value& value);
virtual~ QueryProcess();
public:
int StartQuery();
void SetRequest(CTaskRequest* const request) { request_ = request; };
void SetParseJsonValue(const Json::Value& value) { parse_value_ = value; };
void SetComponent(RequestContext* const component) { component_ = component;};
void SetDocManager(DocManager* const doc_manager) { doc_manager_ = doc_manager;};
public:
virtual int ParseContent(int logic_type) = 0;
virtual int GetValidDoc(int logic_type , const std::vector<FieldInfo>& keys) = 0;
virtual int ParseContent() = 0;
virtual int GetValidDoc() = 0;
virtual int GetScore();
virtual void SortScore(int& i_sequence , int& i_rank);
virtual const Json::Value& SetResponse();
protected:
void TaskBegin();
virtual int ParseContent();
virtual int GetValidDoc();
virtual int GetScoreAndSort();
virtual void TaskEnd();
void SortByCOrderOp(int& i_rank);
void AscSort(int& i_sequence , int& i_rank);
void DescSort(int& i_sequence , int& i_rank);
void AppendHighLightWord();
int CheckValidDoc();
protected:
Component* component_;
LogicalOperate* logical_operate_;
RequestContext* component_;
DocManager* doc_manager_;
uint32_t appid_;
Json::Value value_;
SkipList skipList_;
CTaskRequest* request_;
SortOperatorBase* sort_operator_base_;
std::set<ScoreDocIdNode>* p_scoredocid_set_;
Json::Value parse_value_;
Json::Value response_;
};
#endif

View File

@ -0,0 +1,205 @@
#include "range_query_process.h"
#include "../valid_doc_filter.h"
RangeQueryProcess::RangeQueryProcess(const Json::Value& value, uint32_t ui_query_type)
: QueryProcess(value)
, ui_query_type_(ui_query_type)
{ }
RangeQueryProcess::~RangeQueryProcess()
{ }
int RangeQueryProcess::ParseContent(){
return ParseContent(ORKEY);
}
int RangeQueryProcess::ParseContent(int logic_type)
{
std::vector<FieldInfo> fieldInfos;
Json::Value::Members member = parse_value_.getMemberNames();
Json::Value::Members::iterator iter = member.begin();
if(iter != member.end()){ // 一个range下只对应一个字段
std::string fieldname = *iter;
uint32_t segment_tag = 0;
FieldInfo fieldInfo;
fieldInfo.query_type = ui_query_type_;
uint32_t uiRet = DBManager::Instance()->GetWordField(segment_tag, component_->Appid()
, fieldname, fieldInfo);
if (0 == uiRet){
return -RT_GET_FIELD_ERROR;
}
Json::Value field_value = parse_value_[fieldname];
if(field_value.isObject()){
Json::Value start;
Json::Value end;
RANGTYPE ui_range_type;
if(field_value.isMember(GTE)){
start = field_value[GTE];
if(field_value.isMember(LTE)){
end = field_value[LTE];
ui_range_type = RANGE_GELE;
} else if(field_value.isMember(LT)){
end = field_value[LT];
ui_range_type = RANGE_GELT;
} else {
ui_range_type = RANGE_GE;
}
} else if(field_value.isMember(GT)){
start = field_value[GT];
if(field_value.isMember(LTE)){
end = field_value[LTE];
ui_range_type = RANGE_GTLE;
} else if(field_value.isMember(LT)){
end = field_value[LT];
ui_range_type = RANGE_GTLT;
} else {
ui_range_type = RANGE_GT;
}
} else if(field_value.isMember(LTE)){
end = field_value[LTE];
ui_range_type = RANGE_LE;
} else if(field_value.isMember(LT)){
end = field_value[LT];
ui_range_type = RANGE_LT;
}
fieldInfo.range_type = ui_range_type;
log_debug("range_type:%d", ui_range_type);
if(start.isInt()){
fieldInfo.start = start.asInt();
} else if (start.isDouble()){
fieldInfo.start = start.asDouble();
} else {
log_error("range query lower value only support int/double");
}
if (end.isInt()){
fieldInfo.end = end.asInt();
}else if (end.isDouble()){
fieldInfo.end = end.asDouble();
} else {
log_error("range query upper limit value only support int/double");
}
log_debug("start:%f , end:%f" , fieldInfo.start , fieldInfo.end);
fieldInfos.push_back(fieldInfo);
}
if (!fieldInfos.empty()) {
component_->AddToFieldList(logic_type, fieldInfos);
}
} else {
log_error("RangeQueryParser error, value is null");
return -RT_PARSE_CONTENT_ERROR;
}
return 0;
}
int RangeQueryProcess::GetValidDoc(){
if (component_->GetFieldList(ORKEY).empty()){
return -RT_GET_FIELD_ERROR;
}
return GetValidDoc(ORKEY , component_->GetFieldList(ORKEY)[FIRST_TEST_INDEX]);
}
int RangeQueryProcess::GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys)
{
std::vector<IndexInfo> index_info_vet;
int iret = ValidDocFilter::Instance()->RangeQueryInvertIndexSearch(keys, index_info_vet);
if (iret != 0) { return iret;}
ResultContext::Instance()->SetIndexInfos(logic_type , index_info_vet);
return iret;
}
PreTerminal::PreTerminal(const Json::Value& value, uint32_t ui_query_type)
: RangeQueryProcess(value , ui_query_type)
, candidate_doc_()
{}
PreTerminal::~PreTerminal()
{}
int PreTerminal::GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys){
return 0;
}
int PreTerminal::GetValidDoc(){
uint32_t count = 0;
uint32_t N = 2;
uint32_t limit_start = 0;
int try_times = 0;
while(count < component_->PageSize()){
if(try_times++ > 10){
log_debug("ProcessTerminal try_times is the max, return");
break;
}
vector<TerminalRes> and_vecs;
TerminalQryCond query_cond;
query_cond.sort_type = component_->SortType();
query_cond.sort_field = component_->SortField();
query_cond.last_id = component_->LastId();
query_cond.last_score = component_->LastScore();
query_cond.limit_start = limit_start;
query_cond.page_size = component_->PageSize() * N;
int ret = ValidDocFilter::Instance()->ProcessTerminal(component_->AndKeys(), query_cond, and_vecs);
if(0 != ret){
log_error("ProcessTerminal error.");
return -RT_GET_DOC_ERR;
}
for(int i = 0; i < (int)and_vecs.size(); i++){
std::string doc_id = and_vecs[i].doc_id;
std::stringstream ss;
ss << (int)and_vecs[i].score;
std::string ss_key = ss.str();
log_debug("last_score: %s, ss_key: %s, score: %lf", query_cond.last_score.c_str(), ss_key.c_str(), and_vecs[i].score);
if(component_->LastId() != "" && ss_key == query_cond.last_score){ // 翻页时过滤掉已经返回过的文档编号
if(component_->SortType() == SORT_FIELD_DESC && doc_id >= component_->LastId()){
continue;
}
if(component_->SortType() == SORT_FIELD_ASC && doc_id <= component_->LastId()){
continue;
}
}
if(doc_manager_->CheckDocByExtraFilterKey(doc_id) == true){
count++;
candidate_doc_.push_back(and_vecs[i]);
}
}
limit_start += component_->PageSize() * N;
N *= 2;
}
return 0;
}
int PreTerminal::GetScore(){
log_info("RangeQueryPreTerminal do not need get score");
return 0;
}
const Json::Value& PreTerminal::SetResponse(){
response_["code"] = 0;
int sequence = -1;
int rank = 0;
for (uint32_t i = 0; i < candidate_doc_.size(); i++) {
if(rank >= (int)component_->PageSize()){
break;
}
sequence++;
rank++;
TerminalRes tmp = candidate_doc_[i];
Json::Value doc_info;
doc_info["doc_id"] = Json::Value(tmp.doc_id.c_str());
doc_info["score"] = Json::Value(tmp.score);
response_["result"].append(doc_info);
}
response_["type"] = 0;
response_["count"] = rank; // TODO 这里的count并不是实际的总数
return response_;
}

View File

@ -0,0 +1,81 @@
#ifndef RANGE_QUERY_PROCESS_H_
#define RANGE_QUERY_PROCESS_H_
#include "singleton.h"
#include "noncopyable.h"
#include "query_process.h"
const char* const GTE ="gte";
const char* const GT ="gt";
const char* const LTE ="lte";
const char* const LT ="lt";
class RangeQueryProcess: public QueryProcess{
public:
RangeQueryProcess(const Json::Value& value , uint32_t ui_query_type);
virtual~ RangeQueryProcess();
public:
virtual int ParseContent(int logic_type);
virtual int GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys);
virtual int GetValidDoc();
protected:
virtual int ParseContent();
private:
uint32_t ui_query_type_;
};
class PreTerminal : public RangeQueryProcess{
public:
PreTerminal(const Json::Value& value, uint32_t ui_query_type);
virtual~ PreTerminal();
public:
virtual int GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys);
virtual int GetValidDoc();
virtual int GetScore();
virtual const Json::Value& SetResponse();
private:
std::vector<TerminalRes> candidate_doc_;
};
class RangeQueryGenerator : private noncopyable{
public:
RangeQueryGenerator() { };
virtual~ RangeQueryGenerator() { };
public:
static RangeQueryGenerator* Instance(){
return CSingleton<RangeQueryGenerator>::Instance();
};
static void Destroy(){
CSingleton<RangeQueryGenerator>::Destroy();
};
public:
// 内存释放由调用方处理
QueryProcess* GetRangeQueryProcess(int iType , const Json::Value& parse_value){
QueryProcess* current_range_query = NULL;
switch (iType){
case E_INDEX_READ_RANGE:{
current_range_query = new RangeQueryProcess(parse_value , E_INDEX_READ_RANGE);
}
break;
case E_INDEX_READ_PRE_TERM:{
current_range_query = new PreTerminal(parse_value , E_INDEX_READ_PRE_TERM);
}
break;
default:
break;
}
return current_range_query;
}
};
#endif

View File

@ -0,0 +1,69 @@
#include "term_query_process.h"
TermQueryProcess::TermQueryProcess(const Json::Value& value)
: QueryProcess(value)
{}
TermQueryProcess::~TermQueryProcess(){
}
int TermQueryProcess::ParseContent(){
return ParseContent(ORKEY);
}
int TermQueryProcess::ParseContent(int logic_type){
std::vector<FieldInfo> field_info_vec;
Json::Value::Members member = parse_value_.getMemberNames();
Json::Value::Members::iterator iter = member.begin();
std::string field_name;
Json::Value field_value;
if(iter != member.end()){ // 一个term下只对应一个字段
field_name = *iter;
field_value = parse_value_[field_name];
} else {
log_error("TermQueryProcess error, value is null");
return -RT_PARSE_CONTENT_ERROR;
}
uint32_t segment_tag = 0;
FieldInfo field_info;
field_info.query_type = E_INDEX_READ_TERM;
uint32_t uiRet = DBManager::Instance()->GetWordField(segment_tag, component_->Appid()
, field_name, field_info);
if(uiRet != 0 && field_info.index_tag == 0){
ExtraFilterKey extra_filter_key;
extra_filter_key.field_name = field_name;
extra_filter_key.field_value = field_value.asString();
extra_filter_key.field_type = field_info.field_type;
component_->AddToExtraFieldList(logic_type , extra_filter_key);
} else if(uiRet != 0){
field_info.word = field_value.asString();
field_info_vec.push_back(field_info);
} else {
log_error("field_name:%s error, not in the app_field_define", field_name.c_str());
return -RT_PARSE_CONTENT_ERROR;
}
component_->AddToFieldList(logic_type, field_info_vec);
return 0;
}
int TermQueryProcess::GetValidDoc(){
if (component_->GetFieldList(ORKEY).empty()){
return -RT_GET_FIELD_ERROR;
}
return GetValidDoc(ORKEY , component_->GetFieldList(ORKEY)[FIRST_TEST_INDEX]);
}
int TermQueryProcess::GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys){
log_debug("term query GetValidDoc beginning...");
if (0 == keys[FIRST_SPLIT_WORD_INDEX].index_tag){
return -RT_GET_FIELD_ERROR;
}
std::vector<IndexInfo> index_info_vet;
int iret = ValidDocFilter::Instance()->TextInvertIndexSearch(keys, index_info_vet);
if (iret != 0) { return iret; }
ResultContext::Instance()->SetIndexInfos(logic_type , index_info_vet);
return 0;
}

View File

@ -0,0 +1,19 @@
#ifndef TERM_QUERY_PROCESS_H_
#define TERM_QUERY_PROCESS_H_
#include "query_process.h"
class TermQueryProcess : public QueryProcess{
public:
TermQueryProcess(const Json::Value& value);
virtual ~TermQueryProcess();
public:
virtual int ParseContent(int logic_type);
virtual int GetValidDoc(int logic_type, const std::vector<FieldInfo>& keys);
private:
virtual int ParseContent();
virtual int GetValidDoc();
};
#endif

View File

@ -1,121 +0,0 @@
#include "bool_query_parser.h"
#include "../db_manager.h"
#include "../split_manager.h"
#include "range_query_parser.h"
#include "term_query_parser.h"
#include "match_query_parser.h"
#include "geo_distance_parser.h"
const char* const NAME ="bool";
const char* const MUST ="must";
const char* const SHOULD ="should";
const char* const MUST_NOT ="must_not";
const char* const TERM ="term";
const char* const MATCH ="match";
const char* const RANGE ="range";
const char* const GEODISTANCE ="geo_distance";
BoolQueryParser::BoolQueryParser(uint32_t a, Json::Value& v)
:appid(a),value(v)
{
}
BoolQueryParser::~BoolQueryParser(){
if(NULL != range_query_parser){
delete range_query_parser;
}
if(NULL != term_query_parser){
delete term_query_parser;
}
if(NULL != match_query_parser){
delete match_query_parser;
}
if(NULL != geo_query_parser){
delete geo_query_parser;
}
}
int BoolQueryParser::DoJobByType(Json::Value& value, uint32_t type, QueryParserRes* query_parser_res){
if(value.isMember(TERM)){
term_query_parser = new TermQueryParser(appid, value[TERM]);
return term_query_parser->ParseContent(query_parser_res, type);
} else if(value.isMember(MATCH)){
match_query_parser = new MatchQueryParser(appid, value[MATCH]);
return match_query_parser->ParseContent(query_parser_res, type);
} else if(value.isMember(RANGE)){
range_query_parser = new RangeQueryParser(appid, value[RANGE]);
return range_query_parser->ParseContent(query_parser_res, type);
} else if(value.isMember(GEODISTANCE)){
geo_query_parser = new GeoDistanceParser(appid, value[GEODISTANCE]);
return geo_query_parser->ParseContent(query_parser_res);
} else {
string err_msg = "BoolQueryParser only support term/match/range/geo_distance!";
log_error(err_msg.c_str());
query_parser_res->ErrMsg() = err_msg;
return -RT_PARSE_CONTENT_ERROR;
}
return 0;
}
int BoolQueryParser::ParseContent(QueryParserRes* query_parser_res){
int ret = 0;
if(value.isMember(MUST)){
int type = ANDKEY;
Json::Value must = value[MUST];
if(must.isArray()){
for(int i = 0; i < (int)must.size(); i++){
ret = DoJobByType(must[i], type, query_parser_res);
if(ret != 0){
log_error("DoJobByType error!");
return -RT_PARSE_CONTENT_ERROR;
}
}
} else if (must.isObject()){
ret = DoJobByType(must, type, query_parser_res);
if(ret != 0){
log_error("DoJobByType error!");
return -RT_PARSE_CONTENT_ERROR;
}
}
}
if (value.isMember(SHOULD)){
int type = ORKEY;
Json::Value should = value[SHOULD];
if(should.isArray()){
for(int i = 0; i < (int)should.size(); i++){
ret = DoJobByType(should[i], type, query_parser_res);
if(ret != 0){
log_error("DoJobByType error!");
return -RT_PARSE_CONTENT_ERROR;
}
}
} else if (should.isObject()){
ret = DoJobByType(should, type, query_parser_res);
if(ret != 0){
log_error("DoJobByType error!");
return -RT_PARSE_CONTENT_ERROR;
}
}
}
if (value.isMember(MUST_NOT)){
int type = INVERTKEY;
Json::Value must_not = value[MUST_NOT];
if(must_not.isArray()){
for(int i = 0; i < (int)must_not.size(); i++){
ret = DoJobByType(must_not[i], type, query_parser_res);
if(ret != 0){
log_error("DoJobByType error!");
return -RT_PARSE_CONTENT_ERROR;
}
}
} else if (must_not.isObject()) {
ret = DoJobByType(must_not, type, query_parser_res);
if(ret != 0){
log_error("DoJobByType error!");
return -RT_PARSE_CONTENT_ERROR;
}
}
}
return 0;
}

View File

@ -1,48 +0,0 @@
/*
* =====================================================================================
*
* Filename: bool_query_parser.h
*
* Description: bool_query_parser class definition.
*
* Version: 1.0
* Created: 05/03/2021
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef __BOOL_QUERY_PARSER_H__
#define __BOOL_QUERY_PARSER_H__
#include "query_parser.h"
#include "json/json.h"
class RangeQueryParser;
class TermQueryParser;
class MatchQueryParser;
class GeoDistanceParser;
class BoolQueryParser : public QueryParser
{
public:
BoolQueryParser(uint32_t a, Json::Value& v);
~BoolQueryParser();
int ParseContent(QueryParserRes* query_parser_res);
private:
int DoJobByType(Json::Value& value, uint32_t type, QueryParserRes* query_parser_res);
private:
uint32_t appid;
Json::Value value;
RangeQueryParser* range_query_parser;
TermQueryParser* term_query_parser;
MatchQueryParser* match_query_parser;
GeoDistanceParser* geo_query_parser;
};
#endif

View File

@ -1,140 +0,0 @@
#include "geo_distance_parser.h"
#include "../db_manager.h"
#include <sstream>
const char* const DISTANCE ="distance";
const int GEO_PRECISION = 6;
const int DEFAULT_DISTANCE = 2;
GeoDistanceParser::GeoDistanceParser(uint32_t a, Json::Value& v)
:appid(a),value(v)
{
}
GeoDistanceParser::~GeoDistanceParser(){
}
vector<double> splitDouble(const string& src, string separate_character)
{
vector<double> strs;
//分割字符串的长度,这样就可以支持如“,,”多字符串的分隔符
int separate_characterLen = separate_character.size();
int lastPosition = 0, index = -1;
string str;
double pos = 0;
while (-1 != (index = src.find(separate_character, lastPosition)))
{
if (src.substr(lastPosition, index - lastPosition) != " ") {
str = src.substr(lastPosition, index - lastPosition);
pos = atof(str.c_str());
strs.push_back(pos);
}
lastPosition = index + separate_characterLen;
}
string lastString = src.substr(lastPosition);//截取最后一个分隔符后的内容
if (!lastString.empty() && lastString != " "){
pos = atof(lastString.c_str());
strs.push_back(pos);//如果最后一个分隔符后还有内容就入队
}
return strs;
}
void SetErrMsg(QueryParserRes* query_parser_res, string err_msg){
log_error(err_msg.c_str());
query_parser_res->ErrMsg() = err_msg;
}
int GeoDistanceParser::ParseContent(QueryParserRes* query_parser_res){
vector<FieldInfo> fieldInfos;
double distance = 0;
string fieldname;
Json::Value::Members member = value.getMemberNames();
Json::Value::Members::iterator iter = member.begin();
for(; iter != member.end(); iter++){
Json::Value geo_value = value[*iter];
if(DISTANCE == *iter){
if(geo_value.isString()){
distance = atof(geo_value.asString().c_str());
} else {
SetErrMsg(query_parser_res, "GeoDistanceParser distance should be string, the unit is km.");
return -RT_PARSE_CONTENT_ERROR;
}
} else {
fieldname = *iter;
if(geo_value.isString()){
string geo_str = geo_value.asString();
vector<double> res = splitDouble(geo_str, ",");
if(res.size() >= 2){
geo.lat = res[0];
geo.lon = res[1];
} else {
SetErrMsg(query_parser_res, "GeoDistanceParser format error.");
return -RT_PARSE_CONTENT_ERROR;
}
} else if (geo_value.isArray()){
if(geo_value.size() >= 2){
if(geo_value[0].isDouble()){
geo.lon = geo_value[0].asDouble();
}
if(geo_value[1].isDouble()){
geo.lat = geo_value[1].asDouble();
}
} else {
SetErrMsg(query_parser_res, "GeoDistanceParser format error.");
return -RT_PARSE_CONTENT_ERROR;
}
} else if (geo_value.isObject()){
if(geo_value.isMember("lat") && geo_value["lat"].isDouble()){
geo.lat = geo_value["lat"].asDouble();
} else {
SetErrMsg(query_parser_res, "GeoDistanceParser lat format error.");
return -RT_PARSE_CONTENT_ERROR;
}
if(geo_value.isMember("lon") && geo_value["lon"].isDouble()){
geo.lon = geo_value["lon"].asDouble();
} else {
SetErrMsg(query_parser_res, "GeoDistanceParser lon format error.");
return -RT_PARSE_CONTENT_ERROR;
}
} else {
SetErrMsg(query_parser_res, "GeoDistanceParser error, value is not string/array/object.");
return -RT_PARSE_CONTENT_ERROR;
}
}
}
if(distance == 0){
distance = DEFAULT_DISTANCE;
}
vector<string> gisCode = GetArroundGeoHash(geo, distance, GEO_PRECISION);
if(gisCode.size() > 0){
vector<FieldInfo> fieldInfos;
uint32_t segment_tag = 0;
FieldInfo fieldInfo;
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
if (field != 0 && segment_tag == 0) {
query_parser_res->HasGis() = 1;
for (size_t index = 0; index < gisCode.size(); index++) {
FieldInfo info;
info.field = fieldInfo.field;
info.field_type = fieldInfo.field_type;
info.segment_tag = fieldInfo.segment_tag;
info.word = gisCode[index];
fieldInfos.push_back(info);
}
}
if (fieldInfos.size() != 0) {
query_parser_res->FieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
}
stringstream sslat;
stringstream sslon;
sslat << geo.lat;
query_parser_res->Latitude() = sslat.str();
sslon << geo.lon;
query_parser_res->Longitude() = sslon.str();
query_parser_res->Distance() = distance;
}
return 0;
}

View File

@ -1,39 +0,0 @@
/*
* =====================================================================================
*
* Filename: geo_distance_parser.h
*
* Description: geo_distance_parser class definition.
*
* Version: 1.0
* Created: 20/04/2021
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef __GEO_DISTANCE_PARSER_H__
#define __GEO_DISTANCE_PARSER_H__
#include "query_parser.h"
#include "json/json.h"
#include "geohash.h"
class GeoDistanceParser : public QueryParser
{
public:
GeoDistanceParser(uint32_t a, Json::Value& v);
~GeoDistanceParser();
int ParseContent(QueryParserRes* query_parser_res);
private:
uint32_t appid;
Json::Value value;
GeoPoint geo;
double distance;
};
#endif

View File

@ -1,141 +0,0 @@
#include "geo_shape_parser.h"
#include "../db_manager.h"
#include <sstream>
const char* const POINTS ="points";
const int GEO_PRECISION = 6;
GeoShapeParser::GeoShapeParser(uint32_t a, Json::Value& v)
:appid(a),value(v)
{
}
GeoShapeParser::~GeoShapeParser(){
}
vector<double> GeoShapeParser::splitDouble(const string& src, string separate_character)
{
vector<double> strs;
//分割字符串的长度,这样就可以支持如“,,”多字符串的分隔符
int separate_characterLen = separate_character.size();
int lastPosition = 0, index = -1;
string str;
double pos = 0;
while (-1 != (index = src.find(separate_character, lastPosition)))
{
if (src.substr(lastPosition, index - lastPosition) != " ") {
str = src.substr(lastPosition, index - lastPosition);
pos = atof(str.c_str());
strs.push_back(pos);
}
lastPosition = index + separate_characterLen;
}
string lastString = src.substr(lastPosition);//截取最后一个分隔符后的内容
if (!lastString.empty() && lastString != " "){
pos = atof(lastString.c_str());
strs.push_back(pos);//如果最后一个分隔符后还有内容就入队
}
return strs;
}
void GeoShapeParser::SetErrMsg(QueryParserRes* query_parser_res, string err_msg){
log_error(err_msg.c_str());
query_parser_res->ErrMsg() = err_msg;
}
int GeoShapeParser::ParseContent(QueryParserRes* query_parser_res){
Json::Value::Members member = value.getMemberNames();
Json::Value::Members::iterator iter = member.begin();
if(iter == member.end()){ // 一个geo_shape下只对应一个字段
SetErrMsg(query_parser_res, "GeoShapeParser format error, content is null.");
return -RT_PARSE_CONTENT_ERROR;
}
set<double> lat_arr;
set<double> lon_arr;
string fieldname = *iter;
Json::Value field_value = value[fieldname];
if(field_value.isMember(POINTS)){
Json::Value points = field_value[POINTS];
if(points.isArray()){
for(int i = 0; i < (int)points.size(); i++){
double lat;
double lon;
Json::Value geo_value = points[i];
if(geo_value.isString()){
string geo_str = geo_value.asString();
vector<double> res = splitDouble(geo_str, ",");
if(res.size() >= 2){
lat = res[0];
lon = res[1];
} else {
SetErrMsg(query_parser_res, "GeoShapeParser format error.");
return -RT_PARSE_CONTENT_ERROR;
}
} else if (geo_value.isArray()){
if(geo_value.size() >= 2){
if(geo_value[0].isDouble()){
lon = geo_value[0].asDouble();
}
if(geo_value[1].isDouble()){
lat = geo_value[1].asDouble();
}
} else {
SetErrMsg(query_parser_res, "GeoShapeParser format error.");
return -RT_PARSE_CONTENT_ERROR;
}
} else if (geo_value.isObject()){
if(geo_value.isMember("lat") && geo_value["lat"].isDouble()){
lat = geo_value["lat"].asDouble();
} else {
SetErrMsg(query_parser_res, "GeoShapeParser lat format error.");
return -RT_PARSE_CONTENT_ERROR;
}
if(geo_value.isMember("lon") && geo_value["lon"].isDouble()){
lon = geo_value["lon"].asDouble();
} else {
SetErrMsg(query_parser_res, "GeoShapeParser lon format error.");
return -RT_PARSE_CONTENT_ERROR;
}
} else {
SetErrMsg(query_parser_res, "GeoShapeParser error, value is not string/array/object.");
return -RT_PARSE_CONTENT_ERROR;
}
lat_arr.insert(lat);
lon_arr.insert(lon);
}
} else {
SetErrMsg(query_parser_res, "GeoShapeParser error, points is not a array.");
return -RT_PARSE_CONTENT_ERROR;
}
} else {
SetErrMsg(query_parser_res, "GeoShapeParser error, no points content provide.");
return -RT_PARSE_CONTENT_ERROR;
}
if(lon_arr.size() > 0 && lat_arr.size() > 0){
vector<string> gisCode = GetArroundGeoHash(*lon_arr.rbegin(), *lon_arr.begin(), *lat_arr.rbegin(), *lat_arr.begin(), GEO_PRECISION);
if(gisCode.size() > 0){
vector<FieldInfo> fieldInfos;
uint32_t segment_tag = 0;
FieldInfo fieldInfo;
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
if (field != 0 && segment_tag == 0) {
query_parser_res->HasGis() = 1;
for (size_t index = 0; index < gisCode.size(); index++) {
FieldInfo info;
info.field = fieldInfo.field;
info.field_type = fieldInfo.field_type;
info.segment_tag = fieldInfo.segment_tag;
info.word = gisCode[index];
fieldInfos.push_back(info);
}
}
if (fieldInfos.size() != 0) {
query_parser_res->FieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
}
}
}
return 0;
}

View File

@ -1,43 +0,0 @@
/*
* =====================================================================================
*
* Filename: geo_shape_parser.h
*
* Description: geo_shape_parser class definition.
*
* Version: 1.0
* Created: 08/05/2021
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef __GEO_SHAPE_PARSER_H__
#define __GEO_SHAPE_PARSER_H__
#include "query_parser.h"
#include "json/json.h"
#include "geohash.h"
class GeoShapeParser : public QueryParser
{
public:
GeoShapeParser(uint32_t a, Json::Value& v);
~GeoShapeParser();
int ParseContent(QueryParserRes* query_parser_res);
private:
void SetErrMsg(QueryParserRes* query_parser_res, string err_msg);
vector<double> splitDouble(const string& src, string separate_character);
private:
uint32_t appid;
Json::Value value;
GeoPoint geo;
double distance;
};
#endif

View File

@ -1,66 +0,0 @@
#include "match_query_parser.h"
#include "../db_manager.h"
#include "../split_manager.h"
MatchQueryParser::MatchQueryParser(uint32_t a, Json::Value& v)
:appid(a),value(v)
{
}
MatchQueryParser::~MatchQueryParser(){
}
int MatchQueryParser::ParseContent(QueryParserRes* query_parser_res){
return ParseContent(query_parser_res, ORKEY);
}
int MatchQueryParser::ParseContent(QueryParserRes* query_parser_res, uint32_t type){
vector<FieldInfo> fieldInfos;
Json::Value::Members member = value.getMemberNames();
Json::Value::Members::iterator iter = member.begin();
string fieldname;
Json::Value field_value;
if(iter != member.end()){ // 一个match下只对应一个字段
fieldname = *iter;
field_value = value[fieldname];
} else {
log_error("TermQueryParser error, value is null");
return -RT_PARSE_CONTENT_ERROR;
}
uint32_t segment_tag = 0;
FieldInfo fieldInfo;
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
if (field != 0 && segment_tag == 1)
{
string split_data = SplitManager::Instance()->split(field_value.asString(), appid);
log_debug("split_data: %s", split_data.c_str());
vector<string> split_datas = splitEx(split_data, "|");
for(size_t index = 0; index < split_datas.size(); index++)
{
FieldInfo info;
info.field = fieldInfo.field;
info.field_type = fieldInfo.field_type;
info.word = split_datas[index];
info.segment_tag = fieldInfo.segment_tag;
fieldInfos.push_back(info);
}
}
else if (field != 0)
{
fieldInfo.word = field_value.asString();
fieldInfos.push_back(fieldInfo);
}
if(fieldInfos.size() != 0){
if(type == ORKEY){
query_parser_res->OrFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
} else if(type == ANDKEY){
query_parser_res->FieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
} else if(type == INVERTKEY){
query_parser_res->InvertFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
}
}
return 0;
}

View File

@ -1,37 +0,0 @@
/*
* =====================================================================================
*
* Filename: match_query_parser.h
*
* Description: match_query_parser class definition.
*
* Version: 1.0
* Created: 20/04/2021
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef __MATCH_QUERY_PARSER_H__
#define __MATCH_QUERY_PARSER_H__
#include "query_parser.h"
#include "json/json.h"
class MatchQueryParser : public QueryParser
{
public:
MatchQueryParser(uint32_t a, Json::Value& v);
~MatchQueryParser();
int ParseContent(QueryParserRes* query_parser_res);
int ParseContent(QueryParserRes* query_parser_res, uint32_t type);
private:
uint32_t appid;
Json::Value value;
};
#endif

View File

@ -1,82 +0,0 @@
/*
* =====================================================================================
*
* Filename: query_parser.h
*
* Description: query_parser class definition.
*
* Version: 1.0
* Created: 19/04/2021
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef __QUERY_PARSER_H__
#define __QUERY_PARSER_H__
#include "../comm.h"
#include <map>
class QueryParserRes{
public:
QueryParserRes(){
m_has_gis = 0;
}
map<uint32_t, vector<FieldInfo> >& FieldKeysMap(){
return field_keys_map;
}
map<uint32_t, vector<FieldInfo> >& OrFieldKeysMap(){
return or_field_keys_map;
}
map<uint32_t, vector<FieldInfo> >& InvertFieldKeysMap(){
return invert_field_keys_map;
}
vector<ExtraFilterKey>& ExtraFilterKeys(){
return extra_filter_keys;
}
vector<ExtraFilterKey>& ExtraFilterAndKeys(){
return extra_filter_and_keys;
}
vector<ExtraFilterKey>& ExtraFilterInvertKeys(){
return extra_filter_invert_keys;
}
uint32_t& HasGis(){
return m_has_gis;
}
string& Latitude(){
return latitude;
}
string& Longitude(){
return longitude;
}
double& Distance(){
return distance;
}
string& ErrMsg(){
return err_msg;
}
private:
uint32_t m_has_gis;
string latitude;
string longitude;
double distance;
string err_msg;
map<uint32_t, vector<FieldInfo> > field_keys_map;
map<uint32_t, vector<FieldInfo> > or_field_keys_map;
map<uint32_t, vector<FieldInfo> > invert_field_keys_map;
vector<ExtraFilterKey> extra_filter_keys;
vector<ExtraFilterKey> extra_filter_and_keys;
vector<ExtraFilterKey> extra_filter_invert_keys;
};
class QueryParser{
public:
virtual int ParseContent(QueryParserRes* query_parser_res) = 0;
virtual ~QueryParser() {};
};
#endif

View File

@ -1,95 +0,0 @@
#include "range_query_parser.h"
#include "../db_manager.h"
const char* const GTE ="gte";
const char* const GT ="gt";
const char* const LTE ="lte";
const char* const LT ="lt";
RangeQueryParser::RangeQueryParser(uint32_t a, Json::Value& v)
:appid(a),value(v)
{
}
RangeQueryParser::~RangeQueryParser(){
}
int RangeQueryParser::ParseContent(QueryParserRes* query_parser_res){
return ParseContent(query_parser_res, ORKEY);
}
int RangeQueryParser::ParseContent(QueryParserRes* query_parser_res, uint32_t type){
vector<FieldInfo> fieldInfos;
Json::Value::Members member = value.getMemberNames();
Json::Value::Members::iterator iter = member.begin();
if(iter != member.end()){ // 一个range下只对应一个字段
string fieldname = *iter;
uint32_t segment_tag = 0;
FieldInfo fieldInfo;
DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
Json::Value field_value = value[fieldname];
if(field_value.isObject()){
FieldInfo info;
Json::Value start;
Json::Value end;
if(field_value.isMember(GTE)){
start = field_value[GTE];
if(field_value.isMember(LTE)){
end = field_value[LTE];
info.range_type = RANGE_GELE;
} else if(field_value.isMember(LT)){
end = field_value[LT];
info.range_type = RANGE_GELT;
} else {
info.range_type = RANGE_GE;
}
} else if(field_value.isMember(GT)){
start = field_value[GT];
if(field_value.isMember(LTE)){
end = field_value[LTE];
info.range_type = RANGE_GTLE;
} else if(field_value.isMember(LT)){
end = field_value[LT];
info.range_type = RANGE_GTLT;
} else {
info.range_type = RANGE_GT;
}
} else if(field_value.isMember(LTE)){
end = field_value[LTE];
info.range_type = RANGE_LE;
} else if(field_value.isMember(LT)){
end = field_value[LT];
info.range_type = RANGE_LT;
}
if(!start.isInt() && !start.isNull()){
log_error("range query only support integer");
return -RT_PARSE_CONTENT_ERROR;
}
if(!end.isInt() && !end.isNull()){
log_error("range query only support integer");
return -RT_PARSE_CONTENT_ERROR;
}
if(start.isInt() || end.isInt()){
fieldInfo.start = start.isInt() ? start.asInt() : 0;
fieldInfo.end = end.isInt() ? end.asInt() : 0;
fieldInfo.range_type = info.range_type;
fieldInfos.push_back(fieldInfo);
}
}
if(fieldInfos.size() != 0){
if(type == ORKEY){
query_parser_res->OrFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
} else if(type == ANDKEY){
query_parser_res->FieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
} else if(type == INVERTKEY){
query_parser_res->InvertFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
}
}
} else {
log_error("RangeQueryParser error, value is null");
return -RT_PARSE_CONTENT_ERROR;
}
return 0;
}

View File

@ -1,37 +0,0 @@
/*
* =====================================================================================
*
* Filename: range_query_parser.h
*
* Description: range_query_parser class definition.
*
* Version: 1.0
* Created: 19/04/2021
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef __RANGE_QUERY_PARSER_H__
#define __RANGE_QUERY_PARSER_H__
#include "query_parser.h"
#include "json/json.h"
class RangeQueryParser : public QueryParser
{
public:
RangeQueryParser(uint32_t a, Json::Value& v);
~RangeQueryParser();
int ParseContent(QueryParserRes* query_parser_res);
int ParseContent(QueryParserRes* query_parser_res, uint32_t type);
private:
uint32_t appid;
Json::Value value;
};
#endif

View File

@ -1,64 +0,0 @@
#include "term_query_parser.h"
#include "../db_manager.h"
TermQueryParser::TermQueryParser(uint32_t a, Json::Value& v)
:appid(a),value(v)
{
}
TermQueryParser::~TermQueryParser(){
}
int TermQueryParser::ParseContent(QueryParserRes* query_parser_res){
return ParseContent(query_parser_res, ORKEY);
}
int TermQueryParser::ParseContent(QueryParserRes* query_parser_res, uint32_t type){
vector<FieldInfo> fieldInfos;
Json::Value::Members member = value.getMemberNames();
Json::Value::Members::iterator iter = member.begin();
string fieldname;
string field_value;
Json::Value json_value;
if(iter != member.end()){ // 一个term下只对应一个字段
fieldname = *iter;
json_value = value[fieldname];
field_value = json_value.asString();
} else {
log_error("TermQueryParser error, value is null");
return -RT_PARSE_CONTENT_ERROR;
}
uint32_t segment_tag = 0;
FieldInfo fieldInfo;
uint32_t field = DBManager::Instance()->GetWordField(segment_tag, appid, fieldname, fieldInfo);
if(field != 0 && fieldInfo.index_tag == 0){
ExtraFilterKey extra_filter_key;
extra_filter_key.field_name = fieldname;
extra_filter_key.field_value = field_value;
extra_filter_key.field_type = fieldInfo.field_type;
if(type == ORKEY){
query_parser_res->ExtraFilterKeys().push_back(extra_filter_key);
} else if (type == ANDKEY) {
query_parser_res->ExtraFilterAndKeys().push_back(extra_filter_key);
} else if (type == INVERTKEY) {
query_parser_res->ExtraFilterInvertKeys().push_back(extra_filter_key);
}
} else if (field != 0)
{
fieldInfo.word = field_value;
fieldInfos.push_back(fieldInfo);
}
if(fieldInfos.size() != 0){
if(type == ORKEY){
query_parser_res->OrFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
} else if(type == ANDKEY) {
query_parser_res->FieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
} else if(type == INVERTKEY){
query_parser_res->InvertFieldKeysMap().insert(make_pair(fieldInfo.field, fieldInfos));
}
}
return 0;
}

View File

@ -1,37 +0,0 @@
/*
* =====================================================================================
*
* Filename: term_query_parser.h
*
* Description: term_query_parser class definition.
*
* Version: 1.0
* Created: 20/04/2021
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef __TERM_QUERY_PARSER_H__
#define __TERM_QUERY_PARSER_H__
#include "query_parser.h"
#include "json/json.h"
class TermQueryParser : public QueryParser
{
public:
TermQueryParser(uint32_t a, Json::Value& v);
~TermQueryParser();
int ParseContent(QueryParserRes* query_parser_res);
int ParseContent(QueryParserRes* query_parser_res, uint32_t type);
private:
uint32_t appid;
Json::Value value;
};
#endif

View File

@ -0,0 +1,288 @@
/*
* =====================================================================================
*
* Filename: component.h
*
* Description: component class definition.
*
* Version: 1.0
* Created: 09/08/2019
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#include "request_context.h"
#include "split_manager.h"
#include "db_manager.h"
#include "utf8_str.h"
#include <sstream>
RequestContext::RequestContext()
: or_keys_()
, and_keys_()
, invert_keys_()
, extra_filter_or_keys_()
, extra_filter_and_keys_()
, extra_filter_invert_keys_()
, page_index_(0)
, page_size_(0)
, cache_switch_(0)
, snapshot_switch_(0)
, sort_type_(SORT_RELEVANCE)
, appid_(10001)
, return_all_(0)
, sort_field_("")
, last_id_("")
, last_score_("")
, search_after_(false)
, required_fields_()
, preterminal_tag_(0)
, query_value_()
, has_gis_(false)
{ }
RequestContext::~RequestContext(){
}
int RequestContext::ParseJson(const char *sz_json, int json_len, Json::Value &recv_packet)
{
Json::Reader r(Json::Features::strictMode());
int ret;
ret = r.parse(sz_json, sz_json + json_len, recv_packet);
if (0 == ret)
{
log_error("the err json string is : %s", sz_json);
log_error("parse json error , errmsg : %s", r.getFormattedErrorMessages().c_str());
return -RT_PARSE_JSON_ERR;
}
if (recv_packet.isMember("appid")){
appid_ = ParseJsonReturnInt("appid" , recv_packet);
}
else {
appid_ = 10001;
}
if(recv_packet.isMember("query")){
query_value_ = recv_packet["query"];
}
if (recv_packet.isMember("page_index")){
page_index_ = ParseJsonReturnInt("page_index" , recv_packet);
}
else {
page_index_ = 1 ;
}
if (recv_packet.isMember("page_size")){
page_size_ = ParseJsonReturnInt("page_size" , recv_packet);
}
else {
page_size_ = 10;
}
if(recv_packet.isMember("sort_type")){
sort_type_ = ParseJsonReturnInt("sort_type" , recv_packet);
}
else {
sort_type_ = SORT_RELEVANCE;
}
if(recv_packet.isMember("sort_field") && recv_packet["sort_field"].isString()){
sort_field_ = recv_packet["sort_field"].asString();
}
else {
sort_field_ = "";
}
if (recv_packet.isMember("return_all")){
return_all_ = ParseJsonReturnInt("return_all" , recv_packet);
}
else {
return_all_ = 0;
}
if(recv_packet.isMember("fields") && recv_packet["fields"].isString()){
std::string fields = recv_packet["fields"].asString();
required_fields_ = splitEx(fields, ",");
}
if (recv_packet.isMember("terminal_tag")){
preterminal_tag_ = ParseJsonReturnInt("terminal_tag" , recv_packet);
}
else {
preterminal_tag_ = 0;
}
if(recv_packet.isMember("last_id") && recv_packet["last_id"].isString()){
last_id_ = recv_packet["last_id"].asString();
}
else {
last_id_ = "";
}
bool score_flag = true;
if (recv_packet.isMember("last_score") && recv_packet["last_score"].isString()){
last_score_ = recv_packet["last_score"].asString();
}
else {
score_flag = false;
last_score_ = "0";
}
if(last_id_ != "" && score_flag == true){
search_after_ = true;
}
if(search_after_ == true && sort_type_ != SORT_FIELD_DESC && sort_type_ != SORT_FIELD_ASC){
log_error("in search_after mode, sort_type must be SORT_FIELD_DESC or SORT_FIELD_ASC.");
return -RT_PARSE_JSON_ERR;
}
log_debug("sort_type:%d , sort_field:%s", sort_type_ , sort_field_.c_str());
return 0;
}
uint32_t RequestContext::ParseJsonReturnInt(
const std::string& field,
const Json::Value& json_value)
{
uint32_t ui_value = 0;
if (json_value[field].isString()){
ui_value = atoi(json_value[field].asString().c_str());
}else if (json_value[field].isInt()){
ui_value = json_value[field].asInt();
}
return ui_value;
}
void RequestContext::InitSwitch()
{
AppInfo app_info;
bool res = SearchConf::Instance()->GetAppInfo(appid_, app_info);
if (true == res){
cache_switch_ = app_info.cache_switch;
snapshot_switch_ = app_info.snapshot_switch;
}
}
void RequestContext::AddToFieldList(int type, vector<FieldInfo>& fields)
{
if (fields.size() == 0)
return ;
if (type == ORKEY) {
or_keys_.push_back(fields);
} else if (type == ANDKEY) {
and_keys_.push_back(fields);
} else if (type == INVERTKEY) {
invert_keys_.push_back(fields);
}
return ;
}
const std::vector<std::vector<FieldInfo> >& RequestContext::GetFieldList(int logic_type){
if (ORKEY == logic_type){
return or_keys_;
}else if (ANDKEY == logic_type){
return and_keys_;
}else if (INVERTKEY == logic_type){
return invert_keys_;
}
log_error("get field list with error type, please check");
static std::vector<std::vector<FieldInfo> > empty_fieldinfos;
return empty_fieldinfos;
}
void RequestContext::AddToExtraFieldList(int type , const ExtraFilterKey& extra_field){
if (ORKEY == type){
extra_filter_or_keys_.push_back(extra_field);
}else if (ANDKEY == type){
extra_filter_and_keys_.push_back(extra_field);
}else if (INVERTKEY == type){
extra_filter_invert_keys_.push_back(extra_field);
}
return;
}
const std::vector<std::vector<FieldInfo> >& RequestContext::OrKeys(){
return or_keys_;
}
std::vector<std::vector<FieldInfo> >& RequestContext::AndKeys(){
return and_keys_;
}
const std::vector<std::vector<FieldInfo> >& RequestContext::InvertKeys(){
return invert_keys_;
}
const std::vector<ExtraFilterKey>& RequestContext::ExtraFilterOrKeys(){
return extra_filter_or_keys_;
}
const std::vector<ExtraFilterKey>& RequestContext::ExtraFilterAndKeys(){
return extra_filter_and_keys_;
}
const std::vector<ExtraFilterKey>& RequestContext::ExtraFilterInvertKeys(){
return extra_filter_invert_keys_;
}
uint32_t RequestContext::Appid(){
return appid_;
}
uint32_t RequestContext::SortType(){
return sort_type_;
}
uint32_t RequestContext::PageIndex(){
return page_index_;
}
uint32_t RequestContext::PageSize(){
return page_size_;
}
uint32_t RequestContext::ReturnAll(){
return return_all_;
}
uint32_t RequestContext::CacheSwitch(){
return cache_switch_;
}
uint32_t RequestContext::SnapshotSwitch(){
return snapshot_switch_;
}
string RequestContext::SortField(){
return sort_field_;
}
string RequestContext::LastId(){
return last_id_;
}
string RequestContext::LastScore(){
return last_score_;
}
bool RequestContext::SearchAfter(){
return search_after_;
}
vector<string>& RequestContext::RequiredFields(){
return required_fields_;
}
uint32_t RequestContext::TerminalTag(){
return preterminal_tag_;
}
Json::Value& RequestContext::GetQuery(){
return query_value_;
}

View File

@ -0,0 +1,97 @@
/*
* =====================================================================================
*
* Filename: component.h
*
* Description: component class definition.
*
* Version: 1.0
* Created: 09/08/2019
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Modified by: chenyujie ,chenyujie28@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef __COMPONENT_H__
#define __COMPONENT_H__
#include "comm.h"
#include "json/json.h"
#include <string>
#include <vector>
class RequestContext
{
public:
RequestContext();
~RequestContext();
public:
int ParseJson(const char* sz_json, int json_len, Json::Value &recv_packet);
void InitSwitch();
const std::vector<std::vector<FieldInfo> >& OrKeys();
std::vector<std::vector<FieldInfo> >& AndKeys();
const std::vector<std::vector<FieldInfo> >& InvertKeys();
const std::vector<ExtraFilterKey>& ExtraFilterOrKeys();
const std::vector<ExtraFilterKey>& ExtraFilterAndKeys();
const std::vector<ExtraFilterKey>& ExtraFilterInvertKeys();
uint32_t Appid();
uint32_t SortType();
uint32_t PageIndex();
uint32_t PageSize();
uint32_t ReturnAll();
uint32_t CacheSwitch();
uint32_t TopSwitch();
uint32_t SnapshotSwitch();
std::string SortField();
std::string LastId();
std::string LastScore();
bool SearchAfter();
std::vector<std::string>& RequiredFields();
uint32_t TerminalTag();
Json::Value& GetQuery();
void AddToFieldList(int type, std::vector<FieldInfo>& fields);
const std::vector<std::vector<FieldInfo> >& GetFieldList(int logic_type);
void AddToExtraFieldList(int type , const ExtraFilterKey& extra_field);
void SetHasGisFlag(bool bFlag) { has_gis_ = bFlag; };
bool GetHasGisFlag() { return has_gis_; };
private:
uint32_t ParseJsonReturnInt(const std::string& field, const Json::Value& json_value);
private:
std::vector<std::vector<FieldInfo> > or_keys_;
std::vector<std::vector<FieldInfo> > and_keys_;
std::vector<std::vector<FieldInfo> > invert_keys_;
std::vector<ExtraFilterKey> extra_filter_or_keys_;
std::vector<ExtraFilterKey> extra_filter_and_keys_;
std::vector<ExtraFilterKey> extra_filter_invert_keys_;
uint32_t page_index_;
uint32_t page_size_;
uint32_t cache_switch_;
uint32_t snapshot_switch_;
uint32_t sort_type_;
uint32_t appid_;
uint32_t return_all_;
std::string sort_field_;
std::string last_id_;
std::string last_score_;
bool search_after_;
std::vector<std::string> required_fields_;
uint32_t preterminal_tag_;
Json::Value query_value_;
bool has_gis_;
};
#endif

View File

@ -0,0 +1,111 @@
#include "result_context.h"
#include <algorithm>
ResultContext::ResultContext()
: index_info_vet_()
, valid_docs_set_()
, highlight_word_set_()
, docid_keyinfovet_map_()
, key_doccount_map_()
{ }
ResultContext::~ResultContext(){
}
void ResultContext::Clear(){
index_info_vet_.clear();
valid_docs_set_.clear();
highlight_word_set_.clear();
docid_keyinfovet_map_.clear();
key_doccount_map_.clear();
}
void ResultContext::SetHighLightWordSet(const std::string& highlight_word){
highlight_word_set_.insert(highlight_word);
}
void ResultContext::SetDocKeyinfoMap(const std::string& s_doc_id, const KeyInfo& key_info){
docid_keyinfovet_map_[s_doc_id].push_back(key_info);
}
void ResultContext::SetWordDoccountMap(const std::string& s_word, uint32_t ui_doc_count){
key_doccount_map_.insert(std::make_pair(s_word , ui_doc_count));
}
void ResultContext::SetValidDocs(const std::string& valid_docid){
valid_docs_set_.insert(valid_docid);
}
void ResultContext::SetIndexInfos(int logic_type , std::vector<IndexInfo>& index_info_vet){
if (ORKEY == logic_type){
SetOrIndexInfos(index_info_vet);
}else if(ANDKEY == logic_type){
SetAndIndexInfos(index_info_vet);
}else if(INVERTKEY == logic_type){
SetInvertIndexInfos(index_info_vet);
}
}
void ResultContext::SetOrIndexInfos(std::vector<IndexInfo>& or_index_info_vet){
if (index_info_vet_.empty()){
index_info_vet_ = or_index_info_vet;
}else{
std::vector<IndexInfo> index_info_result;
int i_max_size = index_info_vet_.size() + or_index_info_vet.size();
index_info_result.resize(i_max_size);
std::sort(index_info_vet_.begin() , index_info_vet_.end());
std::sort(or_index_info_vet.begin() , or_index_info_vet.end());
std::vector<IndexInfo>::iterator iter = std::set_union(
index_info_vet_.begin(),index_info_vet_.end(),
or_index_info_vet.begin() ,or_index_info_vet.end(),
index_info_result.begin());
index_info_result.resize(iter - index_info_result.begin());
index_info_vet_.swap(index_info_result);
}
}
void ResultContext::SetAndIndexInfos(std::vector<IndexInfo>& and_index_info_vet){
if (index_info_vet_.empty()){
index_info_vet_ = and_index_info_vet;
}else{
std::vector<IndexInfo> index_info_result;
int i_min_size = (index_info_vet_.size() <= and_index_info_vet.size() ? index_info_vet_.size() : and_index_info_vet.size());
index_info_result.resize(i_min_size);
std::sort(index_info_vet_.begin() , index_info_vet_.end());
std::sort(and_index_info_vet.begin() , and_index_info_vet.end());
std::vector<IndexInfo>::iterator iter = std::set_intersection(
index_info_vet_.begin(), index_info_vet_.end(),
and_index_info_vet.begin() , and_index_info_vet.end(),
index_info_result.begin());
index_info_result.resize(iter - index_info_result.begin());
index_info_vet_.swap(index_info_result);
}
}
void ResultContext::SetInvertIndexInfos(std::vector<IndexInfo>& invert_index_info_vet){
if (index_info_vet_.empty()){
index_info_vet_ = invert_index_info_vet;
}else{
std::vector<IndexInfo> index_info_result;
int i_max_size = index_info_vet_.size() + invert_index_info_vet.size();
index_info_result.resize(i_max_size);
std::sort(index_info_vet_.begin() , index_info_vet_.end());
std::sort(invert_index_info_vet.begin() , invert_index_info_vet.end());
std::vector<IndexInfo>::iterator iter = std::set_difference(
index_info_vet_.begin(), index_info_vet_.end(),
invert_index_info_vet.begin() , invert_index_info_vet.end() ,
index_info_result.begin());
index_info_result.resize(iter - index_info_result.begin());
index_info_vet_.swap(index_info_result);
}
}

View File

@ -0,0 +1,52 @@
#ifndef SYSTEM_STATUS_H_
#define SYSTEM_STATUS_H_
#include "comm.h"
#include "singleton.h"
#include "noncopyable.h"
class ResultContext: private noncopyable{
public:
ResultContext();
virtual ~ResultContext();
public:
static ResultContext* Instance(){
return CSingleton<ResultContext>::Instance();
};
static void Destroy(){
CSingleton<ResultContext>::Destroy();
};
public:
void Clear();
void SetHighLightWordSet(const std::string& highlight_word);
const HighLightWordSet& GetHighLightWordSet() const {return highlight_word_set_;};
void SetDocKeyinfoMap(const std::string& s_doc_id, const KeyInfo& key_info);
const DocKeyinfosMap& GetDocKeyinfosMap() const { return docid_keyinfovet_map_;};
void SetWordDoccountMap(const std::string& s_word, uint32_t ui_doc_count);
uint32_t GetKeywordDoccountMap(const std::string& s_word) { return key_doccount_map_[s_word];};
void SetIndexInfos(int logic_type , std::vector<IndexInfo>& index_info_vet);
const std::vector<IndexInfo>& GetIndexInfos() const { return index_info_vet_;};
void SetValidDocs(const std::string& valid_docid);
ValidDocSet* GetValidDocs() { return &valid_docs_set_;};
private:
void SetOrIndexInfos(std::vector<IndexInfo>& or_index_info_vet);
void SetAndIndexInfos(std::vector<IndexInfo>& and_index_info_vet);
void SetInvertIndexInfos(std::vector<IndexInfo>& invert_index_info_vet);
private:
std::vector<IndexInfo> index_info_vet_;
ValidDocSet valid_docs_set_;
HighLightWordSet highlight_word_set_;
DocKeyinfosMap docid_keyinfovet_map_;
KeywordDoccountMap key_doccount_map_;
};
#endif

View File

@ -11,567 +11,43 @@
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Modified by: chenyujie ,chenyujie28@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#include "split_manager.h"
#include "search_util.h"
#include "search_task.h"
#include "json/reader.h"
#include "json/writer.h"
#include "timemanager.h"
#include "cpa_md5.h"
#include "data_manager.h"
#include "stem.h"
#include "result_cache.h"
#include "cachelist_unit.h"
#include <netinet/in.h>
#include <algorithm>
#include <set>
#include <sstream>
#include <fstream>
#include <math.h>
#include "stat_index.h"
#include "db_manager.h"
#include "utf8_str.h"
#include "split_manager.h"
#include "monitor.h"
#include "index_sync/sync_index_timer.h"
#include "index_sync/sequence_search_index.h"
#include "order_op.h"
#include "index_tbl_op.h"
#include "valid_doc_filter.h"
#include "process/geo_distance_query_process.h"
#include "process/geo_shape_query_process.h"
#include "process/match_query_process.h"
using namespace std;
#define DOC_CNT 10000
typedef pair<string, double> PAIR;
extern CCacheListUnit *cachelist;
extern SyncIndexTimer *globalSyncIndexTimer;
struct CmpByValue {
bool operator()(const PAIR& lhs, const PAIR& rhs) {
if(fabs(lhs.second - rhs.second) < 0.000001){
return lhs.first.compare(rhs.first) > 0;
}
return lhs.second > rhs.second;
}
};
#include "process/range_query_process.h"
#include "process/term_query_process.h"
#include "process/bool_query_process.h"
SearchTask::SearchTask()
{
m_index_set_cnt = 0;
m_has_gis = 0;
component = new Component();
: ProcessTask()
, component_(new RequestContext())
, doc_manager_(new DocManager(component_))
, query_process_(NULL)
{
ValidDocFilter::Instance()->BindDataBasePointer(component_);
}
int SearchTask::GetTopDocIdSetByWord(FieldInfo fieldInfo, vector<TopDocInfo>& doc_info) {
if (DataManager::Instance()->IsSensitiveWord(fieldInfo.word)) {
log_debug("%s is a sensitive word.", fieldInfo.word.c_str());
return 0;
SearchTask::~SearchTask() {
if(component_ != NULL){
delete component_;
}
string word_new = stem(fieldInfo.word);
bool bRet = false;
vector<TopDocInfo> no_filter_docs;
bRet = g_IndexInstance.GetTopDocInfo(m_appid, word_new, no_filter_docs);
if (false == bRet) {
log_error("GetTopDocInfo error.");
return -RT_DTC_ERR;
if(doc_manager_ != NULL){
delete doc_manager_;
}
if (0 == no_filter_docs.size())
return 0;
if (component->SnapshotSwitch() == 1) {
bRet = g_IndexInstance.TopDocValid(m_appid, no_filter_docs, doc_info);
if (false == bRet) {
log_error("GetTopDocInfo by snapshot error.");
return -RT_DTC_ERR;
}
if (query_process_ != NULL){
delete query_process_;
}
else {
for (size_t i = 0; i < no_filter_docs.size(); i++)
{
TopDocInfo info = no_filter_docs[i];
doc_info.push_back(info);
}
}
return 0;
}
int SearchTask::GetTopDocScore(map<string, double>& top_doc_score)
{
vector<TopDocInfo> doc_info;
for (size_t index = 0; index < component->Keys().size(); index++) {
vector<FieldInfo> topInfos = component->Keys()[index];
vector<FieldInfo>::iterator iter;
for (iter = topInfos.begin(); iter != topInfos.end(); iter++) {
int ret = GetTopDocIdSetByWord(*iter, doc_info);
if (ret != 0) {
return -RT_GET_DOC_ERR;
}
}
}
double score = 0;
for(size_t i = 0; i < doc_info.size(); i++)
{
score = (double)doc_info[i].weight;
if (m_sort_type == DONT_SORT) {
score = 1;
} else if (m_sort_type == SORT_TIMESTAMP) {
score = (double)doc_info[i].created_time;
}
top_doc_score[doc_info[i].doc_id] = score;
}
return 0;
}
int SearchTask::GetValidDoc(map<string, vec> &ves, vector<string> &word_vec, map<string, uint32_t> &key_in_doc, hash_double_map &distances, set<string> &valid_docs){
vector<IndexInfo> doc_id_ver_vec; // 最终求完交集并集差集的结果
// key_or
vector<IndexInfo> or_vecs;
logical_operate->SetFunc(vec_union);
int ret = logical_operate->Process(component->Keys(), or_vecs, highlightWord, ves, key_in_doc);
if (ret != 0) {
log_debug("logical_operate error.");
return -RT_GET_DOC_ERR;
}
doc_id_ver_vec.assign(or_vecs.begin(), or_vecs.end());
if ((doc_id_ver_vec.size() == 0) && (component->Keys().size() != 0)) {
log_debug("search result of keys is empty.");
return 0;
}
log_debug("logical_operate begin: %lld.", (long long int)GetSysTimeMicros());
// key_and
vector<IndexInfo> and_vecs;
logical_operate->SetFunc(vec_intersection);
ret = logical_operate->Process(component->AndKeys(), and_vecs, highlightWord, ves, key_in_doc);
if (ret != 0) {
log_debug("logical_operate error.");
return -RT_GET_DOC_ERR;
}
if ((and_vecs.size() == 0) && (component->AndKeys().size() != 0)) {
log_debug("search result of and_keys is empty.");
return 0;
}
if(component->AndKeys().size() != 0){
if(component->Keys().size() != 0){
doc_id_ver_vec = vec_intersection(and_vecs, doc_id_ver_vec);
} else {
doc_id_ver_vec.assign(and_vecs.begin(), and_vecs.end());
}
}
log_debug("logical_operate end: %lld.", (long long int)GetSysTimeMicros());
// key_complete
vector<IndexInfo> complete_vecs;
ret = logical_operate->ProcessComplete(complete_keys, complete_vecs, word_vec, ves, key_in_doc);
if (ret != 0) {
return -RT_GET_DOC_ERR;
}
if ((complete_vecs.size() == 0) && (complete_keys.size() != 0)) {
log_debug("search result of complete_keys is empty.");
return 0;
}
if(complete_keys.size() != 0){
if(component->AndKeys().size() == 0 && component->Keys().size() == 0){
doc_id_ver_vec.assign(complete_vecs.begin(), complete_vecs.end());
} else {
doc_id_ver_vec = vec_intersection(doc_id_ver_vec, complete_vecs);
}
}
// key_invert多个字段的结果先求并集最后一起求差集
vector<IndexInfo> invert_vecs;
logical_operate->SetFunc(vec_union);
ret = logical_operate->Process(component->InvertKeys(), invert_vecs, highlightWord, ves, key_in_doc);
if (ret != 0) {
return -RT_GET_DOC_ERR;
}
doc_id_ver_vec = vec_difference(doc_id_ver_vec, invert_vecs);
if (doc_id_ver_vec.size() == 0){
return 0;
}
bool bRet = doc_manager->GetDocContent(m_has_gis, doc_id_ver_vec, valid_docs, distances);
if (false == bRet) {
log_error("GetDocContent error.");
return -RT_DTC_ERR;
}
return 0;
}
int SearchTask::GetDocScore(map<string, double>& top_doc_score)
{
/***
1keys,and_keys,complete_keys等
2keys,and_keys,complete_keys等S
3SS与域搜索结果F进行AND运算
4keys,and_keys,complete_keys等F
***/
map<string, vec> ves; // statistic word information in the latitude of documents
vector<string> word_vec;
map<string, uint32_t> key_in_doc; // how many documents contains key
hash_double_map distances;
set<string> valid_docs;
int ret = GetValidDoc(ves, word_vec, key_in_doc, distances, valid_docs);
if (ret != 0){
log_error("GetValidDoc error.");
return -RT_GET_DOC_ERR;
}
log_debug("GetValidDoc end: %lld. valid_docs size: %d.", (long long int)GetSysTimeMicros(), (int)valid_docs.size());
// BM25 algorithm
uint32_t doc_cnt = DOC_CNT;
double k1 = 1.2;
double k2 = 200;
double K = 1.65;
string doc_id;
string keyword;
uint32_t word_freq = 0;
uint32_t field = 0;
if(m_sort_type == SORT_RELEVANCE || m_sort_type == SORT_TIMESTAMP){
if(m_has_gis){
hash_double_map::iterator dis_iter = distances.begin();
for(; dis_iter != distances.end(); dis_iter++){
doc_id = dis_iter->first;
double score = dis_iter->second;
if ((component->Distance() > -0.0001 && component->Distance() < 0.0001) || (score + 1e-6 <= component->Distance())){
skipList.InsertNode(score, doc_id.c_str());
}
}
} else {
map<string, vec>::iterator ves_iter = ves.begin();
for (; ves_iter != ves.end(); ves_iter++) {
double score = 0;
uint32_t key_docs = 0;
doc_id = ves_iter->first;
vector<KeyInfo> &key_info = ves_iter->second;
if(valid_docs.find(doc_id) == valid_docs.end()){
continue;
}
if (m_sort_type == SORT_TIMESTAMP) { //按照时间排序
score = (double)key_info[0].created_time;
skipList.InsertNode(score, doc_id.c_str());
continue;
}
set<string> word_set;
map<string, vector<int> > pos_map;
map<string, vector<int> > title_pos_map;
for (uint32_t i = 0; i < key_info.size(); i++) {
keyword = key_info[i].word;
if (word_set.find(keyword) == word_set.end()) {
word_set.insert(keyword);
}
word_freq = key_info[i].word_freq;
field = key_info[i].field;
if (field == LOCATE_ANY) {
pos_map[keyword] = key_info[i].pos_vec;
}
if (field == LOCATE_TITLE) {
title_pos_map[keyword] = key_info[i].pos_vec;
}
key_docs = key_in_doc[keyword];
score += log((doc_cnt - key_docs + 0.5) / (key_docs + 0.5)) * ((k1 + 1)*word_freq) / (K + word_freq) * (k2 + 1) * 1 / (k2 + 1);
}
if (!complete_keys.empty()) { // 完全匹配
if (word_set.size() != word_vec.size()) { // 文章中出现的词语数量与输入的不一致,则不满足完全匹配
continue;
}
else { // 在标题和正文中都不连续出现,则不满足
if (CheckWordContinus(word_vec, pos_map) == false && CheckWordContinus(word_vec, title_pos_map) == false) {
continue;
}
}
}
skipList.InsertNode(score, doc_id.c_str());
}
}
} else {
set<string>::iterator set_iter = valid_docs.begin();
for(; set_iter != valid_docs.end(); set_iter++){
doc_id = *set_iter;
double score = 0;
if (top_doc_score.find(doc_id) != top_doc_score.end()) {
continue;
}
if (m_sort_type == SORT_FIELD_ASC || m_sort_type == SORT_FIELD_DESC){
//if(doc_manager->CheckDocByExtraFilterKey(doc_id) == false){
// continue;
//}
doc_manager->GetScoreMap(doc_id, m_sort_type, m_sort_field, m_sort_field_type, m_appid);
} else {
skipList.InsertNode(1, doc_id.c_str());
}
if (m_has_gis) {
if (distances.find(doc_id) == distances.end())
continue;
score = distances[doc_id];
if ((component->Distance() > -0.0001 && component->Distance() < 0.0001) || (score <= component->Distance()))
skipList.InsertNode(score, doc_id.c_str());
}
}
}
// 范围查的时候如果不指定排序类型需要在这里对skipList进行赋值
if (!m_has_gis && ves.size() == 0 && skipList.GetSize() == 0 && m_sort_type == SORT_RELEVANCE) {
set<string>::iterator iter = valid_docs.begin();
for(; iter != valid_docs.end(); iter++){
skipList.InsertNode(1, (*iter).c_str());
}
}
return 0;
}
void SearchTask::AppendHighLightWord(Json::Value& response)
{
int count = 0;
set<string>::iterator iter = highlightWord.begin();
for (; iter != highlightWord.end(); iter++) {
if (count >= 10)
break;
count = count + 1;
response["hlWord"].append((*iter).c_str());
}
return ;
}
int SearchTask::DoJob(CTaskRequest *request) {
int ret = 0;
// terminal_tag=1时单独处理
if(component->TerminalTag() == 1){
uint32_t count = 0;
uint32_t N = 2;
uint32_t limit_start = 0;
vector<TerminalRes> candidate_doc;
int try_times = 0;
while(count < component->PageSize()){
if(try_times++ > 10){
log_debug("ProcessTerminal try_times is the max, return");
break;
}
vector<TerminalRes> and_vecs;
TerminalQryCond query_cond;
query_cond.sort_type = m_sort_type;
query_cond.sort_field = m_sort_field;
query_cond.last_id = component->LastId();
query_cond.last_score = component->LastScore();
query_cond.limit_start = limit_start;
query_cond.page_size = component->PageSize() * N;
ret = logical_operate->ProcessTerminal(component->AndKeys(), query_cond, and_vecs);
if(0 != ret){
log_error("ProcessTerminal error.");
return -RT_GET_DOC_ERR;
}
for(int i = 0; i < (int)and_vecs.size(); i++){
string doc_id = and_vecs[i].doc_id;
stringstream ss;
ss << (int)and_vecs[i].score;
string ss_key = ss.str();
log_debug("last_score: %s, ss_key: %s, score: %lf", query_cond.last_score.c_str(), ss_key.c_str(), and_vecs[i].score);
if(component->LastId() != "" && ss_key == query_cond.last_score){ // 翻页时过滤掉已经返回过的文档编号
if(m_sort_type == SORT_FIELD_DESC && doc_id >= component->LastId()){
continue;
}
if(m_sort_type == SORT_FIELD_ASC && doc_id <= component->LastId()){
continue;
}
}
if(doc_manager->CheckDocByExtraFilterKey(doc_id) == true){
count++;
candidate_doc.push_back(and_vecs[i]);
}
}
limit_start += component->PageSize() * N;
N *= 2;
}
Json::FastWriter writer;
Json::Value response;
response["code"] = 0;
int sequence = -1;
int rank = 0;
for (uint32_t i = 0; i < candidate_doc.size(); i++) {
if(rank >= (int)component->PageSize()){
break;
}
sequence++;
rank++;
TerminalRes tmp = candidate_doc[i];
Json::Value doc_info;
doc_info["doc_id"] = Json::Value(tmp.doc_id.c_str());
doc_info["score"] = Json::Value(tmp.score);
response["result"].append(doc_info);
}
response["type"] = 0;
response["count"] = rank; // TODO 这里的count并不是实际的总数
std::string outputConfig = writer.write(response);
request->setResult(outputConfig);
return 0;
}
map<string, double> top_doc_score;
if (component->TopSwitch() == 1) {
ret = GetTopDocScore(top_doc_score);
if (ret != 0) {
return -RT_GET_DOC_ERR;
}
}
ret = GetDocScore(top_doc_score);
if (ret != 0) {
return -RT_GET_DOC_ERR;
}
Json::FastWriter writer;
Json::Value response;
response["code"] = 0;
int sequence = -1;
int rank = 0;
int page_size = component->PageSize();
int limit_start = page_size * (component->PageIndex()-1);
int limit_end = page_size * (component->PageIndex()-1) + page_size - 1;
log_debug("search result begin.");
vector<PAIR> top_vec(top_doc_score.begin(), top_doc_score.end());
sort(top_vec.begin(), top_vec.end(), CmpByValue());
for (uint32_t i = 0; i < top_vec.size(); i++) {
sequence++;
rank++;
if(component->ReturnAll() == 0){
if (sequence < limit_start || sequence > limit_end) {
continue;
}
}
pair<string, double> tmp = top_vec[i];
Json::Value doc_info;
doc_info["doc_id"] = Json::Value(tmp.first.c_str());
doc_info["score"] = Json::Value(tmp.second);
response["result"].append(doc_info);
}
if((m_sort_type == SORT_FIELD_DESC || m_sort_type == SORT_FIELD_ASC) && skipList.GetSize() == 0){
OrderOpCond order_op_cond;
order_op_cond.last_id = component->LastId();
order_op_cond.limit_start = limit_start;
order_op_cond.count = page_size;
order_op_cond.has_extra_filter = false;
if(component->ExtraFilterKeys().size() != 0 || component->ExtraFilterAndKeys().size() != 0 || component->ExtraFilterInvertKeys().size() != 0){
order_op_cond.has_extra_filter = true;
}
if(m_sort_field_type == FIELDTYPE_INT){
rank += doc_manager->ScoreIntMap().size();
COrderOp<int> orderOp(FIELDTYPE_INT, component->SearchAfter(), m_sort_type);
orderOp.Process(doc_manager->ScoreIntMap(), atoi(component->LastScore().c_str()), order_op_cond, response, doc_manager);
} else if(m_sort_field_type == FIELDTYPE_DOUBLE) {
rank += doc_manager->ScoreDoubleMap().size();
COrderOp<double> orderOp(FIELDTYPE_DOUBLE, component->SearchAfter(), m_sort_type);
orderOp.Process(doc_manager->ScoreDoubleMap(), atof(component->LastScore().c_str()), order_op_cond, response, doc_manager);
} else {
rank += doc_manager->ScoreStrMap().size();
COrderOp<string> orderOp(FIELDTYPE_STRING, component->SearchAfter(), m_sort_type);
orderOp.Process(doc_manager->ScoreStrMap(), component->LastScore(), order_op_cond, response, doc_manager);
}
} else if (m_has_gis || m_sort_type == SORT_FIELD_ASC) {
log_debug("m_has_gis, size:%d ", skipList.GetSize());
SkipListNode *tmp = skipList.GetHeader()->level[0].forward;
while (tmp->level[0].forward != NULL) {
// 通过extra_filter_keys进行额外过滤针对区分度不高的字段
if(doc_manager->CheckDocByExtraFilterKey(tmp->value) == false){
log_debug("CheckDocByExtraFilterKey failed, %s", tmp->value);
tmp = tmp->level[0].forward;
continue;
}
sequence++;
rank++;
if(component->ReturnAll() == 0){
if (sequence < limit_start || sequence > limit_end) {
tmp = tmp->level[0].forward;
continue;
}
}
Json::Value doc_info;
doc_info["doc_id"] = Json::Value(tmp->value);
doc_info["score"] = Json::Value(tmp->key);
response["result"].append(doc_info);
tmp = tmp->level[0].forward;
}
} else {
SkipListNode *tmp = skipList.GetFooter()->backward;
while(tmp->backward != NULL) {
if(doc_manager->CheckDocByExtraFilterKey(tmp->value) == false){
tmp = tmp->backward;
continue;
}
sequence++;
rank++;
if (component->ReturnAll() == 0){
if (sequence < limit_start || sequence > limit_end) {
tmp = tmp->backward;
continue;
}
}
Json::Value doc_info;
doc_info["doc_id"] = Json::Value(tmp->value);
doc_info["score"] = Json::Value(tmp->key);
response["result"].append(doc_info);
tmp = tmp->backward;
}
}
if(m_fields.size() > 0){
doc_manager->AppendFieldsToRes(response, m_fields);
}
if (rank > 0)
AppendHighLightWord(response);
if (m_has_gis) {
response["type"] = 1;
}
else {
response["type"] = 0;
}
response["count"] = rank;
if(m_index_set_cnt != 0){
response["count"] = m_index_set_cnt;
}
log_debug("search result end: %lld.", (long long int)GetSysTimeMicros());
std::string outputConfig = writer.write(response);
request->setResult(outputConfig);
if (component->ReturnAll() == 0 && component->CacheSwitch() == 1 && component->PageIndex() == 1 && m_has_gis == 0
&& rank > 0 && outputConfig.size() < MAX_VALUE_LEN && m_Primary_Data != "") {
string m_Data_Cache = m_Primary_Data + "|" + component->DataAnd() + "|" + component->DataInvert() + "|" + component->DataComplete() + "|" +
ToString(m_sort_type) + "|" + ToString(m_appid);
unsigned data_size = m_Data_Cache.size();
int ret = cachelist->add_list(m_Data_Cache.c_str(), outputConfig.c_str(), data_size, outputConfig.size());
if (ret != 0) {
log_error("add to cache_list error, ret: %d.", ret);
}
else {
log_debug("add to cache_list: %s.", m_Data_Cache.c_str());
}
}
return 0;
}
int SearchTask::Process(CTaskRequest *request)
@ -579,110 +55,49 @@ int SearchTask::Process(CTaskRequest *request)
log_debug("SearchTask::Process begin: %lld.", (long long int)GetSysTimeMicros());
common::CallerInfo caller_info = common::ProfilerMonitor::GetInstance().RegisterInfo(std::string("searchEngine.searchService.searchTask"));
Json::Value recv_packet;
string request_string = request->buildRequsetString();
if (component->ParseJson(request_string.c_str(), request_string.length(), recv_packet) != 0) {
std::string request_string = request->buildRequsetString();
if (component_->ParseJson(request_string.c_str(), request_string.length(), recv_packet) != 0) {
string str = GenReplyStr(PARAMETER_ERR);
request->setResult(str);
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
return -RT_PARSE_JSON_ERR;
}
m_Primary_Data = component->Data();
m_appid = component->Appid();
m_sort_type = component->SortType();
m_sort_field = component->SortField();
if(component->Fields().size() > 0){
m_fields.assign(component->Fields().begin(), component->Fields().end());
}
component_->InitSwitch();
skipList.InitList();
component->InitSwitch();
log_debug("m_Data: %s", m_Primary_Data.c_str());
m_query_ = component->GetQuery();
if(m_query_.isObject()){
if(m_query_.isMember("match")){
query_process_ = new MatchQueryProcess(m_appid, m_query_["match"], component);
} else {
log_error("query type error.");
return -RT_PARSE_JSON_ERR;
}
query_process_->SetSkipList(skipList);
Json::Value query = component_->GetQuery();
if(query.isObject()){
if(query.isMember(MATCH)){
query_process_ = new MatchQueryProcess(query[MATCH]);
}else if(query.isMember(TERM)){
query_process_ = new TermQueryProcess(query[TERM]);
}else if (query.isMember(GEODISTANCE)){
query_process_ = new GeoDistanceQueryProcess(query[GEODISTANCE]);
}else if (query.isMember(GEOSHAPE)){
query_process_ = new GeoShapeQueryProcess(query[GEOSHAPE]);
}else if (query.isMember(RANGE)){
query_process_ = RangeQueryGenerator::Instance()->GetRangeQueryProcess(E_INDEX_READ_RANGE , query[RANGE]);
}else if (query.isMember(BOOL)){
query_process_ = new BoolQueryProcess(query[BOOL]);
}else{
log_error("no suit query process.");
return -RT_PARSE_JSON_ERR;
}
query_process_->SetRequest(request);
int ret = query_process_->DoJob();
if(ret != 0){
log_error("query_process_ DoJob error, ret: %d", ret);
return ret;
}
return 0;
}
query_process_->SetComponent(component_);
query_process_->SetDocManager(doc_manager_);
string err_msg = "";
int ret = component->GetQueryWord(m_has_gis, err_msg);
if (ret != 0) {
string str = GenReplyStr(PARAMETER_ERR, err_msg);
request->setResult(str);
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
return ret;
}
if(component->TerminalTag() == 1 && component->TerminalTagValid() == false){
log_error("TerminalTag is 1 and TerminalTagValid is false.");
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
return -RT_PARSE_JSON_ERR;
}
doc_manager = new DocManager(component);
log_debug("cache_switch: %u", component->CacheSwitch());
if (component->ReturnAll() == 0 && component->CacheSwitch() == 1 && component->PageIndex() == 1 && m_Primary_Data != "" && m_has_gis == 0) {
string m_Data_Cache = m_Primary_Data + "|" + component->DataAnd() + "|" + component->DataInvert() + "|" + component->DataComplete() + "|" +
ToString(m_sort_type) + "|" + ToString(m_appid);
uint8_t value[MAX_VALUE_LEN] = { 0 };
unsigned vsize = 0;
if (cachelist->in_list(m_Data_Cache.c_str(), m_Data_Cache.size(), value, vsize))
{
statmgr.GetItemU32(INDEX_SEARCH_HIT_CACHE)++;
log_debug("hit cache.");
value[vsize] = '\0';
std::string outputConfig = (char *)value;
request->setResult(outputConfig);
int ret = query_process_->StartQuery();
if(ret != 0){
std::string str = GenReplyStr(PARAMETER_ERR);
request->setResult(str);
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
return 0;
log_error("query_process_ StartQuery error, ret: %d", ret);
return ret;
}
}
if (component->DataComplete() != "") {
FieldInfo fieldInfo;
string split_data = SplitManager::Instance()->split(component->DataComplete(), m_appid);
log_debug("complete split_data: %s", split_data.c_str());
vector<string> split_datas = splitEx(split_data, "|");
for(size_t i = 0; i < split_datas.size(); i++) {
fieldInfo.word = split_datas[i];
complete_keys.push_back(fieldInfo);
}
}
logical_operate = new LogicalOperate(m_appid, m_sort_type, m_has_gis, component->CacheSwitch());
ret = DoJob(request);
if (ret != 0) {
string str = GenReplyStr(PARAMETER_ERR);
request->setResult(str);
common::ProfilerMonitor::GetInstance().FunctionError(caller_info);
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
return ret;
}
common::ProfilerMonitor::GetInstance().RegisterInfoEnd(caller_info);
return 0;
}
SearchTask::~SearchTask() {
if(component != NULL){
delete component;
}
if(logical_operate != NULL){
delete logical_operate;
}
if(doc_manager != NULL){
delete doc_manager;
}
}
}

View File

@ -26,57 +26,28 @@
#include "index_tbl_op.h"
#include "task_request.h"
#include "skiplist.h"
#include "component.h"
#include "logical_operate.h"
#include "request_context.h"
#include "valid_doc_filter.h"
#include "doc_manager.h"
#include <string>
#include <map>
#include <vector>
#include "task_request.h"
#include "process/query_process.h"
using namespace std;
typedef vector<KeyInfo> vec;
class SearchTask : public ProcessTask
{
public:
SearchTask();
virtual int Process(CTaskRequest *request);
virtual ~SearchTask();
SearchTask();
virtual ~SearchTask();
public:
virtual int Process(CTaskRequest *request);
private:
int DoJob(CTaskRequest *request);
int GetTopDocIdSetByWord(FieldInfo fieldInfo, vector<TopDocInfo>& doc_info);
int GetTopDocScore(map<string, double>& top_doc_score);
int GetDocScore(map<string, double>& top_doc_score);
int GetValidDoc(map<string, vec> &ves, vector<string> &word_vec, map<string, uint32_t> &key_in_doc, hash_double_map &distances, set<string> &valid_docs);
void AppendHighLightWord(Json::Value& response);
private:
Component *component;
LogicalOperate *logical_operate;
DocManager *doc_manager;
vector<FieldInfo> complete_keys;
string m_Primary_Data;
FIELDTYPE m_sort_field_type;
uint32_t m_index_set_cnt;
uint32_t m_appid;
uint32_t m_sort_type;
string m_sort_field;
vector<string> m_fields;
uint32_t m_has_gis; //该appid是否包含有地理位置gis信息的查询
set<string> highlightWord;
SkipList skipList;
QueryProcess* query_process_;
Json::Value m_query_;
RequestContext* component_;
DocManager* doc_manager_;
QueryProcess* query_process_;
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -28,7 +28,9 @@
#include "comm.h"
#include "search_conf.h"
#include "index_tbl_op.h"
#include "result_context.h"
using namespace std;
struct GeoPointContext;
vector<int> splitInt(const string& src, string separate_character);
set<string> splitStr(const string& src, string separate_character);
@ -57,17 +59,17 @@ set<string> sets_intersection(set<string> v1, set<string> v2); // 集合求交
set<string> sets_union(set<string> v1, set<string> v2); // 集合求并集
set<string> sets_difference(set<string> v1, set<string> v2); // 集合求差集
double strToDouble(const string& str);
bool GetGisDistance(uint32_t appid, const string& latLeft, const string& lngLeft, hash_double_map& distances, hash_string_map& doc_content);
bool GetGisDistance(uint32_t appid, const GeoPointContext& geo_point, const hash_string_map& doc_content , hash_double_map& distances);
void ConvertCharIntelligent(const string word, IntelligentInfo &info, int &len);
void ConvertIntelligent(const vector<Content> &result, IntelligentInfo &info, bool &flag);
bool GetGisCode(string lng, string lat, string ip, double distance, vector<string>& gisCode);
bool GetGisCode(const vector<string>& lng_arr, const vector<string>& lat_arr, vector<string>& gisCode);
uint32_t GetIpNum(string ip);
int ShiftIntelligentInfo(IntelligentInfo &info, int len);
bool GetSuggestDoc(FieldInfo& fieldInfo, uint32_t len, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set);
bool GetSuggestDoc(FieldInfo& fieldInfo, uint32_t len, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set, uint32_t appid);
bool GetSuggestDocWithoutCharacter(FieldInfo& fieldInfo, uint32_t len, const IntelligentInfo &info, vector<IndexInfo> &doc_id_set);
int GetDocByShiftWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_id_set, uint32_t appid, set<string>& highlightWord);
int GetDocByShiftEnWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_id_set, uint32_t appid, set<string>& highlightWord);
int GetDocByShiftWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_id_set, uint32_t appid);
int GetDocByShiftEnWord(FieldInfo fieldInfo, vector<IndexInfo> &doc_id_set, uint32_t appid);
uint64_t GetSysTimeMicros();
string trim(string& str);
string delPrefix(string& str);

View File

@ -0,0 +1,23 @@
#include "geo_query_sort_operator.h"
GeoQuerySortOperator::GeoQuerySortOperator(RequestContext* request_cnt, DocManager* doc_manager)
: SortOperatorBase(request_cnt , doc_manager)
{}
GeoQuerySortOperator::~GeoQuerySortOperator()
{}
void GeoQuerySortOperator::RelevanceSort()
{
log_debug("relevance score sort type");
const std::vector<IndexInfo>& o_index_info_vet = ResultContext::Instance()->GetIndexInfos();
std::set<std::string>::iterator valid_docs_iter = p_valid_docs_set_->begin();
for(; valid_docs_iter != p_valid_docs_set_->end(); valid_docs_iter++){
std::vector<IndexInfo>::const_iterator index_info_iter = o_index_info_vet.cbegin();
for (; index_info_iter != o_index_info_vet.cend(); ++index_info_iter){
if ((*valid_docs_iter) == (index_info_iter->doc_id)){
scoredocid_set_.insert(ScoreDocIdNode(index_info_iter->distance , index_info_iter->doc_id));
}
}
}
}

View File

@ -0,0 +1,15 @@
#ifndef GEO_QUERY_SORT_OPERATOR_H_
#define GEO_QUERY_SORT_OPERATOR_H_
#include "sort_operator_base.h"
class GeoQuerySortOperator : public SortOperatorBase
{
public:
GeoQuerySortOperator(RequestContext* request_cnt , DocManager* doc_manager);
virtual~ GeoQuerySortOperator();
private:
virtual void RelevanceSort();
};
#endif

View File

@ -0,0 +1,101 @@
#include "sort_operator_base.h"
SortOperatorBase::SortOperatorBase(RequestContext* request_cnt , DocManager* doc_manager)
: component_(request_cnt)
, doc_manager_(doc_manager)
, p_valid_docs_set_(ResultContext::Instance()->GetValidDocs())
, scoredocid_set_()
, sort_field_type_()
{
}
SortOperatorBase::~SortOperatorBase()
{
}
std::set<ScoreDocIdNode>* SortOperatorBase::GetSortOperator(uint32_t ui_sort_type)
{
log_debug("GetSortOperator beginning...");
switch (ui_sort_type)
{
case SORT_RELEVANCE:
case SORT_GEO_DISTANCE:
{
RelevanceSort();
}
break;
case DONT_SORT: {
NoneSort();
}
break;
case SORT_FIELD_ASC:
case SORT_FIELD_DESC:
{
AssignFieldSort();
}
break;
default:
break;
}
return (&scoredocid_set_);
}
void SortOperatorBase::RelevanceSort()
{
// 按照相关度得分,并以此排序
log_debug("relevance score sort type");
// 范围查的时候如果不指定排序类型需要在这里对skipList进行赋值
const DocKeyinfosMap& docid_keyinfovet_map = ResultContext::Instance()->GetDocKeyinfosMap();
if (docid_keyinfovet_map.empty() && scoredocid_set_.empty()) {
std::set<std::string>::iterator iter = p_valid_docs_set_->begin();
for(; iter != p_valid_docs_set_->end(); iter++){
scoredocid_set_.insert(ScoreDocIdNode(1,*iter));
}
return;
}
std::map<std::string, KeyInfoVet>::const_iterator docid_keyinfovet_iter = docid_keyinfovet_map.cbegin();
for (; docid_keyinfovet_iter != docid_keyinfovet_map.cend(); ++ docid_keyinfovet_iter){
std::string doc_id = docid_keyinfovet_iter->first;
const KeyInfoVet& key_info = docid_keyinfovet_iter->second;
if(p_valid_docs_set_->find(doc_id) == p_valid_docs_set_->end()){
continue;
}
double score = 0.0;
for (uint32_t i = 0; i < key_info.size(); i++) {
std::string keyword = key_info[i].word;
uint32_t ui_word_freq = key_info[i].word_freq;
uint32_t ui_doc_count = ResultContext::Instance()->GetKeywordDoccountMap(keyword);
score += log((DOC_CNT - ui_doc_count + 0.5) / (ui_doc_count + 0.5)) * ((D_BM25_K1 + 1)*ui_word_freq) \
/ (D_BM25_K + ui_word_freq) * (D_BM25_K2 + 1) * 1 / (D_BM25_K2 + 1);
log_debug("loop score[%d]:%f", i , score);
}
scoredocid_set_.insert(ScoreDocIdNode(score , doc_id));
}
}
void SortOperatorBase::NoneSort()
{
log_debug("no sort type");
std::set<std::string>::iterator valid_docs_iter = p_valid_docs_set_->begin();
for(; valid_docs_iter != p_valid_docs_set_->end(); valid_docs_iter++){
std::string doc_id = *valid_docs_iter;
scoredocid_set_.insert(ScoreDocIdNode(1 , doc_id));
}
}
void SortOperatorBase::AssignFieldSort()
{
std::set<std::string>::iterator valid_docs_iter = p_valid_docs_set_->begin();
for(; valid_docs_iter != p_valid_docs_set_->end(); valid_docs_iter++){
std::string doc_id = *valid_docs_iter;
doc_manager_->GetScoreMap(doc_id, component_->SortType()
, component_->SortField(), sort_field_type_);
}
log_debug("assign field sort type , order option:%d" , (int)sort_field_type_);
}

View File

@ -0,0 +1,36 @@
#ifndef SORT_OPERATOR_BASE_H_
#define SORT_OPERATOR_BASE_H_
#include <set>
#include "log.h"
#include "../comm.h"
#include "../result_context.h"
#include "../request_context.h"
#include "../doc_manager.h"
class SortOperatorBase
{
public:
SortOperatorBase(RequestContext* request_cnt , DocManager* doc_manager);
virtual ~SortOperatorBase();
public:
std::set<ScoreDocIdNode>* GetSortOperator(uint32_t ui_sort_type);
int GetSortFieldType() { return sort_field_type_;};
protected:
virtual void RelevanceSort();
virtual void NoneSort();
virtual void AssignFieldSort();
protected:
RequestContext* component_;
DocManager* doc_manager_;
ValidDocSet* p_valid_docs_set_;
std::set<ScoreDocIdNode> scoredocid_set_;
private:
FIELDTYPE sort_field_type_;
};
#endif

View File

@ -0,0 +1,32 @@
#ifndef TIMER_COUNTER_H_
#define TIMER_COUNTER_H_
#include <sys/time.h>
// Timer::GetInstance()->Start();
// log_error("costtime:%f",Timer::GetInstance()->Stop());
class Timer{
public:
Timer() {};
~Timer(){};
public:
static Timer* GetInstance(){
static Timer timer;
return &timer;
};
public:
void Start(){
gettimeofday(&tm_start_ , NULL);
};
float Stop(){
timeval tm_stop;
gettimeofday(&tm_stop , NULL);
return ((tm_stop.tv_sec - tm_start_.tv_sec)*1000000 \
+ tm_stop.tv_usec - tm_start_.tv_usec) / 100000;
};
private:
timeval tm_start_;
};
#endif

View File

@ -147,6 +147,10 @@ std::vector<DocIdEntry<T> > CMaxHeap<T>::getNumbers(vector<DocIdEntry<T> >& arr,
}
}
if (max_heap_vec.empty()){
return max_heap_vec;
}
buildMaxHeap(max_heap_vec);
for(int i = k; i < (int)arr.size(); ++i){
// 出现比堆顶元素小且大于last_value的值, 置换堆顶元素, 并调整堆
@ -171,6 +175,11 @@ std::vector<DocIdEntry<T> > CMaxHeap<T>::getNumbers(vector<DocIdEntry<T> >& arr,
break;
}
}
if (max_heap_vec.empty()){
return max_heap_vec;
}
buildMaxHeap(max_heap_vec);
for(int i = k; i < (int)arr.size(); ++i){
// 出现比堆顶元素小的值, 置换堆顶元素, 并调整堆

View File

@ -0,0 +1,350 @@
/*
* =====================================================================================
*
* Filename: valid_doc_filter.h
*
* Description: logical operate class definition.
*
* Version: 1.0
* Created: 09/08/2018
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Modified by: chenyujie ,chenyujie28@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#include "valid_doc_filter.h"
#include "search_util.h"
#include "cachelist_unit.h"
#include "data_manager.h"
#include "json/reader.h"
#include "json/writer.h"
#include "index_tbl_op.h"
#include "index_sync/sync_index_timer.h"
#include "index_sync/sequence_search_index.h"
#include "stem.h"
#include "key_format.h"
#include <sstream>
#include <iomanip>
extern SyncIndexTimer* globalSyncIndexTimer;
extern CCacheListUnit* indexcachelist;
ValidDocFilter::ValidDocFilter()
: p_data_base_(NULL)
{ }
ValidDocFilter::~ValidDocFilter()
{ }
//汉拼无需memcomparable format
int ValidDocFilter::HanPinTextInvertIndexSearch(const std::vector<FieldInfo>& keys
, std::vector<IndexInfo>& index_info_vet){
if (keys.empty()){
return -RT_GET_FIELD_ERROR;
}
std::vector<FieldInfo>::const_iterator iter = keys.cbegin();
for (; iter != keys.cend(); ++iter){
std::vector<IndexInfo> doc_info;
if ((iter->segment_tag) == SEGMENT_CHINESE) {
int ret = GetDocByShiftWord(*iter, doc_info, p_data_base_->Appid());
if (ret != 0) {
index_info_vet.clear();
return -RT_GET_DOC_ERR;
}
std::sort(doc_info.begin(), doc_info.end());
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
KeyInfo info;
info.word_freq = 1;
info.field = (iter->field);
info.word = (iter->word);
ResultContext::Instance()->SetDocKeyinfoMap(doc_info[doc_info_idx].doc_id , info);
}
} else if ((iter->segment_tag) == SEGMENT_ENGLISH) {
int ret = GetDocByShiftEnWord(*iter, doc_info, p_data_base_->Appid());
if (ret != 0) {
index_info_vet.clear();
return -RT_GET_DOC_ERR;
}
std::sort(doc_info.begin(), doc_info.end());
for (size_t doc_info_idx = 0; doc_info_idx < doc_info.size(); doc_info_idx++){
KeyInfo info;
info.word_freq = 1;
info.field = (iter->field);
info.word = (iter->word);
ResultContext::Instance()->SetDocKeyinfoMap(doc_info[doc_info_idx].doc_id , info);
}
}
index_info_vet = Union(index_info_vet, doc_info);
}
return 0;
}
int ValidDocFilter::RangeQueryInvertIndexSearch(const std::vector<FieldInfo>& keys
, std::vector<IndexInfo>& index_info_vet){
if (keys.empty()){
return -RT_GET_DOC_ERR;
}
std::vector<FieldInfo>::const_iterator iter = keys.cbegin();
for (; iter != keys.cend(); ++iter){
std::vector<IndexInfo> doc_info;
log_debug("segment:%d , word:%s , field:%d, fieldtype:%d" ,
iter->segment_tag ,iter->word.c_str() ,
iter->field , iter->field_type);
if (iter->word.empty()){
std::stringstream ss;
ss << p_data_base_->Appid();
InvertIndexEntry startEntry(ss.str(), iter->field, iter->field_type , iter->start);
InvertIndexEntry endEntry(ss.str(), iter->field, iter->field_type , iter->end);
std::vector<InvertIndexEntry> resultEntry;
globalSyncIndexTimer->GetSearchIndex()->GetRangeIndex(iter->range_type, startEntry, endEntry, resultEntry);
std::vector<InvertIndexEntry>::iterator res_iter = resultEntry.begin();
for (; res_iter != resultEntry.end(); res_iter ++) {
IndexInfo info;
info.doc_id = res_iter->_InvertIndexDocId;
info.doc_version = res_iter->_InvertIndexDocVersion;
doc_info.push_back(info);
}
log_debug("appid: %s, field: %d, count: %d", startEntry._InvertIndexAppid.c_str(), iter->field, (int)resultEntry.size());
}
index_info_vet = Union(index_info_vet, doc_info);
}
return 0;
}
int ValidDocFilter::TextInvertIndexSearch(const std::vector<FieldInfo>& keys, std::vector<IndexInfo>& index_info_vet){
if (keys.empty()){
return -RT_GET_FIELD_ERROR;
}
std::vector<FieldInfo>::const_iterator iter = keys.cbegin();
for (; iter != keys.cend(); ++iter){
std::vector<IndexInfo> doc_info;
int ret = GetDocIdSetByWord(*iter, doc_info);
if (ret != 0){
return -RT_GET_DOC_ERR;
}
if (doc_info.size() == 0)
continue;
if (!p_data_base_->GetHasGisFlag() || !isAllNumber(iter->word)){
if (iter->field_type != FIELD_INDEX){
ResultContext::Instance()->SetHighLightWordSet(iter->word);
}
}
if(!p_data_base_->GetHasGisFlag() && (SORT_RELEVANCE == p_data_base_->SortType())){
CalculateByWord(*iter, doc_info);
}
index_info_vet = Union(index_info_vet, doc_info);
}
return 0;
}
int ValidDocFilter::ProcessTerminal(const std::vector<std::vector<FieldInfo> >& and_keys, const TerminalQryCond& query_cond, std::vector<TerminalRes>& vecs){
if(and_keys.size() != 1){
return 0;
}
std::vector<FieldInfo> field_vec = and_keys[0];
if(field_vec.size() != 1){
return 0;
}
FieldInfo field_info = field_vec[0];
if(field_info.segment_tag != SEGMENT_RANGE){
return 0;
}
std::stringstream ss;
ss << p_data_base_->Appid();
InvertIndexEntry beginEntry(ss.str(), field_info.field, field_info.field_type , field_info.start);
InvertIndexEntry endEntry(ss.str(), field_info.field, field_info.field_type , field_info.end);
std::vector<InvertIndexEntry> resultEntry;
globalSyncIndexTimer->GetSearchIndex()->GetRangeIndexInTerminal(field_info.range_type, beginEntry, endEntry, query_cond, resultEntry);
std::vector<InvertIndexEntry>::iterator iter = resultEntry.begin();
for (; iter != resultEntry.end(); iter ++) {
TerminalRes info;
info.doc_id = (*iter)._InvertIndexDocId;
info.score = (*iter)._InvertIndexKey;
vecs.push_back(info);
}
return 0;
}
void ValidDocFilter::CalculateByWord(FieldInfo fieldInfo, const std::vector<IndexInfo>& doc_info) {
std::vector<IndexInfo>::const_iterator iter = doc_info.cbegin();
for ( ; iter != doc_info.cend(); ++iter) {
std::string pos_str = iter->pos;
std::vector<int> pos_vec;
if (pos_str != "" && pos_str.size() > 2) {
pos_str = pos_str.substr(1, pos_str.size() - 2);
pos_vec = splitInt(pos_str, ",");
}
KeyInfo info;
info.word_freq = iter->word_freq;
info.field = iter->field;
info.word = fieldInfo.word;
info.created_time = iter->created_time;
info.pos_vec = pos_vec;
ResultContext::Instance()->SetDocKeyinfoMap(iter->doc_id , info);
}
ResultContext::Instance()->SetWordDoccountMap(fieldInfo.word , doc_info.size());
}
bool ValidDocFilter::GetDocIndexCache(std::string word, uint32_t field, std::vector<IndexInfo>& doc_info) {
log_debug("get doc index start");
bool res = false;
uint8_t value[MAX_VALUE_LEN] = { 0 };
unsigned vsize = 0;
std::string output = "";
std::string indexCache = word + "|" + ToString(field);
if (p_data_base_->CacheSwitch() == 1 && indexcachelist->in_list(indexCache.c_str(), indexCache.size(), value, vsize))
{
log_debug("hit index cache.");
value[vsize] = '\0';
output = (char *)value;
res = true;
}
if (res) {
Json::Value packet;
Json::Reader r(Json::Features::strictMode());
int ret;
ret = r.parse(output.c_str(), output.c_str() + output.size(), packet);
if (0 == ret)
{
log_error("the err json string is : %s, errmsg : %s", output.c_str(), r.getFormattedErrorMessages().c_str());
res = false;
return res;
}
for (uint32_t i = 0; i < packet.size(); ++i) {
IndexInfo info;
Json::Value& index_cache = packet[i];
if (index_cache.isMember("appid") && index_cache["appid"].isUInt() &&
index_cache.isMember("id") && index_cache["id"].isString() &&
index_cache.isMember("version") && index_cache["version"].isUInt() &&
index_cache.isMember("field") && index_cache["field"].isUInt() &&
index_cache.isMember("freq") && index_cache["freq"].isUInt() &&
index_cache.isMember("time") && index_cache["time"].isUInt() &&
index_cache.isMember("pos") && index_cache["pos"].isString() &&
index_cache.isMember("extend") && index_cache["extend"].isString())
{
info.appid = index_cache["appid"].asUInt();
info.doc_id = index_cache["id"].asString();
info.doc_version = index_cache["version"].asUInt();
info.field = index_cache["field"].asUInt();
info.word_freq = index_cache["freq"].asUInt();
info.created_time = index_cache["time"].asUInt();
info.pos = index_cache["pos"].asString();
info.extend = index_cache["extend"].asString();
doc_info.push_back(info);
}
else {
log_error("parse index_cache error, no appid");
doc_info.clear();
res = false;
break;
}
}
}
return res;
}
void ValidDocFilter::SetDocIndexCache(const std::vector<IndexInfo>& doc_info, std::string& indexJsonStr) {
Json::Value indexJson;
std::vector<IndexInfo>::const_iterator iter = doc_info.cbegin();
for ( ; iter != doc_info.cend(); ++iter) {
Json::Value json_tmp;
json_tmp["appid"] = iter->appid;
json_tmp["id"] = iter->doc_id;
json_tmp["version"] = iter->doc_version;
json_tmp["field"] = iter->field;
json_tmp["freq"] = iter->word_freq;
json_tmp["time"] = iter->created_time;
json_tmp["pos"] = iter->pos;
json_tmp["extend"] = iter->extend;
indexJson.append(json_tmp);
}
Json::FastWriter writer;
indexJsonStr = writer.write(indexJson);
}
int ValidDocFilter::GetDocIdSetByWord(FieldInfo fieldInfo, std::vector<IndexInfo> &doc_info) {
bool bRet = false;
if (DataManager::Instance()->IsSensitiveWord(fieldInfo.word)) {
log_debug("%s is a sensitive word.", fieldInfo.word.c_str());
return 0;
}
std::stringstream ss_key;
ss_key << p_data_base_->Appid();
ss_key << "#00#";
if (FIELD_IP == fieldInfo.field_type) {
uint32_t word_id = GetIpNum(fieldInfo.word);
if (word_id == 0) { return 0; }
std::stringstream stream_ip;
stream_ip << word_id;
fieldInfo.word = stream_ip.str();
}
// 联合索引MemFormat在拼接的时候已经完成此处无需再次编码
if(FIELD_INDEX == fieldInfo.field_type){
ss_key << fieldInfo.word;
}else {
KeyFormat::UnionKey o_keyinfo_vet;
o_keyinfo_vet.push_back(std::make_pair(fieldInfo.field_type , fieldInfo.word));
std::string s_format_key = KeyFormat::Encode(o_keyinfo_vet);
ss_key << s_format_key;
}
log_debug("appid [%u], key[%s]", p_data_base_->Appid(), ss_key.str().c_str());
if (p_data_base_->GetHasGisFlag() && GetDocIndexCache(ss_key.str(), fieldInfo.field, doc_info)) {
return 0;
}
bRet = g_IndexInstance.GetDocInfo(p_data_base_->Appid(), ss_key.str(), fieldInfo.field, doc_info);
if (false == bRet) {
log_error("GetDocInfo error.");
return -RT_DTC_ERR;
}
if (p_data_base_->CacheSwitch() == 1 && p_data_base_->GetHasGisFlag() == 1
&& doc_info.size() > 0 && doc_info.size() <= 1000) {
std::string index_str;
SetDocIndexCache(doc_info, index_str);
if (index_str != "" && index_str.size() < MAX_VALUE_LEN) {
std::string indexCache = ss_key.str() + "|" + ToString(fieldInfo.field);
unsigned data_size = indexCache.size();
int ret = indexcachelist->add_list(indexCache.c_str(), index_str.c_str(), data_size, index_str.size());
if (ret != 0) {
log_error("add to index_cache_list error, ret: %d.", ret);
}
else {
log_debug("add to index_cache_list: %s.", indexCache.c_str());
}
}
}
return 0;
}
std::vector<IndexInfo> ValidDocFilter::Union(std::vector<IndexInfo>& first_indexinfo_vet, std::vector<IndexInfo>& second_indexinfo_vet){
std::vector<IndexInfo> index_info_result;
int i_max_size = first_indexinfo_vet.size() + second_indexinfo_vet.size();
index_info_result.resize(i_max_size);
std::sort(first_indexinfo_vet.begin() , first_indexinfo_vet.end());
std::sort(second_indexinfo_vet.begin() , second_indexinfo_vet.end());
std::vector<IndexInfo>::iterator iter = std::set_union(
first_indexinfo_vet.begin(), first_indexinfo_vet.end(),
second_indexinfo_vet.begin() , second_indexinfo_vet.end() ,
index_info_result.begin());
index_info_result.resize(iter - index_info_result.begin());
return index_info_result;
}

View File

@ -0,0 +1,72 @@
/*
* =====================================================================================
*
* Filename: valid_doc_filter.h
*
* Description: logical operate class definition.
*
* Version: 1.0
* Created: 09/08/2018
* Revision: none
* Compiler: gcc
*
* Author: zhulin, shzhulin3@jd.com
* Modified by: chenyujie ,chenyujie28@jd.com
* Company: JD.com, Inc.
*
* =====================================================================================
*/
#ifndef LOGICAL_OP_H
#define LOGICAL_OP_H
#include <map>
#include <set>
#include <functional>
#include "comm.h"
#include "singleton.h"
#include "noncopyable.h"
#include "request_context.h"
#include "result_context.h"
class ValidDocFilter : private noncopyable{
public:
ValidDocFilter();
virtual ~ValidDocFilter();
public:
static ValidDocFilter* Instance(){
return CSingleton<ValidDocFilter>::Instance();
};
static void Destroy(){
CSingleton<ValidDocFilter>::Destroy();
};
public:
void BindDataBasePointer(RequestContext* const p_data_base) { p_data_base_ = p_data_base; };
public:
int HanPinTextInvertIndexSearch(const std::vector<FieldInfo>& keys
, std::vector<IndexInfo>& index_info_vet );
int RangeQueryInvertIndexSearch(const std::vector<FieldInfo>& keys
, std::vector<IndexInfo>& index_info_vet);
int TextInvertIndexSearch(const std::vector<FieldInfo>& keys
, std::vector<IndexInfo>& index_info_vet);
int ProcessTerminal(const std::vector<std::vector<FieldInfo> >& and_keys, const TerminalQryCond& query_cond, std::vector<TerminalRes>& vecs);
private:
void CalculateByWord(FieldInfo fieldInfo, const std::vector<IndexInfo> &doc_info);
void SetDocIndexCache(const std::vector<IndexInfo> &doc_info, std::string& indexJsonStr);
bool GetDocIndexCache(std::string word, uint32_t field, std::vector<IndexInfo> &doc_info);
int GetDocIdSetByWord(FieldInfo fieldInfo, std::vector<IndexInfo> &doc_info);
std::vector<IndexInfo> Union(std::vector<IndexInfo>& first_indexinfo_vet, std::vector<IndexInfo>& second_indexinfo_vet);
private:
RequestContext* p_data_base_;
};
#endif

View File

@ -25,10 +25,12 @@
#include <unistd.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <ctype.h>
#include <bitset>
#include <map>
#include <string>
#include <vector>
#include <algorithm>
#include "db_process_rocks.h"
@ -443,13 +445,21 @@ inline int RocksdbProcess::str2Value(
break;
case DField::String:
Value.str.len = Str.length();
Value.str.ptr = const_cast<char *>(Str.data()); // 不重新new要等这个value使用完后释放内存(如果Str是动态分配的)
{
char* p = (char*)calloc(Str.length() , sizeof(char));
memcpy((void*)p , (void*)Str.data() , Str.length());
Value.str.ptr = p;
Value.str.len = Str.length();
}
break;
case DField::Binary:
Value.bin.len = Str.length();
Value.bin.ptr = const_cast<char *>(Str.data());
{
char* p = (char*)calloc(Str.length() , sizeof(char));
memcpy((void*)p , (void*)Str.data() , Str.length());
Value.bin.ptr = p;
Value.bin.len = Str.length();
}
break;
default:
@ -600,6 +610,7 @@ int RocksdbProcess::condition_filter(
case DField::String:
case DField::Binary:
{
matched = is_matched(rocksValue.c_str(), comparator, condValue.c_str(), (int)rocksValue.length(), (int)condValue.length(), false);
if (!matched)
{
@ -607,6 +618,7 @@ int RocksdbProcess::condition_filter(
condValue.c_str(), comparator);
return 1;
}
}
break;
default:
@ -662,6 +674,23 @@ template bool RocksdbProcess::is_matched<int64_t>(const int64_t lv, int comp, co
template bool RocksdbProcess::is_matched<uint64_t>(const uint64_t lv, int comp, const uint64_t rv);
template bool RocksdbProcess::is_matched<double>(const double lv, int comp, const double rv);
int RocksdbProcess::memcmp_ignore_case(
const void* lv,
const void* rv,
int count)
{
int iret = 0;
for (int i = 0; i < count; i++){
char lv_buffer = tolower(((char*)lv)[i]);
char rv_buffer = tolower(((char*)rv)[i]);
iret = memcmp(&lv_buffer , &rv_buffer , sizeof(char));
if (iret != 0){
return iret;
}
}
return iret;
}
//template<>
bool RocksdbProcess::is_matched(
const char *lv,
@ -686,37 +715,38 @@ bool RocksdbProcess::is_matched(
{
case 0:
if (caseSensitive)
return lLen == rLen && !strncmp(lv, rv, minLen);
return lLen == rLen && !strncasecmp(lv, rv, minLen);
return lLen == rLen && !memcmp(lv, rv, minLen);
return lLen == rLen && !memcmp_ignore_case(lv, rv, minLen);
case 1:
if (lLen != rLen)
return true;
if (caseSensitive)
return strncmp(lv, rv, minLen);
return strncasecmp(lv, rv, minLen);
return memcmp(lv, rv, minLen);
return memcmp_ignore_case(lv, rv, minLen);
case 2:
if (caseSensitive)
ret = strncmp(lv, rv, minLen);
ret = memcmp(lv, rv, minLen);
else
ret = strncasecmp(lv, rv, minLen);
ret = memcmp_ignore_case(lv, rv, minLen);
return ret < 0 || (ret == 0 && lLen < rLen);
case 3:
if (caseSensitive)
ret = strncmp(lv, rv, minLen);
ret = memcmp(lv, rv, minLen);
else
ret = strncasecmp(lv, rv, minLen);
ret = memcmp_ignore_case(lv, rv, minLen);
log_error("iret:%d , len:%d ,rLen:%d", ret , lLen , rLen);
return ret < 0 || (ret == 0 && lLen <= rLen);
case 4:
if (caseSensitive)
ret = strncmp(lv, rv, minLen);
ret = memcmp(lv, rv, minLen);
else
ret = strncasecmp(lv, rv, minLen);
ret = memcmp_ignore_case(lv, rv, minLen);
return ret > 0 || (ret == 0 && lLen > rLen);
case 5:
if (caseSensitive)
ret = strncmp(lv, rv, minLen);
ret = memcmp(lv, rv, minLen);
else
ret = strncasecmp(lv, rv, minLen);
ret = memcmp_ignore_case(lv, rv, minLen);
return ret > 0 || (ret == 0 && lLen >= rLen);
default:
log_error("unsupport comparator:%d", comparator);
@ -2204,6 +2234,24 @@ int RocksdbProcess::process_direct_query(
std::vector<QueryCond> primaryKeyConds;
ret = analyse_primary_key_conds(reqCxt, primaryKeyConds);
#if 0
std::vector<QueryCond>::iterator iter = primaryKeyConds.begin();
for (; iter != primaryKeyConds.end(); ++iter){
std::vector<int> fieldTypes;
fieldTypes.push_back(DField::Signed);
std::vector<std::string> fieldValues;
int ipos = iter->sCondValue.find_last_of("#");
std::string stemp = iter->sCondValue.substr(ipos + 1);
key_format::Decode(stemp , fieldTypes , fieldValues);
log_error("field index:%d , condopr:%d , condvalue:%s" ,
iter->sFieldIndex ,
iter->sCondOpr ,
fieldValues[0].c_str());
}
#endif
if (ret != 0)
{
log_error("query condition incorrect in query context!");
@ -2231,8 +2279,11 @@ int RocksdbProcess::process_direct_query(
std::string value;
RocksDBConn::RocksItr_t rocksItr;
bool forwardDirection = (primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eEQ || primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eGT || primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eGE);
bool forwardDirection = (primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eEQ ||
primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eGT ||
primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eGE);
bool backwardEqual = primaryKeyConds[0].sCondOpr == (uint8_t)CondOpr::eLE;
log_debug("forwardDirection:%d , backwardEqual:%d", (int)forwardDirection , (int)backwardEqual);
if (backwardEqual)
{
// if the query condtion is < || <=, use seek_for_prev to seek in the total_order_seek mode
@ -2316,6 +2367,39 @@ int RocksdbProcess::process_direct_query(
while (true)
{
ret = range_key_matched(fullKey, primaryKeyConds);
#if 0
std::vector<std::string> rocksValues;
std::vector<int> fieldTypes;
fieldTypes.push_back(DField::String);
fieldTypes.push_back(DField::String);
fieldTypes.push_back(DField::Signed);
fieldTypes.push_back(DField::Signed);
fieldTypes.push_back(DField::Signed);
key_format::Decode(fullKey, fieldTypes, rocksValues);
for (int i = 0; i < rocksValues[0].length(); i++){
log_error("No:%d is %d \n" , i , (int)rocksValues[0][i]);
}
int ipos = rocksValues[0].find_last_of("#");
std::string stemp = rocksValues[0].substr(ipos + 1);
std::vector<std::string> rocksValues001;
std::vector<int> fieldTypes001;
fieldTypes001.push_back(DField::Signed);
key_format::Decode(stemp , fieldTypes001 , rocksValues001);
log_error("primary value:%s", rocksValues001[0].c_str());
for (size_t i = 0; i < rocksValues.size(); i++)
{
log_error("value:%s", rocksValues[i].c_str() );
}
#endif
if (ret == -1)
{
// prefix key not matched, reach to the end

View File

@ -269,6 +269,8 @@ class RocksdbProcess : public HelperProcessBase
int64_t timeElapse);
void print_stat_info();
int memcmp_ignore_case(const void* lv, const void* rv, int count);
};
#endif // __DB_PROCESS_ROCKS_H__

View File

@ -66,6 +66,7 @@ enum FieldType{
};
enum SEGMENTTAG {
SEGMENT_NONE = 0,
SEGMENT_DEFAULT = 1,
SEGMENT_NGRAM = 2,
SEGMENT_CHINESE = 3,