diff --git a/src/search_local/index_storage/cache/Makefile b/src/search_local/index_storage/cache/Makefile new file mode 100644 index 0000000..16fe139 --- /dev/null +++ b/src/search_local/index_storage/cache/Makefile @@ -0,0 +1,36 @@ +LIB_PATH = ../../.. +include ../Make.conf +VPATH :=../stat + +####################compile################# +CFLAGS += -D_GLIBCXX_USE_CXX11_ABI=0 +CFLAGS += -I./ -I../common -I../api/c_api -I../stat -I../watchdog -I../helper +LIBPATH := -L. -L../common -L../watchdog -L../stat -L../../../3rdlib/attr_api -L../api +DB%.o tmp.DB%.o:CFLAGS += $(MYSQLINC) +ifneq ($(findstring x86_64,$(PLATFORM)),) +BITS=64 +else +BITS=32 +endif + +LIBDTCAPI := -L../api -lpthread -Wl,-rpath,\$$ORIGIN/../lib/ -Wl,-rpath,\$$ORIGIN -Wl,-rpath,\$$ORIGIN/../api/ -Wl,-rpath,\$$ORIGIN/../ -z origin + +target = libdtcd.a dtcd +target_external = ../api/libdtc.a ../stat/libstat.a ../common/libcommon.a + +$(filterout libdtcd.a,$(target)): libdtcd.a; + +filelist := feature hash ng_info node_set node_index fence_unit buffer_bypass buffer_pool pt_malloc sys_malloc raw_data raw_data_process buffer_process buffer_flush buffer_unit empty_filter black_hole logger task_pendlist lru_bit hb_log admin_process hb_feature container_dtcd col_expand t_tree tree_data tree_data_process expire_time main hb_process task_control +libdtcd_objs:= $(sort $(filelist:%=%.o)) + +#dtcd +dtcd: CFLAGS += -export-dynamic +dtcd: LDFLAGS += -Wl,--version-script,dtcd.export.lst +dtcd_objs:= main.o task_control.o stat_client.o expire_time.o hb_process.o +dtcd_libs:= -lstat -lwatchdog -ldtcd -lcommon -lattr_api_$(BITS) -lpthread -ldl $(Z_LIB) -rdynamic + +#####################install############ +target_install = dtcd +install_dir = ../../bin + +include ../Make.rules diff --git a/src/search_local/index_storage/cache/admin_process.cc b/src/search_local/index_storage/cache/admin_process.cc new file mode 100644 index 0000000..754f9bc --- /dev/null +++ b/src/search_local/index_storage/cache/admin_process.cc @@ -0,0 +1,1243 @@ +/* + * ===================================================================================== + * + * Filename: admin_process.cc + * + * Description: cache initialization & task request method + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include +#include +#include +#include + +#include "packet.h" +#include "log.h" +#include "buffer_process.h" +#include "buffer_flush.h" +#include "mysql_error.h" +#include "sys_malloc.h" +#include "data_chunk.h" +#include "raw_data_process.h" +#include "admin_tdef.h" +#include "key_route.h" +#include "table_def_manager.h" + +DTC_USING_NAMESPACE + +extern DTCTableDefinition *gTableDef[]; +extern int targetNewHash; +extern int hashChanging; +extern KeyRoute *keyRoute; +extern DbConfig *dbConfig; + +extern int collect_load_config(DbConfig *dbconfig); +#if __WORDSIZE == 64 +#define UINT64FMT_T "%lu" +#else +#define UINT64FMT_T "%llu" +#endif + +BufferResult BufferProcess::buffer_process_admin(TaskRequest &Task) +{ + log_debug("BufferProcess::buffer_process_admin admin_code is %d ", Task.requestInfo.admin_code()); + if (Task.requestInfo.admin_code() == DRequest::ServerAdminCmd::QueryServerInfo || Task.requestInfo.admin_code() == DRequest::ServerAdminCmd::LogoutHB || Task.requestInfo.admin_code() == DRequest::ServerAdminCmd::GetUpdateKey) + { + if (hbFeature == NULL) + { // 热备功能尚未启动 + Task.set_error(-EBADRQC, CACHE_SVC, "hot-backup not active yet"); + return BUFFER_PROCESS_ERROR; + } + } + + switch (Task.requestInfo.admin_code()) + { + case DRequest::ServerAdminCmd::QueryServerInfo: + return buffer_query_serverinfo(Task); + + case DRequest::ServerAdminCmd::RegisterHB: + return buffer_register_hb(Task); + + case DRequest::ServerAdminCmd::LogoutHB: + return buffer_logout_hb(Task); + + case DRequest::ServerAdminCmd::GetKeyList: + return buffer_get_key_list(Task); + + case DRequest::ServerAdminCmd::GetUpdateKey: + return buffer_get_update_key(Task); + + case DRequest::ServerAdminCmd::GetRawData: + return buffer_get_raw_data(Task); + + case DRequest::ServerAdminCmd::ReplaceRawData: + return buffer_replace_raw_data(Task); + + case DRequest::ServerAdminCmd::AdjustLRU: + return buffer_adjust_lru(Task); + + case DRequest::ServerAdminCmd::VerifyHBT: + return buffer_verify_hbt(Task); + + case DRequest::ServerAdminCmd::GetHBTime: + return buffer_get_hbt(Task); + + case DRequest::ServerAdminCmd::NodeHandleChange: + return buffer_nodehandlechange(Task); + + case DRequest::ServerAdminCmd::Migrate: + return buffer_migrate(Task); + + case DRequest::ServerAdminCmd::ClearCache: + return buffer_clear_cache(Task); + + case DRequest::ServerAdminCmd::MigrateDB: + case DRequest::ServerAdminCmd::MigrateDBSwitch: + if (update_mode() || is_mem_dirty()) + { + log_error("try to migrate when cache is async"); + Task.set_error(-EC_SERVER_ERROR, "cache process", "try to migrate when cache is async"); + return BUFFER_PROCESS_ERROR; + } + return BUFFER_PROCESS_NEXT; + + case DRequest::ServerAdminCmd::ColExpandStatus: + return buffer_check_expand_status(Task); + + case DRequest::ServerAdminCmd::col_expand: + return buffer_column_expand(Task); + + case DRequest::ServerAdminCmd::ColExpandDone: + return buffer_column_expand_done(Task); + + case DRequest::ServerAdminCmd::ColExpandKey: + return buffer_column_expand_key(Task); + + default: + Task.set_error(-EBADRQC, CACHE_SVC, "invalid admin cmd code from client"); + log_notice("invalid admin cmd code[%d] from client", Task.requestInfo.admin_code()); + break; + } + + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_check_expand_status(TaskRequest &Task) +{ + if (update_mode() || is_mem_dirty()) + { + Task.set_error(-EC_SERVER_ERROR, "cache process", "try to column expand when cache is async"); + log_error("try to column expand when cache is async"); + return BUFFER_PROCESS_ERROR; + } + + int ret = 0; + // get table.conf + RowValue stRow(Task.table_definition()); + Task.update_row(stRow); + log_debug("value[len: %d]", stRow[3].bin.len); + DTCTableDefinition *t; + // parse table.conf to tabledef + // release t by DEC_DELETE, not delete + if (stRow[3].bin.ptr == NULL || + (t = TableDefinitionManager::Instance()->load_buffered_table(stRow[3].bin.ptr)) == NULL) + { + log_error("expand column with illegal table.conf"); + Task.set_error(-EC_SERVER_ERROR, "cache process", "table.conf illegal"); + return BUFFER_PROCESS_ERROR; + } + if ((ret = Cache.check_expand_status()) == -1) + { + // check tabledef + if (t->is_same_table(TableDefinitionManager::Instance()->get_new_table_def())) + { + log_notice("expand same column while expanding, canceled"); + Task.set_error(-EC_ERR_COL_EXPAND_DUPLICATE, "cache process", "expand same column while expanding, canceled"); + } + else + { + log_error("new expanding task while expand, canceled"); + Task.set_error(-EC_ERR_COL_EXPANDING, "cache process", "new expanding task while expand, canceled"); + } + // release t + DEC_DELETE(t); + return BUFFER_PROCESS_ERROR; + } + else if (ret == -2) + { + log_error("column expand not enabled"); + Task.set_error(-EC_SERVER_ERROR, "cache process", "column expand not enabled"); + DEC_DELETE(t); + return BUFFER_PROCESS_ERROR; + } + + log_debug("buffer_check_expand_status ok"); + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_column_expand(TaskRequest &Task) +{ + int ret = 0; + // get table.conf + RowValue stRow(Task.table_definition()); + Task.update_row(stRow); + log_debug("value[len: %d]", stRow[3].bin.len); + DTCTableDefinition *t; + // parse table.conf to tabledef + // release t by DEC_DELETE, not delete + if (stRow[3].bin.ptr == NULL || + (t = TableDefinitionManager::Instance()->load_buffered_table(stRow[3].bin.ptr)) == NULL) + { + log_error("expand column with illegal table.conf"); + Task.set_error(-EC_SERVER_ERROR, "cache process", "table.conf illegal"); + return BUFFER_PROCESS_ERROR; + } + // check expanding + if ((ret = Cache.check_expand_status()) == -1) + { + // check tabledef + if (t->is_same_table(TableDefinitionManager::Instance()->get_new_table_def())) + { + log_notice("expand same column while expanding, canceled"); + Task.set_error(-EC_ERR_COL_EXPAND_DUPLICATE, "cache process", "expand same column while expanding, canceled"); + } + else + { + log_error("new expanding task while expand, canceled"); + Task.set_error(-EC_ERR_COL_EXPANDING, "cache process", "new expanding task while expand, canceled"); + } + // release t + DEC_DELETE(t); + return BUFFER_PROCESS_ERROR; + } + else if (ret == -2) + { + log_error("column expand not enabled"); + Task.set_error(-EC_SERVER_ERROR, "cache process", "column expand not enabled"); + DEC_DELETE(t); + return BUFFER_PROCESS_ERROR; + } + if (t->is_same_table(TableDefinitionManager::Instance()->get_cur_table_def())) + { + log_notice("expand same column, canceled"); + Task.set_error(-EC_ERR_COL_EXPAND_DUPLICATE, "cache process", "expand same column, canceled"); + DEC_DELETE(t); + return BUFFER_PROCESS_ERROR; + } + // if ok + if (TableDefinitionManager::Instance()->get_cur_table_idx() != Cache.shm_table_idx()) + { + log_error("tabledefmanager's idx and shm's are different, need restart"); + Task.set_error(-EC_SERVER_ERROR, "cache process", "tabledefmanager's idx and shm's are different"); + DEC_DELETE(t); + return BUFFER_PROCESS_ERROR; + } + // set new table for tabledefmanger + // copy table.conf to shm + if ((ret = Cache.try_col_expand(stRow[3].bin.ptr, stRow[3].bin.len)) != 0) + { + log_error("try col expand error, ret: %d", ret); + Task.set_error(-EC_SERVER_ERROR, "cache process", "try col expand error"); + DEC_DELETE(t); + return BUFFER_PROCESS_ERROR; + } + TableDefinitionManager::Instance()->set_new_table_def(t, (Cache.shm_table_idx() + 1)); + TableDefinitionManager::Instance()->renew_table_file_def(stRow[3].bin.ptr, stRow[3].bin.len); + TableDefinitionManager::Instance()->save_db_config(); + Cache.col_expand(stRow[3].bin.ptr, stRow[3].bin.len); + // hotbackup for nodb mode + if (nodbMode) + write_hb_log(_DTC_HB_COL_EXPAND_, stRow[3].bin.ptr, stRow[3].bin.len, DTCHotBackup::SYNC_COLEXPAND_CMD); + log_debug("buffer_column_expand ok"); + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_column_expand_done(TaskRequest &Task) +{ + int ret = 0; + // get table.conf + RowValue stRow(Task.table_definition()); + Task.update_row(stRow); + log_debug("value[len: %d]", stRow[3].bin.len); + DTCTableDefinition *t; + // parse table.conf to tabledef + // release t by DEC_DELETE, not delete + if (stRow[3].bin.ptr == NULL || + (t = TableDefinitionManager::Instance()->load_buffered_table(stRow[3].bin.ptr)) == NULL) + { + log_error("expand column with illegal table.conf"); + Task.set_error(-EC_SERVER_ERROR, "cache process", "table.conf illegal"); + return BUFFER_PROCESS_ERROR; + } + if ((ret = Cache.check_expand_status()) == -2) + { + log_error("expand done when not expand task begin or feature not enabled"); + Task.set_error(-EC_SERVER_ERROR, "cache process", "expand done when not expand task begin"); + return BUFFER_PROCESS_ERROR; + } + else if (ret == 0) + { + // check tabledef + if (t->is_same_table(TableDefinitionManager::Instance()->get_cur_table_def())) + { + log_notice("expand done same column while expanding not start, canceled"); + Task.set_error(-EC_ERR_COL_EXPAND_DONE_DUPLICATE, "cache process", "expand same column while expanding not start, canceled"); + } + else + { + log_error("new expand done task while expanding not start, canceled"); + Task.set_error(-EC_ERR_COL_EXPAND_DONE_DISTINCT, "cache process", "new expanding task while expanding not start, canceled"); + } + return BUFFER_PROCESS_ERROR; + } + else + { + // check tabledef + if (!t->is_same_table(TableDefinitionManager::Instance()->get_new_table_def())) + { + log_error("new expand done task while expanding, canceled"); + Task.set_error(-EC_ERR_COL_EXPAND_DONE_DISTINCT, "cache process", "new expanding task done while expanding, canceled"); + return BUFFER_PROCESS_ERROR; + } + } + //若是有源的,则重新载入配置文件到helper + + if (!nodbMode) + { + char *buf = stRow[3].bin.ptr; + char *bufLocal = (char *)MALLOC(strlen(buf) + 1); + memset(bufLocal, 0, strlen(buf) + 1); + strcpy(bufLocal, buf); + DbConfig *dbconfig = DbConfig::load_buffered(bufLocal); + FREE(bufLocal); + if (!dbconfig) + { + log_error("reload dbconfig for collect failed, canceled"); + Task.set_error(-EC_ERR_COL_EXPAND_DONE_DISTINCT, "cache process", "reload dbconfig for collect failed, canceled"); + return BUFFER_PROCESS_ERROR; + } + if (collect_load_config(dbconfig)) + { + log_error("reload config to collect failed, canceled"); + Task.set_error(-EC_ERR_COL_EXPAND_DONE_DISTINCT, "cache process", "reload config to collect failed, canceled"); + return BUFFER_PROCESS_ERROR; + } + } + + TableDefinitionManager::Instance()->renew_cur_table_def(); + TableDefinitionManager::Instance()->save_new_table_conf(); + DTCColExpand::Instance()->expand_done(); + // hotbackup for nodb mode + if (nodbMode) + write_hb_log(_DTC_HB_COL_EXPAND_DONE_, stRow[3].bin.ptr, stRow[3].bin.len, DTCHotBackup::SYNC_COLEXPAND_CMD); + log_debug("buffer_column_expand_done ok"); + + //若是有源的,则需要通知work helper重新载入配置文件 + if (!nodbMode) + { + TaskRequest *pTask = new TaskRequest(TableDefinitionManager::Instance()->get_cur_table_def()); + if (NULL == pTask) + { + log_error("cannot notify work helper reload config, new task error, possible memory exhausted!"); + } + else + { + log_error("notify work helper reload config start!"); + pTask->set_request_type(TaskTypeHelperReloadConfig); + pTask->set_request_code(DRequest::ReloadConfig); + pTask->push_reply_dispatcher(&cacheReply); + output.task_notify(pTask); + } + } + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_column_expand_key(TaskRequest &Task) +{ + if (Cache.check_expand_status() != -1) + { + log_error("expand one when not expand task begin or feature not enabled"); + Task.set_error(-EC_ERR_COL_NOT_EXPANDING, "cache process", "expand one when not expand task begin"); + return BUFFER_PROCESS_ERROR; + } + int iRet = 0; + + const DTCFieldValue *condition = Task.request_condition(); + const DTCValue *key; + + // TODO this may need fix, as we do not check whether this field is key + if (!condition || condition->num_fields() < 1 || condition->field_id(0) != 2) + { + Task.set_error(-EC_ERR_COL_NO_KEY, "cache process", "no key value append for col expand"); + log_error("no key value append for col expand"); + return BUFFER_PROCESS_ERROR; + } + key = condition->field_value(0); + Node stNode = Cache.cache_find_auto_chose_hash(key->bin.ptr); + if (!stNode) + { + log_notice("key not exist for col expand"); + return BUFFER_PROCESS_OK; + } + + iRet = pstDataProcess->expand_node(Task, &stNode); + if (iRet == -4) + { + Task.set_error(-EC_ERR_COL_EXPAND_NO_MEM, "cache process", pstDataProcess->get_err_msg()); + log_error("no mem to expand for key, %s", pstDataProcess->get_err_msg()); + return BUFFER_PROCESS_ERROR; + } + else if (iRet != 0) + { + Task.set_error(-EC_SERVER_ERROR, "cache process", pstDataProcess->get_err_msg()); + log_error("expand key error: %s", pstDataProcess->get_err_msg()); + return BUFFER_PROCESS_ERROR; + } + // hotbackup for nodb mode + if (nodbMode) + write_hb_log(key->bin.ptr, NULL, 0, DTCHotBackup::SYNC_COLEXPAND); + + log_debug("buffer_column_expand_key ok"); + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_register_hb(TaskRequest &Task) +{ + if (hbFeature == NULL) + { // 共享内存还没有激活热备特性 + NEW(HBFeature, hbFeature); + if (hbFeature == NULL) + { + log_error("new hot-backup feature error: %m"); + Task.set_error(-EC_SERVER_ERROR, "buffer_register_hb", "new hot-backup feature fail"); + return BUFFER_PROCESS_ERROR; + } + int iRet = hbFeature->Init(time(NULL)); + if (iRet == -ENOMEM) + { + Node stNode; + if (Cache.try_purge_size(1, stNode) == 0) + iRet = hbFeature->Init(time(NULL)); + } + if (iRet != 0) + { + log_error("init hot-backup feature error: %d", iRet); + Task.set_error(-EC_SERVER_ERROR, "buffer_register_hb", "init hot-backup feature fail"); + return BUFFER_PROCESS_ERROR; + } + iRet = Cache.add_feature(HOT_BACKUP, hbFeature->Handle()); + if (iRet != 0) + { + log_error("add hot-backup feature error: %d", iRet); + Task.set_error(-EC_SERVER_ERROR, "buffer_register_hb", "add hot-backup feature fail"); + return BUFFER_PROCESS_ERROR; + } + } + if (hbFeature->master_uptime() == 0) + hbFeature->master_uptime() = time(NULL); + + //开启变更key日志 + hbLogSwitch = true; + + int64_t hb_timestamp = hbFeature->master_uptime(); + Task.versionInfo.set_master_hb_timestamp(hb_timestamp); + Task.versionInfo.set_slave_hb_timestamp(hbFeature->slave_uptime()); + + Task.set_request_type(TaskTypeRegisterHbLog); + dispatch_hot_back_task(&Task); + return BUFFER_PROCESS_PUSH_HB; +} + +BufferResult BufferProcess::buffer_logout_hb(TaskRequest &Task) +{ + //TODO:暂时没有想到logout的场景 + return BUFFER_PROCESS_OK; +} + +/* + * 遍历cache中所有的Node节点 + */ +BufferResult BufferProcess::buffer_get_key_list(TaskRequest &Task) +{ + + uint32_t lst, lcnt; + lst = Task.requestInfo.limit_start(); + lcnt = Task.requestInfo.limit_count(); + + log_debug("buffer_get_key_list start, Limit[%u %u]", lst, lcnt); + + // if the storage is Rocksdb, do replicate through it directly in full sync stage, + // just dispath the task to helper unit + if ( !nodbMode && dbConfig->dstype == 2/* rocksdb */ ) + { + log_info("proc local replicate!"); + Task.set_request_code(DRequest::Replicate); + // Task.SetRequestType(TaskTypeHelperReplicate); + Task.set_request_type(TaskTypeRead); + + // due to the hotback has a different table definition with the normal query, so + // need to switch table definition during query the storage + DTCTableDefinition* repTab = Task.table_definition(); + + Task.set_table_definition(TableDefinitionManager::Instance()->get_cur_table_def()); + Task.set_replicate_table(repTab); + + return BUFFER_PROCESS_NEXT; + } + + //遍历完所有的Node节点 + if (lst > Cache.max_node_id()) + { + Task.set_error(-EC_FULL_SYNC_COMPLETE, "buffer_get_key_list", "node id is overflow"); + return BUFFER_PROCESS_ERROR; + } + + Task.prepare_result_no_limit(); + + RowValue r(Task.table_definition()); + RawData rawdata(&g_stSysMalloc, 1); + + for (unsigned i = lst; i < lst + lcnt; ++i) + { + if (i < Cache.min_valid_node_id()) + continue; + if (i > Cache.max_node_id()) + break; + + //查找对应的Node节点 + Node node = I_SEARCH(i); + if (!node) + continue; + if (node.not_in_lru_list()) + continue; + if (Cache.is_time_marker(node)) + continue; + + // 解码Key + DataChunk *keyptr = M_POINTER(DataChunk, node.vd_handle()); + + //发送packedkey + r[2] = TableDefinitionManager::Instance()->get_cur_table_def()->packed_key(keyptr->Key()); + + //解码Value + if (pstDataProcess->get_all_rows(&node, &rawdata)) + { + rawdata.Destroy(); + continue; + } + + r[3].Set((char *)(rawdata.get_addr()), (int)(rawdata.data_size())); + + Task.append_row(&r); + + rawdata.Destroy(); + } + + return BUFFER_PROCESS_OK; +} + +/* + * hot backup拉取更新key或者lru变更,如果没有则挂起请求,直到 + * 1. 超时 + * 2. 有更新key, 或者LRU变更 + */ +BufferResult BufferProcess::buffer_get_update_key(TaskRequest &Task) +{ + log_debug("buffer_get_update_key start"); + Task.set_request_type(TaskTypeReadHbLog); + dispatch_hot_back_task(&Task); + return BUFFER_PROCESS_PUSH_HB; +} + +BufferResult BufferProcess::buffer_get_raw_data(TaskRequest &Task) +{ + int iRet; + + const DTCFieldValue *condition = Task.request_condition(); + const DTCValue *key; + + log_debug("buffer_get_raw_data start "); + + RowValue stRow(Task.table_definition()); //一行数据 + RawData stNodeData(&g_stSysMalloc, 1); + + Task.prepare_result_no_limit(); + + for (int i = 0; i < condition->num_fields(); i++) + { + key = condition->field_value(i); + stRow[1].u64 = DTCHotBackup::HAS_VALUE; //表示附加value字段 + stRow[2].Set(key->bin.ptr, key->bin.len); + + Node stNode = Cache.cache_find_auto_chose_hash(key->bin.ptr); + if (!stNode) + { //master没有该key的数据 + stRow[1].u64 = DTCHotBackup::KEY_NOEXIST; + stRow[3].Set(0); + Task.append_row(&stRow); + continue; + } + else + { + + iRet = pstDataProcess->get_all_rows(&stNode, &stNodeData); + if (iRet != 0) + { + log_error("get raw-data failed"); + Task.set_error_dup(-EIO, CACHE_SVC, pstDataProcess->get_err_msg()); + return BUFFER_PROCESS_ERROR; + } + stRow[3].Set((char *)(stNodeData.get_addr()), (int)(stNodeData.data_size())); + } + + Task.append_row(&stRow); //当前行添加到task中 + stNodeData.Destroy(); + } + + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_replace_raw_data(TaskRequest &Task) +{ + log_debug("buffer_replace_raw_data start "); + + int iRet; + + const DTCFieldValue *condition = Task.request_condition(); + const DTCValue *key; + + RowValue stRow(Task.table_definition()); //一行数据 + RawData stNodeData(&g_stSysMalloc, 1); + if (condition->num_fields() < 1) + { + log_debug("%s", "replace raw data need key"); + Task.set_error_dup(-EC_KEY_NEEDED, CACHE_SVC, pstDataProcess->get_err_msg()); + return BUFFER_PROCESS_ERROR; + } + + key = condition->field_value(0); + stRow[2].Set(key->bin.ptr, key->bin.len); + Task.update_row(stRow); //获取数据 + + log_debug("value[len: %d]", stRow[3].bin.len); + + //调整备机的空节点过滤 + if (stRow[1].u64 & DTCHotBackup::EMPTY_NODE && m_pstEmptyNodeFilter) + { + m_pstEmptyNodeFilter->SET(*(unsigned int *)(key->bin.ptr)); + } + + //key在master不存在, 或者是空节点,purge cache. + if (stRow[1].u64 & DTCHotBackup::KEY_NOEXIST || stRow[1].u64 & DTCHotBackup::EMPTY_NODE) + { + log_debug("purge slave data"); + Node stNode = Cache.cache_find_auto_chose_hash(key->bin.ptr); + int rows = Cache.node_rows_count(stNode); + log_debug("migrate replay ,row %d", rows); + Cache.inc_total_row(0LL - rows); + Cache.cache_purge(key->bin.ptr); + return BUFFER_PROCESS_OK; + } + + // 解析成raw data + ALLOC_HANDLE_T hData = g_stSysMalloc.Malloc(stRow[3].bin.len); + if (hData == INVALID_HANDLE) + { + log_error("malloc error: %m"); + Task.set_error(-ENOMEM, CACHE_SVC, "malloc error"); + return BUFFER_PROCESS_ERROR; + } + + memcpy(g_stSysMalloc.handle_to_ptr(hData), stRow[3].bin.ptr, stRow[3].bin.len); + + if ((iRet = stNodeData.Attach(hData, 0, tableDef->key_format())) != 0) + { + log_error("parse raw-data error: %d, %s", iRet, stNodeData.get_err_msg()); + Task.set_error(-EC_BAD_RAW_DATA, CACHE_SVC, "bad raw data"); + return BUFFER_PROCESS_ERROR; + } + + // 检查packed key是否匹配 + DTCValue packed_key = TableDefinitionManager::Instance()->get_cur_table_def()->packed_key(stNodeData.Key()); + if (packed_key.bin.len != key->bin.len || memcmp(packed_key.bin.ptr, key->bin.ptr, key->bin.len)) + { + log_error("packed key miss match, key size=%d, packed key size=%d", key->bin.len, packed_key.bin.len); + log_error("packed key miss match, packed_key %s,key %s", packed_key.bin.ptr, key->bin.ptr); + Task.set_error(-EC_BAD_RAW_DATA, CACHE_SVC, "packed key miss match"); + return BUFFER_PROCESS_ERROR; + } + + // 查找分配node节点 + unsigned int uiNodeID; + Node stNode = Cache.cache_find_auto_chose_hash(key->bin.ptr); + + if (!stNode) + { + for (int i = 0; i < 2; i++) + { + stNode = Cache.cache_allocate(key->bin.ptr); + if (!(!stNode)) + break; + if (Cache.try_purge_size(1, stNode) != 0) + break; + } + if (!stNode) + { + log_error("alloc cache node error"); + Task.set_error(-EIO, CACHE_SVC, "alloc cache node error"); + return BUFFER_PROCESS_ERROR; + } + stNode.vd_handle() = INVALID_HANDLE; + } + else + { + Cache.remove_from_lru(stNode); + Cache.insert2_clean_lru(stNode); + } + + uiNodeID = stNode.node_id(); + + // 替换数据 + iRet = pstDataProcess->replace_data(&stNode, &stNodeData); + if (iRet != 0) + { + if (nodbMode) + { + /* FIXME: no backup db, can't purge data, no recover solution yet */ + log_error("cache replace raw data error: %d, %s", iRet, pstDataProcess->get_err_msg()); + Task.set_error(-EIO, CACHE_SVC, "ReplaceRawData() error"); + return BUFFER_PROCESS_ERROR; + } + else + { + log_error("cache replace raw data error: %d, %s. purge node: %u", iRet, pstDataProcess->get_err_msg(), uiNodeID); + Cache.purge_node_everything(key->bin.ptr, stNode); + return BUFFER_PROCESS_OK; + } + } + + Cache.inc_total_row(pstDataProcess->rows_inc()); + + log_debug("buffer_replace_raw_data success! "); + + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_adjust_lru(TaskRequest &Task) +{ + + const DTCFieldValue *condition = Task.request_condition(); + const DTCValue *key; + + log_debug("buffer_adjust_lru start "); + + RowValue stRow(Task.table_definition()); //一行数据 + + for (int i = 0; i < condition->num_fields(); i++) + { + key = condition->field_value(i); + + Node stNode; + int newhash, oldhash; + if (hashChanging) + { + if (targetNewHash) + { + oldhash = 0; + newhash = 1; + } + else + { + oldhash = 1; + newhash = 0; + } + + stNode = Cache.cache_find(key->bin.ptr, oldhash); + if (!stNode) + { + stNode = Cache.cache_find(key->bin.ptr, newhash); + } + else + { + Cache.move_to_new_hash(key->bin.ptr, stNode); + } + } + else + { + if (targetNewHash) + { + stNode = Cache.cache_find(key->bin.ptr, 1); + } + else + { + stNode = Cache.cache_find(key->bin.ptr, 0); + } + } + if (!stNode) + { + // continue; + Task.set_error(-EC_KEY_NOTEXIST, CACHE_SVC, "key not exist"); + return BUFFER_PROCESS_ERROR; + } + Cache.remove_from_lru(stNode); + Cache.insert2_clean_lru(stNode); + } + + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_verify_hbt(TaskRequest &Task) +{ + log_debug("buffer_verify_hbt start "); + + if (hbFeature == NULL) + { // 共享内存还没有激活热备特性 + NEW(HBFeature, hbFeature); + if (hbFeature == NULL) + { + log_error("new hot-backup feature error: %m"); + Task.set_error(-EC_SERVER_ERROR, "buffer_register_hb", "new hot-backup feature fail"); + return BUFFER_PROCESS_ERROR; + } + int iRet = hbFeature->Init(0); + if (iRet == -ENOMEM) + { + Node stNode; + if (Cache.try_purge_size(1, stNode) == 0) + iRet = hbFeature->Init(0); + } + if (iRet != 0) + { + log_error("init hot-backup feature error: %d", iRet); + Task.set_error(-EC_SERVER_ERROR, "buffer_register_hb", "init hot-backup feature fail"); + return BUFFER_PROCESS_ERROR; + } + iRet = Cache.add_feature(HOT_BACKUP, hbFeature->Handle()); + if (iRet != 0) + { + log_error("add hot-backup feature error: %d", iRet); + Task.set_error(-EC_SERVER_ERROR, "buffer_register_hb", "add hot-backup feature fail"); + return BUFFER_PROCESS_ERROR; + } + } + + int64_t master_timestamp = Task.versionInfo.master_hb_timestamp(); + if (hbFeature->slave_uptime() == 0) + { + hbFeature->slave_uptime() = master_timestamp; + } + else if (hbFeature->slave_uptime() != master_timestamp) + { + log_error("hot backup timestamp incorrect, master[%lld], this slave[%lld]", (long long)master_timestamp, (long long)(hbFeature->slave_uptime())); + Task.set_error(-EC_ERR_SYNC_STAGE, "buffer_verify_hbt", "verify hot backup timestamp fail"); + return BUFFER_PROCESS_ERROR; + } + + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_get_hbt(TaskRequest &Task) +{ + log_debug("buffer_get_hbt start "); + + if (hbFeature == NULL) + { // 共享内存还没有激活热备特性 + Task.versionInfo.set_master_hb_timestamp(0); + Task.versionInfo.set_slave_hb_timestamp(0); + } + else + { + Task.versionInfo.set_master_hb_timestamp(hbFeature->master_uptime()); + Task.versionInfo.set_slave_hb_timestamp(hbFeature->slave_uptime()); + } + + log_debug("master-up-time: %lld, slave-up-time: %lld", (long long)(Task.versionInfo.master_hb_timestamp()), (long long)(Task.versionInfo.slave_hb_timestamp())); + + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_query_serverinfo(TaskRequest &Task) +{ + log_debug("buffer_query_serverinfo start"); + Task.set_request_type(TaskTypeQueryHbLogInfo); + dispatch_hot_back_task(&Task); + return BUFFER_PROCESS_PUSH_HB; +} + +/* finished in one cache process cycle */ +BufferResult BufferProcess::buffer_nodehandlechange(TaskRequest &Task) +{ + log_debug("buffer_nodehandlechange start "); + + const DTCFieldValue *condition = Task.request_condition(); + const DTCValue *key = condition->field_value(0); + Node node; + MEM_HANDLE_T node_handle; + RawData node_raw_data(DTCBinMalloc::Instance(), 0); + /* no need of private raw data, just for copy */ + char *private_buff = NULL; + int buff_len; + MEM_HANDLE_T new_node_handle; + + if (condition->num_fields() < 1) + { + log_debug("%s", "nodehandlechange need key"); + Task.set_error_dup(-EC_KEY_NEEDED, CACHE_SVC, pstDataProcess->get_err_msg()); + return BUFFER_PROCESS_ERROR; + } + + /* packed key -> node id -> node handle -> node raw data -> private buff*/ + int newhash, oldhash; + if (hashChanging) + { + if (targetNewHash) + { + oldhash = 0; + newhash = 1; + } + else + { + oldhash = 1; + newhash = 0; + } + node = Cache.cache_find(key->bin.ptr, oldhash); + if (!node) + { + node = Cache.cache_find(key->bin.ptr, newhash); + } + else + { + Cache.move_to_new_hash(key->bin.ptr, node); + } + } + else + { + if (targetNewHash) + { + node = Cache.cache_find(key->bin.ptr, 1); + } + else + { + node = Cache.cache_find(key->bin.ptr, 0); + } + } + + if (!node) + { + log_debug("%s", "key not exist for defragmentation"); + Task.set_error(-ER_KEY_NOT_FOUND, CACHE_SVC, "node not found"); + return BUFFER_PROCESS_ERROR; + } + + node_handle = node.vd_handle(); + if (node_handle == INVALID_HANDLE) + { + Task.set_error(-EC_BAD_RAW_DATA, CACHE_SVC, "chunk not exist"); + return BUFFER_PROCESS_ERROR; + } + + node_raw_data.Attach(node_handle, tableDef->key_fields() - 1, tableDef->key_format()); + + if ((private_buff = (char *)MALLOC(node_raw_data.data_size())) == NULL) + { + log_error("no mem"); + Task.set_error(-ENOMEM, CACHE_SVC, "malloc error"); + return BUFFER_PROCESS_ERROR; + } + + memcpy(private_buff, node_raw_data.get_addr(), node_raw_data.data_size()); + buff_len = node_raw_data.data_size(); + if (node_raw_data.Destroy()) + { + log_error("node raw data detroy error"); + Task.set_error(-ENOMEM, CACHE_SVC, "free error"); + FREE_IF(private_buff); + return BUFFER_PROCESS_ERROR; + } + log_debug("old node handle: " UINT64FMT_T ", raw data size %d", node_handle, buff_len); + + /* new chunk */ + /* new node handle -> new node handle ptr <- node raw data ptr*/ + new_node_handle = DTCBinMalloc::Instance()->Malloc(buff_len); + log_debug("new node handle: " UINT64FMT_T, new_node_handle); + + if (new_node_handle == INVALID_HANDLE) + { + log_error("malloc error: %m"); + Task.set_error(-ENOMEM, CACHE_SVC, "malloc error"); + FREE_IF(private_buff); + return BUFFER_PROCESS_ERROR; + } + + memcpy(DTCBinMalloc::Instance()->handle_to_ptr(new_node_handle), private_buff, buff_len); + + /* free node raw data, set node handle */ + node.vd_handle() = new_node_handle; + FREE_IF(private_buff); + + log_debug("buffer_nodehandlechange success! "); + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_migrate(TaskRequest &Task) +{ + if (keyRoute == 0) + { + log_error("not support migrate cmd @ bypass mode"); + Task.set_error(-EC_SERVER_ERROR, "buffer_migrate", "Not Support @ Bypass Mode"); + return BUFFER_PROCESS_ERROR; + } + int iRet; + + const DTCFieldValue *ui = Task.request_operation(); + const DTCValue key = TableDefinitionManager::Instance()->get_cur_table_def()->packed_key(Task.packed_key()); + if (key.bin.ptr == 0 || key.bin.len <= 0) + { + Task.set_error(-EC_KEY_NEEDED, "buffer_migrate", "need set migrate key"); + return BUFFER_PROCESS_ERROR; + } + + log_debug("cache_cache_migrate start "); + + RowValue stRow(Task.table_definition()); //一行数据 + RawData stNodeData(&g_stSysMalloc, 1); + + Node stNode = Cache.cache_find_auto_chose_hash(key.bin.ptr); + + //如果有updateInfo则说明请求从DTC过来 + int flag = 0; + if (ui && ui->field_value(0)) + { + flag = ui->field_value(0)->s64; + } + if ((flag & 0xFF) == DTCMigrate::FROM_SERVER) + { + log_debug("this migrate cmd is from DTC"); + RowValue stRow(Task.table_definition()); //一行数据 + RawData stNodeData(&g_stSysMalloc, 1); + stRow[2].Set(key.bin.ptr, key.bin.len); + Task.update_row(stRow); //获取数据 + + log_debug("value[len: %d]", stRow[3].bin.len); + + //key在master不存在, 或者是空节点,purge cache. + if (stRow[1].u64 & DTCHotBackup::KEY_NOEXIST || stRow[1].u64 & DTCHotBackup::EMPTY_NODE) + { + log_debug("purge slave data"); + Cache.cache_purge(key.bin.ptr); + return BUFFER_PROCESS_OK; + } + + // 解析成raw data + ALLOC_HANDLE_T hData = g_stSysMalloc.Malloc(stRow[3].bin.len); + if (hData == INVALID_HANDLE) + { + log_error("malloc error: %m"); + Task.set_error(-ENOMEM, CACHE_SVC, "malloc error"); + return BUFFER_PROCESS_ERROR; + } + + memcpy(g_stSysMalloc.handle_to_ptr(hData), stRow[3].bin.ptr, stRow[3].bin.len); + + if ((iRet = stNodeData.Attach(hData, 0, tableDef->key_format())) != 0) + { + log_error("parse raw-data error: %d, %s", iRet, stNodeData.get_err_msg()); + Task.set_error(-EC_BAD_RAW_DATA, CACHE_SVC, "bad raw data"); + return BUFFER_PROCESS_ERROR; + } + + // 检查packed key是否匹配 + DTCValue packed_key = TableDefinitionManager::Instance()->get_cur_table_def()->packed_key(stNodeData.Key()); + if (packed_key.bin.len != key.bin.len || memcmp(packed_key.bin.ptr, key.bin.ptr, key.bin.len)) + { + log_error("packed key miss match, key size=%d, packed key size=%d", + key.bin.len, packed_key.bin.len); + + Task.set_error(-EC_BAD_RAW_DATA, CACHE_SVC, "packed key miss match"); + return BUFFER_PROCESS_ERROR; + } + + // 查找分配node节点 + unsigned int uiNodeID; + + if (!stNode) + { + for (int i = 0; i < 2; i++) + { + stNode = Cache.cache_allocate(key.bin.ptr); + if (!(!stNode)) + break; + if (Cache.try_purge_size(1, stNode) != 0) + break; + } + if (!stNode) + { + log_error("alloc cache node error"); + Task.set_error(-EIO, CACHE_SVC, "alloc cache node error"); + return BUFFER_PROCESS_ERROR; + } + stNode.vd_handle() = INVALID_HANDLE; + } + else + { + Cache.remove_from_lru(stNode); + Cache.insert2_clean_lru(stNode); + } + if ((flag >> 8) & 0xFF) //如果为脏节点 + { + + Cache.remove_from_lru(stNode); + Cache.insert2_dirty_lru(stNode); + } + + uiNodeID = stNode.node_id(); + + // 替换数据 + iRet = pstDataProcess->replace_data(&stNode, &stNodeData); + if (iRet != 0) + { + if (nodbMode) + { + /* FIXME: no backup db, can't purge data, no recover solution yet */ + log_error("cache replace raw data error: %d, %s", iRet, pstDataProcess->get_err_msg()); + Task.set_error(-EIO, CACHE_SVC, "ReplaceRawData() error"); + return BUFFER_PROCESS_ERROR; + } + else + { + log_error("cache replace raw data error: %d, %s. purge node: %u", + iRet, pstDataProcess->get_err_msg(), uiNodeID); + Cache.purge_node_everything(key.bin.ptr, stNode); + return BUFFER_PROCESS_OK; + } + } + if (write_hb_log(key.bin.ptr, stNode, DTCHotBackup::SYNC_UPDATE)) + { + log_crit("buffer_migrate: log update key failed"); + } + Cache.inc_total_row(pstDataProcess->rows_inc()); + + Task.prepare_result_no_limit(); + + return BUFFER_PROCESS_OK; + } + + log_debug("this migrate cmd is from api"); + //请求从工具过来,我们需要构造请求发给其他dtc + + if (!stNode) + { + Task.set_error(-EC_KEY_NOTEXIST, "buffer_migrate", "this key not found in cache"); + return BUFFER_PROCESS_ERROR; + } + //获取该节点的raw-data,构建replace请求给后端helper + iRet = pstDataProcess->get_all_rows(&stNode, &stNodeData); + if (iRet != 0) + { + log_error("get raw-data failed"); + Task.set_error_dup(-EIO, CACHE_SVC, pstDataProcess->get_err_msg()); + return BUFFER_PROCESS_ERROR; + } + + DTCFieldValue *uitmp = new DTCFieldValue(4); + if (uitmp == NULL) + { + Task.set_error(-EIO, CACHE_SVC, "migrate:new DTCFieldValue error"); + return BUFFER_PROCESS_ERROR; + } + //id0 {"type", DField::Unsigned, 4, DTCValue::Make(0), 0} + //type的最后一个字节用来表示请求来着其他dtc还是api + //倒数第二个字节表示节点是否为脏 + uitmp->add_value(0, DField::Set, DField::Unsigned, DTCValue::Make(DTCMigrate::FROM_SERVER | (stNode.is_dirty() << 8))); + + //id1 {"flag", DField::Unsigned, 1, DTCValue::Make(0), 0}, + uitmp->add_value(1, DField::Set, DField::Unsigned, DTCValue::Make(DTCHotBackup::HAS_VALUE)); + //id2 {"key", DField::Binary, 255, DTCValue::Make(0), 0}, + + //id3 {"value", DField::Binary, MAXPACKETSIZE, DTCValue::Make(0), 0}, + + FREE_IF(Task.migratebuf); + Task.migratebuf = (char *)calloc(1, stNodeData.data_size()); + if (Task.migratebuf == NULL) + { + log_error("create buffer failed"); + Task.set_error(-EIO, CACHE_SVC, "migrate:get raw data,create buffer failed"); + return BUFFER_PROCESS_ERROR; + } + memcpy(Task.migratebuf, (char *)(stNodeData.get_addr()), (int)(stNodeData.data_size())); + uitmp->add_value(3, DField::Set, DField::Binary, + DTCValue::Make(Task.migratebuf, stNodeData.data_size())); + Task.set_request_operation(uitmp); + keyRoute->key_migrating(stNodeData.Key()); + + return BUFFER_PROCESS_REMOTE; +} + +BufferResult BufferProcess::buffer_clear_cache(TaskRequest &Task) +{ + if (updateMode != MODE_SYNC) + { + log_error("try to clear cache for async mode, abort..."); + Task.set_error(-EC_SERVER_ERROR, "buffer_clear_cache", "can not clear cache for aync mode, abort"); + return BUFFER_PROCESS_ERROR; + } + // clean and rebuild + int64_t mu = 0, su = 0; + if (hbFeature != NULL) + { + mu = hbFeature->master_uptime(); + su = hbFeature->slave_uptime(); + } + // table.conf in shm is set in clear_create + int ret = Cache.clear_create(); + if (ret < 0) + { + log_error("clear and create cache error: %s", Cache.Error()); + if (ret == -1) + { + log_error("fault error, exit..."); + exit(-1); + } + if (ret == -2) + { + log_error("error, abort..."); + Task.set_error(-EC_SERVER_ERROR, "buffer_clear_cache", "clear cache error, abort"); + return BUFFER_PROCESS_ERROR; + } + } + pstDataProcess->change_mallocator(DTCBinMalloc::Instance()); + // setup hotbackup + if (hbFeature != NULL) + { + hbFeature->Detach(); + // no need consider no enough mem, as mem is just cleared + hbFeature->Init(0); + int iRet = Cache.add_feature(HOT_BACKUP, hbFeature->Handle()); + if (iRet != 0) + { + log_error("add hot-backup feature error: %d", iRet); + exit(-1); + } + hbFeature->master_uptime() = mu; + hbFeature->slave_uptime() = su; + } + // hotbackup + char buf[16]; + memset(buf, 0, sizeof(buf)); + Node node; + if (write_hb_log(buf, node, DTCHotBackup::SYNC_CLEAR)) + log_error("hb: log clear cache error"); + + return BUFFER_PROCESS_OK; +} diff --git a/src/search_local/index_storage/cache/black_hole.cc b/src/search_local/index_storage/cache/black_hole.cc new file mode 100644 index 0000000..ceaed19 --- /dev/null +++ b/src/search_local/index_storage/cache/black_hole.cc @@ -0,0 +1,50 @@ +/* + * ===================================================================================== + * + * Filename: black_hole.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include + +BlackHole::~BlackHole(void) +{ +} + +void BlackHole::task_notify(TaskRequest *cur) +{ +#if 0 + switch(cur->request_code()){ + case DRequest::Get: + break; + + case DRequest::Insert: // TableDef->has_auto_increment() must be false + cur->resultInfo.set_affected_rows(1); + break; + + case DRequest::Update: + case DRequest::Delete: + case DRequest::Purge: + case DRequest::Replace: + default: + cur->resultInfo.set_affected_rows(1); + break; + } + +#else + // preset affected_rows==0 is obsoleted + // use BlackHole flag instead + cur->mark_as_black_hole(); +#endif + cur->reply_notify(); +} diff --git a/src/search_local/index_storage/cache/black_hole.h b/src/search_local/index_storage/cache/black_hole.h new file mode 100644 index 0000000..adecb79 --- /dev/null +++ b/src/search_local/index_storage/cache/black_hole.h @@ -0,0 +1,30 @@ +/* + * ===================================================================================== + * + * Filename: black_hole.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include + +class BlackHole : public TaskDispatcher +{ +public: + BlackHole(PollThread *o) : TaskDispatcher(o), output(o){}; + virtual ~BlackHole(void); + void bind_dispatcher(TaskDispatcher *p) { output.bind_dispatcher(p); } + +private: + RequestOutput output; + virtual void task_notify(TaskRequest *); +}; diff --git a/src/search_local/index_storage/cache/buffer_bypass.cc b/src/search_local/index_storage/cache/buffer_bypass.cc new file mode 100644 index 0000000..0328932 --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_bypass.cc @@ -0,0 +1,61 @@ +/* + * ===================================================================================== + * + * Filename: buffer_bypass.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include + +class ReplyBypass : public ReplyDispatcher +{ +public: + ReplyBypass(void) {} + virtual ~ReplyBypass(void); + virtual void reply_notify(TaskRequest *task); +}; + +ReplyBypass::~ReplyBypass(void) {} + +void ReplyBypass::reply_notify(TaskRequest *task) +{ + if (task->result) + task->pass_all_result(task->result); + task->reply_notify(); +} + +static ReplyBypass replyBypass; + +BufferBypass::~BufferBypass(void) +{ +} + +void BufferBypass::task_notify(TaskRequest *cur) +{ + if (cur->is_batch_request()) + { + cur->reply_notify(); + return; + } + + if (cur->count_only() && (cur->requestInfo.limit_start() || cur->requestInfo.limit_count())) + { + cur->set_error(-EC_BAD_COMMAND, "BufferBypass", "There's nothing to limit because no fields required"); + cur->reply_notify(); + return; + } + + cur->mark_as_pass_thru(); + cur->push_reply_dispatcher(&replyBypass); + output.task_notify(cur); +} diff --git a/src/search_local/index_storage/cache/buffer_bypass.h b/src/search_local/index_storage/cache/buffer_bypass.h new file mode 100644 index 0000000..5d3ab8a --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_bypass.h @@ -0,0 +1,30 @@ +/* + * ===================================================================================== + * + * Filename: buffer_bypass.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include + +class BufferBypass : public TaskDispatcher +{ +public: + BufferBypass(PollThread *o) : TaskDispatcher(o), output(o){}; + virtual ~BufferBypass(void); + void bind_dispatcher(TaskDispatcher *p) { output.bind_dispatcher(p); } + +private: + RequestOutput output; + virtual void task_notify(TaskRequest *); +}; diff --git a/src/search_local/index_storage/cache/buffer_def.h b/src/search_local/index_storage/cache/buffer_def.h new file mode 100644 index 0000000..3df4122 --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_def.h @@ -0,0 +1,46 @@ +/* + * ===================================================================================== + * + * Filename: buffer_def.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __CACHE_DEF_H +#define __CACHE_DEF_H + +#define E_OK 0 //success +#define E_FAIL -1 //fail +#define KEY_LEN_LEN sizeof(char) //"key长"字段长度 +#define MAX_KEY_LEN 256 //key最大长度,由"key长"字段长度所能表示的最大数字决定 +#define ERR_MSG_LEN 1024 +#define MAX_PURGE_NUM 1000 //每次purge的节点数上限 +#define CACHE_SVC "dtc" //cache服务名 +//#define VERSION "1.0.3" //版本信息 + +#define STRNCPY(dest, src, len) \ + { \ + memset(dest, 0x00, len); \ + strncpy(dest, src, len - 1); \ + } +#define SNPRINTF(dest, len, fmt, args...) \ + { \ + memset(dest, 0x00, len); \ + snprintf(dest, len - 1, fmt, ##args); \ + } +#define MSGNCPY(dest, len, fmt, args...) \ + { \ + memset(dest, 0x00, len); \ + snprintf(dest, len - 1, "[%s][%d]%s: " fmt "\n", __FILE__, __LINE__, __FUNCTION__, ##args); \ + } + +#endif diff --git a/src/search_local/index_storage/cache/buffer_flush.cc b/src/search_local/index_storage/cache/buffer_flush.cc new file mode 100644 index 0000000..3731df0 --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_flush.cc @@ -0,0 +1,442 @@ +/* + * ===================================================================================== + * + * Filename: buffer_flush.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include "buffer_flush.h" +#include "buffer_process.h" +#include "global.h" + +DTCFlushRequest::DTCFlushRequest(BufferProcess *o, const char *key) : owner(o), + numReq(0), + badReq(0), + wait(NULL) +{ +} + +DTCFlushRequest::~DTCFlushRequest() +{ + if (wait) + { + wait->reply_notify(); + wait = NULL; + } +} + +class DropDataReply : public ReplyDispatcher +{ +public: + DropDataReply() {} + virtual void reply_notify(TaskRequest *cur); +}; + +void DropDataReply::reply_notify(TaskRequest *cur) +{ + DTCFlushRequest *req = cur->OwnerInfo(); + if (req == NULL) + delete cur; + else + req->complete_row(cur, cur->owner_index()); +} + +static DropDataReply dropReply; + +int DTCFlushRequest::flush_row(const RowValue &row) +{ + TaskRequest *pTask = new TaskRequest; + if (pTask == NULL) + { + log_error("cannot flush row, new task error, possible memory exhausted\n"); + return -1; + } + + if (pTask->Copy(row) < 0) + { + log_error("cannot flush row, from: %s error: %s \n", + pTask->resultInfo.error_from(), + pTask->resultInfo.error_message()); + return -1; + } + pTask->set_request_type(TaskTypeCommit); + pTask->push_reply_dispatcher(&dropReply); + pTask->set_owner_info(this, numReq, NULL); + owner->inc_async_flush_stat(); + //TaskTypeCommit never expired + //pTask->set_expire_time(3600*1000/*ms*/); + numReq++; + owner->push_flush_queue(pTask); + return 0; +} + +void DTCFlushRequest::complete_row(TaskRequest *req, int index) +{ + delete req; + numReq--; + if (numReq == 0) + { + if (wait) + { + wait->reply_notify(); + wait = NULL; + } + owner->complete_flush_request(this); + } +} + +MARKER_STAMP BufferProcess::calculate_current_marker() +{ + time_t now; + + time(&now); + return now - (now % markerInterval); +} + +void BufferProcess::set_drop_count(int c) +{ + // Cache.set_drop_count(c); +} + +void BufferProcess::get_dirty_stat() +{ + uint64_t ullMaxNode; + uint64_t ullMaxRow; + const double rate = 0.9; + + if (DTCBinMalloc::Instance()->user_alloc_size() >= DTCBinMalloc::Instance()->total_size() * rate) + { + ullMaxNode = Cache.total_used_node(); + ullMaxRow = Cache.total_used_row(); + } + else + { + if (DTCBinMalloc::Instance()->user_alloc_size() > 0) + { + double enlarge = DTCBinMalloc::Instance()->total_size() * rate / DTCBinMalloc::Instance()->user_alloc_size(); + ullMaxNode = (uint64_t)(Cache.total_used_node() * enlarge); + ullMaxRow = (uint64_t)(Cache.total_used_row() * enlarge); + } + else + { + ullMaxNode = 0; + ullMaxRow = 0; + } + } +} + +void BufferProcess::set_flush_parameter( + int intvl, + int mreq, + int mintime, + int maxtime) +{ + // require v4 cache + if (Cache.get_cache_info()->version < 4) + return; + + /* + if(intvl < 60) + intvl = 60; + else if(intvl > 43200) + intvl = 43200; + */ + + /* marker time interval changed to 1sec */ + intvl = 1; + markerInterval = intvl; + + /* 1. make sure at least one time marker exist + * 2. init first marker time and last marker time + * */ + Node stTimeNode = Cache.first_time_marker(); + if (!stTimeNode) + Cache.insert_time_marker(calculate_current_marker()); + Cache.first_time_marker_time(); + Cache.last_time_marker_time(); + + if (mreq <= 0) + mreq = 1; + if (mreq > 10000) + mreq = 10000; + + if (mintime < 10) + mintime = 10; + if (maxtime <= mintime) + maxtime = mintime * 2; + + maxFlushReq = mreq; + minDirtyTime = mintime; + maxDirtyTime = maxtime; + + //get_dirty_stat(); + + /*attach timer only if async mode or sync mode but mem dirty*/ + if (updateMode == MODE_ASYNC || + (updateMode == MODE_SYNC && mem_dirty == true)) + { + /* check for expired dirty node every second */ + flushTimer = owner->get_timer_list(1); + attach_timer(flushTimer); + } +} + +int BufferProcess::commit_flush_request(DTCFlushRequest *req, TaskRequest *callbackTask) +{ + req->wait = callbackTask; + + if (req->numReq == 0) + delete req; + else + nFlushReq++; + + statCurrFlushReq = nFlushReq; + return 0; +} + +void BufferProcess::complete_flush_request(DTCFlushRequest *req) +{ + delete req; + nFlushReq--; + statCurrFlushReq = nFlushReq; + + calculate_flush_speed(0); + + if (nFlushReq < mFlushReq) + flush_next_node(); +} + +void BufferProcess::timer_notify(void) +{ + log_debug("flush timer event..."); + int ret = 0; + + MARKER_STAMP cur = calculate_current_marker(); + if (Cache.first_time_marker_time() != cur) + Cache.insert_time_marker(cur); + + calculate_flush_speed(1); + + /* flush next node return + * 1: no dirty node exist, sync dtc, should not attach timer again + * 0: one flush request created, nFlushReq inc in flush_next_node, notinue + * others: on flush request created due to some reason, should break for another flush timer event, otherwise may be + * block here, eg. no dirty node exist, and in async mode + * */ + while (nFlushReq < mFlushReq) + { + ret = flush_next_node(); + if (ret == 0) + { + continue; + } + else + { + break; + } + } + + /*SYNC + mem_dirty/ASYNC need to reattach flush timer*/ + if ((updateMode == MODE_SYNC && mem_dirty == true) || updateMode == MODE_ASYNC) + attach_timer(flushTimer); +} + +int BufferProcess::oldest_dirty_node_alarm() +{ + Node stHead = Cache.dirty_lru_head(); + Node stNode = stHead.Prev(); + + if (Cache.is_time_marker(stNode)) + { + stNode = stNode.Prev(); + if (Cache.is_time_marker(stNode) || stNode == stHead) + { + return 0; + } + else + { + return 1; + } + } + else if (stNode == stHead) + { + return 0; + } + else + { + return 1; + } +} + +/*flush speed(nFlushReq) only depend on oldest dirty node existing time*/ +void BufferProcess::calculate_flush_speed(int is_flush_timer) +{ + delete_tail_time_markers(); + + // time base + int m, v; + unsigned int t1 = Cache.first_time_marker_time(); + unsigned int t2 = Cache.last_time_marker_time(); + //initialized t1 and t2, so no need of test for this + v = t1 - t2; + + //if start with sync and mem dirty, flush as fast as we can + if (updateMode == MODE_SYNC) + { + if (mem_dirty == false) + { + mFlushReq = 0; + } + else + { + mFlushReq = maxFlushReq; + } + goto __stat; + } + + //alarm if oldest dirty node exist too much time, flush at fastest speed + if (v >= maxDirtyTime) + { + mFlushReq = maxFlushReq; + if (oldest_dirty_node_alarm() && is_flush_timer) + { + log_notice("oldest dirty node exist time > max dirty time"); + } + } + else if (v >= minDirtyTime) + { + m = 1 + (v - minDirtyTime) * (maxFlushReq - 1) / (maxDirtyTime - minDirtyTime); + if (m > mFlushReq) + mFlushReq = m; + } + else + { + mFlushReq = 0; + } + +__stat: + if (mFlushReq > maxFlushReq) + mFlushReq = maxFlushReq; + + statMaxFlushReq = mFlushReq; + statOldestDirtyTime = v; +} + +/* return -1: encount the only time marker + * return 1: no dirty node exist, clear mem dirty + * return 2: no dirty node exist, in async mode + * return -2: no flush request created + * return 0: one flush request created + * */ +int BufferProcess::flush_next_node(void) +{ + unsigned int uiFlushRowsCnt = 0; + MARKER_STAMP stamp; + static MARKER_STAMP last_rm_stamp; + + Node stHead = Cache.dirty_lru_head(); + Node stNode = stHead; + Node stPreNode = stNode.Prev(); + + /*case 1: delete continues time marker, until + * encount a normal node/head node, go next + * encount the only time marker*/ + while (1) + { + stNode = stPreNode; + stPreNode = stNode.Prev(); + + if (!Cache.is_time_marker(stNode)) + break; + + if (Cache.first_time_marker_time() == stNode.Time()) + { + if (updateMode == MODE_SYNC && mem_dirty == true) + { + /* delete this time marker, flush all dirty node */ + Cache.remove_time_marker(stNode); + stNode = stPreNode; + stPreNode = stNode.Prev(); + while (stNode != stHead) + { + buffer_flush_data_timer(stNode, uiFlushRowsCnt); + stNode = stPreNode; + stPreNode = stNode.Prev(); + } + + disable_timer(); + mem_dirty = false; + log_notice("mem clean now for sync cache"); + return 1; + } + return -1; + } + + stamp = stNode.Time(); + if (stamp > last_rm_stamp) + { + last_rm_stamp = stamp; + } + + log_debug("remove time marker in dirty lru, time %u", stNode.Time()); + Cache.remove_time_marker(stNode); + } + + /*case 2: this the head node, clear mem dirty if nessary, return, should not happen*/ + if (stNode == stHead) + { + if (updateMode == MODE_SYNC && mem_dirty == true) + { + disable_timer(); + mem_dirty = false; + log_notice("mem clean now for sync cache"); + return 1; + } + else + { + return 2; + } + } + + /*case 3: this a normal node, flush it. + * return -2 if no flush request added to cache process + * */ + int iRet = buffer_flush_data_timer(stNode, uiFlushRowsCnt); + if (iRet == -1 || iRet == -2 || iRet == -3 || iRet == 1) + { + return -2; + } + + return 0; +} + +void BufferProcess::delete_tail_time_markers() +{ + Node stHead = Cache.dirty_lru_head(); + Node stNode = stHead; + Node stPreNode = stNode.Prev(); + + while (1) + { + stNode = stPreNode; + stPreNode = stNode.Prev(); + + if (stNode == stHead || Cache.first_time_marker_time() == stNode.Time()) + break; + + if (Cache.is_time_marker(stNode) && Cache.is_time_marker(stPreNode)) + Cache.remove_time_marker(stNode); + else + break; + } +} diff --git a/src/search_local/index_storage/cache/buffer_flush.h b/src/search_local/index_storage/cache/buffer_flush.h new file mode 100644 index 0000000..a917a39 --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_flush.h @@ -0,0 +1,49 @@ +/* + * ===================================================================================== + * + * Filename: buffer_flush.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __H_CACHE_FLUSH_H__ +#define __H_CACHE_FLUSH_H__ + +#include "timer_list.h" +#include "lqueue.h" +#include "task_request.h" +#include "buffer_process.h" +#include "log.h" + +class BufferProcess; + +class DTCFlushRequest +{ +private: + BufferProcess *owner; + int numReq; + int badReq; + TaskRequest *wait; + +public: + friend class BufferProcess; + DTCFlushRequest(BufferProcess *, const char *key); + ~DTCFlushRequest(void); + + const DTCTableDefinition *table_definition(void) const { return owner->table_definition(); } + + int flush_row(const RowValue &); + void complete_row(TaskRequest *req, int index); + int Count(void) const { return numReq; } +}; + +#endif diff --git a/src/search_local/index_storage/cache/buffer_pool.cc b/src/search_local/index_storage/cache/buffer_pool.cc new file mode 100644 index 0000000..ed05856 --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_pool.cc @@ -0,0 +1,1561 @@ +/* + * ===================================================================================== + * + * Filename: buffer_pool.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include +#include +#include + +#include "pt_malloc.h" +#include "namespace.h" +#include "buffer_pool.h" +#include "data_chunk.h" +#include "empty_filter.h" +#include "task_request.h" +#include "dtc_global.h" +#include "relative_hour_calculator.h" +#include "table_def_manager.h" + +extern DTCTableDefinition *gTableDef[]; +extern int hashChanging; +extern int targetNewHash; + +DTC_USING_NAMESPACE + +DTCBufferPool::DTCBufferPool(PurgeNodeNotifier *pn) : _purge_notifier(pn) +{ + memset(&_cacheInfo, 0x00, sizeof(CacheInfo)); + + _hash = 0; + _ngInfo = 0; + _feature = 0; + _nodeIndex = 0; + _colExpand = 0; + + memset(_errmsg, 0, sizeof(_errmsg)); + _need_set_integrity = 0; + _need_purge_node_count = 0; + + _delay_purge_timerlist = NULL; + firstMarkerTime = lastMarkerTime = 0; + emptyLimit = 0; + disableTryPurge = 0; + survival_hour = statmgr.get_sample(DATA_SURVIVAL_HOUR_STAT); +} + +DTCBufferPool::~DTCBufferPool() +{ + _hash->Destroy(); + _ngInfo->Destroy(); + _feature->Destroy(); + _nodeIndex->Destroy(); + + /* 运行到这里,说明程序是正常stop的,设置共享内存完整性标记 */ + if (_need_set_integrity) + { + log_notice("Share Memory Integrity... ok"); + DTCBinMalloc::Instance()->set_share_memory_integrity(1); + } +} + +/* 检查lru链表是否cross-link了,一旦发生这种情况,没法处理了 :( */ +static inline int check_cross_linked_lru(Node node) +{ + Node v = node.Prev(); + + if (v == node) + { + log_crit("BUG: cross-linked lru list"); + return -1; + } + return 0; +} + +/* 验证cacheInfo合法性, 避免出现意外 */ +int DTCBufferPool::verify_cache_info(CacheInfo *info) +{ + if (INVALID_HANDLE != 0UL) + { + snprintf(_errmsg, sizeof(_errmsg), "PANIC: invalid handle must be 0UL"); + return -1; + } + + if (INVALID_NODE_ID != (NODE_ID_T)(-1)) + { + snprintf(_errmsg, sizeof(_errmsg), + "PANIC: invalid node id must be %u, but it is %u now", + (NODE_ID_T)(-1), INVALID_NODE_ID); + return -1; + } + + if (info->version != 4) + { + snprintf(_errmsg, sizeof(_errmsg), + "only support cache version >= 4"); + return -1; + } + + /* 系统可工作的最小内存 */ + /* 1. emptyFilter = 0 Min=64M */ + /* 2. emptyFilter = 1 Min=256M, 初步按照1.5G用户来计算 */ + + if (info->emptyFilter) + { + if (info->ipcMemSize < (256UL << 20)) + { + snprintf(_errmsg, sizeof(_errmsg), + "Empty-Node Filter function need min 256M mem"); + return -1; + } + } + + if (info->ipcMemSize < (64UL << 20)) + { + snprintf(_errmsg, sizeof(_errmsg), + "too small mem size, need min 64M mem"); + return -1; + } + + /* size check on 32bits platform*/ + if (sizeof(long) == 4) + { + if (info->ipcMemSize >= UINT_MAX) + { + snprintf(_errmsg, sizeof(_errmsg), + "cache size " UINT64FMT "exceed 4G, Please upgrade to 64 bit version", + info->ipcMemSize); + return -1; + } + } + + /* support max 64G memory size*/ + if (info->ipcMemSize > (64ULL << 30)) + { + snprintf(_errmsg, sizeof(_errmsg), + "cache size exceed 64G, unsupported"); + return -1; + } + + return 0; +} + +int DTCBufferPool::check_expand_status() +{ + if (!_colExpand) + { + snprintf(_errmsg, sizeof(_errmsg), "column expand not support"); + return -2; + } + if (_colExpand->is_expanding()) + { + snprintf(_errmsg, sizeof(_errmsg), "column expanding, please later"); + return -1; + } + return 0; +} + +unsigned char DTCBufferPool::shm_table_idx() +{ + if (!_colExpand) + { + // column not supported, always return 0 + return 0; + } + + return _colExpand->cur_table_idx(); +} + +bool DTCBufferPool::col_expand(const char *table, int len) +{ + if (!_colExpand->expand(table, len)) + { + snprintf(_errmsg, sizeof(_errmsg), "column expand error"); + return false; + } + + return true; +} + +int DTCBufferPool::try_col_expand(const char *table, int len) +{ + return _colExpand->try_expand(table, len); +} + +bool DTCBufferPool::reload_table() +{ + if (!_colExpand) + { + return true; + } + if (!_colExpand->reload_table()) + { + snprintf(_errmsg, sizeof(_errmsg), "reload table error"); + return false; + } + return true; +} + +int DTCBufferPool::cache_open(CacheInfo *info) +{ +TRY_CACHE_INIT_AGAIN: + if (info->readOnly == 0) + { + if (verify_cache_info(info) != 0) + return -1; + memcpy((char *)&_cacheInfo, info, sizeof(CacheInfo)); + } + else + { + memset((char *)&_cacheInfo, 0, sizeof(CacheInfo)); + _cacheInfo.readOnly = 1; + _cacheInfo.keySize = info->keySize; + _cacheInfo.ipcMemKey = info->ipcMemKey; + } + + //初始化统计对象 + statCacheSize = statmgr.get_item_u32(DTC_CACHE_SIZE); + statCacheKey = statmgr.get_item_u32(DTC_CACHE_KEY); + statCacheVersion = statmgr.get_item(DTC_CACHE_VERSION); + statUpdateMode = statmgr.get_item_u32(DTC_UPDATE_MODE); + statEmptyFilter = statmgr.get_item_u32(DTC_EMPTY_FILTER); + statHashSize = statmgr.get_item_u32(DTC_BUCKET_TOTAL); + statFreeBucket = statmgr.get_item_u32(DTC_FREE_BUCKET); + statDirtyEldest = statmgr.get_item_u32(DTC_DIRTY_ELDEST); + statDirtyAge = statmgr.get_item_u32(DTC_DIRTY_AGE); + statTryPurgeCount = statmgr.get_sample(TRY_PURGE_COUNT); + statPurgeForCreateUpdateCount = statmgr.get_sample(PURGE_CREATE_UPDATE_STAT); + statTryPurgeNodes = statmgr.get_item_u32(TRY_PURGE_NODES); + statLastPurgeNodeModTime = statmgr.get_item_u32(LAST_PURGE_NODE_MOD_TIME); + statDataExistTime = statmgr.get_item_u32(DATA_EXIST_TIME); + + //打开共享内存 + if (_shm.Open(_cacheInfo.ipcMemKey) > 0) + { + //共享内存已存在 + + if (_cacheInfo.createOnly) + { + snprintf(_errmsg, sizeof(_errmsg), "m_shm already exist"); + return -1; + } + + if (_cacheInfo.readOnly == 0 && _shm.Lock() != 0) + { + snprintf(_errmsg, sizeof(_errmsg), "m_shm.Lock() failed"); + return -1; + } + + if (_shm.Attach(_cacheInfo.readOnly) == NULL) + { + snprintf(_errmsg, sizeof(_errmsg), "m_shm.Attach() failed"); + return -1; + } + + //底层分配器 + if (DTCBinMalloc::Instance()->Attach(_shm.Ptr(), _shm.Size()) != 0) + { + snprintf(_errmsg, + sizeof(_errmsg), + "binmalloc attach failed: %s", + M_ERROR()); + return -1; + } + + //内存版本检测, 目前因为底层分配器的缘故,只支持version >= 4的版本 + _cacheInfo.version = DTCBinMalloc::Instance()->detect_version(); + if (_cacheInfo.version != 4) + { + snprintf(_errmsg, sizeof(_errmsg), "unsupport version, %d", _cacheInfo.version); + return -1; + } + + /* 检查共享内存完整性,通过*/ + if (DTCBinMalloc::Instance()->share_memory_integrity()) + { + log_notice("Share Memory Integrity Check.... ok"); + /* + * 设置共享内存不完整标记 + * + * 这样可以在程序coredump引起内存混乱时,再次重启后dtc能发现内存已经写乱了。 + */ + if (_cacheInfo.readOnly == 0) + { + _need_set_integrity = 1; + DTCBinMalloc::Instance()->set_share_memory_integrity(0); + } + } + /* 不通过 */ + else + { + log_warning("Share Memory Integrity Check... failed"); + + if (_cacheInfo.autoDeleteDirtyShm) + { + if (_cacheInfo.readOnly == 1) + { + log_error("ReadOnly Share Memory is Confuse"); + return -1; + } + + /* 删除共享内存,重新启动cache初始化流程 */ + if (_shm.Delete() < 0) + { + log_error("Auto Delete Share Memory failed: %m"); + return -1; + } + + log_notice("Auto Delete Share Memory Success, Try Rebuild"); + + _shm.Unlock(); + + DTCBinMalloc::Destroy(); + + /* 重新初始化 */ + goto TRY_CACHE_INIT_AGAIN; + } + } + } + + //共享内存不存在,需要创建 + else + { + //只读,失败 + if (_cacheInfo.readOnly) + { + snprintf(_errmsg, sizeof(_errmsg), "readonly m_shm non-exists"); + return -1; + } + + //创建 + if (_shm.Create(_cacheInfo.ipcMemKey, _cacheInfo.ipcMemSize) <= 0) + { + if (errno == EACCES || errno == EEXIST) + snprintf(_errmsg, sizeof(_errmsg), "m_shm exists but unwritable"); + else + snprintf(_errmsg, sizeof(_errmsg), "create m_shm failed: %m"); + return -1; + } + + if (_shm.Lock() != 0) + { + snprintf(_errmsg, sizeof(_errmsg), "m_shm.Lock() failed"); + return -1; + } + + if (_shm.Attach() == NULL) + { + snprintf(_errmsg, sizeof(_errmsg), "m_shm.Attach() failed"); + return -1; + } + + //底层分配器初始化 + if (DTCBinMalloc::Instance()->Init(_shm.Ptr(), _shm.Size()) != 0) + { + snprintf(_errmsg, + sizeof(_errmsg), + "binmalloc init failed: %s", + M_ERROR()); + return -1; + } + + /* + * 设置共享内存不完整标记 + */ + _need_set_integrity = 1; + DTCBinMalloc::Instance()->set_share_memory_integrity(0); + } + + /* statistic */ + statCacheSize = _cacheInfo.ipcMemSize; + statCacheKey = _cacheInfo.ipcMemKey; + statCacheVersion = _cacheInfo.version; + statUpdateMode = _cacheInfo.syncUpdate; + statEmptyFilter = _cacheInfo.emptyFilter; + /*set minchunksize*/ + DTCBinMalloc::Instance()->set_min_chunk_size(DTCGlobal::_min_chunk_size); + + //attention: invoke app_storage_open() must after DTCBinMalloc init() or attach(). + return app_storage_open(); +} + +int DTCBufferPool::app_storage_open() +{ + APP_STORAGE_T *storage = M_POINTER(APP_STORAGE_T, DTCBinMalloc::Instance()->get_reserve_zone()); + if (!storage) + { + snprintf(_errmsg, + sizeof(_errmsg), + "get reserve zone from binmalloc failed: %s", + M_ERROR()); + + return -1; + } + + return dtc_mem_open(storage); +} + +int DTCBufferPool::dtc_mem_open(APP_STORAGE_T *storage) +{ + if (storage->need_format()) + { + log_debug("starting init dtc mem"); + return dtc_mem_init(storage); + } + + return dtc_mem_attach(storage); +} + +/* hash size = 1% total memory size */ +/* return hash bucket num*/ + +uint32_t DTCBufferPool::hash_bucket_num(uint64_t size) +{ + int h = (uint32_t)(size / 100 - 16) / sizeof(NODE_ID_T); + h = (h / 9) * 9; + return h; +} + +int DTCBufferPool::dtc_mem_init(APP_STORAGE_T *storage) +{ + _feature = Feature::Instance(); + if (!_feature || _feature->Init(MIN_FEATURES)) + { + snprintf(_errmsg, sizeof(_errmsg), + "init feature failed, %s", + _feature->Error()); + return -1; + } + + if (storage->Format(_feature->Handle())) + { + snprintf(_errmsg, sizeof(_errmsg), "format storage failed"); + return -1; + } + + /* Node-Index*/ + _nodeIndex = NodeIndex::Instance(); + if (!_nodeIndex || _nodeIndex->Init(_cacheInfo.ipcMemSize)) + { + snprintf(_errmsg, sizeof(_errmsg), + "init node-index failed, %s", + _nodeIndex->Error()); + return -1; + } + + /* Hash-Bucket */ + _hash = DTCHash::Instance(); + if (!_hash || _hash->Init(hash_bucket_num(_cacheInfo.ipcMemSize), _cacheInfo.keySize)) + { + snprintf(_errmsg, sizeof(_errmsg), + "init hash-bucket failed, %s", + _hash->Error()); + return -1; + } + statHashSize = _hash->hash_size(); + statFreeBucket = _hash->free_bucket(); + + /* NS-Info */ + _ngInfo = NGInfo::Instance(); + if (!_ngInfo || _ngInfo->Init()) + { + snprintf(_errmsg, sizeof(_errmsg), + "init ns-info failed, %s", + _ngInfo->Error()); + return -1; + } + + /* insert features*/ + if (_feature->add_feature(NODE_INDEX, _nodeIndex->Handle())) + { + snprintf(_errmsg, sizeof(_errmsg), + "add node-index feature failed, %s", + _feature->Error()); + return -1; + } + + if (_feature->add_feature(HASH_BUCKET, _hash->Handle())) + { + snprintf(_errmsg, sizeof(_errmsg), + "add hash-bucket feature failed, %s", + _feature->Error()); + return -1; + } + + if (_feature->add_feature(NODE_GROUP, _ngInfo->Handle())) + { + snprintf(_errmsg, sizeof(_errmsg), + "add node-group feature failed, %s", + _feature->Error()); + return -1; + } + + /* Empty-Node Filter*/ + if (_cacheInfo.emptyFilter) + { + EmptyNodeFilter *p = EmptyNodeFilter::Instance(); + if (!p || p->Init()) + { + snprintf(_errmsg, sizeof(_errmsg), + "start Empty-Node Filter failed, %s", + p->Error()); + return -1; + } + + if (_feature->add_feature(EMPTY_FILTER, p->Handle())) + { + snprintf(_errmsg, sizeof(_errmsg), + "add empty-filter feature failed, %s", + _feature->Error()); + return -1; + } + } + + // column expand + _colExpand = DTCColExpand::Instance(); + if (!_colExpand || _colExpand->Init()) + { + snprintf(_errmsg, sizeof(_errmsg), "init column expand failed, %s", _colExpand->Error()); + return -1; + } + if (_feature->add_feature(COL_EXPAND, _colExpand->Handle())) + { + snprintf(_errmsg, sizeof(_errmsg), "add column expand feature failed, %s", _feature->Error()); + return -1; + } + + statDirtyEldest = 0; + statDirtyAge = 0; + + return 0; +} + +int DTCBufferPool::dtc_mem_attach(APP_STORAGE_T *storage) +{ + + _feature = Feature::Instance(); + if (!_feature || _feature->Attach(storage->as_extend_info)) + { + snprintf(_errmsg, sizeof(_errmsg), "%s", _feature->Error()); + return -1; + } + + /*hash-bucket*/ + FEATURE_INFO_T *p = _feature->get_feature_by_id(HASH_BUCKET); + if (!p) + { + snprintf(_errmsg, sizeof(_errmsg), "not found hash-bucket feature"); + return -1; + } + _hash = DTCHash::Instance(); + if (!_hash || _hash->Attach(p->fi_handle)) + { + snprintf(_errmsg, sizeof(_errmsg), "%s", _hash->Error()); + return -1; + } + statHashSize = _hash->hash_size(); + statFreeBucket = _hash->free_bucket(); + + /*node-index*/ + p = _feature->get_feature_by_id(NODE_INDEX); + if (!p) + { + snprintf(_errmsg, sizeof(_errmsg), "not found node-index feature"); + return -1; + } + _nodeIndex = NodeIndex::Instance(); + if (!_nodeIndex || _nodeIndex->Attach(p->fi_handle)) + { + snprintf(_errmsg, sizeof(_errmsg), "%s", _nodeIndex->Error()); + return -1; + } + + /*ns-info*/ + p = _feature->get_feature_by_id(NODE_GROUP); + if (!p) + { + snprintf(_errmsg, sizeof(_errmsg), "not found ns-info feature"); + return -1; + } + _ngInfo = NGInfo::Instance(); + if (!_ngInfo || _ngInfo->Attach(p->fi_handle)) + { + snprintf(_errmsg, sizeof(_errmsg), "%s", _ngInfo->Error()); + return -1; + } + + Node stLastTime = last_time_marker(); + Node stFirstTime = first_time_marker(); + if (!(!stLastTime) && !(!stFirstTime)) + { + statDirtyEldest = stLastTime.Time(); + statDirtyAge = stFirstTime.Time() - stLastTime.Time(); + } + + //TODO tableinfo + // column expand + p = _feature->get_feature_by_id(COL_EXPAND); + if (p) + { + _colExpand = DTCColExpand::Instance(); + if (!_colExpand || _colExpand->Attach(p->fi_handle, _cacheInfo.forceUpdateTableConf)) + { + // if _colExpand if null + snprintf(_errmsg, sizeof(_errmsg), "%s", _colExpand->Error()); + return -1; + } + } + else + { + log_error("column expand feature not enable, do not support column expand"); + _colExpand = NULL; + } + return 0; +} + +// Sync the empty node statstics +int DTCBufferPool::init_empty_node_list(void) +{ + if (_ngInfo->empty_startup_mode() == NGInfo::ATTACHED) + { + // iterate through empty lru list + // re-counting the total empty lru statstics + + // empty lru header + int count = 0; + Node header = _ngInfo->empty_node_head(); + Node pos; + + for (pos = header.Prev(); pos != header; pos = pos.Prev()) + { + /* check whether cross-linked */ + if (check_cross_linked_lru(pos) < 0) + break; + count++; + } + _ngInfo->inc_empty_node(count); + log_info("found %u empty nodes inside empty lru list", count); + } + return 0; +} + +// migrate empty node from clean list to empty list +int DTCBufferPool::upgrade_empty_node_list(void) +{ + if (_ngInfo->empty_startup_mode() != NGInfo::CREATED) + { + int count = 0; + Node header = _ngInfo->clean_node_head(); + Node next; + + for (Node pos = header.Prev(); pos != header; pos = next) + { + /* check whether cross-linked */ + if (check_cross_linked_lru(pos) < 0) + break; + next = pos.Prev(); + + if (node_rows_count(pos) == 0) + { + _ngInfo->remove_from_lru(pos); + _ngInfo->insert2_empty_lru(pos); + count++; + } + } + _ngInfo->inc_empty_node(count); + log_info("found %u empty nodes inside clean lru list, move to empty lru", count); + } + + return 0; +} + +// migrate empty node from empty list to clean list +int DTCBufferPool::merge_empty_node_list(void) +{ + if (_ngInfo->empty_startup_mode() != NGInfo::CREATED) + { + int count = 0; + Node header = _ngInfo->empty_node_head(); + Node next; + + for (Node pos = header.Prev(); pos != header; pos = next) + { + /* check whether cross-linked */ + if (check_cross_linked_lru(pos) < 0) + break; + next = pos.Prev(); + + _ngInfo->remove_from_lru(pos); + _ngInfo->insert2_clean_lru(pos); + count++; + } + log_info("found %u empty nodes, move to clean lru", count); + } + + return 0; +} + +// prune all empty nodes +int DTCBufferPool::prune_empty_node_list(void) +{ + if (_ngInfo->empty_startup_mode() == NGInfo::ATTACHED) + { + int count = 0; + Node header = _ngInfo->empty_node_head(); + Node next; + + for (Node pos = header.Prev(); pos != header; pos = next) + { + /* check whether cross-linked */ + if (check_cross_linked_lru(pos) < 0) + break; + next = pos.Prev(); + + count++; + purge_node_everything(pos); + } + + log_info("fullmode: total %u empty nodes purged", count); + } + + return 0; +} + +int DTCBufferPool::shrink_empty_node_list(void) +{ + if (emptyLimit && _ngInfo->empty_count() > emptyLimit) + { + //bug fix recalc empty + int togo = _ngInfo->empty_count() - emptyLimit; + int count = 0; + Node header = _ngInfo->empty_node_head(); + Node next; + + for (Node pos = header.Prev(); count < togo && pos != header; pos = next) + { + /* check whether cross-linked */ + if (check_cross_linked_lru(pos) < 0) + break; + + next = pos.Prev(); + + purge_node_everything(pos); + _ngInfo->inc_empty_node(-1); + count++; + } + log_info("shrink empty lru, %u empty nodes purged", count); + } + + return 0; +} + +int DTCBufferPool::purge_single_empty_node(void) +{ + Node header = _ngInfo->empty_node_head(); + Node pos = header.Prev(); + + if (pos != header) + { + /* check whether cross-linked */ + if (check_cross_linked_lru(pos) < 0) + return -1; + + log_debug("empty node execeed limit, purge node %u", pos.node_id()); + purge_node_everything(pos); + _ngInfo->inc_empty_node(-1); + } + + return 0; +} + +/* insert node to hash bucket*/ +int DTCBufferPool::Insert2Hash(const char *key, Node node) +{ + HASH_ID_T hashslot; + + if (targetNewHash) + { + hashslot = _hash->new_hash_slot(key); + } + else + { + hashslot = _hash->hash_slot(key); + } + + if (_hash->hash2_node(hashslot) == INVALID_NODE_ID) + { + _hash->inc_free_bucket(-1); + --statFreeBucket; + } + + _hash->inc_node_cnt(1); + + node.next_node_id() = _hash->hash2_node(hashslot); + _hash->hash2_node(hashslot) = node.node_id(); + + return 0; +} + +int DTCBufferPool::remove_from_hash_base(const char *key, Node remove_node, int newhash) +{ + HASH_ID_T hash_slot; + + if (newhash) + { + hash_slot = _hash->new_hash_slot(key); + } + else + { + hash_slot = _hash->hash_slot(key); + } + + NODE_ID_T node_id = _hash->hash2_node(hash_slot); + + /* hash miss */ + if (node_id == INVALID_NODE_ID) + return 0; + + /* found in hash head */ + if (node_id == remove_node.node_id()) + { + _hash->hash2_node(hash_slot) = remove_node.next_node_id(); + + // stat + if (_hash->hash2_node(hash_slot) == INVALID_NODE_ID) + { + _hash->inc_free_bucket(1); + ++statFreeBucket; + } + + _hash->inc_node_cnt(-1); + return 0; + } + + Node prev = I_SEARCH(node_id); + Node next = I_SEARCH(prev.next_node_id()); + + while (!(!next) && next.node_id() != remove_node.node_id()) + { + prev = next; + next = I_SEARCH(next.next_node_id()); + } + + /* found */ + if (!(!next)) + { + prev.next_node_id() = next.next_node_id(); + _hash->inc_node_cnt(-1); + } + else + { + log_error("remove_from_hash failed, node-id [%d] not found in slot %u ", + remove_node.node_id(), hash_slot); + return -1; + } + + return 0; +} + +int DTCBufferPool::remove_from_hash(const char *key, Node remove_node) +{ + if (hashChanging) + { + remove_from_hash_base(key, remove_node, 1); + remove_from_hash_base(key, remove_node, 0); + } + else + { + if (targetNewHash) + remove_from_hash_base(key, remove_node, 1); + else + remove_from_hash_base(key, remove_node, 0); + } + + return 0; +} + +int DTCBufferPool::move_to_new_hash(const char *key, Node node) +{ + remove_from_hash(key, node); + Insert2Hash(key, node); + return 0; +} + +inline int DTCBufferPool::key_cmp(const char *key, const char *other) +{ + int len = _cacheInfo.keySize == 0 ? (*(unsigned char *)key + 1) : _cacheInfo.keySize; + + return memcmp(key, other, len); +} + +Node DTCBufferPool::cache_find_auto_chose_hash(const char *key) +{ + int oldhash = 0; + int newhash = 1; + Node stNode; + + if (hashChanging) + { + if (targetNewHash) + { + oldhash = 0; + newhash = 1; + } + else + { + oldhash = 1; + newhash = 0; + } + + stNode = cache_find(key, oldhash); + if (!stNode) + { + stNode = cache_find(key, newhash); + } + else + { + move_to_new_hash(key, stNode); + } + } + else + { + if (targetNewHash) + { + stNode = cache_find(key, 1); + } + else + { + stNode = cache_find(key, 0); + } + } + return stNode; +} + +Node DTCBufferPool::cache_find(const char *key, int newhash) +{ + HASH_ID_T hash_slot; + + if (newhash) + { + hash_slot = _hash->new_hash_slot(key); + } + else + { + hash_slot = _hash->hash_slot(key); + } + + NODE_ID_T node_id = _hash->hash2_node(hash_slot); + + /* not found */ + if (node_id == INVALID_NODE_ID) + return Node(); + + Node iter = I_SEARCH(node_id); + while (!(!iter)) + { + if (iter.vd_handle() == INVALID_HANDLE) + { + log_warning("node[%u]'s handle is invalid", iter.node_id()); + Node node = iter; + iter = I_SEARCH(iter.next_node_id()); + purge_node(key, node); + continue; + } + + DataChunk *data_chunk = M_POINTER(DataChunk, iter.vd_handle()); + if (NULL == data_chunk) + { + log_warning("node[%u]'s handle is invalid", iter.node_id()); + Node node = iter; + iter = I_SEARCH(iter.next_node_id()); + purge_node(key, node); + continue; + } + + if (NULL == data_chunk->Key()) + { + log_warning("node[%u]'s handle is invalid, decode key failed", iter.node_id()); + Node node = iter; + iter = I_SEARCH(iter.next_node_id()); + purge_node(key, node); + continue; + } + + /* EQ */ + if (key_cmp(key, data_chunk->Key()) == 0) + { + log_debug("found node[%u]", iter.node_id()); + return iter; + } + + iter = I_SEARCH(iter.next_node_id()); + } + + /* not found*/ + return Node(); +} + +unsigned int DTCBufferPool::first_time_marker_time(void) +{ + if (firstMarkerTime == 0) + { + Node marker = first_time_marker(); + firstMarkerTime = !marker ? 0 : marker.Time(); + } + return firstMarkerTime; +} + +unsigned int DTCBufferPool::last_time_marker_time(void) +{ + if (lastMarkerTime == 0) + { + Node marker = last_time_marker(); + lastMarkerTime = !marker ? 0 : marker.Time(); + } + return lastMarkerTime; +} + +/* insert a time-marker to dirty lru list*/ +int DTCBufferPool::insert_time_marker(unsigned int t) +{ + Node tm_node = _ngInfo->allocate_node(); + if (!tm_node) + { + log_debug("no mem allocate timemarker, purge 10 clean node"); + /* prepurge clean node for cache is full */ + pre_purge_nodes(10, Node()); + tm_node = _ngInfo->allocate_node(); + if (!tm_node) + { + log_crit("can not allocate time marker for dirty lru"); + return -1; + } + } + + log_debug("insert time marker in dirty lru, time %u", t); + tm_node.next_node_id() = TIME_MARKER_NEXT_NODE_ID; + tm_node.vd_handle() = t; + + _ngInfo->insert2_dirty_lru(tm_node); + + //stat + firstMarkerTime = t; + + /*in case lastMarkerTime not set*/ + if (lastMarkerTime == 0) + last_time_marker_time(); + + statDirtyAge = firstMarkerTime - lastMarkerTime; + statDirtyEldest = lastMarkerTime; + + return 0; +} + +/* -1: not a time marker + * -2: this the only time marker + + */ +int DTCBufferPool::remove_time_marker(Node node) +{ + /* is not timermarker node */ + if (!is_time_marker(node)) + return -1; + + _ngInfo->remove_from_lru(node); + _ngInfo->release_node(node); + + //stat + Node stLastTime = last_time_marker(); + if (!stLastTime) + { + lastMarkerTime = firstMarkerTime; + } + else + { + lastMarkerTime = stLastTime.Time(); + } + + statDirtyAge = firstMarkerTime - lastMarkerTime; + statDirtyEldest = lastMarkerTime; + return 0; +} + +/* prev <- dirtyhead */ +Node DTCBufferPool::last_time_marker() const +{ + Node pos, dirtyHeader = _ngInfo->dirty_node_head(); + NODE_LIST_FOR_EACH_RVS(pos, dirtyHeader) + { + if (pos.next_node_id() == TIME_MARKER_NEXT_NODE_ID) + return pos; + } + + return Node(); +} + +/* dirtyhead -> next */ +Node DTCBufferPool::first_time_marker() const +{ + Node pos, dirtyHeader = _ngInfo->dirty_node_head(); + + NODE_LIST_FOR_EACH(pos, dirtyHeader) + { + if (pos.next_node_id() == TIME_MARKER_NEXT_NODE_ID) + return pos; + } + + return Node(); +} + +/* dirty lru list head */ +Node DTCBufferPool::dirty_lru_head() const +{ + return _ngInfo->dirty_node_head(); +} + +/* clean lru list head */ +Node DTCBufferPool::clean_lru_head() const +{ + return _ngInfo->clean_node_head(); +} + +/* empty lru list head */ +Node DTCBufferPool::empty_lru_head() const +{ + return _ngInfo->empty_node_head(); +} + +int DTCBufferPool::is_time_marker(Node node) const +{ + return node.next_node_id() == TIME_MARKER_NEXT_NODE_ID; +} + +int DTCBufferPool::try_purge_size(size_t size, Node reserve, unsigned max_purge_count) +{ + log_debug("start try_purge_size"); + + if (disableTryPurge) + { + static int alert_count = 0; + if (!alert_count++) + { + log_alert("memory overflow, auto purge disabled"); + } + return -1; + } + /*if have pre purge, purge node and continue*/ + /* prepurge should not purge reserved node in try_purge_size */ + pre_purge_nodes(DTCGlobal::_pre_purge_nodes, reserve); + + unsigned real_try_purge_count = 0; + + /* clean lru header */ + Node clean_header = clean_lru_head(); + + Node pos = clean_header.Prev(); + + for (unsigned iter = 0; iter < max_purge_count && !(!pos) && pos != clean_header; ++iter) + { + Node purge_node = pos; + + if (total_used_node() < 10) + break; + + /* check whether cross-linked */ + if (check_cross_linked_lru(pos) < 0) + break; + + pos = pos.Prev(); + + if (purge_node == reserve) + { + continue; + } + + if (purge_node.vd_handle() == INVALID_HANDLE) + { + log_warning("node[%u]'s handle is invalid", purge_node.node_id()); + continue; + } + + /* ask for data-chunk's size */ + DataChunk *data_chunk = M_POINTER(DataChunk, purge_node.vd_handle()); + if (NULL == data_chunk) + { + log_warning("node[%u] handle is invalid, attach DataChunk failed", purge_node.node_id()); + continue; + } + + unsigned combine_size = data_chunk->ask_for_destroy_size(DTCBinMalloc::Instance()); + log_debug("need_size=%u, combine-size=%u, node-size=%u", + (unsigned)size, combine_size, data_chunk->node_size()); + + if (combine_size >= size) + { + /* stat total rows */ + inc_total_row(0LL - node_rows_count(purge_node)); + check_and_purge_node_everything(purge_node); + _need_purge_node_count = iter; + log_debug("try purge size for create or update: %d", iter + 1); + statPurgeForCreateUpdateCount.push(iter + 1); + ++statTryPurgeNodes; + return 0; + } + + ++real_try_purge_count; + } + + _need_purge_node_count = real_try_purge_count; + return -1; +} + +int DTCBufferPool::purge_node(const char *key, Node purge_node) +{ + /* HB */ + if (_purge_notifier) + _purge_notifier->purge_node_notify(key, purge_node); + + /*1. Remove from hash */ + remove_from_hash(key, purge_node); + + /*2. Remove from LRU */ + _ngInfo->remove_from_lru(purge_node); + + /*3. Release node, it can auto remove from nodeIndex */ + _ngInfo->release_node(purge_node); + + return 0; +} + +int DTCBufferPool::purge_node_everything(Node node) +{ + /* invalid node attribute */ + if (!(!node) && node.vd_handle() != INVALID_HANDLE) + { + DataChunk *data_chunk = M_POINTER(DataChunk, node.vd_handle()); + if (NULL == data_chunk || NULL == data_chunk->Key()) + { + log_error("node[%u]'s handle is invalid, can't attach and decode key", node.node_id()); + //TODO + return -1; + } + uint32_t dwCreatetime = data_chunk->create_time(); + uint32_t dwPurgeHour = RELATIVE_HOUR_CALCULATOR->get_relative_hour(); + log_debug("lru purge node,node[%u]'s createhour is %u, purgeHour is %u", node.node_id(), dwCreatetime, dwPurgeHour); + survival_hour.push((dwPurgeHour - dwCreatetime)); + + char key[256] = {0}; + /* decode key */ + memcpy(key, data_chunk->Key(), _cacheInfo.keySize > 0 ? _cacheInfo.keySize : *(unsigned char *)(data_chunk->Key()) + 1); + + /* destroy data-chunk */ + data_chunk->Destroy(DTCBinMalloc::Instance()); + + return purge_node(key, node); + } + + return 0; +} + +uint32_t DTCBufferPool::get_cmodtime(Node *node) +{ + // how init + RawData *_raw_data = new RawData(DTCBinMalloc::Instance(), 1); + uint32_t lastcmod = 0; + uint32_t lastcmod_thisrow = 0; + int iRet = _raw_data->Attach(node->vd_handle()); + if (iRet != 0) + { + log_error("raw-data attach[handle:" UINT64FMT "] error: %d,%s", + node->vd_handle(), iRet, _raw_data->get_err_msg()); + return (0); + } + + unsigned int uiTotalRows = _raw_data->total_rows(); + for (unsigned int i = 0; i < uiTotalRows; i++) //查找 + { + if ((iRet = _raw_data->get_lastcmod(lastcmod_thisrow)) != 0) + { + log_error("raw-data decode row error: %d,%s", iRet, _raw_data->get_err_msg()); + return (0); + } + if (lastcmod_thisrow > lastcmod) + lastcmod = lastcmod_thisrow; + } + return lastcmod; +} + +//check if node's timestamp max than setting +//and purge_node_everything +int DTCBufferPool::check_and_purge_node_everything(Node node) +{ + int dataExistTime = statDataExistTime; + if (dateExpireAlertTime) + { + struct timeval tm; + gettimeofday(&tm, NULL); + unsigned int lastnodecmodtime = get_cmodtime(&node); + if (lastnodecmodtime > statLastPurgeNodeModTime) + { + statLastPurgeNodeModTime = lastnodecmodtime; + dataExistTime = (unsigned int)tm.tv_sec - statLastPurgeNodeModTime; + statDataExistTime = dataExistTime; + } + if (statDataExistTime < dateExpireAlertTime) + { + static int alert_count = 0; + if (!alert_count++) + { + log_alert("DataExistTime:%u is little than setting:%u", dataExistTime, dateExpireAlertTime); + } + } + log_debug("dateExpireAlertTime:%d ,lastnodecmodtime:%d,timenow:%u", dateExpireAlertTime, lastnodecmodtime, (uint32_t)tm.tv_sec); + } + + return purge_node_everything(node); +} +int DTCBufferPool::purge_node_everything(const char *key, Node node) +{ + DataChunk *data_chunk = NULL; + if (!(!node) && node.vd_handle() != INVALID_HANDLE) + { + data_chunk = M_POINTER(DataChunk, node.vd_handle()); + if (NULL == data_chunk) + { + log_error("node[%u]'s handle is invalid, can't attach data-chunk", node.node_id()); + return -1; + } + uint32_t dwCreatetime = data_chunk->create_time(); + uint32_t dwPurgeHour = RELATIVE_HOUR_CALCULATOR->get_relative_hour(); + log_debug(" purge node, node[%u]'s createhour is %u, purgeHour is %u", node.node_id(), dwCreatetime, dwPurgeHour); + survival_hour.push((dwPurgeHour - dwCreatetime)); + /* destroy data-chunk */ + data_chunk->Destroy(DTCBinMalloc::Instance()); + } + + if (!(!node)) + return purge_node(key, node); + return 0; +} + +/* allocate a new node by key */ +Node DTCBufferPool::cache_allocate(const char *key) +{ + Node allocate_node = _ngInfo->allocate_node(); + + /* allocate failed */ + if (!allocate_node) + return allocate_node; + + /*1. Insert to hash bucket */ + Insert2Hash(key, allocate_node); + + /*2. Insert to clean Lru list*/ + _ngInfo->insert2_clean_lru(allocate_node); + + return allocate_node; +} + +extern int useNewHash; + +/* purge key{data-chunk, hash, lru, node...} */ +int DTCBufferPool::cache_purge(const char *key) +{ + Node purge_node; + + if (hashChanging) + { + purge_node = cache_find(key, 0); + if (!purge_node) + { + purge_node = cache_find(key, 1); + if (!purge_node) + { + return 0; + } + else + { + if (purge_node_everything(key, purge_node) < 0) + return -1; + } + } + else + { + if (purge_node_everything(key, purge_node) < 0) + return -1; + } + } + else + { + if (targetNewHash) + { + purge_node = cache_find(key, 1); + if (!purge_node) + return 0; + else + { + if (purge_node_everything(key, purge_node) < 0) + return -1; + } + } + else + { + purge_node = cache_find(key, 0); + if (!purge_node) + return 0; + else + { + if (purge_node_everything(key, purge_node) < 0) + return -1; + } + } + } + + return 0; +} + +void DTCBufferPool::delay_purge_notify(const unsigned count) +{ + if (_need_purge_node_count == 0) + return; + else + statTryPurgeCount.push(_need_purge_node_count); + + unsigned purge_count = count < _need_purge_node_count ? count : _need_purge_node_count; + unsigned real_purge_count = 0; + + log_debug("delay_purge_notify: total=%u, now=%u", _need_purge_node_count, purge_count); + + /* clean lru header */ + Node clean_header = clean_lru_head(); + Node pos = clean_header.Prev(); + + while (purge_count-- > 0 && !(!pos) && pos != clean_header) + { + Node purge_node = pos; + check_cross_linked_lru(pos); + pos = pos.Prev(); + + /* stat total rows */ + inc_total_row(0LL - node_rows_count(purge_node)); + + check_and_purge_node_everything(purge_node); + + ++statTryPurgeNodes; + ++real_purge_count; + } + + _need_purge_node_count -= real_purge_count; + + /* 如果没有请求,重新调度delay purge任务 */ + if (_need_purge_node_count > 0) + attach_timer(_delay_purge_timerlist); + + return; +} + +int DTCBufferPool::pre_purge_nodes(int purge_count, Node reserve) +{ + int realpurged = 0; + + if (purge_count <= 0) + return 0; + else + statTryPurgeCount.push(purge_count); + + /* clean lru header */ + Node clean_header = clean_lru_head(); + Node pos = clean_header.Prev(); + + while (purge_count-- > 0 && !(!pos) && pos != clean_header) + { + Node purge_node = pos; + check_cross_linked_lru(pos); + pos = pos.Prev(); + + if (reserve == purge_node) + continue; + + /* stat total rows */ + inc_total_row(0LL - node_rows_count(purge_node)); + check_and_purge_node_everything(purge_node); + ++statTryPurgeNodes; + realpurged++; + } + return realpurged; + ; +} + +int DTCBufferPool::purge_by_time(unsigned int oldesttime) +{ + return 0; +} + +int DTCBufferPool::clear_create() +{ + if (_cacheInfo.readOnly == 1) + { + snprintf(_errmsg, sizeof(_errmsg), "cache readonly, can not clear cache"); + return -2; + } + _hash->Destroy(); + _ngInfo->Destroy(); + _feature->Destroy(); + _nodeIndex->Destroy(); + DTCBinMalloc::Instance()->Destroy(); + if (_shm.Delete() < 0) + { + snprintf(_errmsg, sizeof(_errmsg), "delete shm memory error"); + return -1; + } + log_notice("delete shm memory ok when clear cache"); + + if (_shm.Create(_cacheInfo.ipcMemKey, _cacheInfo.ipcMemSize) <= 0) + { + snprintf(_errmsg, sizeof(_errmsg), "create shm memory error"); + return -1; + } + if (_shm.Attach() == NULL) + { + snprintf(_errmsg, sizeof(_errmsg), "attach shm memory error"); + return -1; + } + + if (DTCBinMalloc::Instance()->Init(_shm.Ptr(), _shm.Size()) != 0) + { + snprintf(_errmsg, sizeof(_errmsg), + "binmalloc init failed: %s", M_ERROR()); + return -1; + } + DTCBinMalloc::Instance()->set_min_chunk_size(DTCGlobal::_min_chunk_size); + return app_storage_open(); +} + +void DTCBufferPool::start_delay_purge_task(TimerList *timer) +{ + log_info("start delay-purge task"); + _delay_purge_timerlist = timer; + attach_timer(_delay_purge_timerlist); + + return; +} +void DTCBufferPool::timer_notify(void) +{ + log_debug("sched delay-purge task"); + + delay_purge_notify(); +} diff --git a/src/search_local/index_storage/cache/buffer_pool.h b/src/search_local/index_storage/cache/buffer_pool.h new file mode 100644 index 0000000..3ea613a --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_pool.h @@ -0,0 +1,272 @@ +/* + * ===================================================================================== + * + * Filename: buffer_pool.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __DTC_CACHE_POOL_H +#define __DTC_CACHE_POOL_H + +#include +#include "stat_dtc.h" +#include "namespace.h" +#include "pt_malloc.h" +#include "shmem.h" +#include "global.h" +#include "node_list.h" +#include "node_index.h" +#include "node_set.h" +#include "feature.h" +#include "ng_info.h" +#include "hash.h" +#include "col_expand.h" +#include "node.h" +#include "timer_list.h" +#include "data_chunk.h" + +DTC_BEGIN_NAMESPACE + +/* time-marker node in dirty lru list */ +//#define TIME_MARKER_NEXT_NODE_ID INVALID_NODE_ID +#define TIME_MARKER_NEXT_NODE_ID (INVALID_NODE_ID-1) + +/* cache基本信息 */ +typedef struct _CacheInfo{ + int ipcMemKey; // 共享内存key + uint64_t ipcMemSize; // 共享内存大小 + unsigned short keySize; // key大小 + unsigned char version; // 内存版本号 + unsigned char syncUpdate:1; // 同异步模式 + unsigned char readOnly:1; // 只读模式打开 + unsigned char createOnly:1; // 供mem_tool使用 + unsigned char emptyFilter:1; // 是否启用空节点过滤功能 + unsigned char autoDeleteDirtyShm:1; // 是否需要在检出到内存不完整时自动删除并重建共享内存 + unsigned char forceUpdateTableConf:1; // 是否需要强制使用table.conf更新共享内存中的配置 + + inline void Init(int keyFormat, unsigned long cacheSize, unsigned int createVersion) + { + // calculate buckettotal + keySize = keyFormat; + ipcMemSize = cacheSize; + version = createVersion; + } + +} CacheInfo; + +class PurgeNodeNotifier { + public: + PurgeNodeNotifier(){}; + virtual ~PurgeNodeNotifier(){}; + virtual void purge_node_notify(const char *key, Node node) = 0; +}; + +class BufferProcess; +class RawDataProcess; +class TreeDataProcess; +class DTCBufferPool : private TimerObject +{ + protected: + PurgeNodeNotifier *_purge_notifier; + SharedMemory _shm; //共享内存管理器 + CacheInfo _cacheInfo; //cache基本信息 + + DTCHash *_hash; // hash桶 + NGInfo *_ngInfo; // node管理 + Feature *_feature; // 特性抽象 + NodeIndex *_nodeIndex; // NodeID转换 + //CTableInfo *_tableInfo; // Table信息 + DTCColExpand *_colExpand; + + char _errmsg[256]; + int _need_set_integrity; + + /* 待淘汰节点数目 */ + unsigned _need_purge_node_count; + + TimerList * _delay_purge_timerlist; + unsigned firstMarkerTime; + unsigned lastMarkerTime; + int emptyLimit; + /**********for purge alert*******/ + int disableTryPurge; + //如果自动淘汰的数据最后更新时间比当前时间减DataExpireAlertTime小则报警 + int dateExpireAlertTime; + + + protected: + /* for statistic*/ + StatItemU32 statCacheSize; + StatItemU32 statCacheKey; + StatItemU32 statCacheVersion; + StatItemU32 statUpdateMode; + StatItemU32 statEmptyFilter; + StatItemU32 statHashSize; + StatItemU32 statFreeBucket; + StatItemU32 statDirtyEldest; + StatItemU32 statDirtyAge; + StatSample statTryPurgeCount; + StatItemU32 statTryPurgeNodes; + StatItemU32 statLastPurgeNodeModTime;//最后被淘汰的节点的lastcmod的最大值(如果多行) + StatItemU32 statDataExistTime;//当前时间减去statLastPurgeNodeModTime + StatSample survival_hour; + StatSample statPurgeForCreateUpdateCount; + private: + int app_storage_open(); + int dtc_mem_open(APP_STORAGE_T *); + int dtc_mem_attach(APP_STORAGE_T *); + int dtc_mem_init(APP_STORAGE_T *); + int verify_cache_info(CacheInfo *); + unsigned int hash_bucket_num(uint64_t); + + int remove_from_hash_base(const char *key, Node node, int newhash); + int remove_from_hash(const char *key, Node node); + int move_to_new_hash(const char *key, Node node); + int Insert2Hash(const char *key, Node node); + + int purge_node(const char *key, Node purge_node); + int purge_node_everything(const char* key, Node purge_node); + + /* purge alert*/ + int check_and_purge_node_everything(Node purge_node); + uint32_t get_cmodtime(Node* purge_node); + + uint32_t get_expire_time(Node *node, uint32_t &expire); + + /* lru list op */ + int insert2_dirty_lru(Node node) {return _ngInfo->insert2_dirty_lru(node);} + int insert2_clean_lru(Node node) {return _ngInfo->insert2_clean_lru(node);} + int insert2_empty_lru(Node node) { + return emptyLimit ? + _ngInfo->insert2_empty_lru(node) : + _ngInfo->insert2_clean_lru(node) ; + + } + int remove_from_lru(Node node) {return _ngInfo->remove_from_lru(node);} + int key_cmp(const char *key, const char *other); + + /* node|row count statistic for async flush.*/ + void inc_dirty_node(int v){ _ngInfo->inc_dirty_node(v);} + void inc_dirty_row(int v) { _ngInfo->inc_dirty_row(v); } + void dec_empty_node(void) { if(emptyLimit) _ngInfo->inc_empty_node(-1); } + void inc_empty_node(void) { + if(emptyLimit) { + _ngInfo->inc_empty_node(1); + if(_ngInfo->empty_count() > emptyLimit) { + purge_single_empty_node(); + } + } + } + + const unsigned int total_dirty_node() const {return _ngInfo->total_dirty_node();} + + const uint64_t total_dirty_row() const {return _ngInfo->total_dirty_row();} + const uint64_t total_used_row() const {return _ngInfo->total_used_row();} + + /*定期调度delay purge任务*/ + virtual void timer_notify(void); + + public: + DTCBufferPool(PurgeNodeNotifier *o = NULL); + ~DTCBufferPool(); + + int check_expand_status(); + unsigned char shm_table_idx(); + bool col_expand(const char *table, int len); + int try_col_expand(const char *table, int len); + bool reload_table(); + + int cache_open(CacheInfo *); + void set_empty_node_limit(int v) { emptyLimit = v<0?0:v; } + int init_empty_node_list(void); + int upgrade_empty_node_list(void); + int merge_empty_node_list(void); + int prune_empty_node_list(void); + int shrink_empty_node_list(void); + int purge_single_empty_node(void); + + Node cache_find(const char *key, int newhash); + Node cache_find_auto_chose_hash(const char *key); + int cache_purge(const char *key); + int purge_node_everything(Node purge_node); + Node cache_allocate(const char *key); + int try_purge_size(size_t size, Node purge_node, unsigned count=2500); + void disable_try_purge(void) { disableTryPurge = 1; } + void set_date_expire_alert_time(int time){dateExpireAlertTime = time<0?0:time;}; + + /* 淘汰固定个节点 */ + void delay_purge_notify(const unsigned count=50); + int pre_purge_nodes(int purge_cnt, Node reserve); + int purge_by_time(unsigned int oldesttime); + void start_delay_purge_task(TimerList *); + + int insert_time_marker(unsigned int); + int remove_time_marker(Node node); + int is_time_marker(Node node) const; + Node first_time_marker() const; + Node last_time_marker() const; + unsigned int first_time_marker_time(); + unsigned int last_time_marker_time(); + + Node dirty_lru_head() const; + Node clean_lru_head() const; + Node empty_lru_head() const; + int dirty_lru_empty()const{return NODE_LIST_EMPTY(dirty_lru_head());} + + const CacheInfo* get_cache_info() const { return &_cacheInfo;} + const char *Error(void) const { return _errmsg; } + + FEATURE_INFO_T* query_feature_by_id(const uint32_t id) + { + return _feature ? _feature->get_feature_by_id(id):(FEATURE_INFO_T *)(0); + } + + int add_feature(const uint32_t id, const MEM_HANDLE_T v) + { + if(_feature == NULL) + return -1; + return _feature->add_feature(id, v); + } + + int clear_create(); + + uint32_t max_node_id(void) const{ + return _ngInfo->max_node_id(); + } + + NODE_ID_T min_valid_node_id(void) const { + return _ngInfo->min_valid_node_id(); + } + + const unsigned int total_used_node() const {return _ngInfo->total_used_node();} + void inc_total_row(int v) { _ngInfo->inc_total_row(v); } + + static int32_t node_rows_count(Node node) { + if(!node || node.vd_handle() == INVALID_HANDLE) + return 0; + + DataChunk *chunk = ((DataChunk*)(DTCBinMalloc::Instance()->handle_to_ptr(node.vd_handle()))); + if(!chunk) return 0; + + return chunk->total_rows(); + } + + friend class BufferProcess; + friend class RawDataProcess; + friend class TreeDataProcess; +}; + +DTC_END_NAMESPACE + +#endif + diff --git a/src/search_local/index_storage/cache/buffer_process.cc b/src/search_local/index_storage/cache/buffer_process.cc new file mode 100644 index 0000000..b6e8042 --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_process.cc @@ -0,0 +1,2433 @@ +/* + * ===================================================================================== + * + * Filename: buffer_process.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include +#include +#include +#include + +#include "packet.h" +#include "log.h" +#include "buffer_process.h" +#include "buffer_flush.h" +#include "mysql_error.h" +#include "sys_malloc.h" +#include "data_chunk.h" +#include "raw_data_process.h" +#include "key_route.h" +#include "buffer_remoteLog.h" +#include "hotback_task.h" +#include "tree_data_process.h" +DTC_USING_NAMESPACE; + +extern DTCTableDefinition *gTableDef[]; +extern KeyRoute *keyRoute; +extern int hashChanging; +extern int targetNewHash; +extern DTCConfig *gConfig; + +inline int BufferProcess::transation_find_node(TaskRequest &Task) +{ + // alreay cleared/zero-ed + ptrKey = Task.packed_key(); + if (m_pstEmptyNodeFilter != NULL && m_pstEmptyNodeFilter->ISSET(Task.int_key())) + { + //Cache.cache_purge(ptrKey); + m_stNode = Node(); + return nodeStat = NODESTAT_EMPTY; + } + + int newhash, oldhash; + if (hashChanging) + { + if (targetNewHash) + { + oldhash = 0; + newhash = 1; + } + else + { + oldhash = 1; + newhash = 0; + } + + m_stNode = Cache.cache_find(ptrKey, oldhash); + if (!m_stNode) + { + m_stNode = Cache.cache_find(ptrKey, newhash); + if (!m_stNode) + return nodeStat = NODESTAT_MISSING; + } + else + { + Cache.move_to_new_hash(ptrKey, m_stNode); + } + } + else + { + if (targetNewHash) + { + m_stNode = Cache.cache_find(ptrKey, 1); + if (!m_stNode) + return nodeStat = NODESTAT_MISSING; + } + else + { + m_stNode = Cache.cache_find(ptrKey, 0); + if (!m_stNode) + return nodeStat = NODESTAT_MISSING; + } + } + + keyDirty = m_stNode.is_dirty(); + oldRows = Cache.node_rows_count(m_stNode); + // prepare to decrease empty node count + nodeEmpty = keyDirty == 0 && oldRows == 0; + return nodeStat = NODESTAT_PRESENT; +} + +inline void BufferProcess::transation_update_lru(bool async, int level) +{ + if (!keyDirty) + { + // clear node empty here, because the lru is adjusted + // it's not a fresh node in EmptyButInCleanList state + if (async == true) + { + m_stNode.set_dirty(); + Cache.inc_dirty_node(1); + Cache.remove_from_lru(m_stNode); + Cache.insert2_dirty_lru(m_stNode); + if (nodeEmpty != 0) + { + // empty to non-empty + Cache.dec_empty_node(); + nodeEmpty = 0; + } + lruUpdate = LRU_NONE; + } + else + { + lruUpdate = level; + } + } +} + +void BufferProcess::transation_end(void) +{ + int newRows = 0; + if (!!m_stNode && !keyDirty && !m_stNode.is_dirty()) + { + newRows = Cache.node_rows_count(m_stNode); + int nodeEmpty1 = newRows == 0; + + if (lruUpdate > noLRU || nodeEmpty1 != nodeEmpty) + { + if (newRows == 0) + { + Cache.remove_from_lru(m_stNode); + Cache.insert2_empty_lru(m_stNode); + if (nodeEmpty == 0) + { + // non-empty to empty + Cache.inc_empty_node(); + nodeEmpty = 1; + } + //Cache.DumpEmptyNodeList(); + } + else + { + Cache.remove_from_lru(m_stNode); + Cache.insert2_clean_lru(m_stNode); + if (nodeEmpty != 0) + { + // empty to non-empty + Cache.dec_empty_node(); + nodeEmpty = 0; + } + } + } + } + + CacheTransation::Free(); +} + +int BufferProcess::write_lru_hb_log(const char *key) +{ + log_debug("write_lru_hb_log begin"); + if (!hbLogSwitch) + { + return 0; + } + log_debug("write_lru_hb_log new task"); + TaskRequest *pTask = new TaskRequest; + if (pTask == NULL) + { + log_error("cannot write_hb_log row, new task error, possible memory exhausted\n"); + return -1; + } + + pTask->set_request_type(TaskTypeWriteLruHbLog); + HotBackTask &hotbacktask = pTask->get_hot_back_task(); + hotbacktask.set_type(DTCHotBackup::SYNC_LRU); + hotbacktask.set_flag(DTCHotBackup::NON_VALUE); + hotbacktask.set_value(NULL, 0); + DTCValue packeKey = tableDef->packed_key(key); + hotbacktask.set_packed_key(packeKey.bin.ptr, packeKey.bin.len); + log_debug(" packed key len:%d, key len:%d, key :%s", packeKey.bin.len, *(unsigned char *)packeKey.bin.ptr, packeKey.bin.ptr + 1); + dispatch_hot_back_task(pTask); + return 0; +} + +int BufferProcess::write_hb_log(const char *key, char *pstChunk, unsigned int uiNodeSize, int iType) +{ + if (!hbLogSwitch) + { + return 0; + } + TaskRequest *pTask = new TaskRequest; + if (pTask == NULL) + { + log_error("cannot write_hb_log row, new task error, possible memory exhausted\n"); + return -1; + } + + pTask->set_request_type(TaskTypeWriteHbLog); + + HotBackTask &hotbacktask = pTask->get_hot_back_task(); + hotbacktask.set_type(iType); + DTCValue packeKey; + if (iType == DTCHotBackup::SYNC_COLEXPAND_CMD) + packeKey.Set(key); + else + packeKey = tableDef->packed_key(key); + hotbacktask.set_packed_key(packeKey.bin.ptr, packeKey.bin.len); + log_debug(" packed key len:%d, key len:%d, key :%s", packeKey.bin.len, *(unsigned char *)packeKey.bin.ptr, packeKey.bin.ptr + 1); + if (uiNodeSize > 0 && (iType == DTCHotBackup::SYNC_COLEXPAND_CMD || uiNodeSize <= 100)) + { + hotbacktask.set_flag(DTCHotBackup::HAS_VALUE); + hotbacktask.set_value(pstChunk, uiNodeSize); + dispatch_hot_back_task(pTask); + } + else + { + hotbacktask.set_flag(DTCHotBackup::NON_VALUE); + hotbacktask.set_value(NULL, 0); + dispatch_hot_back_task(pTask); + } + + return 0; +} + +int BufferProcess::write_hb_log(const char *key, Node &stNode, int iType) +{ + if (!hbLogSwitch) + { + return 0; + } + + unsigned int uiNodeSize = 0; + DataChunk *pstChunk = NULL; + + if (!(!stNode) && stNode.vd_handle() != INVALID_HANDLE) + { + pstChunk = (DataChunk *)DTCBinMalloc::Instance()->handle_to_ptr(stNode.vd_handle()); + uiNodeSize = pstChunk->node_size(); + } + return write_hb_log(key, (char *)pstChunk, uiNodeSize, iType); +} + +inline int BufferProcess::write_hb_log(TaskRequest &Task, Node &stNode, int iType) +{ + return write_hb_log(Task.packed_key(), stNode, iType); +} + +void BufferProcess::purge_node_notify(const char *key, Node node) +{ + if (!node) + return; + + if (node == m_stNode) + { + if (nodeEmpty) + { + // purge an empty node! decrease empty counter + Cache.dec_empty_node(); + nodeEmpty = 0; + } + m_stNode = Node::Empty(); + } + + if (write_hb_log(key, node, DTCHotBackup::SYNC_PURGE)) + { + log_crit("hb: log purge key failed"); + } +} + +BufferProcess::BufferProcess(PollThread *p, DTCTableDefinition *tdef, EUpdateMode um) + : TaskDispatcher(p), + // output(p, this), + output(p), + remoteoutput(p), + hblogoutput(p), + cacheReply(this), + tableDef(tdef), + Cache(this), + nodbMode(false), + fullMode(false), + m_bReplaceEmpty(false), + noLRU(0), + asyncServer(um), + updateMode(MODE_SYNC), + insertMode(MODE_SYNC), + mem_dirty(false), + insertOrder(INSERT_ORDER_LAST), + nodeSizeLimit(0), + nodeRowsLimit(0), + nodeEmptyLimit(0), + + flushReply(this), + flushTimer(NULL), + nFlushReq(0), + mFlushReq(0), + maxFlushReq(1), + markerInterval(300), + minDirtyTime(3600), + maxDirtyTime(43200), + + m_pstEmptyNodeFilter(NULL), + //Hot Backup + hbLogSwitch(false), + hbFeature(NULL), + //Hot Backup + //BlackList + blacklist(0), + blacklist_timer(0) + +//BlackList +{ + memset((char *)&cacheInfo, 0, sizeof(cacheInfo)); + + statGetCount = statmgr.get_item_u32(DTC_GET_COUNT); + statGetHits = statmgr.get_item_u32(DTC_GET_HITS); + statInsertCount = statmgr.get_item_u32(DTC_INSERT_COUNT); + statInsertHits = statmgr.get_item_u32(DTC_INSERT_HITS); + statUpdateCount = statmgr.get_item_u32(DTC_UPDATE_COUNT); + statUpdateHits = statmgr.get_item_u32(DTC_UPDATE_HITS); + statDeleteCount = statmgr.get_item_u32(DTC_DELETE_COUNT); + statDeleteHits = statmgr.get_item_u32(DTC_DELETE_HITS); + statPurgeCount = statmgr.get_item_u32(DTC_PURGE_COUNT); + + statDropCount = statmgr.get_item_u32(DTC_DROP_COUNT); + statDropRows = statmgr.get_item_u32(DTC_DROP_ROWS); + statFlushCount = statmgr.get_item_u32(DTC_FLUSH_COUNT); + statFlushRows = statmgr.get_item_u32(DTC_FLUSH_ROWS); + //statIncSyncStep = statmgr.get_sample(HBP_INC_SYNC_STEP); + + statMaxFlushReq = statmgr.get_item_u32(DTC_MAX_FLUSH_REQ); + statCurrFlushReq = statmgr.get_item_u32(DTC_CURR_FLUSH_REQ); + + statOldestDirtyTime = statmgr.get_item_u32(DTC_OLDEST_DIRTY_TIME); + statAsyncFlushCount = statmgr.get_item_u32(DTC_ASYNC_FLUSH_COUNT); + + statExpireCount = statmgr.get_item_u32(DTC_KEY_EXPIRE_USER_COUNT); + statBufferProcessExpireCount = statmgr.get_item_u32(CACHE_EXPIRE_REQ); + + maxExpireCount = gConfig->get_int_val("cache", "MaxExpireCount", 100); + maxExpireTime = gConfig->get_int_val("cache", "MaxExpireTime", 3600 * 24 * 30); +} + +BufferProcess::~BufferProcess() +{ + if (m_pstEmptyNodeFilter != NULL) + delete m_pstEmptyNodeFilter; +} + +int BufferProcess::set_insert_order(int o) +{ + if (nodbMode == true && o == INSERT_ORDER_PURGE) + { + log_error("NoDB server don't support TABLE_DEFINE.ServerOrderInsert = purge"); + return -1; + } + + if (cacheInfo.syncUpdate == 0 && o == INSERT_ORDER_PURGE) + { + log_error("AsyncUpdate server don't support TABLE_DEFINE.ServerOrderInsert = purge"); + return -1; + } + insertOrder = o; + if (pstDataProcess) + pstDataProcess->set_insert_order(o); + return 0; +} + +int BufferProcess::enable_no_db_mode(void) +{ + if (insertOrder == INSERT_ORDER_PURGE) + { + log_error("NoDB server don't support TABLE_DEFINE.ServerOrderInsert = purge"); + return -1; + } + if (tableDef->has_auto_increment()) + { + log_error("NoDB server don't support auto_increment field"); + return -1; + } + nodbMode = true; + fullMode = true; + return 0; +} + +int BufferProcess::disable_lru_update(int level) +{ + if (level > LRU_WRITE) + level = LRU_WRITE; + if (level < 0) + level = 0; + noLRU = level; + return 0; +} + +int BufferProcess::disable_async_log(int disable) +{ + noAsyncLog = !!disable; + return 0; +} + +int BufferProcess::buffer_set_size(unsigned long cacheSize, unsigned int createVersion) +{ + cacheInfo.Init(tableDef->key_format(), cacheSize, createVersion); + return 0; +} + +/* + * Function : cache_open + * Description : 打开cache + * Input : iIpcKey 共享内存ipc key + * ulNodeTotal_ 数据节点总数 + ulBucketTotal hash桶总数 + ulChunkTotal chunk节点总数 + ulChunkSize chunk节点大小(单位:byte) + * Output : + * Return : 成功返回0,失败返回-1 + */ +int BufferProcess::cache_open(int iIpcKey, int iEnableEmptyFilter, int iEnableAutoDeleteDirtyShm) +{ + cacheInfo.keySize = tableDef->key_format(); + cacheInfo.ipcMemKey = iIpcKey; + cacheInfo.syncUpdate = !asyncServer; + cacheInfo.emptyFilter = iEnableEmptyFilter ? 1 : 0; + cacheInfo.autoDeleteDirtyShm = iEnableAutoDeleteDirtyShm ? 1 : 0; + cacheInfo.forceUpdateTableConf = gConfig->get_int_val("cache", "ForceUpdateTableConf", 0); + + log_debug("cache_info: \n\tshmkey[%d] \n\tshmsize[" UINT64FMT "] \n\tkeysize[%u]" + "\n\tversion[%u] \n\tsyncUpdate[%u] \n\treadonly[%u]" + "\n\tcreateonly[%u] \n\tempytfilter[%u] \n\tautodeletedirtysharememory[%u]", + cacheInfo.ipcMemKey, cacheInfo.ipcMemSize, cacheInfo.keySize, + cacheInfo.version, cacheInfo.syncUpdate, cacheInfo.readOnly, + cacheInfo.createOnly, cacheInfo.emptyFilter, cacheInfo.autoDeleteDirtyShm); + + if (Cache.cache_open(&cacheInfo)) + { + log_error("%s", Cache.Error()); + return -1; + } + + log_info("Current cache memory format is V%d\n", cacheInfo.version); + + int iMemSyncUpdate = Cache.dirty_lru_empty() ? 1 : 0; + /* + * 1. sync dtc + dirty mem, SYNC + mem_dirty + * 2. sync dtc + clean mem, SYNC + !mem_dirty + * 3. async dtc + dirty mem/clean mem: ASYNC + * disable ASYNC <--> FLUSH switch, so FLUSH never happen forever + * updateMode == asyncServer + * */ + switch (asyncServer * 0x10000 + iMemSyncUpdate) + { + case 0x00000: // sync dtcd + async mem + mem_dirty = true; + updateMode = MODE_SYNC; + break; + case 0x00001: // sync dtcd + sync mem + updateMode = MODE_SYNC; + break; + case 0x10000: // async dtcd + async mem + updateMode = MODE_ASYNC; + break; + case 0x10001: // async dtcd + sync mem + updateMode = MODE_ASYNC; + break; + default: + updateMode = cacheInfo.syncUpdate ? MODE_SYNC : MODE_ASYNC; + } + + if (tableDef->has_auto_increment() == 0 && updateMode == MODE_ASYNC) + insertMode = MODE_ASYNC; + + log_info("Cache Update Mode: %s", + updateMode == MODE_SYNC ? "SYNC" : updateMode == MODE_ASYNC ? "ASYNC" : updateMode == MODE_FLUSH ? "FLUSH" : ""); + + // 空结点过滤 + const FEATURE_INFO_T *pstFeature; + pstFeature = Cache.query_feature_by_id(EMPTY_FILTER); + if (pstFeature != NULL) + { + NEW(EmptyNodeFilter, m_pstEmptyNodeFilter); + if (m_pstEmptyNodeFilter == NULL) + { + log_error("new %s error: %m", "EmptyNodeFilter"); + return -1; + } + if (m_pstEmptyNodeFilter->Attach(pstFeature->fi_handle) != 0) + { + log_error("EmptyNodeFilter attach error: %s", m_pstEmptyNodeFilter->Error()); + return -1; + } + } + + Mallocator *pstMalloc = DTCBinMalloc::Instance(); + UpdateMode stUpdateMod = {asyncServer, updateMode, insertMode, insertOrder}; + if (tableDef->index_fields() > 0) + { + log_debug("tree index enable, index field num[%d]", tableDef->index_fields()); + pstDataProcess = new TreeDataProcess(pstMalloc, tableDef, &Cache, &stUpdateMod); + if (pstDataProcess == NULL) + { + log_error("create TreeDataProcess error: %m"); + return -1; + } + } + else + { + log_debug("%s", "use raw-data mode"); + pstDataProcess = new RawDataProcess(pstMalloc, tableDef, &Cache, &stUpdateMod); + if (pstDataProcess == NULL) + { + log_error("create RawDataProcess error: %m"); + return -1; + } + ((RawDataProcess *)pstDataProcess)->set_limit_node_size(nodeSizeLimit); + } + + if (updateMode == MODE_SYNC) + { + noAsyncLog = 1; + } + + // 热备特性 + pstFeature = Cache.query_feature_by_id(HOT_BACKUP); + if (pstFeature != NULL) + { + NEW(HBFeature, hbFeature); + if (hbFeature == NULL) + { + log_error("new hot-backup feature error: %m"); + return -1; + } + if (hbFeature->Attach(pstFeature->fi_handle) != 0) + { + log_error("hot-backup feature attach error: %s", hbFeature->Error()); + return -1; + } + + if (hbFeature->master_uptime() != 0) + { + //开启变更key日志 + hbLogSwitch = true; + } + } + //Hot Backup + + //DelayPurge + Cache.start_delay_purge_task(owner->get_timer_list_by_m_seconds(10 /*10 ms*/)); + + //Blacklist + blacklist_timer = owner->get_timer_list(10 * 60); /* 10 min sched*/ + + NEW(BlackListUnit(blacklist_timer), blacklist); + if (NULL == blacklist || blacklist->init_blacklist(100000, tableDef->key_format())) + { + log_error("init blacklist failed"); + return -1; + } + + blacklist->start_blacklist_expired_task(); + //Blacklist + + if (tableDef->expire_time_field_id() != -1) + { + if (nodbMode) + { + key_expire_timer = owner->get_timer_list_by_m_seconds(1000 /* 1s */); + NEW(ExpireTime(key_expire_timer, &Cache, pstDataProcess, + tableDef, maxExpireCount), + key_expire); + if (key_expire == NULL) + { + log_error("init key expire time failed"); + return -1; + } + key_expire->start_key_expired_task(); + } + else + { + log_error("db mode do not support expire time"); + return -1; + } + } + + //Empty Node list + if (fullMode == true) + { + // nodb Mode has not empty nodes, + nodeEmptyLimit = 0; + // prune all present empty nodes + Cache.prune_empty_node_list(); + } + else if (nodeEmptyLimit) + { + // Enable Empty Node Limitation + Cache.set_empty_node_limit(nodeEmptyLimit); + // re-counting empty node count + Cache.init_empty_node_list(); + // upgrade from old memory + Cache.upgrade_empty_node_list(); + // shrinking empty list + Cache.shrink_empty_node_list(); + } + else + { + // move all empty node to clean list + Cache.merge_empty_node_list(); + } + REMOTE_LOG->set_remote_log_mode(tableDef, nodbMode, insertMode, updateMode); + //Empty Node list + return 0; +} + +bool BufferProcess::InsertEmptyNode(void) +{ + for (int i = 0; i < 2; i++) + { + m_stNode = Cache.cache_allocate(ptrKey); + if (!(!m_stNode)) + break; + + if (Cache.try_purge_size(1, m_stNode) != 0) + break; + } + if (!m_stNode) + { + log_debug("alloc cache node error"); + return false; + } + m_stNode.vd_handle() = INVALID_HANDLE; + // new node created, it's EmptyButInCleanList + nodeEmpty = 0; // means it's not in empty before transation + return true; +} + +BufferResult BufferProcess::InsertDefaultRow(TaskRequest &Task) +{ + int iRet; + log_debug("%s", "insert default start!"); + + if (!m_stNode) + { + //发现空节点 + if (InsertEmptyNode() == false) + { + log_warning("alloc cache node error"); + Task.set_error(-EIO, CACHE_SVC, "alloc cache node error"); + return BUFFER_PROCESS_ERROR; + } + if (m_pstEmptyNodeFilter) + m_pstEmptyNodeFilter->CLR(Task.int_key()); + } + else + { + uint32_t uiTotalRows = ((DataChunk *)(DTCBinMalloc::Instance()->handle_to_ptr(m_stNode.vd_handle())))->total_rows(); + if (uiTotalRows != 0) + return BUFFER_PROCESS_OK; + } + + RowValue stRowValue(Task.table_definition()); + stRowValue.default_value(); + + RawData stDataRows(&g_stSysMalloc, 1); + iRet = stDataRows.Init(ptrKey); + if (iRet != 0) + { + log_warning("raw data init error: %d, %s", iRet, stDataRows.get_err_msg()); + Task.set_error(-ENOMEM, CACHE_SVC, "new raw-data error"); + Cache.purge_node_everything(ptrKey, m_stNode); + return BUFFER_PROCESS_ERROR; + } + stDataRows.insert_row(stRowValue, false, false); + iRet = pstDataProcess->replace_data(&m_stNode, &stDataRows); + if (iRet != 0) + { + log_debug("replace data error: %d, %s", iRet, stDataRows.get_err_msg()); + Task.set_error(-ENOMEM, CACHE_SVC, "replace data error"); + /*标记加入黑名单*/ + Task.push_black_list_size(stDataRows.data_size()); + Cache.purge_node_everything(ptrKey, m_stNode); + return BUFFER_PROCESS_ERROR; + } + + if (m_stNode.vd_handle() == INVALID_HANDLE) + { + log_error("BUG: node[%u] vdhandle=0", m_stNode.node_id()); + Cache.purge_node(Task.packed_key(), m_stNode); + } + + return BUFFER_PROCESS_OK; +} + +/* + * Function : buffer_get_data + * Description : 处理get请求 + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 成功返回0,失败返回-1 + */ +BufferResult BufferProcess::buffer_get_data(TaskRequest &Task) +{ + int iRet; + + log_debug("buffer_get_data start "); + transation_find_node(Task); + + switch (nodeStat) + { + case NODESTAT_MISSING: + if (fullMode == false) + { + if (Task.flag_no_cache() != 0) + Task.mark_as_pass_thru(); + return BUFFER_PROCESS_NEXT; + } + --statGetHits; // FullCache Missing treat as miss + // FullCache Mode: treat as empty & fallthrough + case NODESTAT_EMPTY: + ++statGetHits; + //发现空节点,直接构建result + log_debug("found Empty-Node[%u], response directed", Task.int_key()); + Task.prepare_result(); + Task.set_total_rows(0); + Task.set_result_hit_flag(HIT_SUCCESS); + return BUFFER_PROCESS_OK; + } + + if (nodbMode) + { + BufferResult cacheRet = check_and_expire(Task); + if (cacheRet != BUFFER_PROCESS_NEXT) + return cacheRet; + } + ++statGetHits; + log_debug("[%s:%d]cache hit ", __FILE__, __LINE__); + + transation_update_lru(false, LRU_READ); + iRet = pstDataProcess->get_data(Task, &m_stNode); + if (iRet != 0) + { + log_error("get_data() failed"); + Task.set_error_dup(-EIO, CACHE_SVC, pstDataProcess->get_err_msg()); + return BUFFER_PROCESS_ERROR; + } + log_debug(" noLRU:%d,LRU_READ:%d", noLRU, LRU_READ); + // Hot Backup + if (noLRU < LRU_READ && write_lru_hb_log(Task.packed_key())) + { + //为避免错误扩大, 给客户端成功响应 + log_crit("hb: log lru key failed"); + } + // Hot Bakcup + Task.set_result_hit_flag(HIT_SUCCESS); + return BUFFER_PROCESS_OK; +} + +/* + * Function : buffer_batch_get_data + * Description : 处理get请求 + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 成功返回0,失败返回-1 + */ +BufferResult BufferProcess::buffer_batch_get_data(TaskRequest &Task) +{ + int index; + int iRet; + + log_debug("buffer_batch_get_data start "); + + Task.prepare_result_no_limit(); + for (index = 0; Task.set_batch_cursor(index) >= 0; index++) + { + ++statGetCount; + Task.set_result_hit_flag(HIT_INIT); + transation_find_node(Task); + switch (nodeStat) + { + case NODESTAT_EMPTY: + ++statGetHits; + Task.done_batch_cursor(index); + log_debug("[%s:%d]cache empty ", __FILE__, __LINE__); + break; + case NODESTAT_MISSING: + if (fullMode) + Task.done_batch_cursor(index); + log_debug("[%s:%d]cache miss ", __FILE__, __LINE__); + break; + case NODESTAT_PRESENT: + ++statGetHits; + log_debug("[%s:%d]cache hit ", __FILE__, __LINE__); + + transation_update_lru(false, LRU_BATCH); + iRet = pstDataProcess->get_data(Task, &m_stNode); + if (iRet != 0) + { + log_error("get_data() failed"); + Task.set_error_dup(-EIO, CACHE_SVC, pstDataProcess->get_err_msg()); + return BUFFER_PROCESS_ERROR; + } + Task.done_batch_cursor(index); + + // Hot Backup + if (noLRU < LRU_BATCH && write_lru_hb_log(Task.packed_key())) + { + //为避免错误扩大, 给客户端成功响应 + log_crit("hb: log lru key failed"); + } + break; + } + transation_end(); + } + // Hot Bakcup + return BUFFER_PROCESS_OK; +} + +/* + * Function : buffer_get_rb + * Description : 处理Helper的get回读task + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 成功返回0,失败返回-1 + */ +BufferResult BufferProcess::buffer_get_rb(TaskRequest &Task) +{ + log_debug("buffer_get_rb start "); + + Task.prepare_result(); + int iRet = Task.append_result(Task.result); + if (iRet < 0) + { + log_notice("task append_result error: %d", iRet); + Task.set_error(iRet, CACHE_SVC, "append_result() error"); + return BUFFER_PROCESS_ERROR; + } + log_debug("buffer_get_rb success"); + + return BUFFER_PROCESS_OK; +} + +/* helper执行GET回来后,更新内存数据 */ +BufferResult BufferProcess::buffer_replace_result(TaskRequest &Task) +{ + int iRet; + int oldRows = 0; + + log_debug("cache replace all start!"); + + transation_find_node(Task); + + //数据库回来的记录如果是0行则 + // 1. 设置bits 2. 直接构造0行的result响应包 + if (m_pstEmptyNodeFilter != NULL) + { + if ((Task.result == NULL || Task.result->total_rows() == 0)) + { + log_debug("SET Empty-Node[%u]", Task.int_key()); + m_pstEmptyNodeFilter->SET(Task.int_key()); + Cache.cache_purge(ptrKey); + return BUFFER_PROCESS_OK; + } + else + { + m_pstEmptyNodeFilter->CLR(Task.int_key()); + } + } + + if (!m_stNode) + { + if (InsertEmptyNode() == false) + return BUFFER_PROCESS_OK; + } + else + { + oldRows = Cache.node_rows_count(m_stNode); + } + + unsigned int uiNodeID = m_stNode.node_id(); + iRet = pstDataProcess->replace_data(Task, &m_stNode); + if (iRet != 0 || m_stNode.vd_handle() == INVALID_HANDLE) + { + if (nodbMode == true) + { + /* UNREACHABLE */ + log_info("cache replace data error: %d. node: %u", iRet, uiNodeID); + Task.set_error_dup(-EIO, CACHE_SVC, pstDataProcess->get_err_msg()); + return BUFFER_PROCESS_ERROR; + } + log_debug("cache replace data error: %d. purge node: %u", iRet, uiNodeID); + Cache.purge_node_everything(ptrKey, m_stNode); + Cache.inc_dirty_row(0 - oldRows); + return BUFFER_PROCESS_OK; + } + Cache.inc_total_row(pstDataProcess->rows_inc()); + + transation_update_lru(false, LRU_READ); + if (oldRows != 0 || Cache.node_rows_count(m_stNode) != 0) + { + // Hot Backup + if (noLRU < LRU_READ && write_lru_hb_log(Task.packed_key())) + { + //为避免错误扩大, 给客户端成功响应 + log_crit("hb: log lru key failed"); + } + // Hot Bakcup + } + + log_debug("buffer_replace_result success! "); + + return BUFFER_PROCESS_OK; +} + +/* + * Function : buffer_flush_data + * Description : 处理flush请求 + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 成功返回0,失败返回-1 + */ +BufferResult BufferProcess::buffer_flush_data_before_delete(TaskRequest &Task) +{ + log_debug("%s", "flush start!"); + transation_find_node(Task); + if (!m_stNode || !(m_stNode.is_dirty())) + { + log_debug("node is null or node is clean,return BUFFER_PROCESS_OK"); + return BUFFER_PROCESS_OK; + } + unsigned int uiFlushRowsCnt; + + Node node = m_stNode; + int iRet = 0; + + /*init*/ + keyDirty = m_stNode.is_dirty(); + + DTCFlushRequest *flushReq = new DTCFlushRequest(this, ptrKey); + if (flushReq == NULL) + { + log_error("new DTCFlushRequest error: %m"); + return BUFFER_PROCESS_ERROR; + } + + iRet = pstDataProcess->flush_data(flushReq, &m_stNode, uiFlushRowsCnt); + if (iRet != 0) + { + log_error("flush_data error:%d", iRet); + return BUFFER_PROCESS_ERROR; + } + if (uiFlushRowsCnt == 0) + { + delete flushReq; + if (keyDirty) + Cache.inc_dirty_node(-1); + m_stNode.clr_dirty(); + Cache.remove_from_lru(m_stNode); + Cache.insert2_clean_lru(m_stNode); + return BUFFER_PROCESS_OK; + } + else + { + commit_flush_request(flushReq, NULL); + Cache.inc_dirty_row(pstDataProcess->dirty_rows_inc()); + if (keyDirty) + Cache.inc_dirty_node(-1); + m_stNode.clr_dirty(); + Cache.remove_from_lru(m_stNode); + Cache.insert2_clean_lru(m_stNode); + ++statFlushCount; + statFlushRows += uiFlushRowsCnt; + return BUFFER_PROCESS_OK; + } +} + +/* + * Function : buffer_flush_data + * Description : 处理flush请求 + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 成功返回0,失败返回-1 + */ +BufferResult BufferProcess::buffer_flush_data(TaskRequest &Task) +{ + + log_debug("%s", "flush start!"); + transation_find_node(Task); + if (!m_stNode || !(m_stNode.is_dirty())) + return BUFFER_PROCESS_OK; + + unsigned int uiFlushRowsCnt; + + BufferResult iRet = buffer_flush_data(m_stNode, &Task, uiFlushRowsCnt); + if (iRet == BUFFER_PROCESS_OK) + { + ++statFlushCount; + statFlushRows += uiFlushRowsCnt; + } + return (iRet); +} + +/*called by flush next node*/ +int BufferProcess::buffer_flush_data_timer(Node &stNode, unsigned int &uiFlushRowsCnt) +{ + int iRet, err = 0; + + /*init*/ + transation_begin(NULL); + keyDirty = stNode.is_dirty(); + ptrKey = ((DataChunk *)(DTCBinMalloc::Instance()->handle_to_ptr(stNode.vd_handle())))->Key(); + + DTCFlushRequest *flushReq = new DTCFlushRequest(this, ptrKey); + if (flushReq == NULL) + { + log_error("new DTCFlushRequest error: %m"); + err = -1; + goto __out; + } + + iRet = pstDataProcess->flush_data(flushReq, &stNode, uiFlushRowsCnt); + + if (uiFlushRowsCnt == 0) + { + delete flushReq; + if (iRet < 0) + { + err = -2; + goto __out; + } + else + { + if (keyDirty) + Cache.inc_dirty_node(-1); + stNode.clr_dirty(); + Cache.remove_from_lru(stNode); + Cache.insert2_clean_lru(stNode); + err = 1; + goto __out; + } + } + else + { + commit_flush_request(flushReq, NULL); + Cache.inc_dirty_row(pstDataProcess->dirty_rows_inc()); + if (iRet == 0) + { + if (keyDirty) + Cache.inc_dirty_node(-1); + stNode.clr_dirty(); + Cache.remove_from_lru(stNode); + Cache.insert2_clean_lru(stNode); + err = 2; + goto __out; + } + else + { + err = -5; + goto __out; + } + } + +__out: + /*clear init*/ + CacheTransation::Free(); + return err; +} +/* + * Function : buffer_flush_data + * Description : 处理flush请求 + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 成功返回0,失败返回-1 + */ +BufferResult BufferProcess::buffer_flush_data(Node &stNode, TaskRequest *pstTask, unsigned int &uiFlushRowsCnt) +{ + int iRet; + + /*could called by flush timer event, no transationFindNode called there, can't trust KeyDirty, recal it*/ + keyDirty = stNode.is_dirty(); + + log_debug("%s", "flush node start!"); + + int flushCnt = 0; + DTCFlushRequest *flushReq = NULL; + if (!nodbMode) + { + flushReq = new DTCFlushRequest(this, ptrKey); + if (flushReq == NULL) + { + log_error("new DTCFlushRequest error: %m"); + if (pstTask != NULL) + pstTask->set_error(-ENOMEM, CACHE_SVC, "new DTCFlushRequest error"); + return BUFFER_PROCESS_ERROR; + } + } + + iRet = pstDataProcess->flush_data(flushReq, &stNode, uiFlushRowsCnt); + + if (flushReq) + { + flushCnt = flushReq->numReq; + commit_flush_request(flushReq, pstTask); + if (iRet != 0) + { + log_error("flush_data() failed while flush data"); + if (pstTask != NULL) + pstTask->set_error_dup(-EIO, CACHE_SVC, pstDataProcess->get_err_msg()); + + return BUFFER_PROCESS_ERROR; + } + } + + Cache.inc_dirty_row(pstDataProcess->dirty_rows_inc()); + + if (keyDirty) + Cache.inc_dirty_node(-1); + + stNode.clr_dirty(); + keyDirty = 0; + transation_update_lru(false, LRU_ALWAYS); + + log_debug("buffer_flush_data success"); + if (flushCnt == 0) + return BUFFER_PROCESS_OK; + else + return BUFFER_PROCESS_PENDING; +} + +/* + * Function : buffer_purge_data + * Description : 处理purge请求 + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 成功返回0,失败返回-1 + */ +BufferResult BufferProcess::buffer_purge_data(TaskRequest &Task) +{ + transation_find_node(Task); + + switch (nodeStat) + { + case NODESTAT_EMPTY: + m_pstEmptyNodeFilter->CLR(Task.int_key()); + return BUFFER_PROCESS_OK; + + case NODESTAT_MISSING: + return BUFFER_PROCESS_OK; + + case NODESTAT_PRESENT: + break; + } + + BufferResult iRet = BUFFER_PROCESS_OK; + if (updateMode && m_stNode.is_dirty()) + { + unsigned int uiFlushRowsCnt; + iRet = buffer_flush_data(m_stNode, &Task, uiFlushRowsCnt); + if (iRet != BUFFER_PROCESS_PENDING) + return iRet; + } + + ++statDropCount; + statDropRows += ((DataChunk *)(DTCBinMalloc::Instance()->handle_to_ptr(m_stNode.vd_handle())))->total_rows(); + Cache.inc_total_row(0LL - ((DataChunk *)(DTCBinMalloc::Instance()->handle_to_ptr(m_stNode.vd_handle())))->total_rows()); + + unsigned int uiNodeID = m_stNode.node_id(); + if (Cache.cache_purge(ptrKey) != 0) + { + log_error("PANIC: purge node[id=%u] fail", uiNodeID); + } + + return iRet; +} + +/* + * Function : buffer_update_rows + * Description : 处理Helper的update task + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 成功返回0,失败返回-1 + */ +BufferResult BufferProcess::buffer_update_rows(TaskRequest &Task, bool async, bool setrows) +{ + int iRet; + + log_debug("cache update data start! "); + + if (m_bReplaceEmpty == true) + { + BufferResult ret = InsertDefaultRow(Task); + if (ret != BUFFER_PROCESS_OK) + return (ret); + } + + int rows = Cache.node_rows_count(m_stNode); + iRet = pstDataProcess->update_data(Task, &m_stNode, pstLogRows, async, setrows); + if (iRet != 0) + { + if (async == false && !Task.flag_black_hole()) + { + Cache.purge_node_everything(ptrKey, m_stNode); + Cache.inc_total_row(0LL - rows); + return BUFFER_PROCESS_OK; + } + log_warning("update_data() failed: %d,%s", iRet, pstDataProcess->get_err_msg()); + Task.set_error_dup(-EIO, CACHE_SVC, pstDataProcess->get_err_msg()); + transation_update_lru(async, LRU_ALWAYS); + goto ERR_RETURN; + } + /*if update volatile field,node won't be dirty*/ + transation_update_lru((Task.resultInfo.affected_rows() > 0 && + (Task.request_operation() && Task.request_operation()->has_type_commit()) //has core field modified + ) + ? async + : false, + LRU_WRITE); + + Cache.inc_dirty_row(pstDataProcess->dirty_rows_inc()); + + // Hot Backup + if (nodeStat != NODESTAT_PRESENT || + (Task.request_operation() && Task.request_operation()->has_type_commit())) + { + // only write log if some non-volatile field got updated + // or cache miss and m_bReplaceEmpty is set (equiv insert(default)+update) + if (write_hb_log(Task, m_stNode, DTCHotBackup::SYNC_UPDATE)) + { + //为避免错误扩大, 给客户端成功响应 + log_crit("hb: log update key failed"); + } + } + // Hot Bakcup + + return BUFFER_PROCESS_OK; + +ERR_RETURN: + return BUFFER_PROCESS_ERROR; +} + +/* buffer_replace_rows don't allow empty stNode */ +BufferResult BufferProcess::buffer_replace_rows(TaskRequest &Task, bool async, bool setrows) +{ + int iRet; + + log_debug("cache replace rows start!"); + + int rows = Cache.node_rows_count(m_stNode); + iRet = pstDataProcess->replace_rows(Task, &m_stNode, pstLogRows, async, setrows); + if (iRet != 0) + { + if (keyDirty == false && !Task.flag_black_hole()) + { + Cache.purge_node_everything(ptrKey, m_stNode); + Cache.inc_total_row(0LL - rows); + } + + /* 如果是同步replace命令,返回成功*/ + if (async == false && !Task.flag_black_hole()) + return BUFFER_PROCESS_OK; + + log_error("cache replace rows error: %d,%s", iRet, pstDataProcess->get_err_msg()); + Task.set_error(-EIO, CACHE_SVC, "replace_data() error"); + return BUFFER_PROCESS_ERROR; + } + Cache.inc_total_row(pstDataProcess->rows_inc()); + Cache.inc_dirty_row(pstDataProcess->dirty_rows_inc()); + + BufferResult ret = BUFFER_PROCESS_OK; + + transation_update_lru(async, LRU_WRITE); + + // Hot Backup + if (write_hb_log(Task, m_stNode, DTCHotBackup::SYNC_UPDATE)) + // if(hbLog.write_update_key(Task.packed_key(), DTCHotBackup::SYNC_UPDATE)) + { + //为避免错误扩大, 给客户端成功响应 + log_crit("hb: log update key failed"); + } + // Hot Bakcup + + log_debug("buffer_replace_rows success! "); + + if (m_stNode.vd_handle() == INVALID_HANDLE) + { + log_error("BUG: node[%u] vdhandle=0", m_stNode.node_id()); + Cache.purge_node(Task.packed_key(), m_stNode); + Cache.inc_total_row(0LL - rows); + } + + return ret; +} + +/* + * Function : buffer_insert_row + * Description : 处理Helper的insert task + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 成功返回0,失败返回-1 + */ +BufferResult BufferProcess::buffer_insert_row(TaskRequest &Task, bool async, bool setrows) +{ + int iRet; + bool emptyFlag = false; + + if (!m_stNode) + { + emptyFlag = true; + if (InsertEmptyNode() == false) + { + if (async == true || Task.flag_black_hole()) + { + Task.set_error(-EIO, CACHE_SVC, "allocate_node Error while insert row"); + return BUFFER_PROCESS_ERROR; + } + return BUFFER_PROCESS_OK; + } + + RawData stDataRows(&g_stSysMalloc, 1); + //iRet = stDataRows.Init(0, Task.table_definition()->key_format(), ptrKey); + iRet = stDataRows.Init(ptrKey); + if (iRet != 0) + { + log_warning("raw data init error: %d, %s", iRet, stDataRows.get_err_msg()); + Task.set_error(-ENOMEM, CACHE_SVC, "new raw-data error"); + Cache.purge_node_everything(ptrKey, m_stNode); + return BUFFER_PROCESS_ERROR; + } + iRet = pstDataProcess->replace_data(&m_stNode, &stDataRows); + if (iRet != 0) + { + log_warning("raw data init error: %d, %s", iRet, stDataRows.get_err_msg()); + Task.set_error(-ENOMEM, CACHE_SVC, "new raw-data error"); + Cache.purge_node_everything(ptrKey, m_stNode); + return BUFFER_PROCESS_ERROR; + } + + if (m_pstEmptyNodeFilter) + m_pstEmptyNodeFilter->CLR(Task.int_key()); + } + + int oldRows = Cache.node_rows_count(m_stNode); + iRet = pstDataProcess->append_data(Task, &m_stNode, pstLogRows, async, setrows); + if (iRet == -1062) + { + Task.set_error(-ER_DUP_ENTRY, CACHE_SVC, "duplicate unique key detected"); + return BUFFER_PROCESS_ERROR; + } + else if (iRet != 0) + { + if ((async == false && !Task.flag_black_hole()) || emptyFlag) + { + log_debug("append_data() failed, purge now [%d %s]", iRet, pstDataProcess->get_err_msg()); + Cache.inc_total_row(0LL - oldRows); + Cache.purge_node_everything(ptrKey, m_stNode); + return BUFFER_PROCESS_OK; + } + else + { + log_error("append_data() failed while update data"); + Task.set_error_dup(-EIO, CACHE_SVC, pstDataProcess->get_err_msg()); + return BUFFER_PROCESS_ERROR; + } + } + transation_update_lru(async, LRU_WRITE); + + Cache.inc_total_row(pstDataProcess->rows_inc()); + if (async == true) + Cache.inc_dirty_row(pstDataProcess->dirty_rows_inc()); + + // Hot Backup + if (write_hb_log(Task, m_stNode, DTCHotBackup::SYNC_INSERT)) + // if(hbLog.write_update_key(Task.packed_key(), DTCHotBackup::SYNC_INSERT)) + { + //为避免错误扩大, 给客户端成功响应 + log_crit("hb: log update key failed"); + } + // Hot Bakcup + + log_debug("buffer_insert_row success"); + return BUFFER_PROCESS_OK; +} + +/* + * Function : buffer_delete_rows + * Description : 处理del请求 + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 成功返回0,失败返回-1 + */ +BufferResult BufferProcess::buffer_delete_rows(TaskRequest &Task) +{ + int iRet; + + log_debug("buffer_delete_rows start! "); + + uint32_t oldRows = Cache.node_rows_count(m_stNode); + + int all_row_delete = Task.all_rows(); + + if (Task.all_rows() != 0) + { //如果没有del条件则删除整个节点 + empty: + if (lossyMode || Task.flag_black_hole()) + { + Task.resultInfo.set_affected_rows(oldRows); + } + + /*row cnt statistic dec by 1*/ + Cache.inc_total_row(0LL - oldRows); + + /*dirty node cnt staticstic dec by 1*/ + if (keyDirty) + { + Cache.inc_dirty_node(-1); + } + + /* dirty row cnt statistic dec, if count dirty row error, let statistic wrong with it*/ + if (all_row_delete) + { + int old_dirty_rows = pstDataProcess->dirty_rows_in_node(Task, &m_stNode); + if (old_dirty_rows > 0) + Cache.inc_dirty_row(old_dirty_rows); + } + else + { + Cache.inc_dirty_row(pstDataProcess->dirty_rows_inc()); + } + + Cache.purge_node_everything(ptrKey, m_stNode); + if (m_pstEmptyNodeFilter) + m_pstEmptyNodeFilter->SET(Task.int_key()); + + // Hot Backup + Node stEmpytNode; + if (write_hb_log(Task, stEmpytNode, DTCHotBackup::SYNC_PURGE)) + // if(hbLog.write_update_key(Task.packed_key(), DTCHotBackup::SYNC_UPDATE)) + { + //为避免错误扩大, 给客户端成功响应 + log_crit("hb: log update key failed"); + } + // Hot Bakcup + + return BUFFER_PROCESS_OK; + } + + /*delete error handle is too simple, statistic can not trust if error happen here*/ + iRet = pstDataProcess->delete_data(Task, &m_stNode, pstLogRows); + if (iRet != 0) + { + log_error("delete_data() failed: %d,%s", iRet, pstDataProcess->get_err_msg()); + Task.set_error_dup(-EIO, CACHE_SVC, pstDataProcess->get_err_msg()); + if (!keyDirty) + { + Cache.inc_total_row(0LL - oldRows); + Cache.purge_node_everything(ptrKey, m_stNode); + } + return BUFFER_PROCESS_ERROR; + } + + /* Delete to empty */ + uint32_t uiTotalRows = ((DataChunk *)(DTCBinMalloc::Instance()->handle_to_ptr(m_stNode.vd_handle())))->total_rows(); + if (uiTotalRows == 0) + goto empty; + + Cache.inc_dirty_row(pstDataProcess->dirty_rows_inc()); + Cache.inc_total_row(pstDataProcess->rows_inc()); + + transation_update_lru(false, LRU_WRITE); + + // Hot Backup + if (write_hb_log(Task, m_stNode, DTCHotBackup::SYNC_DELETE)) + { + //为避免错误扩大, 给客户端成功响应 + log_crit("hb: log update key failed"); + } + // Hot Bakcup + + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::check_allowed_insert(TaskRequest &Task) +{ + int rows = Cache.node_rows_count(m_stNode); + // single rows checker + if (tableDef->key_as_uniq_field() && rows != 0) + { + Task.set_error(-ER_DUP_ENTRY, CACHE_SVC, "duplicate unique key detected"); + return BUFFER_PROCESS_ERROR; + } + if (nodeRowsLimit > 0 && rows >= nodeRowsLimit) + { + /* check weather allowed execute insert operation*/ + Task.set_error(-EC_NOT_ALLOWED_INSERT, __FUNCTION__, + "rows exceed limit, not allowed insert any more data"); + return BUFFER_PROCESS_ERROR; + } + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_sync_insert_precheck(TaskRequest &Task) +{ + log_debug("%s", "buffer_sync_insert begin"); + if (m_bReplaceEmpty == true) + { // 这种模式下,不支持insert操作 + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "insert cmd from client, not support under replace mode"); + log_notice("insert cmd from client, not support under replace mode"); + return BUFFER_PROCESS_ERROR; + } + + if (tableDef->key_as_uniq_field() || nodeRowsLimit > 0) + { + transation_find_node(Task); + + // single rows checker + if (nodeStat == NODESTAT_PRESENT && check_allowed_insert(Task) == BUFFER_PROCESS_ERROR) + return BUFFER_PROCESS_ERROR; + } + + return BUFFER_PROCESS_NEXT; +} + +BufferResult BufferProcess::buffer_sync_insert(TaskRequest &Task) +{ + log_debug("%s", "buffer_sync_insert begin"); + if (m_bReplaceEmpty == true) + { // 这种模式下,不支持insert操作 + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "insert cmd from client, not support under replace mode"); + log_notice("insert cmd from client, not support under replace mode"); + return BUFFER_PROCESS_ERROR; + } + + if (Task.resultInfo.insert_id() > 0) + Task.update_packed_key(Task.resultInfo.insert_id()); // 如果自增量字段是key,则会更新key + + transation_find_node(Task); + + // Missing is NO-OP, otherwise insert it + switch (nodeStat) + { + case NODESTAT_MISSING: + return BUFFER_PROCESS_OK; + + case NODESTAT_EMPTY: + case NODESTAT_PRESENT: + if (lossyMode) + { + Task.set_error(0, NULL, NULL); + Task.resultInfo.set_affected_rows(0); + } + break; + } + + return buffer_insert_row(Task, false /* async */, lossyMode /* setrows */); +} + +BufferResult BufferProcess::buffer_sync_update(TaskRequest &Task) +{ + bool setrows = lossyMode; + log_debug("%s", "buffer_sync_update begin"); + // NOOP sync update + if (Task.request_operation() == NULL) + { + // no field need to update + return BUFFER_PROCESS_OK; //如果helper更新的纪录数为0则直接返回 + } + else if (setrows == false && Task.resultInfo.affected_rows() == 0) + { + if (Task.request_operation()->has_type_commit() == 0) + { + // pure volatile update, ignore upstream affected-rows + setrows = true; + } + else if (Task.request_condition() && Task.request_condition()->has_type_timestamp()) + { + // update base timestamp fields, ignore upstream affected-rows + setrows = true; + } + else + { + log_debug("%s", "helper's affected rows is zero"); + return BUFFER_PROCESS_OK; //如果helper更新的纪录数为0则直接返回 + } + } + + transation_find_node(Task); + + // Missing or Empty is NO-OP except EmptyAsDefault logical + switch (nodeStat) + { + case NODESTAT_MISSING: + return BUFFER_PROCESS_OK; + + case NODESTAT_EMPTY: + if (m_bReplaceEmpty == true) + break; + if (lossyMode) + { + Task.set_error(0, NULL, NULL); + Task.resultInfo.set_affected_rows(0); + } + return BUFFER_PROCESS_OK; + + case NODESTAT_PRESENT: + if (lossyMode) + { + Task.set_error(0, NULL, NULL); + Task.resultInfo.set_affected_rows(0); + } + break; + } + + return buffer_update_rows(Task, false /*Async*/, setrows); +} + +BufferResult BufferProcess::buffer_sync_replace(TaskRequest &Task) +{ + const int setrows = lossyMode; + log_debug("%s", "buffer_sync_replace begin"); + // NOOP sync update + if (lossyMode == false && Task.resultInfo.affected_rows() == 0) + { + log_debug("%s", "helper's affected rows is zero"); + return BUFFER_PROCESS_OK; //如果helper更新的纪录数为0则直接返回 + } + + transation_find_node(Task); + + // missing node is NO-OP, empty node insert it, otherwise replace it + switch (nodeStat) + { + case NODESTAT_MISSING: + return BUFFER_PROCESS_OK; + + case NODESTAT_EMPTY: + if (lossyMode) + { + Task.set_error(0, NULL, NULL); + Task.resultInfo.set_affected_rows(0); + } + return buffer_insert_row(Task, false, setrows); + + case NODESTAT_PRESENT: + if (lossyMode) + { + Task.set_error(0, NULL, NULL); + Task.resultInfo.set_affected_rows(0); + } + break; + } + + return buffer_replace_rows(Task, false, lossyMode); +} + +BufferResult BufferProcess::buffer_sync_delete(TaskRequest &Task) +{ + log_debug("%s", "buffer_sync_delete begin"); + // didn't check zero affected_rows + transation_find_node(Task); + + // missing and empty is NO-OP, otherwise delete it + switch (nodeStat) + { + case NODESTAT_MISSING: + return BUFFER_PROCESS_OK; + case NODESTAT_EMPTY: + if (lossyMode) + { + Task.set_error(0, NULL, NULL); + Task.resultInfo.set_affected_rows(0); + } + return BUFFER_PROCESS_OK; + + case NODESTAT_PRESENT: + break; + } + + return buffer_delete_rows(Task); +} + +BufferResult BufferProcess::buffer_nodb_insert(TaskRequest &Task) +{ + BufferResult iRet; + log_debug("%s", "buffer_asyn_prepare_insert begin"); + if (m_bReplaceEmpty == true) + { // 这种模式下,不支持insert操作 + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "insert cmd from client, not support under replace mode"); + log_notice("insert cmd from client, not support under replace mode"); + return BUFFER_PROCESS_ERROR; + } + + transation_find_node(Task); + if (nodeStat == NODESTAT_PRESENT) + { + iRet = check_and_expire(Task); + if (iRet == BUFFER_PROCESS_ERROR) + { + return iRet; + } + else if (iRet == BUFFER_PROCESS_OK) + { + nodeStat = NODESTAT_MISSING; + m_stNode = Node(); + } + } + if (nodeStat == NODESTAT_PRESENT && check_allowed_insert(Task) == BUFFER_PROCESS_ERROR) + return BUFFER_PROCESS_ERROR; + + // update key expire time + if (Task.request_operation() && Task.update_key_expire_time(maxExpireTime) != 0) + { + Task.set_error(-EC_BAD_INVALID_FIELD, CACHE_SVC, "key expire time illegal"); + return BUFFER_PROCESS_ERROR; + } + + return buffer_insert_row(Task, false /* async */, true /* setrows */); +} + +BufferResult BufferProcess::buffer_nodb_update(TaskRequest &Task) +{ + log_debug("%s", "buffer_fullmode_prepare_update begin"); + + transation_find_node(Task); + + // missing & empty is NO-OP, except EmptyAsDefault logical + switch (nodeStat) + { + case NODESTAT_MISSING: + case NODESTAT_EMPTY: + if (m_bReplaceEmpty == true) + break; + return BUFFER_PROCESS_OK; + + case NODESTAT_PRESENT: + break; + } + + BufferResult cacheRet = check_and_expire(Task); + if (cacheRet != BUFFER_PROCESS_NEXT) + return cacheRet; + // update key expire time + if (Task.request_operation() && Task.update_key_expire_time(maxExpireTime) != 0) + { + Task.set_error(-EC_BAD_INVALID_FIELD, CACHE_SVC, "key expire time illegal"); + return BUFFER_PROCESS_ERROR; + } + + return buffer_update_rows(Task, false /*Async*/, true /*setrows*/); +} + +BufferResult BufferProcess::buffer_nodb_replace(TaskRequest &Task) +{ + log_debug("%s", "buffer_asyn_prepare_replace begin"); + transation_find_node(Task); + + // update key expire time + if (Task.request_operation() && Task.update_key_expire_time(maxExpireTime) != 0) + { + Task.set_error(-EC_BAD_INVALID_FIELD, CACHE_SVC, "key expire time illegal"); + return BUFFER_PROCESS_ERROR; + } + // missing & empty insert it, otherwise replace it + switch (nodeStat) + { + case NODESTAT_EMPTY: + case NODESTAT_MISSING: + return buffer_insert_row(Task, false, true /* setrows */); + + case NODESTAT_PRESENT: + break; + } + + BufferResult cacheRet = check_and_expire(Task); + if (cacheRet == BUFFER_PROCESS_ERROR) + { + return cacheRet; + } + else if (cacheRet == BUFFER_PROCESS_OK) + { + nodeStat = NODESTAT_MISSING; + m_stNode = Node(); + return buffer_insert_row(Task, false, true /* setrows */); + } + + return buffer_replace_rows(Task, false, true); +} + +BufferResult BufferProcess::buffer_nodb_delete(TaskRequest &Task) +{ + log_debug("%s", "buffer_fullmode_delete begin"); + transation_find_node(Task); + + // missing & empty is NO-OP + switch (nodeStat) + { + case NODESTAT_MISSING: + case NODESTAT_EMPTY: + return BUFFER_PROCESS_OK; + + case NODESTAT_PRESENT: + break; + } + + return buffer_delete_rows(Task); +} + +BufferResult BufferProcess::buffer_async_insert(TaskRequest &Task) +{ + log_debug("%s", "buffer_async_insert begin"); + if (m_bReplaceEmpty == true) + { // 这种模式下,不支持insert操作 + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "insert cmd from client, not support under replace mode"); + log_notice("insert cmd from client, not support under replace mode"); + return BUFFER_PROCESS_ERROR; + } + + transation_find_node(Task); + switch (nodeStat) + { + case NODESTAT_MISSING: + if (fullMode == false) + return BUFFER_PROCESS_NEXT; + if (updateMode == MODE_FLUSH) + return BUFFER_PROCESS_NEXT; + break; + + case NODESTAT_EMPTY: + if (updateMode == MODE_FLUSH) + return BUFFER_PROCESS_NEXT; + break; + + case NODESTAT_PRESENT: + if (check_allowed_insert(Task) == BUFFER_PROCESS_ERROR) + return BUFFER_PROCESS_ERROR; + if (updateMode == MODE_FLUSH && !(m_stNode.is_dirty())) + return BUFFER_PROCESS_NEXT; + break; + } + + log_debug("%s", "buffer_async_insert data begin"); + //对insert 操作命中数据进行采样 + ++statInsertHits; + + return buffer_insert_row(Task, true /* async */, true /* setrows */); +} + +BufferResult BufferProcess::buffer_async_update(TaskRequest &Task) +{ + log_debug("%s", "buffer_asyn_update begin"); + + transation_find_node(Task); + switch (nodeStat) + { + case NODESTAT_MISSING: + if (fullMode == false) + return BUFFER_PROCESS_NEXT; + // FALLTHROUGH + case NODESTAT_EMPTY: + if (m_bReplaceEmpty == true) + { + if (updateMode == MODE_FLUSH) + return BUFFER_PROCESS_NEXT; + break; + } + return BUFFER_PROCESS_OK; + + case NODESTAT_PRESENT: + if (updateMode == MODE_FLUSH && !(m_stNode.is_dirty())) + return BUFFER_PROCESS_NEXT; + break; + } + + log_debug("%s", "buffer_async_update update data begin"); + //对update 操作命中数据进行采样 + ++statUpdateHits; + + return buffer_update_rows(Task, true /*Async*/, true /*setrows*/); +} + +BufferResult BufferProcess::buffer_async_replace(TaskRequest &Task) +{ + log_debug("%s", "buffer_asyn_prepare_replace begin"); + transation_find_node(Task); + + switch (nodeStat) + { + case NODESTAT_MISSING: + if (fullMode == false) + return BUFFER_PROCESS_NEXT; + if (updateMode == MODE_FLUSH) + return BUFFER_PROCESS_NEXT; + if (tableDef->key_as_uniq_field() == false) + return BUFFER_PROCESS_NEXT; + return buffer_insert_row(Task, true, true); + + case NODESTAT_EMPTY: + if (updateMode == MODE_FLUSH) + return BUFFER_PROCESS_NEXT; + return buffer_insert_row(Task, true, true); + + case NODESTAT_PRESENT: + if (updateMode == MODE_FLUSH && !(m_stNode.is_dirty())) + return BUFFER_PROCESS_NEXT; + break; + } + + return buffer_replace_rows(Task, true, true); +} + +/* + * Function : buffer_process_request + * Description : 处理incoming task + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 0 成功 + * : -1 失败 + */ +BufferResult BufferProcess::buffer_process_request(TaskRequest &Task) +{ + Task.renew_timestamp(); + szErrMsg[0] = 0; + + Task.field_type(0); + + /* 取命令字 */ + int iCmd = Task.request_code(); + log_debug("BufferProcess::buffer_process_request cmd is %d ", iCmd); + switch (iCmd) + { + case DRequest::Get: + Task.set_result_hit_flag(HIT_INIT); //set hit flag init status + if (Task.count_only() && (Task.requestInfo.limit_start() || Task.requestInfo.limit_count())) + { + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "There's nothing to limit because no fields required"); + return BUFFER_PROCESS_ERROR; + } + + /* 如果命中黑名单,则purge掉当前节点,走PassThru模式 */ + if (blacklist->in_blacklist(Task.packed_key())) + { + /* + * 理论上是在黑名单的节点是不可能在cache中的 + * 为了防止异常,预purge。 + */ + log_debug("blacklist hit, passthough to datasource"); + buffer_purge_data(Task); + Task.mark_as_pass_thru(); + return BUFFER_PROCESS_NEXT; + } + + log_debug("blacklist miss, normal process"); + + ++statGetCount; + + return buffer_get_data(Task); + + case DRequest::Insert: + ++statInsertCount; + + if (updateMode == MODE_ASYNC && insertMode != MODE_SYNC) + return buffer_async_insert(Task); + + //标示task将提交给helper + return buffer_sync_insert_precheck(Task); + + case DRequest::Update: + ++statUpdateCount; + + if (updateMode) + return buffer_async_update(Task); + + //标示task将提交给helper + return BUFFER_PROCESS_NEXT; + + //如果clinet 上送Delete 操作,删除Cache中数据,同时提交Helper + //现阶段异步Cache暂时不支持Delete操作 + case DRequest::Delete: + if (updateMode != MODE_SYNC) + { + if (Task.request_condition() && Task.request_condition()->has_type_rw()) + { + Task.set_error(-EC_BAD_ASYNC_CMD, CACHE_SVC, "Delete base non ReadOnly fields"); + return BUFFER_PROCESS_ERROR; + } + //异步delete前先flush + BufferResult iRet = BUFFER_PROCESS_OK; + iRet = buffer_flush_data_before_delete(Task); + if (iRet == BUFFER_PROCESS_ERROR) + return iRet; + } + + //对于delete操作,直接提交DB,不改变原有逻辑 + ++statDeleteCount; + + //标示task将提交给helper + return BUFFER_PROCESS_NEXT; + + case DRequest::Purge: + //删除指定key在cache中的数据 + ++statPurgeCount; + return buffer_purge_data(Task); + + case DRequest::Flush: + if (updateMode) + //flush指定key在cache中的数据 + return buffer_flush_data(Task); + else + return BUFFER_PROCESS_OK; + + case DRequest::Replace: //如果是淘汰的数据,不作处理 + ++statUpdateCount; + + // 限制key字段作为唯一字段才能使用replace命令 + if (!(Task.table_definition()->key_part_of_uniq_field()) || Task.table_definition()->has_auto_increment()) + { + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "replace cmd require key fields part of uniq-fields and no auto-increment field"); + return BUFFER_PROCESS_ERROR; + } + + if (updateMode) + return buffer_async_replace(Task); + + //标示task将提交给helper + return BUFFER_PROCESS_NEXT; + + case DRequest::SvrAdmin: + return buffer_process_admin(Task); + + default: + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "invalid cmd from client"); + log_notice("invalid cmd[%d] from client", iCmd); + break; + } //end of switch + + return BUFFER_PROCESS_ERROR; +} + +/* + * Function : buffer_process_batch + * Description : 处理incoming batch task + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 0 成功 + * : -1 失败 + */ +BufferResult BufferProcess::buffer_process_batch(TaskRequest &Task) +{ + Task.renew_timestamp(); + szErrMsg[0] = 0; + + /* 取命令字 */ + int iCmd = Task.request_code(); + if (nodeEmptyLimit) + { + int bsize = Task.get_batch_size(); + if (bsize * 10 > nodeEmptyLimit) + { + Task.set_error(-EC_TOO_MANY_KEY_VALUE, __FUNCTION__, "batch count exceed LimitEmptyNodes/10"); + return BUFFER_PROCESS_ERROR; + } + } + switch (iCmd) + { + case DRequest::Get: + return buffer_batch_get_data(Task); + + default: // unknwon command treat as OK, fallback to split mode + break; + } //end of switch + + return BUFFER_PROCESS_OK; +} + +/* + * Function : buffer_process_reply + * Description : 处理task from helper reply + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 0 成功 + * : -1 失败 + */ + +BufferResult BufferProcess::buffer_process_reply(TaskRequest &Task) +{ + Task.renew_timestamp(); + szErrMsg[0] = '\0'; + int iLimit = 0; + + int iCmd = Task.request_code(); + switch (iCmd) + { + case DRequest::Get: //一定是cache miss,全部replace入cache + if (Task.flag_pass_thru()) + { + if (Task.result) + Task.pass_all_result(Task.result); + return BUFFER_PROCESS_OK; + } + + // ATTN: if failed, node always purged + if (Task.result && + ((nodeSizeLimit > 0 && Task.result->data_len() >= nodeSizeLimit) || + (nodeRowsLimit > 0 && Task.result->total_rows() >= nodeRowsLimit))) + { + log_error("key[%d] rows[%d] size[%d] exceed limit", + Task.int_key(), Task.result->total_rows(), Task.result->data_len()); + iLimit = 1; + } + + /* don't add empty node if Task back from blackhole */ + if (!iLimit && !Task.flag_black_hole()) + buffer_replace_result(Task); + + return buffer_get_rb(Task); + + case DRequest::Insert: //没有回读则必定是multirow,新数据附在原有数据后面 + if (Task.flag_black_hole()) + return buffer_nodb_insert(Task); + + if (insertOrder == INSERT_ORDER_PURGE) + { + buffer_purge_data(Task); + return BUFFER_PROCESS_OK; + } + return buffer_sync_insert(Task); + + case DRequest::Update: + if (Task.flag_black_hole()) + return buffer_nodb_update(Task); + + if (insertOrder == INSERT_ORDER_PURGE && Task.resultInfo.affected_rows() > 0) + { + buffer_purge_data(Task); + return BUFFER_PROCESS_OK; + } + + return buffer_sync_update(Task); + + case DRequest::Delete: + if (Task.flag_black_hole()) + return buffer_nodb_delete(Task); + return buffer_sync_delete(Task); + + case DRequest::Replace: + if (Task.flag_black_hole()) + return buffer_nodb_replace(Task); + return buffer_sync_replace(Task); + + case DRequest::SvrAdmin: + if (Task.requestInfo.admin_code() == DRequest::ServerAdminCmd::Migrate) + { + const DTCFieldValue *condition = Task.request_condition(); + const DTCValue *key = condition->field_value(0); + Node stNode = Cache.cache_find_auto_chose_hash(key->bin.ptr); + int rows = Cache.node_rows_count(stNode); + log_debug("migrate replay ,row %d", rows); + Cache.inc_total_row(0LL - rows); + Cache.purge_node_everything(key->bin.ptr, stNode); + log_debug("should purgenode everything"); + keyRoute->key_migrated(key->bin.ptr); + delete (Task.request_operation()); + Task.set_request_operation(NULL); + return BUFFER_PROCESS_OK; + } + if (Task.requestInfo.admin_code() == DRequest::ServerAdminCmd::MigrateDB || + Task.requestInfo.admin_code() == DRequest::ServerAdminCmd::MigrateDBSwitch) + { + return BUFFER_PROCESS_OK; + } + else + { + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "invalid cmd from helper"); + } + + case DRequest::Replicate: + // 处理主从同步 + return buffer_process_replicate(Task); + + default: + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "invalid cmd from helper"); + } //end of switch + + return BUFFER_PROCESS_ERROR; +} + +/* + * Function : buffer_process_fullmode + * Description : 处理incoming task + * Input : Task 请求信息 + * Output : Task 返回信息 + * Return : 0 成功 + * : -1 失败 + */ +BufferResult BufferProcess::buffer_process_nodb(TaskRequest &Task) +{ + // nodb mode always blackhole-d + Task.mark_as_black_hole(); + Task.renew_timestamp(); + szErrMsg[0] = 0; + + /* 取命令字 */ + int iCmd = Task.request_code(); + switch (iCmd) + { + case DRequest::Get: + if (Task.count_only() && (Task.requestInfo.limit_start() || Task.requestInfo.limit_count())) + { + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "There's nothing to limit because no fields required"); + return BUFFER_PROCESS_ERROR; + } + ++statGetCount; + Task.set_result_hit_flag(HIT_INIT); + return buffer_get_data(Task); + + case DRequest::Insert: + ++statInsertCount; + return buffer_nodb_insert(Task); + + case DRequest::Update: + ++statUpdateCount; + return buffer_nodb_update(Task); + + case DRequest::Delete: + ++statDeleteCount; + return buffer_nodb_delete(Task); + + case DRequest::Purge: + //删除指定key在cache中的数据 + ++statPurgeCount; + return buffer_purge_data(Task); + + case DRequest::Flush: + return BUFFER_PROCESS_OK; + + case DRequest::Replace: //如果是淘汰的数据,不作处理 + ++statUpdateCount; + // 限制key字段作为唯一字段才能使用replace命令 + if (!(Task.table_definition()->key_part_of_uniq_field()) || Task.table_definition()->has_auto_increment()) + { + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "replace cmd require key fields part of uniq-fields and no auto-increment field"); + return BUFFER_PROCESS_ERROR; + } + return buffer_nodb_replace(Task); + + case DRequest::SvrAdmin: + return buffer_process_admin(Task); + + default: + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "invalid cmd from client"); + log_notice("invalid cmd[%d] from client", iCmd); + break; + } //end of switch + + return BUFFER_PROCESS_ERROR; +} + +/* + * 当DTC后端使用诸如Rocksdb之类的单机内嵌式持久引擎时,主从同步需要从存储侧拉取全量 + * 数据,这里处理从存储引擎侧的返回值并返回给hotback主从同步端,注意:不对当前cache + * 做任何更改 + * */ +BufferResult BufferProcess::buffer_process_replicate(TaskRequest &Task) +{ + int iRet; + + log_info("do cache process replicate start!"); + + // switch back the tabledef + Task.set_request_code(DRequest::SvrAdmin); + + // 数据库回来的记录如果是0行,则表示全量同步结束 + if ((Task.result == NULL || Task.result->total_rows() == 0)) + { + log_info("full replicate stage finished! key:[%u]", Task.int_key()); + Task.set_table_definition(Task.get_replicate_table()); + Task.set_error(-EC_FULL_SYNC_COMPLETE, "buffer_process_replicate", "full sync finished!"); + return BUFFER_PROCESS_ERROR; + } + + // 处理返回值 + RowValue row(Task.get_replicate_table()); + RawData rawdata(&g_stSysMalloc, 1); + + Task.prepare_result_no_limit(); + + if (Task.result != NULL) + { + ResultSet *pstResultSet = Task.result; + for (int i = 0; i < pstResultSet->total_rows(); i++) + { + RowValue *pstRow = pstResultSet->_fetch_row(); + if (pstRow == NULL) + { + log_info("%s!", "call FetchRow func error"); + rawdata.Destroy(); + // hotback can not handle error exception now, just continue + log_error("replicate: get data from storage failed!"); + continue; + } + + // 设置key + Task.set_request_key(pstRow->field_value(0)); + Task.build_packed_key(); + row[2] = (*pstRow)[0]; + + // only bring back the key list + Task.append_row(&row); + + rawdata.Destroy(); + } + } + + log_info("do cache process replicate finished! "); + Task.set_table_definition(Task.get_replicate_table()); + + return BUFFER_PROCESS_OK; +} + +BufferResult BufferProcess::buffer_flush_reply(TaskRequest &Task) +{ + szErrMsg[0] = '\0'; + + int iCmd = Task.request_code(); + switch (iCmd) + { + case DRequest::Replace: //如果是淘汰的数据,不作处理 + return BUFFER_PROCESS_OK; + default: + Task.set_error(-EC_BAD_COMMAND, CACHE_SVC, "invalid cmd from helper"); + } //end of switch + + return BUFFER_PROCESS_ERROR; +} + +BufferResult BufferProcess::buffer_process_error(TaskRequest &Task) +{ + // execute timeout + szErrMsg[0] = '\0'; + + switch (Task.request_code()) + { + case DRequest::Insert: + if (lossyMode == true && Task.result_code() == -ER_DUP_ENTRY) + { + // upstream is un-trusted + Task.renew_timestamp(); + return buffer_sync_insert(Task); + } + // FALLTHROUGH + case DRequest::Delete: + switch (Task.result_code()) + { + case -EC_UPSTREAM_ERROR: + case -CR_SERVER_LOST: + if (updateMode == MODE_SYNC) + { + log_notice("SQL execute result unknown, purge data"); + buffer_purge_data(Task); + } + else + { + log_crit("SQL execute result unknown, data may be corrupted"); + } + break; + } + break; + + case DRequest::Update: + switch (Task.result_code()) + { + case -ER_DUP_ENTRY: + if (lossyMode == true) + { + // upstream is un-trusted + Task.renew_timestamp(); + return buffer_sync_update(Task); + } + // FALLTHROUGH + case -EC_UPSTREAM_ERROR: + case -CR_SERVER_LOST: + if (updateMode == MODE_SYNC) + { + log_notice("SQL execute result unknown, purge data"); + buffer_purge_data(Task); + } + // must be cache miss + break; + } + break; + } + + return BUFFER_PROCESS_ERROR; +} + +BufferResult BufferProcess::check_and_expire(TaskRequest &Task) +{ + uint32_t expire, now; + int iRet = pstDataProcess->get_expire_time(Task.table_definition(), &m_stNode, expire); + if (iRet != 0) + { + log_error("get_expire_time failed"); + Task.set_error_dup(-EIO, CACHE_SVC, pstDataProcess->get_err_msg()); + return BUFFER_PROCESS_ERROR; + } + if (expire != 0 && expire <= (now = time(NULL))) + { + //expired + ++statExpireCount; + log_debug("key: %u expired, purge current key when update, expire time: %d, current time: %d", + Task.int_key(), expire, now); + if (Task.request_code() == DRequest::Get) + { + Task.prepare_result(); + Task.set_total_rows(0); + } + Cache.inc_total_row(0LL - Cache.node_rows_count(m_stNode)); + if (Cache.cache_purge(ptrKey) != 0) + log_error("PANIC: purge node[id=%u] fail", m_stNode.node_id()); + return BUFFER_PROCESS_OK; + } + + return BUFFER_PROCESS_NEXT; +} diff --git a/src/search_local/index_storage/cache/buffer_process.h b/src/search_local/index_storage/cache/buffer_process.h new file mode 100644 index 0000000..ba89e88 --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_process.h @@ -0,0 +1,520 @@ +/* + * ===================================================================================== + * + * Filename: buffer_process.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __CACHE_RPOCESS +#define __CACHE_RPOCESS + +#include +#include + +#include "protocol.h" +#include "value.h" +#include "field.h" +#include "section.h" +#include "table_def.h" +#include "task_request.h" +#include "list.h" +#include "fence.h" +#include "buffer_pool.h" +#include "poll_thread.h" +#include "dbconfig.h" +#include "lqueue.h" +#include "stat_dtc.h" +#include "data_process.h" +#include "empty_filter.h" +#include "namespace.h" +#include "task_pendlist.h" +#include "data_chunk.h" +#include "hb_log.h" +#include "lru_bit.h" +#include "hb_feature.h" +#include "blacklist_unit.h" +#include "expire_time.h" + +DTC_BEGIN_NAMESPACE + +class DTCFlushRequest; +class BufferProcess; +class DTCTableDefinition; +class TaskPendingList; +enum BufferResult { + BUFFER_PROCESS_ERROR =-1, + BUFFER_PROCESS_OK =0, + BUFFER_PROCESS_NEXT =1, + BUFFER_PROCESS_PENDING =2, + BUFFER_PROCESS_REMOTE =3 , + BUFFER_PROCESS_PUSH_HB = 4 +}; +typedef unsigned int MARKER_STAMP; + +class BufferReplyNotify: public ReplyDispatcher { + private: + BufferProcess *owner; + public: + BufferReplyNotify(BufferProcess *o) : + owner(o) + {} + virtual ~BufferReplyNotify(){} + virtual void reply_notify(TaskRequest *); +}; + +class FlushReplyNotify: public ReplyDispatcher { + private: + BufferProcess *owner; + public: + FlushReplyNotify(BufferProcess *o) : + owner(o) + {} + virtual ~FlushReplyNotify(){} + virtual void reply_notify(TaskRequest *); +}; + +class HotBackReplay : public ReplyDispatcher +{ + public: + HotBackReplay() {} + virtual ~HotBackReplay() {} + virtual void reply_notify(TaskRequest *task); +}; + +enum { + LRU_NONE=0, + LRU_BATCH, + LRU_READ, + LRU_WRITE, + LRU_ALWAYS=999, +}; + +enum { + NODESTAT_MISSING, + NODESTAT_EMPTY, + NODESTAT_PRESENT +}; + +struct CacheTransation { + TaskRequest *curTask; + const char *ptrKey; + Node m_stNode; + int oldRows; + uint8_t nodeStat; + uint8_t keyDirty; + uint8_t nodeEmpty; + uint8_t lruUpdate; + int logtype; // OLD ASYNC TRANSATION LOG + RawData *fstLogRows; // OLD ASYNC TRANSATION LOG + RawData *pstLogRows; // OLD ASYNC TRANSATION LOG + + void Init(TaskRequest *task) { + memset(this, 0, sizeof(CacheTransation)); + curTask = task; + } + + void Free(void) { + if(fstLogRows) delete fstLogRows; + fstLogRows = NULL; + pstLogRows = NULL; + logtype = 0; + + ptrKey = NULL; + m_stNode = Node::Empty(); + nodeStat = 0; + keyDirty = 0; + oldRows = 0; + nodeEmpty = 0; + lruUpdate = 0; + //curTask = NULL; + } +}; + +class BufferProcess : + public TaskDispatcher, + private TimerObject, + public PurgeNodeNotifier, + public CacheTransation +{ + protected: // base members + // cache chain control + RequestOutput output; + RequestOutput remoteoutput;//将请求传给远端dtc,用于migrate命令 + RequestOutput hblogoutput; // hblog task output + BufferReplyNotify cacheReply; + + // table info + DTCTableDefinition *tableDef; + // cache memory management + DTCBufferPool Cache; + DataProcess* pstDataProcess; + CacheInfo cacheInfo; + + // no backup db + bool nodbMode; + // full cache + bool fullMode; + bool lossyMode; + // treat empty key as default value, flat bitmap emulation + bool m_bReplaceEmpty; + // lru update level + int noLRU; + // working mode + EUpdateMode asyncServer; + EUpdateMode updateMode; + EUpdateMode insertMode; + /*indicate mem dirty when start with sync dtc*/ + bool mem_dirty; + // server side sorting + unsigned char insertOrder; + + // cache protection + int nodeSizeLimit; //node size limit + int nodeRowsLimit; //node rows limit + int nodeEmptyLimit; //empty nodes limit + + // generated error message + char szErrMsg[256]; + + int maxExpireCount; + int maxExpireTime; + + + protected: // stat subsystem + StatItemU32 statGetCount; + StatItemU32 statGetHits; + StatItemU32 statInsertCount; + StatItemU32 statInsertHits; + StatItemU32 statUpdateCount; + StatItemU32 statUpdateHits; + StatItemU32 statDeleteCount; + StatItemU32 statDeleteHits; + StatItemU32 statPurgeCount; + + StatItemU32 statDropCount; + StatItemU32 statDropRows; + StatItemU32 statFlushCount; + StatItemU32 statFlushRows; + StatSample statIncSyncStep; + + StatItemU32 statMaxFlushReq; + StatItemU32 statCurrFlushReq; + StatItemU32 statOldestDirtyTime; + StatItemU32 statAsyncFlushCount; + + StatItemU32 statExpireCount; + StatItemU32 statBufferProcessExpireCount; + protected: // async flush members + FlushReplyNotify flushReply; + TimerList *flushTimer; + volatile int nFlushReq; // current pending node + volatile int mFlushReq; // pending node limit + volatile unsigned short maxFlushReq; // max speed + volatile unsigned short markerInterval; + volatile int minDirtyTime; + volatile int maxDirtyTime; + // async log writer + int noAsyncLog; + + + protected: + //空节点过滤 + EmptyNodeFilter *m_pstEmptyNodeFilter; + + protected: + // Hot Backup + //记录更新key + bool hbLogSwitch; + //记录lru变更 + + HBFeature* hbFeature; + // Hot Backup + + protected: + // BlackList + BlackListUnit *blacklist; + TimerList *blacklist_timer; + // BlackList + + ExpireTime *key_expire; + TimerList *key_expire_timer; + HotBackReplay hotbackReply; + private: + // level 1 processing + // GET entrance + BufferResult buffer_get_data (TaskRequest &Task); + // GET batch entrance + BufferResult buffer_batch_get_data (TaskRequest &Task); + // GET response, DB --> cache + BufferResult buffer_replace_result (TaskRequest &Task); + // GET response, DB --> client + BufferResult buffer_get_rb (TaskRequest &Task); + + // implementation some admin/purge/flush function + BufferResult buffer_process_admin(TaskRequest &Task); + BufferResult buffer_purge_data (TaskRequest &Task); + BufferResult buffer_flush_data(TaskRequest &Task); + BufferResult buffer_flush_data_before_delete(TaskRequest &Task); + int buffer_flush_data_timer(Node& stNode, unsigned int& uiFlushRowsCnt); + BufferResult buffer_flush_data(Node& stNode, TaskRequest* pstTask, unsigned int& uiFlushRowsCnt); + + // sync mode operation, called by reply + BufferResult buffer_sync_insert_precheck (TaskRequest& task); + BufferResult buffer_sync_insert (TaskRequest& task); + BufferResult buffer_sync_update (TaskRequest& task); + BufferResult buffer_sync_replace (TaskRequest& task); + BufferResult buffer_sync_delete (TaskRequest& task); + + // async mode operation, called by entrance + BufferResult buffer_async_insert (TaskRequest& task); + BufferResult buffer_async_update (TaskRequest& task); + BufferResult buffer_async_replace (TaskRequest& task); + + // fullcache mode operation, called by entrance + BufferResult buffer_nodb_insert (TaskRequest& task); + BufferResult buffer_nodb_update (TaskRequest& task); + BufferResult buffer_nodb_replace (TaskRequest& task); + BufferResult buffer_nodb_delete (TaskRequest& task); + + // level 2 operation + // level 2: INSERT with async compatible, create node & clear empty filter + BufferResult buffer_insert_row (TaskRequest &Task, bool async, bool setrows); + // level 2: UPDATE with async compatible, accept empty node only if EmptyAsDefault + BufferResult buffer_update_rows (TaskRequest &Task, bool async, bool setrows); + // level 2: REPLACE with async compatible, don't allow empty node + BufferResult buffer_replace_rows (TaskRequest &Task, bool async, bool setrows); + // level 2: DELETE has no async mode, don't allow empty node + BufferResult buffer_delete_rows (TaskRequest &Task); + + // very low level + // 空结点inset default值进cache内存 + // auto clear empty filter + BufferResult InsertDefaultRow(TaskRequest &Task); + bool InsertEmptyNode(void); + + // 热备操作 + BufferResult buffer_register_hb(TaskRequest &Task); + BufferResult buffer_logout_hb(TaskRequest &Task); + BufferResult buffer_get_key_list(TaskRequest &Task); + BufferResult buffer_get_update_key(TaskRequest &Task); + BufferResult buffer_get_raw_data(TaskRequest &Task); + BufferResult buffer_replace_raw_data(TaskRequest &Task); + BufferResult buffer_adjust_lru(TaskRequest &Task); + BufferResult buffer_verify_hbt(TaskRequest &Task); + BufferResult buffer_get_hbt(TaskRequest &Task); + + //内存整理操作 + BufferResult buffer_nodehandlechange(TaskRequest &Task); + + // column expand related + BufferResult buffer_check_expand_status(TaskRequest &Task); + BufferResult buffer_column_expand(TaskRequest &Task); + BufferResult buffer_column_expand_done(TaskRequest &Task); + BufferResult buffer_column_expand_key(TaskRequest &Task); + + //迁移操作 + BufferResult buffer_migrate(TaskRequest &Task); + + // clear cache(only support nodb mode) + BufferResult buffer_clear_cache(TaskRequest &Task); + + /* we can still purge clean node if hit ratio is ok */ + BufferResult cache_purgeforhit(TaskRequest &Task); + + //rows限制 + BufferResult check_allowed_insert(TaskRequest &Task); + + BufferResult buffer_query_serverinfo(TaskRequest &Task); + + // 主从复制操作 + BufferResult buffer_process_replicate(TaskRequest &Task); + + // 热备日志 + int write_hb_log(const char* key, char *pstChunk, unsigned int uiNodeSize, int iType); + int write_hb_log(const char* key, Node& stNode, int iType); + int write_hb_log(TaskRequest &Task, Node& stNode, int iType); + int write_lru_hb_log(const char* key); + public: + virtual void purge_node_notify(const char *key, Node node); + /* inc flush task stat(created by flush dirty node function) */ + void inc_async_flush_stat() { statAsyncFlushCount++; } + + private: + virtual void task_notify(TaskRequest *); + void reply_notify(TaskRequest *); + + // flush internal + virtual void timer_notify(void); + int flush_next_node(void); + void delete_tail_time_markers(); + void get_dirty_stat(); + void calculate_flush_speed(int is_flush_timer); + MARKER_STAMP calculate_current_marker(); + + BufferProcess (const BufferProcess& robj); + BufferProcess& operator= (const BufferProcess& robj); + + public: + BufferProcess (PollThread *, DTCTableDefinition *, EUpdateMode async); + ~BufferProcess (void); + + const DTCTableDefinition *table_definition(void) const { return tableDef; } + const char *last_error_message(void) const { return szErrMsg[0] ? szErrMsg : "unknown error"; } + + void set_limit_node_size(int node_size) { + nodeSizeLimit = node_size; + } + + /* 0 = no limit */ + void set_limit_node_rows(int rows) { + nodeRowsLimit = rows < 0 ? 0 : rows; + return; + } + + /* + * 0 = no limit, + * 1-999: invalid, use 1000 instead + * 1000-1G: max empty node count + * >1G: invalid, no limit + */ + void set_limit_empty_nodes(int nodes) { + nodeEmptyLimit = nodes <= 0 ? 0 : + nodes < 1000 ? 1000 : + nodes > (1<<30) ? 0 : + nodes; + return; + } + + void disable_auto_purge(void) { + Cache.disable_try_purge(); + } + + void set_date_expire_alert_time(int time) { + Cache.set_date_expire_alert_time(time); + } + + /************************************************* + Description: 设置cache内存大小以及版本 + Input: cacheSize 共享内存的大小 + createVersion cache内存版本号 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int buffer_set_size(unsigned long cacheSize, unsigned int createVersion); + + /************************************************* + Description: 打开共享内存并初始化 + Input: iIpcKey 共享内存的key + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int cache_open(int iIpcKey, int iEnableEmptyFilter, int iEnableAutoDeleteDirtyShm); + + int update_mode(void) const { return updateMode; } + int enable_no_db_mode(void); + void enable_lossy_data_source(int v) { lossyMode = v == 0 ? false : true; } + int disable_lru_update(int); + int disable_async_log(int); + + /************************************************* + Description: 处理task请求 + Input: Task task请求 + Output: + Return: BUFFER_PROCESS_OK为成功,CACHE_PROCESS_NEXT为转交helper处理,CACHE_PROCESS_PENDING为flush请求需要等待 + BUFFER_PROCESS_ERROR为错误 + *************************************************/ + BufferResult buffer_process_request(TaskRequest &Task); + + /************************************************* + Description: 处理helper的回应 + Input: Task task请求 + Output: + Return: BUFFER_PROCESS_OK为成功,BUFFER_PROCESS_ERROR为错误 + *************************************************/ + BufferResult buffer_process_reply(TaskRequest &Task); + + /************************************************* + Description: 处理helper的回应 + Input: Task task请求 + Output: + Return: BUFFER_PROCESS_OK为成功,BUFFER_PROCESS_ERROR为错误 + *************************************************/ + BufferResult buffer_process_batch(TaskRequest &Task); + + /************************************************* + Description: 处理helper的回应 + Input: Task task请求 + Output: + Return: BUFFER_PROCESS_OK为成功,BUFFER_PROCESS_ERROR为错误 + *************************************************/ + BufferResult buffer_process_nodb(TaskRequest &Task); + + /************************************************* + Description: 处理flush请求的helper回应 + Input: Task task请求 + Output: + Return: BUFFER_PROCESS_OK为成功,BUFFER_PROCESS_ERROR为错误 + *************************************************/ + BufferResult buffer_flush_reply(TaskRequest &Task); + + /************************************************* + Description: task出错的处理 + Input: Task task请求 + Output: + Return: BUFFER_PROCESS_OK为成功,BUFFER_PROCESS_ERROR为错误 + *************************************************/ + BufferResult buffer_process_error(TaskRequest &Task); + + void print_row(const RowValue *r); + int set_insert_order(int o); + void set_replace_empty(bool v) { m_bReplaceEmpty = v; } + + // stage relate + void bind_dispatcher(TaskDispatcher *p) { output.bind_dispatcher(p); } + void bind_dispatcher_remote(TaskDispatcher *p) { remoteoutput.bind_dispatcher(p); } + void bind_hb_log_dispatcher(TaskDispatcher *p) { hblogoutput.bind_dispatcher(p); } + + // flush api + void set_flush_parameter(int, int, int, int); + void set_drop_count(int); // to be remove + int commit_flush_request(DTCFlushRequest *, TaskRequest*); + void complete_flush_request(DTCFlushRequest *); + void push_flush_queue(TaskRequest *p) { p->push_reply_dispatcher(&flushReply); output.indirect_notify(p); } + inline bool is_mem_dirty() {return mem_dirty;} + int oldest_dirty_node_alarm(); + + // expire + BufferResult check_and_expire(TaskRequest &Task); + + + friend class TaskPendingList; + friend class BufferReplyNotify; + +public: + // transation implementation + inline void transation_begin(TaskRequest *task) { CacheTransation::Init(task); } + void transation_end(void); + inline int transation_find_node(TaskRequest &task); + inline void transation_update_lru(bool async, int type); + void dispatch_hot_back_task(TaskRequest *task) + { + + task->push_reply_dispatcher(&hotbackReply); + hblogoutput.task_notify(task); + } +}; + + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/buffer_reader.cc b/src/search_local/index_storage/cache/buffer_reader.cc new file mode 100644 index 0000000..d63baaa --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_reader.cc @@ -0,0 +1,183 @@ +/* + * ===================================================================================== + * + * Filename: buffer_reader.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include + +#include "task_pkey.h" +#include "buffer_reader.h" +#include "log.h" +#include "sys_malloc.h" + +BufferReader::BufferReader(void) : DTCBufferPool(NULL) +{ + pstItem = NULL; + pstDataProcess = NULL; + iInDirtyLRU = 1; + notFetch = 1; +} + +BufferReader::~BufferReader(void) +{ + if (pstItem != NULL) + delete pstItem; + pstItem = NULL; +} + +int BufferReader::cache_open(int shmKey, int keySize, DTCTableDefinition *pstTab) +{ + int iRet; + + CacheInfo stInfo; + memset(&stInfo, 0, sizeof(stInfo)); + stInfo.ipcMemKey = shmKey; + stInfo.keySize = keySize; + stInfo.readOnly = 1; + + iRet = DTCBufferPool::cache_open(&stInfo); + if (iRet != E_OK) + return -1; + + pstItem = new RawData(&g_stSysMalloc, 1); + if (pstItem == NULL) + { + snprintf(error_message, sizeof(error_message), "new RawData error: %m"); + return -1; + } + + UpdateMode stUpdateMod; + stUpdateMod.m_iAsyncServer = MODE_SYNC; + stUpdateMod.m_iUpdateMode = MODE_SYNC; + stUpdateMod.m_iInsertMode = MODE_SYNC; + stUpdateMod.m_uchInsertOrder = 0; + + if (pstTab->index_fields() > 0) + { +#if HAS_TREE_DATA + pstDataProcess = new TreeDataProcess(DTCBinMalloc::Instance(), pstTab, this, &stUpdateMod); +#else + log_error("tree index not supported, index field num[%d]", pstTab->index_fields()); + return -1; +#endif + } + else + pstDataProcess = new RawDataProcess(DTCBinMalloc::Instance(), pstTab, this, &stUpdateMod); + if (pstDataProcess == NULL) + { + log_error("create %s error: %m", pstTab->index_fields() > 0 ? "TreeDataProcess" : "RawDataProcess"); + return -1; + } + + return 0; +} + +int BufferReader::begin_read() +{ + stDirtyHead = dirty_lru_head(); + stClrHead = clean_lru_head(); + if (!dirty_lru_empty()) + { + iInDirtyLRU = 1; + stCurNode = stDirtyHead; + } + else + { + iInDirtyLRU = 0; + stCurNode = stClrHead; + } + return 0; +} + +int BufferReader::fetch_node() +{ + + pstItem->Destroy(); + if (!stCurNode) + { + snprintf(error_message, sizeof(error_message), "begin read first!"); + return -1; + } + if (end()) + { + snprintf(error_message, sizeof(error_message), "reach end of cache"); + return -2; + } + notFetch = 0; + + curRowIdx = 0; + if (iInDirtyLRU) + { + while (stCurNode != stDirtyHead && is_time_marker(stCurNode)) + stCurNode = stCurNode.Next(); + if (stCurNode != stDirtyHead && !is_time_marker(stCurNode)) + { + if (pstDataProcess->get_all_rows(&stCurNode, pstItem) != 0) + { + snprintf(error_message, sizeof(error_message), "get node's data error"); + return -3; + } + return (0); + } + + iInDirtyLRU = 0; + stCurNode = stClrHead.Next(); + } + + stCurNode = stCurNode.Next(); + if (stCurNode != stClrHead) + { + if (pstDataProcess->get_all_rows(&stCurNode, pstItem) != 0) + { + snprintf(error_message, sizeof(error_message), "get node's data error"); + return -3; + } + } + else + { + snprintf(error_message, sizeof(error_message), "reach end of cache"); + return -2; + } + + return (0); +} + +int BufferReader::num_rows() +{ + if (pstItem == NULL) + return (-1); + + return pstItem->total_rows(); +} + +int BufferReader::read_row(RowValue &row) +{ + while (notFetch || curRowIdx >= (int)pstItem->total_rows()) + { + if (fetch_node() != 0) + return -1; + } + + TaskPackedKey::unpack_key(row.table_definition(), pstItem->Key(), row.field_value(0)); + + if (pstItem->decode_row(row, uchRowFlags, 0) != 0) + return -2; + + curRowIdx++; + + return 0; +} diff --git a/src/search_local/index_storage/cache/buffer_reader.h b/src/search_local/index_storage/cache/buffer_reader.h new file mode 100644 index 0000000..56587b4 --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_reader.h @@ -0,0 +1,63 @@ +/* + * ===================================================================================== + * + * Filename: buffer_reader.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __CACHE_READER_H +#define __CACHE_READER_H + +#include "reader_interface.h" +#include "buffer_pool.h" +#include "table_def.h" +#include "raw_data_process.h" + +class BufferReader : public ReaderInterface, public DTCBufferPool +{ +private: + Node stClrHead; + Node stDirtyHead; + int iInDirtyLRU; + Node stCurNode; + unsigned char uchRowFlags; + RawData *pstItem; + DataProcess *pstDataProcess; + int notFetch; + int curRowIdx; + char error_message[200]; + +public: + BufferReader(void); + ~BufferReader(void); + + int cache_open(int shmKey, int keySize, DTCTableDefinition *pstTab); + + const char *err_msg() { return error_message; } + int begin_read(); + int read_row(RowValue &row); + int end(); + int key_flags(void) const { return stCurNode.is_dirty(); } + int key_flag_dirty(void) const { return stCurNode.is_dirty(); } + int row_flags(void) const { return uchRowFlags; } + int row_flag_dirty(void) const { return uchRowFlags & OPER_DIRTY; } + int fetch_node(); + int num_rows(); +}; + +inline int BufferReader::end() +{ + return (iInDirtyLRU == 0) && (notFetch == 0) && (stCurNode == stClrHead); +} + +#endif diff --git a/src/search_local/index_storage/cache/buffer_remoteLog.h b/src/search_local/index_storage/cache/buffer_remoteLog.h new file mode 100644 index 0000000..2623c8d --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_remoteLog.h @@ -0,0 +1,421 @@ +/* + * ===================================================================================== + * + * Filename: buffer_remoteLog.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef _CACHE_REMOTE_LOG_ +#define _CACHE_REMOTE_LOG_ + +#include "value.h" +#include +#include "singleton.h" +#include +#include +#include "table_def.h" +#include "protocol.h" +#include "log.h" +#include +#define REMOTELOG_OP_FLOW_TYPE 1 +extern void remote_log(int type, const char *key, int op_type, int op_result, char *content, long op_time, int cmd, int magic, int contentlen); +enum E_TASK_PROCESS_STAGE +{ + TASK_NOTIFY_STAGE = 0, + TASK_REPLY_STAGE = 1 +}; + +class CacheRemoteLog +{ +public: + CacheRemoteLog() : m_curtask(NULL), m_IsNoDbMode(false), m_TableDef(0), m_UpdateMode(MODE_SYNC), m_InsertMode(MODE_SYNC), m_RemotePort(0), m_OpLog(false) + { + } + ~CacheRemoteLog() + { + } + void set_remote_port(int iPort) + { + m_RemotePort = iPort; + } + void set_op_log_on() + { + m_OpLog = true; + } + void set_remote_log_mode(DTCTableDefinition *tableDef, bool isNoDbMode, EUpdateMode insertMode, EUpdateMode updateMode) + { + this->m_TableDef = tableDef; + this->m_IsNoDbMode = isNoDbMode; + this->m_UpdateMode = updateMode; + this->m_InsertMode = insertMode; + } + + void write_remote_log(uint64_t ddwOptime, TaskRequest *curtask, E_TASK_PROCESS_STAGE stage) + { + if (0 == m_RemotePort) + { + return; + } + if (!m_OpLog) + return; + this->m_curtask = curtask; + if (NULL == m_curtask) + { + return; + } + + if ((DRequest::Get == m_curtask->request_code()) || (DRequest::SvrAdmin == m_curtask->request_code())) + { + + return; + } + + if ((DRequest::Replace == m_curtask->request_code()) && (m_UpdateMode == MODE_ASYNC || m_InsertMode == MODE_ASYNC)) + { + return; + } + + if (NULL == m_TableDef) + { + return; + } + if (DRequest::Purge == m_curtask->request_code()) + { + std::string strPurgeContent = "purge Node"; + remote_log(REMOTELOG_OP_FLOW_TYPE, extract_key().c_str(), DRequest::Purge, + 0, const_cast(strPurgeContent.c_str()), ddwOptime, 0, 0, strPurgeContent.length()); + return; + } + + if (!m_IsNoDbMode) + { + + if (TASK_NOTIFY_STAGE == stage) + { + + if ((m_UpdateMode == MODE_SYNC) && ((m_InsertMode == MODE_SYNC))) + { + + return; + } + write_task_notify_stage_log(ddwOptime); + return; + } + if (TASK_REPLY_STAGE == stage) + { + + write_task_reply_stage_log(ddwOptime); + return; + } + } + else + { + + write_no_db_op_log(ddwOptime); + } + } + +private: + void write_task_notify_stage_log(uint64_t ddwOptime) + { + std::stringstream oss; + oss << "task notify stage, Async mode , "; + if ((DRequest::Update == m_curtask->request_code()) && (m_UpdateMode == MODE_ASYNC)) + { + oss << extract_update_content(); + remote_log(REMOTELOG_OP_FLOW_TYPE, extract_key().c_str(), m_curtask->request_code(), + m_curtask->result_code(), const_cast(oss.str().c_str()), ddwOptime, 0, 0, oss.str().length()); + } + if ((DRequest::Insert == m_curtask->request_code()) && (m_InsertMode == MODE_ASYNC)) + { + oss << extract_insert_content(); + remote_log(REMOTELOG_OP_FLOW_TYPE, extract_key().c_str(), m_curtask->request_code(), + m_curtask->result_code(), const_cast(oss.str().c_str()), ddwOptime, 0, 0, oss.str().length()); + } + if ((DRequest::Replace == m_curtask->request_code()) && (m_UpdateMode == MODE_ASYNC)) + { + oss << extract_replace_content(); + remote_log(REMOTELOG_OP_FLOW_TYPE, extract_key().c_str(), m_curtask->request_code(), + m_curtask->result_code(), const_cast(oss.str().c_str()), ddwOptime, 0, 0, oss.str().length()); + } + } + + void write_task_reply_stage_log(uint64_t ddwOptime) + { + std::stringstream oss; + oss << "task reply stage, "; + oss << get_op_content(); + remote_log(REMOTELOG_OP_FLOW_TYPE, extract_key().c_str(), m_curtask->request_code(), + m_curtask->result_code(), const_cast(oss.str().c_str()), ddwOptime, 0, 0, oss.str().length()); + } + + void write_no_db_op_log(uint64_t ddwOptime) + { + std::string strContent = get_no_db_op_content(); + remote_log(REMOTELOG_OP_FLOW_TYPE, extract_key().c_str(), m_curtask->request_code(), + m_curtask->result_code(), const_cast(strContent.c_str()), ddwOptime, 0, 0, strContent.length()); + } + std::string get_no_db_op_content() + { + + std::stringstream oss; + oss << "NoDb Op,content: " << get_op_content(); + return oss.str(); + } + + std::string get_op_content() + { + if (DRequest::Update == m_curtask->request_code()) + { + return extract_update_content(); + } + else if (DRequest::Insert == m_curtask->request_code()) + { + return extract_insert_content(); + } + else if (DRequest::Delete == m_curtask->request_code()) + { + return extract_delete_content(); + } + else if (DRequest::Replace == m_curtask->request_code()) + { + return extract_replace_content(); + } + else + { + return ""; + } + } + + void filter_quotation(char *ptr, int len) + { + if ((NULL == ptr) || (len <= 0)) + { + return; + } + + for (int iCharLoop = 0; iCharLoop < len; iCharLoop++) + { + if ('\"' == ptr[iCharLoop]) + { + ptr[iCharLoop] = '|'; + } + } + } + std::string hex_to_string(char *ptr, int len) + { + if ((NULL == ptr) || (len <= 0)) + { + return ""; + } + std::string str; + while (len--) + { + char szTemp[16] = {0}; + memset(szTemp, 0, 16); + snprintf(szTemp, sizeof(szTemp), "%02x", *ptr++); + str += szTemp; + } + return str; + } + + std::string value_to_str(const DTCValue *value, int fieldType) + { + if (NULL == value) + { + return ""; + } + std::stringstream oss; + switch (fieldType) + { + case DField::Signed: + { + oss << value->s64; + break; + } + + case DField::Unsigned: + { + oss << value->u64; + break; + } + + case DField::String: + { + filter_quotation(value->str.ptr, value->str.len); + oss << value->str.ptr; + break; + } + case DField::Binary: + { + return hex_to_string(value->str.ptr, value->str.len); + } + case DField::Float: + { + oss << value->flt; + break; + } + default: + { + return ""; + } + } + return oss.str(); + } + + std::string extract_key() + { + if (NULL == m_curtask) + { + return ""; + } + return value_to_str(m_curtask->request_key(), m_TableDef->field_type(0)); + } + + std::string extract_condition_content(const DTCFieldValue *condition) + { + if (NULL == condition) + { + return ""; + } + std::stringstream oss; + oss << "where conditon:["; + for (int j = 0; j < condition->num_fields(); j++) + { + if (m_TableDef->is_volatile(j)) + { + return ""; + } + uint8_t op = condition->field_operation(j); + if (op >= DField::TotalComparison) + { + continue; + } + + static const char *const compStr[] = {"EQ", "NE", "LT", "LE", "GT", "GE"}; + oss << m_TableDef->field_name(condition->field_id(j)) << " "; + oss << compStr[op] << " "; + oss << value_to_str(condition->field_value(j), condition->field_type(j)); + oss << ";"; + } + oss << "]"; + return oss.str(); + } + + std::string extract_update_content(const DTCFieldValue *updateInfo) + { + if (NULL == updateInfo) + { + return ""; + } + std::stringstream oss; + oss << "update content:["; + for (int i = 0; i < updateInfo->num_fields(); i++) + { + const int fid = updateInfo->field_id(i); + + if (m_TableDef->is_volatile(fid)) + { + continue; + } + + switch (updateInfo->field_operation(i)) + { + case DField::Set: + { + oss << m_TableDef->field_name(fid) << ":" << value_to_str(updateInfo->field_value(i), updateInfo->field_type(i)) << ";"; + break; + } + case DField::Add: + { + oss << m_TableDef->field_name(fid) << ":" << m_TableDef->field_name(fid) << "+" << value_to_str(updateInfo->field_value(i), updateInfo->field_type(i)) << ";"; + break; + } + default: + { + break; + } + } + } + oss << "]"; + return oss.str(); + } + + std::string extract_insert_content() + { + if (NULL == m_curtask) + { + return ""; + } + std::stringstream oss; + + if (m_curtask->request_operation()) + { + oss << "insert content: ["; + const DTCFieldValue *updateInfo = m_curtask->request_operation(); + for (int i = 0; i < updateInfo->num_fields(); ++i) + { + int fid = updateInfo->field_id(i); + if (m_TableDef->is_volatile(fid)) + { + continue; + } + oss << m_TableDef->field_name(fid) << ":" << value_to_str(updateInfo->field_value(i), updateInfo->field_type(i)) << ";"; + } + oss << "]"; + } + return oss.str(); + } + + std::string extract_update_content() + { + if (NULL == m_curtask) + { + return ""; + } + std::stringstream oss; + oss << extract_update_content(m_curtask->request_operation()); + oss << extract_condition_content(m_curtask->request_condition()); + return oss.str(); + } + + std::string extract_delete_content() + { + if (NULL == m_curtask) + { + return ""; + } + return extract_condition_content(m_curtask->request_condition()); + } + + std::string extract_replace_content() + { + if (NULL == m_curtask) + { + return ""; + } + return extract_update_content(m_curtask->request_operation()); + } + +private: + TaskRequest *m_curtask; + bool m_IsNoDbMode; + DTCTableDefinition *m_TableDef; + EUpdateMode m_UpdateMode; + EUpdateMode m_InsertMode; + int m_RemotePort; /*如果端口没有设置正确,写日志函数就啥都不用做了*/ + bool m_OpLog; +}; + +#define REMOTE_LOG Singleton::Instance() +#endif diff --git a/src/search_local/index_storage/cache/buffer_unit.cc b/src/search_local/index_storage/cache/buffer_unit.cc new file mode 100644 index 0000000..a1ff2b9 --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_unit.cc @@ -0,0 +1,241 @@ +/* + * ===================================================================================== + * + * Filename: buffer_unit.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include "log.h" +#include "buffer_process.h" +#include +#include "buffer_remoteLog.h" + +void BufferReplyNotify::reply_notify(TaskRequest *cur) +{ + owner->reply_notify(cur); +} + +void BufferProcess::reply_notify(TaskRequest *cur) +{ + if (DRequest::ReloadConfig == cur->request_code() && TaskTypeHelperReloadConfig == cur->request_type()) + { + /*only delete task */ + log_debug("reload config task reply ,just delete task"); + delete cur; + return; + } + + transation_begin(cur); + + if (cur->result_code() < 0) + { + buffer_process_error(*cur); + } + else if (cur->result_code() > 0) + { + log_notice("result_code() > 0: from %s msg %s", cur->resultInfo.error_from(), cur->resultInfo.error_message()); + } + if (cur->result_code() >= 0 && buffer_process_reply(*cur) != BUFFER_PROCESS_OK) + { + if (cur->result_code() >= 0) + cur->set_error(-EC_SERVER_ERROR, "buffer_process_reply", last_error_message()); + } + + if (!cur->flag_black_hole()) + { + /* 如果cache操作有失败,则加入黑名单*/ + unsigned blacksize = cur->pop_black_list_size(); + if (blacksize > 0) + { + log_debug("add to blacklist, key=%d size=%u", cur->int_key(), blacksize); + blacklist->add_blacklist(cur->packed_key(), blacksize); + } + } + + REMOTE_LOG->write_remote_log(owner->get_now_time() / 1000000, cur, TASK_REPLY_STAGE); + cur->reply_notify(); + + transation_end(); + + /* 启动匀速淘汰(deplay purge) */ + Cache.delay_purge_notify(); +} + +void HotBackReplay::reply_notify(TaskRequest *cur) +{ + log_debug("reply_notify, request type %d", cur->request_type()); + int iRet = cur->result_code(); + if (0 != iRet) + { + if ((-ETIMEDOUT == iRet) || (-EC_INC_SYNC_STAGE == iRet) || (-EC_FULL_SYNC_STAGE == iRet)) + { + log_debug("hotback task , normal fail: from %s msg %s, request type %d", cur->resultInfo.error_from(), cur->resultInfo.error_message(), cur->request_type()); + } + else + { + log_error("hotback task fail: from %s msg %s, request type %d", cur->resultInfo.error_from(), cur->resultInfo.error_message(), cur->request_type()); + } + } + + if ((TaskTypeWriteHbLog == cur->request_type()) || (TaskTypeWriteLruHbLog == cur->request_type())) + { + /*only delete task */ + log_debug("write hotback task reply ,just delete task"); + delete cur; + return; + } + log_debug("read hotback task ,reply to client"); + cur->reply_notify(); +} + +void FlushReplyNotify::reply_notify(TaskRequest *cur) +{ + owner->transation_begin(cur); + if (cur->result_code() < 0) + { + owner->buffer_process_error(*cur); + } + else if (cur->result_code() > 0) + { + log_notice("result_code() > 0: from %s msg %s", cur->resultInfo.error_from(), cur->resultInfo.error_message()); + } + if (cur->result_code() >= 0 && owner->buffer_flush_reply(*cur) != BUFFER_PROCESS_OK) + { + if (cur->result_code() >= 0) + cur->set_error(-EC_SERVER_ERROR, "buffer_flush_reply", owner->last_error_message()); + } + REMOTE_LOG->write_remote_log(owner->owner->get_now_time() / 1000000, cur, TASK_REPLY_STAGE); + cur->reply_notify(); + owner->transation_end(); +} + +void BufferProcess::task_notify(TaskRequest *cur) +{ + tableDef = TableDefinitionManager::Instance()->get_cur_table_def(); + uint64_t now_unix_time = GET_TIMESTAMP() / 1000; + if (cur->is_expired(now_unix_time)) + { + log_debug("task time out, throw it for availability, now is [%lld] expire is [%lld]", (long long)now_unix_time, (long long)cur->get_expire_time()); + statBufferProcessExpireCount++; + cur->set_error(-EC_TASK_TIMEOUT, "buffer_process", "task time out"); + cur->reply_notify(); + return; + } + + unsigned blacksize = 0; + transation_begin(cur); + + if (cur->result_code() < 0) + { + cur->mark_as_hit(); /* mark as hit if result done */ + cur->reply_notify(); + } + else if (cur->is_batch_request()) + { + switch (buffer_process_batch(*cur)) + { + default: + cur->set_error(-EC_SERVER_ERROR, "buffer_process", last_error_message()); + cur->mark_as_hit(); /* mark as hit if result done */ + cur->reply_notify(); + break; + + case BUFFER_PROCESS_OK: + cur->mark_as_hit(); /* mark as hit if result done */ + cur->reply_notify(); + break; + + case BUFFER_PROCESS_ERROR: + if (cur->result_code() >= 0) + cur->set_error(-EC_SERVER_ERROR, "buffer_process", last_error_message()); + cur->mark_as_hit(); /* mark as hit if result done */ + cur->reply_notify(); + break; + } + } + else if (nodbMode == false) + { + BufferResult result = buffer_process_request(*cur); + REMOTE_LOG->write_remote_log(owner->get_now_time() / 1000000, cur, TASK_NOTIFY_STAGE); + switch (result) + { + default: + if (!cur->flag_black_hole()) + { + /* 如果cache操作有失败,则加入黑名单*/ + blacksize = cur->pop_black_list_size(); + if (blacksize > 0) + { + log_debug("add to blacklist, key=%d size=%u", cur->int_key(), blacksize); + blacklist->add_blacklist(cur->packed_key(), blacksize); + } + } + case BUFFER_PROCESS_ERROR: + if (cur->result_code() >= 0) + cur->set_error(-EC_SERVER_ERROR, "buffer_process", last_error_message()); + + case BUFFER_PROCESS_OK: + cur->mark_as_hit(); /* mark as hit if result done */ + cur->reply_notify(); + break; + case BUFFER_PROCESS_NEXT: + log_debug("push task to next-unit"); + cur->push_reply_dispatcher(&cacheReply); + output.task_notify(cur); + break; + case BUFFER_PROCESS_PENDING: + break; + case BUFFER_PROCESS_REMOTE: //migrate 命令,给远端dtc + cur->push_reply_dispatcher(&cacheReply); + remoteoutput.task_notify(cur); + break; + case BUFFER_PROCESS_PUSH_HB: + { + log_debug("push task to hotback thread"); + break; + } + } + } + else + { + BufferResult result = buffer_process_nodb(*cur); + REMOTE_LOG->write_remote_log(owner->get_now_time() / 1000000, cur, TASK_NOTIFY_STAGE); + switch (result) + { + default: + case BUFFER_PROCESS_ERROR: + if (cur->result_code() >= 0) + cur->set_error(-EC_SERVER_ERROR, "buffer_process", last_error_message()); + + case BUFFER_PROCESS_NEXT: + case BUFFER_PROCESS_OK: + cur->mark_as_hit(); /* mark as hit if result done */ + cur->reply_notify(); + break; + case BUFFER_PROCESS_PENDING: + break; + case BUFFER_PROCESS_REMOTE: //migrate 命令,给远端dtc + cur->push_reply_dispatcher(&cacheReply); + remoteoutput.task_notify(cur); + break; + case BUFFER_PROCESS_PUSH_HB: + { + log_debug("push task to hotback thread"); + break; + } + } + } + transation_end(); + /* 启动匀速淘汰(deplay purge) */ + Cache.delay_purge_notify(); +} diff --git a/src/search_local/index_storage/cache/buffer_writer.cc b/src/search_local/index_storage/cache/buffer_writer.cc new file mode 100644 index 0000000..5460128 --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_writer.cc @@ -0,0 +1,187 @@ +/* + * ===================================================================================== + * + * Filename: buffer_writer.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include + +#include "task_pkey.h" +#include "buffer_writer.h" +#include "pt_malloc.h" +#include "sys_malloc.h" +#include "log.h" + +BufferWriter::BufferWriter(void) : DTCBufferPool(NULL) +{ + pstItem = NULL; + iRowIdx = 0; + iIsFull = 0; + memset(achPackKey, 0, sizeof(achPackKey)); +} + +BufferWriter::~BufferWriter(void) +{ + if (pstItem != NULL) + delete pstItem; + pstItem = NULL; +} + +int BufferWriter::cache_open(CacheInfo *pstInfo, DTCTableDefinition *pstTab) +{ + int iRet; + + iRet = DTCBufferPool::cache_open(pstInfo); + if (iRet != E_OK) + { + log_error("cache open error: %d, %s", iRet, Error()); + return -1; + } + + pstItem = new RawData(&g_stSysMalloc, 1); + if (pstItem == NULL) + { + snprintf(szErrMsg, sizeof(szErrMsg), "new RawData error: %m"); + return -2; + } + + UpdateMode stUpdateMod; + stUpdateMod.m_iAsyncServer = pstInfo->syncUpdate ? MODE_SYNC : MODE_ASYNC; + stUpdateMod.m_iUpdateMode = pstInfo->syncUpdate ? MODE_SYNC : MODE_ASYNC; + stUpdateMod.m_iInsertMode = pstInfo->syncUpdate ? MODE_SYNC : MODE_ASYNC; + stUpdateMod.m_uchInsertOrder = 0; + + if (pstTab->index_fields() > 0) + { +#if HAS_TREE_DATA + pstDataProcess = new TreeDataProcess(DTCBinMalloc::Instance(), pstTab, this, &stUpdateMod); +#else + log_error("tree index not supported, index field num[%d]", pstTab->index_fields()); + return -1; +#endif + } + else + pstDataProcess = new RawDataProcess(DTCBinMalloc::Instance(), pstTab, this, &stUpdateMod); + if (pstDataProcess == NULL) + { + log_error("create %s error: %m", pstTab->index_fields() > 0 ? "TreeDataProcess" : "RawDataProcess"); + return -3; + } + + return 0; +} + +int BufferWriter::begin_write() +{ + iRowIdx = 0; + + return 0; +} + +int BufferWriter::full() +{ + return (iIsFull); +} + +int BufferWriter::AllocNode(const RowValue &row) +{ + int iRet; + + iRet = TaskPackedKey::build_packed_key(row.table_definition(), row.field_value(0), sizeof(achPackKey), achPackKey); + if (iRet != 0) + { + snprintf(szErrMsg, sizeof(szErrMsg), "build packed key error: %d", iRet); + return -1; + } + + stCurNode = cache_allocate(achPackKey); + if (!stCurNode) + { + snprintf(szErrMsg, sizeof(szErrMsg), "cache alloc node error"); + iIsFull = 1; + return -2; + } + + iRet = pstItem->Init(row.table_definition()->key_fields() - 1, row.table_definition()->key_format(), achPackKey, 0); + if (iRet != 0) + { + snprintf(szErrMsg, sizeof(szErrMsg), "raw data init error: %s", pstItem->get_err_msg()); + cache_purge(achPackKey); + return -3; + } + + return 0; +} + +int BufferWriter::write_row(const RowValue &row) +{ + int iRet; + + if (iRowIdx == 0) + { + if (AllocNode(row) != 0) + return -1; + } + + iRet = pstItem->insert_row(row, false, false); + if (iRet != 0) + { + snprintf(szErrMsg, sizeof(szErrMsg), "insert row error: %s", pstItem->get_err_msg()); + cache_purge(achPackKey); + return -2; + } + + iRowIdx++; + return 0; +} + +int BufferWriter::commit_node() +{ + int iRet; + + if (iRowIdx < 1) + return 0; + + const MemHead *pstHead = DTCBinMalloc::Instance()->get_head_info(); + if (pstHead->m_hTop + pstItem->data_size() + MINSIZE >= pstHead->m_tSize) + { + iIsFull = 1; + cache_purge(achPackKey); + return -1; + } + + iRet = pstDataProcess->replace_data(&stCurNode, pstItem); + if (iRet != 0) + { + snprintf(szErrMsg, sizeof(szErrMsg), "write data into cache error"); + cache_purge(achPackKey); + return -2; + } + + iRowIdx = 0; + memset(achPackKey, 0, sizeof(achPackKey)); + pstItem->Destroy(); + return 0; +} + +int BufferWriter::rollback_node() +{ + pstItem->Destroy(); + cache_purge(achPackKey); + memset(achPackKey, 0, sizeof(achPackKey)); + + return 0; +} diff --git a/src/search_local/index_storage/cache/buffer_writer.h b/src/search_local/index_storage/cache/buffer_writer.h new file mode 100644 index 0000000..05493fa --- /dev/null +++ b/src/search_local/index_storage/cache/buffer_writer.h @@ -0,0 +1,54 @@ +/* + * ===================================================================================== + * + * Filename: buffer_writer.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __CACHE_WRITER_H +#define __CACHE_WRITER_H + +#include "buffer_pool.h" +#include "table_def.h" +#include "writer_interface.h" +#include "raw_data_process.h" + +class BufferWriter : public WriterInterface, public DTCBufferPool +{ +private: + RawData *pstItem; + DataProcess *pstDataProcess; + int iIsFull; + int iRowIdx; + Node stCurNode; + char achPackKey[MAX_KEY_LEN + 1]; + char szErrMsg[200]; + +protected: + int AllocNode(const RowValue &row); + +public: + BufferWriter(void); + ~BufferWriter(void); + + int cache_open(CacheInfo *pstInfo, DTCTableDefinition *pstTab); + + const char *err_msg() { return szErrMsg; } + int begin_write(); + int full(); + int write_row(const RowValue &row); + int commit_node(); + int rollback_node(); +}; + +#endif diff --git a/src/search_local/index_storage/cache/col_expand.cc b/src/search_local/index_storage/cache/col_expand.cc new file mode 100644 index 0000000..c20a0db --- /dev/null +++ b/src/search_local/index_storage/cache/col_expand.cc @@ -0,0 +1,176 @@ +/* + * ===================================================================================== + * + * Filename: col_expand.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include + +#include "col_expand.h" +#include "table_def_manager.h" + +DTC_USING_NAMESPACE + +extern DTCConfig *gConfig; + +DTCColExpand::DTCColExpand() : _colExpand(NULL) +{ + memset(_errmsg, 0, sizeof(_errmsg)); +} + +DTCColExpand::~DTCColExpand() +{ +} + +int DTCColExpand::Init() +{ + // alloc mem + size_t size = sizeof(COL_EXPAND_T); + MEM_HANDLE_T v = M_CALLOC(size); + if (INVALID_HANDLE == v) + { + snprintf(_errmsg, sizeof(_errmsg), "init column expand failed, %s", M_ERROR()); + return -1; + } + _colExpand = M_POINTER(COL_EXPAND_T, v); + _colExpand->expanding = false; + _colExpand->curTable = 0; + memset(_colExpand->tableBuff, 0, sizeof(_colExpand->tableBuff)); + // copy file's table.conf to shm + if (strlen(TableDefinitionManager::Instance()->table_file_buffer()) > COL_EXPAND_BUFF_SIZE) + { + snprintf(_errmsg, sizeof(_errmsg), "table buf size bigger than %d", COL_EXPAND_BUFF_SIZE); + return -1; + } + strcpy(_colExpand->tableBuff[_colExpand->curTable % COL_EXPAND_BUFF_NUM], + TableDefinitionManager::Instance()->table_file_buffer()); + // use file's tabledef + DTCTableDefinition *t = TableDefinitionManager::Instance()->table_file_table_def(); + TableDefinitionManager::Instance()->set_cur_table_def(t, _colExpand->curTable % COL_EXPAND_BUFF_NUM); + log_debug("init col expand with curTable: %d, tableBuff: %s", _colExpand->curTable, _colExpand->tableBuff[_colExpand->curTable % COL_EXPAND_BUFF_NUM]); + return 0; +} + +int DTCColExpand::reload_table() +{ + if (TableDefinitionManager::Instance()->get_cur_table_idx() == _colExpand->curTable) + return 0; + + DTCTableDefinition *t = TableDefinitionManager::Instance()->load_buffered_table(_colExpand->tableBuff[_colExpand->curTable % COL_EXPAND_BUFF_NUM]); + if (!t) + { + log_error("load shm table.conf error, buf: %s", _colExpand->tableBuff[_colExpand->curTable % COL_EXPAND_BUFF_NUM]); + return -1; + } + TableDefinitionManager::Instance()->set_cur_table_def(t, _colExpand->curTable); + return 0; +} + +int DTCColExpand::Attach(MEM_HANDLE_T handle, int forceFlag) +{ + if (INVALID_HANDLE == handle) + { + log_crit("attch col expand error, handle = 0"); + return -1; + } + _colExpand = M_POINTER(COL_EXPAND_T, handle); + // 1) force update shm mem, 2)replace shm mem by dumped mem + if (forceFlag) + { + log_debug("force use table.conf, not use shm conf"); + if (strlen(TableDefinitionManager::Instance()->table_file_buffer()) > COL_EXPAND_BUFF_SIZE) + { + log_error("table.conf to long while force update shm"); + return -1; + } + if (_colExpand->expanding) + { + log_error("col expanding, can't force update table.conf, delete shm and try again"); + return -1; + } + strcpy(_colExpand->tableBuff[_colExpand->curTable % COL_EXPAND_BUFF_NUM], + TableDefinitionManager::Instance()->table_file_buffer()); + DTCTableDefinition *t = TableDefinitionManager::Instance()->table_file_table_def(); + TableDefinitionManager::Instance()->set_cur_table_def(t, _colExpand->curTable); + return 0; + } + // parse shm table.conf + DTCTableDefinition *t, *tt = NULL; + t = TableDefinitionManager::Instance()->load_buffered_table(_colExpand->tableBuff[_colExpand->curTable % COL_EXPAND_BUFF_NUM]); + if (!t) + { + log_error("load shm table.conf error, buf: %s", _colExpand->tableBuff[_colExpand->curTable % COL_EXPAND_BUFF_NUM]); + return -1; + } + if (_colExpand->expanding) + { + tt = TableDefinitionManager::Instance()->load_buffered_table(_colExpand->tableBuff[(_colExpand->curTable + 1) % COL_EXPAND_BUFF_NUM]); + if (!tt) + { + log_error("load shm col expand new table.conf error, buf: %s", _colExpand->tableBuff[(_colExpand->curTable + 1) % COL_EXPAND_BUFF_NUM]); + return -1; + } + } + // compare + // if not same + // log_error + if (!t->is_same_table(TableDefinitionManager::Instance()->table_file_table_def())) + { // same with hash_equal + DTCTableDefinition *tt = TableDefinitionManager::Instance()->table_file_table_def(); + log_error("table.conf is not same to shm's"); + log_error("shm table, name: %s, hash: %s", t->table_name(), t->table_hash()); + log_error("file table, name: %s, hash: %s", tt->table_name(), tt->table_hash()); + } + else + { + log_debug("table.conf is same to shm's"); + } + // use shm's + TableDefinitionManager::Instance()->set_cur_table_def(t, _colExpand->curTable); + if (_colExpand->expanding) + TableDefinitionManager::Instance()->set_new_table_def(tt, _colExpand->curTable + 1); + return 0; +} + +bool DTCColExpand::is_expanding() +{ + return _colExpand->expanding; +} + +bool DTCColExpand::expand(const char *table, int len) +{ + _colExpand->expanding = true; + memcpy(_colExpand->tableBuff[(_colExpand->curTable + 1) % COL_EXPAND_BUFF_NUM], table, len); + return true; +} + +int DTCColExpand::try_expand(const char *table, int len) +{ + if (_colExpand->expanding || len > COL_EXPAND_BUFF_SIZE || _colExpand->curTable > 255) + return -1; + return 0; +} + +bool DTCColExpand::expand_done() +{ + ++_colExpand->curTable; + _colExpand->expanding = false; + return true; +} + +int DTCColExpand::cur_table_idx() +{ + return _colExpand->curTable; +} diff --git a/src/search_local/index_storage/cache/col_expand.h b/src/search_local/index_storage/cache/col_expand.h new file mode 100644 index 0000000..d739a5e --- /dev/null +++ b/src/search_local/index_storage/cache/col_expand.h @@ -0,0 +1,67 @@ +/* + * ===================================================================================== + * + * Filename: col_expand.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __DTC_COL_EXPAND_H_ +#define __DTC_COL_EXPAND_H_ + +#include "namespace.h" +#include "global.h" +#include "singleton.h" + +DTC_BEGIN_NAMESPACE + +#define COL_EXPAND_BUFF_SIZE (1024 * 1024) +#define COL_EXPAND_BUFF_NUM 2 + +struct _col_expand +{ + bool expanding; + unsigned char curTable; + char tableBuff[COL_EXPAND_BUFF_NUM][COL_EXPAND_BUFF_SIZE]; +}; +typedef struct _col_expand COL_EXPAND_T; + +class DTCColExpand +{ +public: + DTCColExpand(); + ~DTCColExpand(); + + static DTCColExpand *Instance() { return Singleton::Instance(); } + static void Destroy() { Singleton::Destroy(); } + + int Init(); + int Attach(MEM_HANDLE_T handle, int forceFlag); + + bool is_expanding(); + bool expand(const char *table, int len); + int try_expand(const char *table, int len); + bool expand_done(); + int cur_table_idx(); + int reload_table(); + + const MEM_HANDLE_T Handle() const { return M_HANDLE(_colExpand); } + const char *Error() const { return _errmsg; } + +private: + COL_EXPAND_T *_colExpand; + char _errmsg[256]; +}; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/container_dtcd.cc b/src/search_local/index_storage/cache/container_dtcd.cc new file mode 100644 index 0000000..c708919 --- /dev/null +++ b/src/search_local/index_storage/cache/container_dtcd.cc @@ -0,0 +1,202 @@ +/* + * ===================================================================================== + * + * Filename: container_dtcd.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include + +#include "compiler.h" +#include "container.h" +#include "version.h" +#include "table_def.h" +#include "buffer_error.h" +#include "listener_pool.h" +#include "request_threading.h" +#include "task_multiplexer.h" +#include "../api/c_api/dtc_int.h" +#include "proxy_listen_pool.h" +#include "table_def_manager.h" + +class DTCTaskExecutor : public IDTCTaskExecutor, public ThreadingOutputDispatcher +{ +public: + virtual NCResultInternal *task_execute(NCRequest &rq, const DTCValue *kptr); +}; + +NCResultInternal *DTCTaskExecutor::task_execute(NCRequest &rq, const DTCValue *kptr) +{ + NCResultInternal *res = new NCResultInternal(rq.tdef); + if (res->Copy(rq, kptr) < 0) + return res; + res->set_owner_info(this, 0, NULL); + switch (ThreadingOutputDispatcher::execute((TaskRequest *)res)) + { + case 0: // OK + res->process_internal_result(res->Timestamp()); + break; + case -1: // no side effect + res->set_error(-EC_REQUEST_ABORTED, "API::sending", "Server Shutdown"); + break; + case -2: + default: // result unknown, leak res by purpose + //new NCResult(-EC_REQUEST_ABORTED, "API::recving", "Server Shutdown"); + log_error("(-EC_REQUEST_ABORTED, API::sending, Server Shutdown"); + break; + } + return res; +} + +class DTCInstance : public IDTCService +{ +public: + AgentListenPool *ports; + DTCTaskExecutor *executor; + int mypid; + +public: + DTCInstance(); + virtual ~DTCInstance(); + + virtual const char *query_version_string(void); + virtual const char *query_service_type(void); + virtual const char *query_instance_name(void); + + virtual DTCTableDefinition *query_table_definition(void); + virtual DTCTableDefinition *query_admin_table_definition(void); + virtual IDTCTaskExecutor *query_task_executor(void); + virtual int match_listening_ports(const char *, const char * = NULL); + + int IsOK(void) const + { + return this != NULL && + ports != NULL && + executor != NULL && + getpid() == mypid; + } +}; + +extern ListenerPool *listener; +DTCInstance::DTCInstance(void) +{ + ports = NULL; + executor = NULL; + mypid = getpid(); +} + +DTCInstance::~DTCInstance(void) +{ +} + +const char *DTCInstance::query_version_string(void) +{ + return version_detail; +} + +const char *DTCInstance::query_service_type(void) +{ + return "dtcd"; +} + +const char *DTCInstance::query_instance_name(void) +{ + return TableDefinitionManager::Instance()->get_cur_table_def()->table_name(); +} + +DTCTableDefinition *DTCInstance::query_table_definition(void) +{ + return TableDefinitionManager::Instance()->get_cur_table_def(); +} + +DTCTableDefinition *DTCInstance::query_admin_table_definition(void) +{ + return TableDefinitionManager::Instance()->get_hot_backup_table_def(); +} + +IDTCTaskExecutor *DTCInstance::query_task_executor(void) +{ + return executor; +} + +int DTCInstance::match_listening_ports(const char *host, const char *port) +{ + return ports->Match(host, port); +} + +struct nocase +{ + bool operator()(const char *const &a, const char *const &b) const + { + return strcasecmp(a, b) < 0; + } +}; +typedef std::map instmap_t; +static instmap_t instMap; + +extern "C" __EXPORT + IInternalService * + _QueryInternalService(const char *name, const char *instance) +{ + instmap_t::iterator i; + + if (!name || !instance) + return NULL; + + if (strcasecmp(name, "dtcd") != 0) + return NULL; + + /* not found */ + i = instMap.find(instance); + if (i == instMap.end()) + return NULL; + + DTCInstance &inst = i->second; + if (inst.IsOK() == 0) + return NULL; + + return &inst; +} + +void InitTaskExecutor(const char *name, AgentListenPool *listener, TaskDispatcher *output) +{ + if (NCResultInternal::verify_class() == 0) + { + log_error("Inconsistent class NCResultInternal detected, internal API disabled"); + return; + } + // this may cause memory leak, but this is small + char *tablename = (char *)malloc(strlen(name) + 1); + memset(tablename, 0, strlen(name) + 1); + strncpy(tablename, name, strlen(name)); + + DTCInstance &inst = instMap[tablename]; + inst.ports = listener; + DTCTaskExecutor *executor = new DTCTaskExecutor(); + TaskMultiplexer *batcher = new TaskMultiplexer(output->owner_thread()); + batcher->bind_dispatcher(output); + executor->bind_dispatcher(batcher); + inst.executor = executor; + log_info("Internal Task Executor initialized"); +} + +void StopTaskExecutor(void) +{ + instmap_t::iterator i; + for (i = instMap.begin(); i != instMap.end(); i++) + { + if (i->second.executor) + i->second.executor->Stop(); + } +} diff --git a/src/search_local/index_storage/cache/data_chunk.h b/src/search_local/index_storage/cache/data_chunk.h new file mode 100644 index 0000000..ef14ab3 --- /dev/null +++ b/src/search_local/index_storage/cache/data_chunk.h @@ -0,0 +1,331 @@ +/* + * ===================================================================================== + * + * Filename: data_chunk.h + * + * Description: packaging data chunk method. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef DATA_CHUNK_H +#define DATA_CHUNK_H + +#include +#include "raw_data.h" +#include "tree_data.h" + +class DataChunk +{ +protected: + unsigned char m_uchDataType; // 数据chunk的类型 + +public: + /************************************************* + Description: 计算基本结构大小 + Input: + Output: + Return: 内存大小 + *************************************************/ + ALLOC_SIZE_T base_size() + { + if (m_uchDataType == DATA_TYPE_RAW) + return (sizeof(RawFormat)); + else + return (sizeof(RootData)); + } + + /************************************************* + Description: index key + Input: + Output: + Return: key + *************************************************/ + char *index_key() + { + char *indexKey = (char *)this; + return indexKey + sizeof(unsigned char) * 2 + sizeof(uint32_t) * 2; + } + + /************************************************* + Description: 获取格式化后的key + Input: + Output: + Return: key指针 + *************************************************/ + const char *Key() const + { + if ((m_uchDataType & 0x7f) == DATA_TYPE_RAW) + { + RawFormat *pstRaw = (RawFormat *)this; + return pstRaw->m_achKey; + } + else if ((m_uchDataType & 0x7f) == DATA_TYPE_TREE_ROOT) + { + RootData *pstRoot = (RootData *)this; + return pstRoot->m_achKey; + } + return NULL; + } + + /************************************************* + Description: 获取格式化后的key + Input: + Output: + Return: key指针 + *************************************************/ + char *Key() + { + if ((m_uchDataType & 0x7f) == DATA_TYPE_RAW) + { + RawFormat *pstRaw = (RawFormat *)this; + return pstRaw->m_achKey; + } + else if ((m_uchDataType & 0x7f) == DATA_TYPE_TREE_ROOT) + { + RootData *pstRoot = (RootData *)this; + return pstRoot->m_achKey; + } + return NULL; + } + + /************************************************* + Description: 保存key + Input: key key的实际值 + Output: + Return: + *************************************************/ + +#define SET_KEY_FUNC(type, key) \ + void set_key(type key) \ + { \ + if (m_uchDataType == DATA_TYPE_RAW) \ + { \ + RawFormat *pstRaw = (RawFormat *)this; \ + *(type *)(void *)pstRaw->m_achKey = key; \ + } \ + else \ + { \ + RootData *pstRoot = (RootData *)this; \ + *(type *)(void *)pstRoot->m_achKey = key; \ + } \ + } + + SET_KEY_FUNC(int32_t, iKey) + SET_KEY_FUNC(uint32_t, uiKey) + SET_KEY_FUNC(int64_t, llKey) + SET_KEY_FUNC(uint64_t, ullKey) + + /************************************************* + Description: 保存字符串key + Input: key key的实际值 + iLen key的长度 + Output: + Return: + *************************************************/ + void set_key(const char *pchKey, int iLen) + { + if (m_uchDataType == DATA_TYPE_RAW) + { + RawFormat *pstRaw = (RawFormat *)this; + *(unsigned char *)pstRaw->m_achKey = iLen; + memcpy(pstRaw->m_achKey + 1, pchKey, iLen); + } + else + { + RootData *pstRoot = (RootData *)this; + *(unsigned char *)pstRoot->m_achKey = iLen; + memcpy(pstRoot->m_achKey + 1, pchKey, iLen); + } + } + + /************************************************* + Description: 保存格式化好的字符串key + Input: key key的实际值, 要求key[0]是长度 + Output: + Return: + *************************************************/ + void set_key(const char *pchKey) + { + if (m_uchDataType == DATA_TYPE_RAW) + { + RawFormat *pstRaw = (RawFormat *)this; + memcpy(pstRaw->m_achKey, pchKey, *(unsigned char *)pchKey); + } + else + { + RootData *pstRoot = (RootData *)this; + memcpy(pstRoot->m_achKey, pchKey, *(unsigned char *)pchKey); + } + } + + /************************************************* + Description: 查询字符串key大小 + Input: + Output: + Return: key大小 + *************************************************/ + int str_key_size() + { + if (m_uchDataType == DATA_TYPE_RAW) + { + RawFormat *pstRaw = (RawFormat *)this; + return *(unsigned char *)pstRaw->m_achKey; + } + else + { + RootData *pstRoot = (RootData *)this; + return *(unsigned char *)pstRoot->m_achKey; + } + } + + /************************************************* + Description: 查询二进制key大小 + Input: + Output: + Return: key大小 + *************************************************/ + int bin_key_size() { return str_key_size(); } + + unsigned int head_size() + { + if (m_uchDataType == DATA_TYPE_RAW) + return sizeof(RawFormat); + else + return sizeof(RootData); + } + + /************************************************* + Description: 查询数据头大小,如果是CRawData的chunk,data_size()是不包括Row的长度,仅包括头部信息以及key + Input: + Output: + Return: 内存大小 + *************************************************/ + unsigned int data_size(int iKeySize) + { + int iKeyLen = iKeySize ? iKeySize : 1 + str_key_size(); + return head_size() + iKeyLen; + } + + unsigned int node_size() + { + if (m_uchDataType == DATA_TYPE_RAW) + { + RawFormat *pstRaw = (RawFormat *)this; + return pstRaw->m_uiDataSize; + } + else + { + return 0; // unknow + } + } + + unsigned int create_time() + { + if (m_uchDataType == DATA_TYPE_RAW) + { + RawFormat *pstRaw = (RawFormat *)this; + return pstRaw->m_CreateHour; + } + else + { + return 0; // unknow + } + } + unsigned last_access_time() + { + if (m_uchDataType == DATA_TYPE_RAW) + { + RawFormat *pstRaw = (RawFormat *)this; + return pstRaw->m_LastAccessHour; + } + else + { + return 0; // unknow + } + } + unsigned int last_update_time() + { + if (m_uchDataType == DATA_TYPE_RAW) + { + RawFormat *pstRaw = (RawFormat *)this; + return pstRaw->m_LastUpdateHour; + } + else + { + return 0; // unknow + } + } + + uint32_t total_rows() + { + if (m_uchDataType == DATA_TYPE_RAW) + { + RawFormat *pstRaw = (RawFormat *)this; + return pstRaw->m_uiRowCnt; + } + else + { + RootData *pstRoot = (RootData *)this; + return pstRoot->m_uiRowCnt; + } + } + + /************************************************* + Description: 销毁内存并释放内存 + Input: + Output: + Return: 0为成功,非0失败 + *************************************************/ + int Destroy(Mallocator *pstMalloc) + { + MEM_HANDLE_T hHandle = pstMalloc->ptr_to_handle(this); + if (m_uchDataType == DATA_TYPE_RAW) + { + return pstMalloc->Free(hHandle); + } + else if (m_uchDataType == DATA_TYPE_TREE_ROOT) + { + TreeData stTree(pstMalloc); + int iRet = stTree.Attach(hHandle); + if (iRet != 0) + { + return (iRet); + } + return stTree.Destroy(); + } + return (-1); + } + + /* 查询如果destroy这块内存,能释放多少空间出来 (包括合并)*/ + unsigned ask_for_destroy_size(Mallocator *pstMalloc) + { + MEM_HANDLE_T hHandle = pstMalloc->ptr_to_handle(this); + + if (m_uchDataType == DATA_TYPE_RAW) + { + return pstMalloc->ask_for_destroy_size(hHandle); + } + else if (m_uchDataType == DATA_TYPE_TREE_ROOT) + { + TreeData stTree(pstMalloc); + if (stTree.Attach(hHandle)) + return 0; + + return stTree.ask_for_destroy_size(); + } + + log_debug("ask_for_destroy_size failed"); + return 0; + } +}; + +#endif diff --git a/src/search_local/index_storage/cache/data_process.h b/src/search_local/index_storage/cache/data_process.h new file mode 100644 index 0000000..0a630da --- /dev/null +++ b/src/search_local/index_storage/cache/data_process.h @@ -0,0 +1,205 @@ +/* + * ===================================================================================== + * + * Filename: data_process.h + * + * Description: data processing interface(abstract class) definition. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef DATA_PROCESS_H +#define DATA_PROCESS_H + +#include "buffer_def.h" +#include "protocol.h" +#include "value.h" +#include "field.h" +#include "section.h" +#include "table_def.h" +#include "task_request.h" +#include "stat_dtc.h" +#include "raw_data.h" +#include "node.h" + +#include "namespace.h" +DTC_BEGIN_NAMESPACE + +enum EUpdateMode +{ + MODE_SYNC = 0, + MODE_ASYNC, + MODE_FLUSH +}; + +typedef struct +{ + EUpdateMode m_iAsyncServer; + EUpdateMode m_iUpdateMode; + EUpdateMode m_iInsertMode; + unsigned char m_uchInsertOrder; +} UpdateMode; + +class DTCFlushRequest; +class DataProcess +{ +public: + DataProcess() {} + virtual ~DataProcess() {} + + virtual const char *get_err_msg() = 0; + virtual void set_insert_mode(EUpdateMode iMode) = 0; + virtual void set_insert_order(int iOrder) = 0; + + /************************************************* + Description: 查询本次操作增加的行数(可以为负数) + Input: + Output: + Return: 行数 + *************************************************/ + virtual int64_t rows_inc() = 0; + + /************************************************* + Description: 查询本次操作增加的脏行数(可以为负数) + Input: + Output: + Return: 行数 + *************************************************/ + virtual int64_t dirty_rows_inc() = 0; + + /************************************************* + Description: 查询node里的所有数据 + Input: pstNode node节点 + Output: pstRows 保存数据的结构 + Return: 0为成功,非0失败 + *************************************************/ + virtual int get_all_rows(Node *pstNode, RawData *pstRows) = 0; + + /************************************************* + Description: 用pstRows的数据替换cache里的数据 + Input: pstRows 新数据 + pstNode node节点 + Output: + Return: 0为成功,非0失败 + *************************************************/ + virtual int replace_data(Node *pstNode, RawData *pstRawData) = 0; + + /************************************************* + Description: 根据task请求删除数据 + Input: stTask task请求 + pstNode node节点 + Output: pstAffectedRows 保存被删除的数据(为NULL时不保存) + Return: 0为成功,非0失败 + *************************************************/ + virtual int delete_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows) = 0; + + /************************************************* + Description: 根据task请求查询数据 + Input: stTask task请求 + pstNode node节点 + Output: stTask 保存查找到的数据 + Return: 0为成功,非0失败 + *************************************************/ + virtual int get_data(TaskRequest &stTask, Node *pstNode) = 0; + + /************************************************* + Description: 根据task请求添加一行数据 + Input: stTask task请求 + pstNode node节点 + isDirty 是否脏数据 + Output: pstAffectedRows 保存被删除的数据(为NULL时不保存) + Return: 0为成功,非0失败 + *************************************************/ + virtual int append_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool isDirty, bool uniq) = 0; + + /************************************************* + Description: 用task的数据替换cache里的数据 + Input: stTask task请求 + pstNode node节点 + Output: + Return: 0为成功,非0失败 + *************************************************/ + virtual int replace_data(TaskRequest &stTask, Node *pstNode) = 0; + + /************************************************* + Description: 用task的数据替换cache里的数据 + Input: stTask task请求 + pstNode node节点 + async 是否异步操作 + Output: pstAffectedRows 保存被更新后的数据(为NULL时不保存) + Return: 0为成功,非0失败 + *************************************************/ + virtual int replace_rows(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows = false) = 0; + + /************************************************* + Description: 根据task请求更新cache数据 + Input: stTask task请求 + pstNode node节点 + async 是否异步操作 + Output: pstAffectedRows 保存被更新后的数据(为NULL时不保存) + Return: 0为成功,非0失败 + *************************************************/ + virtual int update_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows = false) = 0; + + /************************************************* + Description: 将node节点的脏数据组成若干个flush请求 + Input: pstNode node节点 + Output: pstFlushReq 保存flush请求 + uiFlushRowsCnt 被flush的行数 + Return: 0为成功,非0失败 + *************************************************/ + virtual int flush_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt) = 0; + + /************************************************* + Description: 删除cache里的数据,如果有脏数据会生成flush请求 + Input: pstNode node节点 + Output: pstFlushReq 保存flush请求 + uiFlushRowsCnt 被flush的行数 + Return: 0为成功,非0失败 + *************************************************/ + virtual int purge_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt) = 0; + + /************************************************* + Description: get cache expire time + Input: pstNode node + Output: + Return: + *************************************************/ + virtual int get_expire_time(DTCTableDefinition *t, Node *pstNode, uint32_t &expire) = 0; + + /************************************************* + Description: + Input: + Output: + Return: + *************************************************/ + virtual int expand_node(TaskRequest &stTask, Node *pstNode) = 0; + + /************************************************* + Description: + Input: + Output: + Return: + *************************************************/ + virtual void change_mallocator(Mallocator *pstMalloc) = 0; + + /************************************************* + Description: + Input: + Output: + Return: + *************************************************/ + virtual int dirty_rows_in_node(TaskRequest &stTask, Node *node) = 0; +}; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/defragment.h b/src/search_local/index_storage/cache/defragment.h new file mode 100644 index 0000000..5bd20a3 --- /dev/null +++ b/src/search_local/index_storage/cache/defragment.h @@ -0,0 +1,77 @@ +/* + * ===================================================================================== + * + * Filename: defragment.h + * + * Description: memory clear up. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include "pt_malloc.h" +#include "dtc_api.h" + +class Defragment +{ +public: + Defragment() + { + _mem = NULL; + _pstChunk = NULL; + _keysize = -1; + _s = NULL; + _error_count = 0; + _skip_count = 0; + _ok_count = 0; + _bulk_per_ten_microscoends = 1; + } + + ~Defragment() + { + } + int Attach(const char *key, int keysize, int step); + char *get_key_by_handle(INTER_HANDLE_T handle, int *len); + int proccess(INTER_HANDLE_T handle); + int dump_mem(bool verbose = false); + int dump_mem_new(const char *filename, uint64_t &memsize); + int defrag_mem(int level, DTC::Server *s); + int defrag_mem_new(int level, DTC::Server *s, const char *filename, uint64_t memsize); + int proccess_handle(const char *filename, DTC::Server *s); + void frequency_limit(void); + +private: + DTCBinMalloc *_mem; + MallocChunk *_pstChunk; + int _keysize; + DTC::Server *_s; + + //stat + uint64_t _error_count; + uint64_t _skip_count; + uint64_t _ok_count; + int _bulk_per_ten_microscoends; +}; + +#define SEARCH 0 +#define MATCH 1 +class DefragMemAlgo +{ +public: + DefragMemAlgo(int level, Defragment *master); + ~DefragMemAlgo(); + int Push(INTER_HANDLE_T handle, int used); + +private: + int _status; + INTER_HANDLE_T *_queue; + Defragment *_master; + int _count; + int _level; +}; diff --git a/src/search_local/index_storage/cache/dtcd.export.lst b/src/search_local/index_storage/cache/dtcd.export.lst new file mode 100644 index 0000000..6ba9888 --- /dev/null +++ b/src/search_local/index_storage/cache/dtcd.export.lst @@ -0,0 +1,3 @@ +{ + global: _QueryInternalService; +}; diff --git a/src/search_local/index_storage/cache/empty_filter.cc b/src/search_local/index_storage/cache/empty_filter.cc new file mode 100644 index 0000000..a188c94 --- /dev/null +++ b/src/search_local/index_storage/cache/empty_filter.cc @@ -0,0 +1,128 @@ +/* + * ===================================================================================== + * + * Filename: empty_filter.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include +#include "pt_malloc.h" +#include "empty_filter.h" +#include "bitsop.h" + +EmptyNodeFilter::EmptyNodeFilter() : _enf(0) +{ + memset(_errmsg, 0x0, sizeof(_errmsg)); +} + +EmptyNodeFilter::~EmptyNodeFilter() +{ +} + +int EmptyNodeFilter::ISSET(uint32_t key) +{ + uint32_t bitoff = Offset(key); + uint32_t tableid = Index(key); + + if (_enf->enf_tables[tableid].t_size < bitoff / CHAR_BIT + 1) + return 0; + + return ISSET_B(bitoff, M_POINTER(void, _enf->enf_tables[tableid].t_handle)); +} + +void EmptyNodeFilter::SET(uint32_t key) +{ + uint32_t bitoff = Offset(key); + uint32_t tableid = Index(key); + + if (_enf->enf_tables[tableid].t_size < bitoff / CHAR_BIT + 1) + { + /* 按step的整数倍来increase table*/ + int incbyte = bitoff / CHAR_BIT + 1 - _enf->enf_tables[tableid].t_size; + int how = (incbyte + _enf->enf_step - 1) / _enf->enf_step; + size_t size = _enf->enf_tables[tableid].t_size + how * _enf->enf_step; + + _enf->enf_tables[tableid].t_handle = M_REALLOC(_enf->enf_tables[tableid].t_handle, size); + if (_enf->enf_tables[tableid].t_handle == INVALID_HANDLE) + { + /* realloc 失败后,不会重试*/ + return; + } + + _enf->enf_tables[tableid].t_size = size; + } + + return SET_B(bitoff, M_POINTER(void, _enf->enf_tables[tableid].t_handle)); +} + +void EmptyNodeFilter::CLR(uint32_t key) +{ + uint32_t bitoff = Offset(key); + uint32_t tableid = Index(key); + + if (_enf->enf_tables[tableid].t_size < bitoff / CHAR_BIT + 1) + /* 超出表范围,return*/ + return; + + return CLR_B(bitoff, M_POINTER(void, _enf->enf_tables[tableid].t_handle)); +} + +int EmptyNodeFilter::Init(uint32_t total, uint32_t step, uint32_t mod) +{ + mod = mod ? mod : DF_ENF_MOD; + step = step ? step : DF_ENF_STEP; + total = total ? total : DF_ENF_TOTAL; + + /* allocate header */ + uint32_t size = sizeof(ENF_T); + size += sizeof(ENF_TABLE_T) * mod; + + MEM_HANDLE_T v = M_CALLOC(size); + if (INVALID_HANDLE == v) + { + snprintf(_errmsg, sizeof(_errmsg), + "calloc %u bytes mem failed, %s", size, M_ERROR()); + return -1; + } + + _enf = M_POINTER(ENF_T, v); + + _enf->enf_total = total; + _enf->enf_step = step; + _enf->enf_mod = mod; + + return 0; +} + +int EmptyNodeFilter::Attach(MEM_HANDLE_T v) +{ + if (INVALID_HANDLE == v) + { + snprintf(_errmsg, sizeof(_errmsg), + "attach Empty-Node Filter failed, memory handle = 0"); + return -1; + } + + _enf = M_POINTER(ENF_T, v); + return 0; +} + +int EmptyNodeFilter::Detach(void) +{ + _enf = 0; + _errmsg[0] = 0; + + return 0; +} diff --git a/src/search_local/index_storage/cache/empty_filter.h b/src/search_local/index_storage/cache/empty_filter.h new file mode 100644 index 0000000..6c3dfbb --- /dev/null +++ b/src/search_local/index_storage/cache/empty_filter.h @@ -0,0 +1,82 @@ +/* + * ===================================================================================== + * + * Filename: empty_filter.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __DTC_EMPTY_FILTER_H +#define __DTC_EMPTY_FILTER_H + +#include "namespace.h" +#include "singleton.h" +#include "global.h" + +DTC_BEGIN_NAMESPACE + +#define DF_ENF_TOTAL 0 /* 0 = unlimited */ +#define DF_ENF_STEP 512 /* byte */ +#define DF_ENF_MOD 30000 + +struct _enf_table +{ + MEM_HANDLE_T t_handle; + uint32_t t_size; +}; +typedef struct _enf_table ENF_TABLE_T; + +struct _empty_node_filter +{ + uint32_t enf_total; // 占用的总内存 + uint32_t enf_step; // 表增长步长 + uint32_t enf_mod; // 分表算子 + + ENF_TABLE_T enf_tables[0]; // 位图表 +}; +typedef struct _empty_node_filter ENF_T; + +class EmptyNodeFilter +{ +public: + void SET(uint32_t key); + void CLR(uint32_t key); + int ISSET(uint32_t key); + +public: + /* 0 = use default value */ + int Init(uint32_t total = 0, uint32_t step = 0, uint32_t mod = 0); + int Attach(MEM_HANDLE_T); + int Detach(void); + +public: + EmptyNodeFilter(); + ~EmptyNodeFilter(); + static EmptyNodeFilter *Instance() { return Singleton::Instance(); } + static void Destroy() { Singleton::Destroy(); } + const char *Error() const { return _errmsg; } + const MEM_HANDLE_T Handle() const { return M_HANDLE(_enf); } + +private: + /* 计算表id */ + uint32_t Index(uint32_t key) { return key % _enf->enf_mod; } + /* 计算表中的位图偏移 */ + uint32_t Offset(uint32_t key) { return key / _enf->enf_mod; } + +private: + ENF_T *_enf; + char _errmsg[256]; +}; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/expire_time.cc b/src/search_local/index_storage/cache/expire_time.cc new file mode 100644 index 0000000..d6ddb23 --- /dev/null +++ b/src/search_local/index_storage/cache/expire_time.cc @@ -0,0 +1,106 @@ +/* + * ===================================================================================== + * + * Filename: expire_time.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include "expire_time.h" +#include +#include + +DTC_USING_NAMESPACE + +ExpireTime::ExpireTime(TimerList *t, DTCBufferPool *c, DataProcess *p, DTCTableDefinition *td, int e) : timer(t), + cache(c), + process(p), + tableDef(td), + maxExpire(e) +{ + statExpireCount = statmgr.get_item_u32(DTC_KEY_EXPIRE_DTC_COUNT); + statGetCount = statmgr.get_item_u32(DTC_GET_COUNT); + statInsertCount = statmgr.get_item_u32(DTC_INSERT_COUNT); + statUpdateCount = statmgr.get_item_u32(DTC_UPDATE_COUNT); + statDeleteCount = statmgr.get_item_u32(DTC_DELETE_COUNT); + statPurgeCount = statmgr.get_item_u32(DTC_PURGE_COUNT); +} + +ExpireTime::~ExpireTime() +{ +} + +void ExpireTime::start_key_expired_task(void) +{ + log_info("start key expired task"); + attach_timer(timer); + return; +} + +int ExpireTime::try_expire_count() +{ + int num1 = maxExpire - (statGetCount.get() + statInsertCount.get() + + statUpdateCount.get() + statDeleteCount.get() + + statPurgeCount.get()) / + 10; + int num2 = cache->total_used_node(); + return num1 < num2 ? num1 : num2; +} + +void ExpireTime::timer_notify(void) +{ + log_debug("sched key expire task"); + int start = cache->min_valid_node_id(), end = cache->max_node_id(); + int count, interval = end - start, node_id; + int i, j, k = 0; + struct timeval tv; + + gettimeofday(&tv, NULL); + log_debug("tv.tv_usec: %ld", tv.tv_usec); + srandom(tv.tv_usec); + count = try_expire_count(); + log_debug("try_expire_count: %d", count); + for (i = 0, j = 0; i < count && j < count * 3; ++j) + { + Node node; + node_id = random() % interval + start; + node = I_SEARCH(node_id); + uint32_t expire = 0; + if (!!node && !node.not_in_lru_list() && !cache->is_time_marker(node)) + { + // read expire time + // if expired + // purge + ++i; + if (process->get_expire_time(tableDef, &node, expire) != 0) + { + log_error("get expire time error for node: %d", node.node_id()); + continue; + } + log_debug("node id: %d, expire: %d, current: %ld", node.node_id(), expire, tv.tv_sec); + if (expire != 0 && expire < tv.tv_sec) + { + log_debug("expire time timer purge node: %d, %d", node.node_id(), ++k); + cache->inc_total_row(0LL - cache->node_rows_count(node)); + if (cache->purge_node_everything(node) != 0) + { + log_error("purge node error, node: %d", node.node_id()); + } + ++statExpireCount; + } + } + } + log_debug("expire time found %d real node, %d", i, k); + + attach_timer(timer); + return; +} diff --git a/src/search_local/index_storage/cache/expire_time.h b/src/search_local/index_storage/cache/expire_time.h new file mode 100644 index 0000000..9f0ea56 --- /dev/null +++ b/src/search_local/index_storage/cache/expire_time.h @@ -0,0 +1,59 @@ +/* + * ===================================================================================== + * + * Filename: expire_time.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __DTC_EXPIRE_TIME_H +#define __DTC_EXPIRE_TIME_H + +#include "namespace.h" +#include "timer_list.h" +#include "log.h" +#include "stat_dtc.h" +#include "buffer_pool.h" +#include "data_process.h" +#include "raw_data_process.h" + +DTC_BEGIN_NAMESPACE + +class TimerObject; +class ExpireTime : private TimerObject +{ +public: + ExpireTime(TimerList *t, DTCBufferPool *c, DataProcess *p, DTCTableDefinition *td, int e); + virtual ~ExpireTime(void); + virtual void timer_notify(void); + void start_key_expired_task(void); + int try_expire_count(); + +private: + TimerList *timer; + DTCBufferPool *cache; + DataProcess *process; + DTCTableDefinition *tableDef; + + StatItemU32 statExpireCount; + StatItemU32 statGetCount; + StatItemU32 statInsertCount; + StatItemU32 statUpdateCount; + StatItemU32 statDeleteCount; + StatItemU32 statPurgeCount; + + int maxExpire; +}; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/feature.cc b/src/search_local/index_storage/cache/feature.cc new file mode 100644 index 0000000..b8de534 --- /dev/null +++ b/src/search_local/index_storage/cache/feature.cc @@ -0,0 +1,169 @@ +/* + * ===================================================================================== + * + * Filename: feature.cc + * + * Description: feature description character definition. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#include +#include +#include "singleton.h" +#include "feature.h" +#include "global.h" + +DTC_USING_NAMESPACE + +Feature *Feature::Instance() +{ + return Singleton::Instance(); +} + +void Feature::Destroy() +{ + return Singleton::Destroy(); +} + +Feature::Feature() : _baseInfo(NULL) +{ + memset(_errmsg, 0, sizeof(_errmsg)); +} + +Feature::~Feature() +{ +} +/* feature id -> feature. 拷贝输入feature 到 找到feature + */ +int Feature::modify_feature(FEATURE_INFO_T *fi) +{ + if (!fi) + return -1; + + FEATURE_INFO_T *p = get_feature_by_id(fi->fi_id); + if (!p) + { + snprintf(_errmsg, sizeof(_errmsg), "not found feature[%d]", fi->fi_id); + return -2; + } + + *p = *fi; + return 0; +} +/* feature id -> feature. 清空这个feature + */ +int Feature::delete_feature(FEATURE_INFO_T *fi) +{ + if (!fi) + return -1; + + FEATURE_INFO_T *p = get_feature_by_id(fi->fi_id); + if (!p) + { + snprintf(_errmsg, sizeof(_errmsg), "not found feature[%d]", fi->fi_id); + return -2; + } + + //delete feature + p->fi_id = 0; + p->fi_attr = 0; + p->fi_handle = INVALID_HANDLE; + + return 0; +} +/* 找一个空闲feature, 赋值 + */ +int Feature::add_feature(const uint32_t id, const MEM_HANDLE_T v, const uint32_t attr) +{ + if (INVALID_HANDLE == v) + { + snprintf(_errmsg, sizeof(_errmsg), "handle is invalid"); + return -1; + } + + //find freespace + FEATURE_INFO_T *p = get_feature_by_id(0); + if (!p) + { + snprintf(_errmsg, sizeof(_errmsg), "have no free space to add a new feature"); + return -2; + } + + p->fi_id = id; + p->fi_attr = attr; + p->fi_handle = v; + + return 0; +} +/* feature id -> feature. + * 1. feature id == 0: 则表示找一个空闲feature. + * 2. 否则根据feature id 找对应的feature + */ +FEATURE_INFO_T *Feature::get_feature_by_id(const uint32_t fd) +{ + if (!_baseInfo || _baseInfo->bi_total == 0) + { + goto EXIT; + } + + for (uint32_t i = 0; i < _baseInfo->bi_total; i++) + { + if (_baseInfo->bi_features[i].fi_id == fd) + { + return (&(_baseInfo->bi_features[i])); + } + } + +EXIT: + return (FEATURE_INFO_T *)(0); +} +/* 1. 创建num个空feature + * 2. 初始化头信息(baseInfo) + */ +int Feature::Init(const uint32_t num) +{ + size_t size = sizeof(FEATURE_INFO_T); + size *= num; + size += sizeof(BASE_INFO_T); + + MEM_HANDLE_T v = M_CALLOC(size); + if (INVALID_HANDLE == v) + { + snprintf(_errmsg, sizeof(_errmsg), "init features failed, %s", M_ERROR()); + return -1; + } + + _baseInfo = M_POINTER(BASE_INFO_T, v); + _baseInfo->bi_total = num; + + return 0; +} +/* feature已经存在,第一个feature的内存句柄。直接初始化头信息指向 + */ +int Feature::Attach(MEM_HANDLE_T handle) +{ + if (INVALID_HANDLE == handle) + { + + snprintf(_errmsg, sizeof(_errmsg), "attach features failed, memory handle=0"); + return -1; + } + + _baseInfo = M_POINTER(BASE_INFO_T, handle); + return 0; +} + +int Feature::Detach(void) +{ + _baseInfo = NULL; + return 0; +} diff --git a/src/search_local/index_storage/cache/feature.h b/src/search_local/index_storage/cache/feature.h new file mode 100644 index 0000000..81284b8 --- /dev/null +++ b/src/search_local/index_storage/cache/feature.h @@ -0,0 +1,87 @@ +/* + * ===================================================================================== + * + * Filename: feature.h + * + * Description: feature description character definition. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef __DTC_FEATURE_H +#define __DTC_FEATURE_H + +#include "namespace.h" +#include "global.h" + +DTC_BEGIN_NAMESPACE + +// feature type +enum feature_id +{ + NODE_GROUP = 10, //DTC begin feature id + NODE_INDEX, + HASH_BUCKET, + TABLE_INFO, + EMPTY_FILTER, + HOT_BACKUP, + COL_EXPAND, +}; +typedef enum feature_id FEATURE_ID_T; + +struct feature_info +{ + uint32_t fi_id; // feature id + uint32_t fi_attr; // feature attribute + MEM_HANDLE_T fi_handle; // feature handler +}; +typedef struct feature_info FEATURE_INFO_T; + +struct base_info +{ + uint32_t bi_total; // total features + FEATURE_INFO_T bi_features[0]; +}; +typedef struct base_info BASE_INFO_T; + +class Feature +{ +public: + static Feature *Instance(); + static void Destroy(); + + MEM_HANDLE_T Handle() const { return M_HANDLE(_baseInfo); } + const char *Error() const { return _errmsg; } + + int modify_feature(FEATURE_INFO_T *fi); + int delete_feature(FEATURE_INFO_T *fi); + int add_feature(const uint32_t id, const MEM_HANDLE_T v, const uint32_t attr = 0); + FEATURE_INFO_T *get_feature_by_id(const uint32_t id); + + //创建物理内存并格式化 + int Init(const uint32_t num = MIN_FEATURES); + //绑定到物理内存 + int Attach(MEM_HANDLE_T handle); + //脱离物理内存 + int Detach(void); + +public: + Feature(); + ~Feature(); + +private: + BASE_INFO_T *_baseInfo; + char _errmsg[256]; +}; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/fence.h b/src/search_local/index_storage/cache/fence.h new file mode 100644 index 0000000..c8daea9 --- /dev/null +++ b/src/search_local/index_storage/cache/fence.h @@ -0,0 +1,45 @@ +/* + * ===================================================================================== + * + * Filename: fence.h + * + * Description: fence class definition. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __BARRIER_H__ +#define __BARRIER_H__ + +#include +#include + +class TaskRequest; +class BarrierUnit; +class Barrier; + +class Barrier : public ListObject, + public LinkQueue +{ +public: + friend class BarrierUnit; + + inline Barrier(LinkQueue::allocator *a = NULL) : LinkQueue(a), key(0) + { + } + inline ~Barrier(){}; + + inline unsigned long Key() const { return key; } + inline void set_key(unsigned long k) { key = k; } + +private: + unsigned long key; +}; + +#endif diff --git a/src/search_local/index_storage/cache/fence_unit.cc b/src/search_local/index_storage/cache/fence_unit.cc new file mode 100644 index 0000000..8471a25 --- /dev/null +++ b/src/search_local/index_storage/cache/fence_unit.cc @@ -0,0 +1,207 @@ +/* + * ===================================================================================== + * + * Filename: barrier_unit.cc + * + * Description: barrier uint class definition. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include + +#include +#include +#include + +#include "log.h" + +//------------------------------------------------------------------------- +BarrierUnit::BarrierUnit(PollThread *o, int max, int maxkeycount, E_BARRIER_UNIT_PLACE place) : TaskDispatcher(o), + count(0), + maxBarrier(max), + maxKeyCount(maxkeycount), + output(o) +{ + freeList.InitList(); + for (int i = 0; i < BARRIER_HASH_MAX; i++) + hashSlot[i].InitList(); + //stat + if (IN_FRONT == place) + { + statBarrierCount = statmgr.get_item_u32(DTC_FRONT_BARRIER_COUNT); + statBarrierMaxTask = statmgr.get_item_u32(DTC_FRONT_BARRIER_MAX_TASK); + } + else if (IN_BACK == place) + { + statBarrierCount = statmgr.get_item_u32(DTC_BACK_BARRIER_COUNT); + statBarrierMaxTask = statmgr.get_item_u32(DTC_BACK_BARRIER_MAX_TASK); + } + else + { + log_error("bad place value %d", place); + } + statBarrierCount = 0; + statBarrierMaxTask = 0; +} + +BarrierUnit::~BarrierUnit() +{ + while (!freeList.ListEmpty()) + { + delete static_cast(freeList.ListNext()); + } + for (int i = 0; i < BARRIER_HASH_MAX; i++) + { + while (!hashSlot[i].ListEmpty()) + { + delete static_cast(hashSlot[i].ListNext()); + } + } +} + +Barrier *BarrierUnit::get_barrier(unsigned long key) +{ + ListObject *h = &hashSlot[key2idx(key)]; + ListObject *p; + + for (p = h->ListNext(); p != h; p = p->ListNext()) + { + if (p->ListOwner()->Key() == key) + return p->ListOwner(); + } + + return NULL; +} + +Barrier *BarrierUnit::get_barrier_by_idx(unsigned long idx) +{ + if (idx >= BARRIER_HASH_MAX) + return NULL; + + ListObject *h = &hashSlot[idx]; + ListObject *p; + + p = h->ListNext(); + return p->ListOwner(); +} + +void BarrierUnit::attach_free_barrier(Barrier *bar) +{ + bar->ListMove(freeList); + count--; + statBarrierCount = count; + //Stat.set_barrier_count (count); +} + +void BarrierUnit::task_notify(TaskRequest *cur) +{ + if (cur->request_code() == DRequest::SvrAdmin && + cur->requestInfo.admin_code() != DRequest::ServerAdminCmd::Migrate) + { + //Migrate命令在PrepareRequest的时候已经计算了PackedKey和hash,需要跟普通的task一起排队 + chain_request(cur); + return; + } + if (cur->is_batch_request()) + { + chain_request(cur); + return; + } + + unsigned long key = cur->barrier_key(); + Barrier *bar = get_barrier(key); + + if (bar) + { + if (bar->Count() < maxKeyCount) + { + bar->Push(cur); + if (bar->Count() > statBarrierMaxTask) //max key number + statBarrierMaxTask = bar->Count(); + } + else + { + log_warning("barrier[%s]: overload max key count %d bars %d", owner->Name(), maxKeyCount, count); + cur->set_error(-EC_SERVER_BUSY, __FUNCTION__, + "too many request blocked at key"); + cur->reply_notify(); + } + } + else if (count >= maxBarrier) + { + log_warning("too many barriers, count=%d", count); + cur->set_error(-EC_SERVER_BUSY, __FUNCTION__, + "too many barriers"); + cur->reply_notify(); + } + else + { + if (freeList.ListEmpty()) + { + bar = new Barrier(&taskQueueAllocator); + } + else + { + bar = freeList.NextOwner(); + } + bar->set_key(key); + bar->list_move_tail(hashSlot[key2idx(key)]); + bar->Push(cur); + count++; + statBarrierCount = count; //barrier number + //Stat.set_barrier_count (count); + chain_request(cur); + } +} + +void BarrierUnit::reply_notify(TaskRequest *cur) +{ + if (cur->request_code() == DRequest::SvrAdmin && + cur->requestInfo.admin_code() != DRequest::ServerAdminCmd::Migrate) + { + cur->reply_notify(); + return; + } + if (cur->is_batch_request()) + { + cur->reply_notify(); + return; + } + + unsigned long key = cur->barrier_key(); + Barrier *bar = get_barrier(key); + if (bar == NULL) + { + log_error("return task not in barrier, key=%lu", key); + } + else if (bar->Front() == cur) + { + if (bar->Count() == statBarrierMaxTask) //max key number + statBarrierMaxTask--; + bar->Pop(); + TaskRequest *next = bar->Front(); + if (next == NULL) + { + attach_free_barrier(bar); + } + else + { + queue_request(next); + } + //printf("pop bar %lu: count %d\n", key, bar->Count()); + } + else + { + log_error("return task not barrier header, key=%lu", key); + } + + cur->reply_notify(); +} diff --git a/src/search_local/index_storage/cache/fence_unit.h b/src/search_local/index_storage/cache/fence_unit.h new file mode 100644 index 0000000..6696982 --- /dev/null +++ b/src/search_local/index_storage/cache/fence_unit.h @@ -0,0 +1,84 @@ +/* + * ===================================================================================== + * + * Filename: fence_unit.h + * + * Description: barrier uint class definition. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __BARRIER_UNIT_H__ +#define __BARRIER_UNIT_H__ + +#include +#include +#include "task_request.h" +#include "timer_list.h" +#include "fence.h" +#include "stat_dtc.h" + +#define BARRIER_HASH_MAX 1024 * 8 + +class TaskRequest; +class PollThread; +class BarrierUnit; + +class BarrierUnit : public TaskDispatcher, public ReplyDispatcher +{ +public: + enum E_BARRIER_UNIT_PLACE + { + IN_FRONT, + IN_BACK + }; + BarrierUnit(PollThread *, int max, int maxkeycount, E_BARRIER_UNIT_PLACE place); + ~BarrierUnit(); + virtual void task_notify(TaskRequest *); + virtual void reply_notify(TaskRequest *); + + void chain_request(TaskRequest *p) + { + p->push_reply_dispatcher(this); + output.task_notify(p); + } + void queue_request(TaskRequest *p) + { + p->push_reply_dispatcher(this); + output.indirect_notify(p); + } + PollThread *owner_thread(void) const { return owner; } + void attach_free_barrier(Barrier *); + int max_count_by_key(void) const { return maxKeyCount; } + void bind_dispatcher(TaskDispatcher *p) { output.bind_dispatcher(p); } + int barrier_count() const { return count; } + +protected: + int count; + LinkQueue::allocator taskQueueAllocator; + ListObject freeList; + ListObject hashSlot[BARRIER_HASH_MAX]; + int maxBarrier; + + Barrier *get_barrier(unsigned long key); + Barrier *get_barrier_by_idx(unsigned long idx); + int key2idx(unsigned long key) { return key % BARRIER_HASH_MAX; } + +private: + int maxKeyCount; + + RequestOutput output; + + //stat + StatItemU32 statBarrierCount; + StatItemU32 statBarrierMaxTask; +}; + +#endif diff --git a/src/search_local/index_storage/cache/global.h b/src/search_local/index_storage/cache/global.h new file mode 100644 index 0000000..996aabd --- /dev/null +++ b/src/search_local/index_storage/cache/global.h @@ -0,0 +1,85 @@ +/* + * ===================================================================================== + * + * Filename: global.h + * + * Description: macro definition and common function. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef __DTC_GLOBAL_H +#define __DTC_GLOBAL_H + +#include +#include +#include "namespace.h" +#include "pt_malloc.h" + +DTC_BEGIN_NAMESPACE + +/* 共享内存操作定义 */ +#define M_HANDLE(ptr) DTCBinMalloc::Instance()->Handle(ptr) +#define M_POINTER(type, v) DTCBinMalloc::Instance()->Pointer(v) +#define M_MALLOC(size) DTCBinMalloc::Instance()->Malloc(size) +#define M_CALLOC(size) DTCBinMalloc::Instance()->Calloc(size) +#define M_REALLOC(v, size) DTCBinMalloc::Instance()->ReAlloc(v, size) +#define M_FREE(v) DTCBinMalloc::Instance()->Free(v) +#define M_ERROR() DTCBinMalloc::Instance()->get_err_msg() + +/* Node查找函数 */ +#define I_SEARCH(id) NodeIndex::Instance()->Search(id) +#define I_INSERT(node) NodeIndex::Instance()->Insert(node) +/*#define I_DELETE(node) NodeIndex::Instance()->Delete(node) */ + +/* memory handle*/ +#define MEM_HANDLE_T ALLOC_HANDLE_T + +/*Node ID*/ +#define NODE_ID_T uint32_t +#define INVALID_NODE_ID ((NODE_ID_T)(-1)) +#define SYS_MIN_NODE_ID ((NODE_ID_T)(0)) +#define SYS_DIRTY_NODE_INDEX 0 +#define SYS_CLEAN_NODE_INDEX 1 +#define SYS_EMPTY_NODE_INDEX 2 +#define SYS_DIRTY_HEAD_ID (SYS_MIN_NODE_ID + SYS_DIRTY_NODE_INDEX) +#define SYS_CLEAN_HEAD_ID (SYS_MIN_NODE_ID + SYS_CLEAN_NODE_INDEX) +#define SYS_EMPTY_HEAD_ID (SYS_MIN_NODE_ID + SYS_EMPTY_NODE_INDEX) + +/* Node time list */ +#define LRU_PREV (0) +#define LRU_NEXT (1) + +/* features */ +#define MIN_FEATURES 32 + +/*Hash ID*/ +#define HASH_ID_T uint32_t + +/* Node Group */ +#define NODE_GROUP_INCLUDE_NODES 256 + +/* output u64 format */ +#if __WORDSIZE == 64 +#define UINT64FMT "%lu" +#else +#define UINT64FMT "%llu" +#endif + +#if __WORDSIZE == 64 +#define INT64FMT "%ld" +#else +#define INT64FMT "%lld" +#endif + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/hash.cc b/src/search_local/index_storage/cache/hash.cc new file mode 100644 index 0000000..7530be9 --- /dev/null +++ b/src/search_local/index_storage/cache/hash.cc @@ -0,0 +1,83 @@ +/* + * ===================================================================================== + * + * Filename: hash.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include "hash.h" +#include "global.h" + +DTC_USING_NAMESPACE + +DTCHash::DTCHash() : _hash(NULL) +{ + memset(_errmsg, 0, sizeof(_errmsg)); +} + +DTCHash::~DTCHash() +{ +} + +NODE_ID_T &DTCHash::hash2_node(const HASH_ID_T v) +{ + return _hash->hh_buckets[v]; +} + +int DTCHash::Init(const uint32_t hsize, const uint32_t fixedsize) +{ + size_t size = sizeof(NODE_ID_T); + size *= hsize; + size += sizeof(HASH_T); + + MEM_HANDLE_T v = M_CALLOC(size); + if (INVALID_HANDLE == v) + { + snprintf(_errmsg, sizeof(_errmsg), "init hash bucket failed, %s", M_ERROR()); + return -1; + } + + _hash = M_POINTER(HASH_T, v); + _hash->hh_size = hsize; + _hash->hh_free = hsize; + _hash->hh_node = 0; + _hash->hh_fixedsize = fixedsize; + + /* init each nodeid to invalid */ + for (uint32_t i = 0; i < hsize; i++) + { + _hash->hh_buckets[i] = INVALID_NODE_ID; + } + + return 0; +} + +int DTCHash::Attach(MEM_HANDLE_T handle) +{ + if (INVALID_HANDLE == handle) + { + snprintf(_errmsg, sizeof(_errmsg), "attach hash bucket failed, memory handle = 0"); + return -1; + } + + _hash = M_POINTER(HASH_T, handle); + return 0; +} + +int DTCHash::Detach(void) +{ + _hash = (HASH_T *)(0); + return 0; +} diff --git a/src/search_local/index_storage/cache/hash.h b/src/search_local/index_storage/cache/hash.h new file mode 100644 index 0000000..05da25c --- /dev/null +++ b/src/search_local/index_storage/cache/hash.h @@ -0,0 +1,124 @@ +/* + * ===================================================================================== + * + * Filename: hash.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __DTC_HASH_H +#define __DTC_HASH_H + +#include "namespace.h" +#include "singleton.h" +#include "global.h" +#include "node.h" +#include "new_hash.h" + +DTC_BEGIN_NAMESPACE + +struct _hash +{ + uint32_t hh_size; // hash 大小 + uint32_t hh_free; // 空闲的hash数量 + uint32_t hh_node; // 挂接的node总数量 + uint32_t hh_fixedsize; // key大小:变长key时,hh_fixedsize = 0;其他就是其实际长度 + uint32_t hh_buckets[0]; // hash bucket start +}; +typedef struct _hash HASH_T; + +class DTCHash +{ +public: + DTCHash(); + ~DTCHash(); + + static DTCHash *Instance() { return Singleton::Instance(); } + static void Destroy() { Singleton::Destroy(); } + + inline HASH_ID_T new_hash_slot(const char *key) + { + //变长key的前一个字节编码的是key的长度 + uint32_t size = _hash->hh_fixedsize ? _hash->hh_fixedsize : *(unsigned char *)key++; + + //目前仅支持1、2、4字节的定长key + switch (size) + { + case sizeof(unsigned char): + return (*(unsigned char *)key) % _hash->hh_size; + case sizeof(unsigned short): + return (*(unsigned short *)key) % _hash->hh_size; + case sizeof(unsigned int): + return (*(unsigned int *)key) % _hash->hh_size; + } + + unsigned int h = new_hash(key, size); + return h % _hash->hh_size; + } + + inline HASH_ID_T hash_slot(const char *key) + { + //变长key的前一个字节编码的是key的长度 + uint32_t size = _hash->hh_fixedsize ? _hash->hh_fixedsize : *(unsigned char *)key++; + + //目前仅支持1、2、4字节的定长key + switch (size) + { + case sizeof(unsigned char): + return (*(unsigned char *)key) % _hash->hh_size; + case sizeof(unsigned short): + return (*(unsigned short *)key) % _hash->hh_size; + case sizeof(unsigned int): + return (*(unsigned int *)key) % _hash->hh_size; + } + + unsigned int h = 0, g = 0; + const char *arEnd = key + size; + + //变长key hash算法, 目前8字节的定长整型key也是作为变长hash的。 + while (key < arEnd) + { + h = (h << 4) + *key++; + if ((g = (h & 0xF0000000))) + { + h = h ^ (g >> 24); + h = h ^ g; + } + } + return h % _hash->hh_size; + } + + NODE_ID_T &hash2_node(const HASH_ID_T); + + const MEM_HANDLE_T Handle() const { return M_HANDLE(_hash); } + const char *Error() const { return _errmsg; } + + //创建物理内存并格式化 + int Init(const uint32_t hsize, const uint32_t fixedsize); + //绑定到物理内存 + int Attach(MEM_HANDLE_T handle); + //脱离物理内存 + int Detach(void); + + uint32_t hash_size() const { return _hash->hh_size; } + uint32_t free_bucket() const { return _hash->hh_free; } + void inc_free_bucket(int v) { _hash->hh_free += v; } + void inc_node_cnt(int v) { _hash->hh_node += v; } + +private: + HASH_T *_hash; + char _errmsg[256]; +}; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/hb_feature.cc b/src/search_local/index_storage/cache/hb_feature.cc new file mode 100644 index 0000000..66788d1 --- /dev/null +++ b/src/search_local/index_storage/cache/hb_feature.cc @@ -0,0 +1,69 @@ +/* + * ===================================================================================== + * + * Filename: hb_feature.cc + * + * Description: hotbackup method release. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include +#include "hb_feature.h" +#include "global.h" + +DTC_USING_NAMESPACE + +HBFeature::HBFeature() : _hb_info(NULL), _handle(INVALID_HANDLE) +{ + memset(_errmsg, 0, sizeof(_errmsg)); +} + +HBFeature::~HBFeature() +{ +} + +int HBFeature::Init(time_t tMasterUptime) +{ + _handle = M_CALLOC(sizeof(HB_FEATURE_INFO_T)); + if (INVALID_HANDLE == _handle) + { + snprintf(_errmsg, sizeof(_errmsg), "init hb_feature fail, %s", M_ERROR()); + return -ENOMEM; + } + + _hb_info = M_POINTER(HB_FEATURE_INFO_T, _handle); + _hb_info->master_up_time = tMasterUptime; + _hb_info->slave_up_time = 0; + + return 0; +} + +int HBFeature::Attach(MEM_HANDLE_T handle) +{ + if (INVALID_HANDLE == handle) + { + snprintf(_errmsg, sizeof(_errmsg), "attach hb feature failed, memory handle = 0"); + return -1; + } + + _handle = handle; + _hb_info = M_POINTER(HB_FEATURE_INFO_T, _handle); + + return 0; +} + +void HBFeature::Detach(void) +{ + _hb_info = NULL; + _handle = INVALID_HANDLE; +} diff --git a/src/search_local/index_storage/cache/hb_feature.h b/src/search_local/index_storage/cache/hb_feature.h new file mode 100644 index 0000000..98efb44 --- /dev/null +++ b/src/search_local/index_storage/cache/hb_feature.h @@ -0,0 +1,63 @@ +/* + * ===================================================================================== + * + * Filename: hb_feature.h + * + * Description: hotbackup method release. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __DTC_HB_FEATURE_H +#define __DTC_HB_FEATURE_H + +#include +#include +#include + +#include "namespace.h" +#include "singleton.h" +#include "global.h" + +struct hb_feature_info +{ + int64_t master_up_time; + int64_t slave_up_time; +}; +typedef struct hb_feature_info HB_FEATURE_INFO_T; + +class HBFeature +{ +public: + HBFeature(); + ~HBFeature(); + + static HBFeature* Instance(){return Singleton::Instance();} + static void Destroy() { Singleton::Destroy();} + + int Init(time_t tMasterUptime); + int Attach(MEM_HANDLE_T handle); + void Detach(void); + + const char *Error() const {return _errmsg;} + + MEM_HANDLE_T Handle() const { return _handle; } + + int64_t& master_uptime() { return _hb_info->master_up_time; } + int64_t& slave_uptime() { return _hb_info->slave_up_time; } + +private: + HB_FEATURE_INFO_T* _hb_info; + MEM_HANDLE_T _handle; + char _errmsg[256]; +}; + +#endif + diff --git a/src/search_local/index_storage/cache/hb_log.cc b/src/search_local/index_storage/cache/hb_log.cc new file mode 100644 index 0000000..92a1446 --- /dev/null +++ b/src/search_local/index_storage/cache/hb_log.cc @@ -0,0 +1,224 @@ +/* + * ===================================================================================== + * + * Filename: hb_log.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include "hb_log.h" +#include "global.h" +#include "admin_tdef.h" + +HBLog::HBLog(DTCTableDefinition *tbl) : _tabledef(tbl), + _log_writer(0), + _log_reader(0) +{ +} + +HBLog::~HBLog() +{ + DELETE(_log_writer); + DELETE(_log_reader); +} + +int HBLog::Init(const char *path, const char *prefix, uint64_t total, off_t max_size) +{ + _log_writer = new BinlogWriter; + _log_reader = new BinlogReader; + + if (_log_writer->Init(path, prefix, total, max_size)) + { + log_error("init log_writer failed"); + return -1; + } + + if (_log_reader->Init(path, prefix)) + { + log_error("init log_reader failed"); + return -2; + } + + return 0; +} + +int HBLog::write_update_log(TaskRequest &task) +{ + RawData *raw_data; + NEW(RawData(&g_stSysMalloc, 1), raw_data); + + if (!raw_data) + { + log_error("raw_data is null"); + return -1; + } + + HotBackTask &hotbacktask = task.get_hot_back_task(); + int type = hotbacktask.get_type(); + if (raw_data->Init(0, _tabledef->key_size(), (const char *)&type, 0, -1, -1, 0)) + { + DELETE(raw_data); + return -1; + } + DTCValue key; + DTCValue value; + if (0 == hotbacktask.get_packed_key_len()) + { + log_error("packedkey len is zero"); + return -1; + } + else + { + key.Set(hotbacktask.get_packed_key(), hotbacktask.get_packed_key_len()); + } + + if (0 == hotbacktask.get_value_len()) + { + value.Set(0); + } + else + { + value.Set(hotbacktask.get_value(), hotbacktask.get_value_len()); + } + + RowValue row(_tabledef); + row[0].u64 = type; + row[1].u64 = hotbacktask.get_flag(); + row[2] = key; + row[3] = value; + log_debug(" tye is %d, flag %d", type, hotbacktask.get_flag()); + raw_data->insert_row(row, false, false); + _log_writer->insert_header(type, 0, 1); + _log_writer->append_body(raw_data->get_addr(), raw_data->data_size()); + DELETE(raw_data); + + log_debug(" packed key len:%d,key len:%d, key :%s", key.bin.len, *(unsigned char *)key.bin.ptr, key.bin.ptr + 1); + return _log_writer->Commit(); +} + +int HBLog::write_lru_hb_log(TaskRequest &task) +{ + RawData *raw_data; + NEW(RawData(&g_stSysMalloc, 1), raw_data); + + if (!raw_data) + { + log_error("raw_data is null"); + return -1; + } + + HotBackTask &hotbacktask = task.get_hot_back_task(); + int type = hotbacktask.get_type(); + if (raw_data->Init(0, _tabledef->key_size(), (const char *)&type, 0, -1, -1, 0)) + { + DELETE(raw_data); + return -1; + } + DTCValue key; + if (0 == hotbacktask.get_packed_key_len()) + { + log_error("packedkey len is zero"); + return -1; + } + else + { + key.Set(hotbacktask.get_packed_key(), hotbacktask.get_packed_key_len()); + } + + RowValue row(_tabledef); + row[0].u64 = type; + row[1].u64 = hotbacktask.get_flag(); + row[2] = key; + row[3].Set(0); + log_debug(" type is %d, flag %d", type, hotbacktask.get_flag()); + raw_data->insert_row(row, false, false); + _log_writer->insert_header(BINLOG_LRU, 0, 1); + _log_writer->append_body(raw_data->get_addr(), raw_data->data_size()); + DELETE(raw_data); + + log_debug(" write lru hotback log, packed key len:%d,key len:%d, key :%s", key.bin.len, *(unsigned char *)key.bin.ptr, key.bin.ptr + 1); + return _log_writer->Commit(); +} + +int HBLog::Seek(const JournalID &v) +{ + return _log_reader->Seek(v); +} + +/* 批量拉取更新key,返回更新key的个数 */ +int HBLog::task_append_all_rows(TaskRequest &task, int limit) +{ + int count; + for (count = 0; count < limit; ++count) + { + /* 没有待处理日志 */ + if (_log_reader->Read()) + break; + + RawData *raw_data; + + NEW(RawData(&g_stSysMalloc, 0), raw_data); + + if (!raw_data) + { + log_error("allocate rawdata mem failed"); + return -1; + } + + if (raw_data->check_size(g_stSysMalloc.Handle(_log_reader->record_pointer()), + 0, + _tabledef->key_size(), + _log_reader->record_length(0)) < 0) + { + log_error("raw data broken: wrong size"); + DELETE(raw_data); + return -1; + } + + /* attach raw data read from one binlog */ + if (raw_data->Attach(g_stSysMalloc.Handle(_log_reader->record_pointer()), 0, _tabledef->key_size())) + { + log_error("attach rawdata mem failed"); + + DELETE(raw_data); + return -1; + } + + RowValue r(_tabledef); + r[0].u64 = *(unsigned *)raw_data->Key(); + + unsigned char flag = 0; + while (raw_data->decode_row(r, flag) == 0) + { + + log_debug("type: " UINT64FMT ", flag: " UINT64FMT ", key:%s, value :%s", + r[0].u64, r[1].u64, r[2].bin.ptr, r[3].bin.ptr); + log_debug("binlog-type: %d", _log_reader->binlog_type()); + + task.append_row(&r); + } + + DELETE(raw_data); + } + + return count; +} + +JournalID HBLog::get_reader_jid(void) +{ + return _log_reader->query_id(); +} + +JournalID HBLog::get_writer_jid(void) +{ + return _log_writer->query_id(); +} diff --git a/src/search_local/index_storage/cache/hb_log.h b/src/search_local/index_storage/cache/hb_log.h new file mode 100644 index 0000000..fecf097 --- /dev/null +++ b/src/search_local/index_storage/cache/hb_log.h @@ -0,0 +1,63 @@ +/* + * ===================================================================================== + * + * Filename: hb_log.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __DTC_HB_LOG_H +#define __DTC_HB_LOG_H + +#include "logger.h" +#include "journal_id.h" +#include "task_request.h" +#include "field.h" +#include "raw_data.h" +#include "admin_tdef.h" +#include "sys_malloc.h" +#include "table_def.h" + +class BinlogWriter; +class BinlogReader; + +class HBLog +{ +public: + //传入编解码的表结构 + HBLog(DTCTableDefinition *tbl); + ~HBLog(); + + int Init(const char *path, const char *prefix, uint64_t total, off_t max_size); + int Seek(const JournalID &); + + JournalID get_reader_jid(void); + JournalID get_writer_jid(void); + + //不带value,只写更新key + int write_update_key(DTCValue key, int type); + + //将多条log记录编码进TaskReqeust + int task_append_all_rows(TaskRequest &, int limit); + + //提供给LRUBitUnit来记录lru变更 + int write_lru_hb_log(TaskRequest &task); + int write_update_log(TaskRequest &task); + int write_update_key(DTCValue key, DTCValue v, int type); + +private: + DTCTableDefinition *_tabledef; + BinlogWriter *_log_writer; + BinlogReader *_log_reader; +}; + +#endif diff --git a/src/search_local/index_storage/cache/hb_process.cc b/src/search_local/index_storage/cache/hb_process.cc new file mode 100644 index 0000000..5cd67da --- /dev/null +++ b/src/search_local/index_storage/cache/hb_process.cc @@ -0,0 +1,201 @@ +/* + * ===================================================================================== + * + * Filename: hb_process.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include "hb_process.h" +#include "poll_thread.h" +#include "task_request.h" +#include "log.h" +#include "hotback_task.h" + +extern DTCTableDefinition *gTableDef[]; + +HBProcess::HBProcess(PollThread *o) : TaskDispatcher(o), + ownerThread(o), + output(o), + taskPendList(this), + hbLog(TableDefinitionManager::Instance()->get_hot_backup_table_def()) +{ +} + +HBProcess::~HBProcess() +{ +} +void HBProcess::task_notify(TaskRequest *cur) +{ + log_debug("request type is %d ", cur->request_type()); + THBResult result = HB_PROCESS_ERROR; + switch (cur->request_type()) + { + case TaskTypeWriteHbLog: + { + result = write_hb_log_process(*cur); + break; + } + case TaskTypeReadHbLog: + { + result = read_hb_log_process(*cur); + break; + } + case TaskTypeWriteLruHbLog: + { + result = write_lru_hb_log_process(*cur); + break; + } + case TaskTypeRegisterHbLog: + { + result = register_hb_log_process(*cur); + break; + } + case TaskTypeQueryHbLogInfo: + { + result = query_hb_log_info_process(*cur); + break; + } + default: + { + cur->set_error(-EBADRQC, "hb process", "invalid hb cmd code"); + log_notice("invalid hb cmd code[%d]", cur->request_type()); + cur->reply_notify(); + return; + } + } + + if (HB_PROCESS_PENDING == result) + { + log_debug("hb task is pending "); + return; + } + log_debug("hb task reply"); + cur->reply_notify(); + return; +} + +bool HBProcess::Init(uint64_t total, off_t max_size) +{ + log_debug("total: %lu, max_size: %ld", total, max_size); + if (hbLog.Init("../log/hblog", "hblog", total, max_size)) + { + log_error("hotback process for hblog init failed"); + return false; + } + + return true; +} + +THBResult HBProcess::write_hb_log_process(TaskRequest &task) +{ + if (0 != hbLog.write_update_log(task)) + { + task.set_error(-EC_ERR_HOTBACK_WRITEUPDATE, "HBProcess", "write_hb_log_process fail"); + return HB_PROCESS_ERROR; + } + taskPendList.Wakeup(); + return HB_PROCESS_OK; +} + +THBResult HBProcess::write_lru_hb_log_process(TaskRequest &task) +{ + if (0 != hbLog.write_lru_hb_log(task)) + { + task.set_error(-EC_ERR_HOTBACK_WRITELRU, "HBProcess", "write_lru_hb_log_process fail"); + return HB_PROCESS_ERROR; + } + return HB_PROCESS_OK; +} + +THBResult HBProcess::read_hb_log_process(TaskRequest &task) +{ + log_debug("read Hb log begin "); + JournalID hb_jid = task.versionInfo.hot_backup_id(); + JournalID write_jid = hbLog.get_writer_jid(); + + if (hb_jid.GE(write_jid)) + { + taskPendList.add2_list(&task); + return HB_PROCESS_PENDING; + } + + if (hbLog.Seek(hb_jid)) + { + task.set_error(-EC_BAD_HOTBACKUP_JID, "HBProcess", "read_hb_log_process jid overflow"); + return HB_PROCESS_ERROR; + } + + task.prepare_result_no_limit(); + + int count = hbLog.task_append_all_rows(task, task.requestInfo.limit_count()); + if (count >= 0) + { + statIncSyncStep.push(count); + } + else + { + task.set_error(-EC_ERROR_BASE, "HBProcess", "read_hb_log_process,decode binlog error"); + return HB_PROCESS_ERROR; + } + + task.versionInfo.set_hot_backup_id((uint64_t)hbLog.get_reader_jid()); + return HB_PROCESS_OK; +} +THBResult HBProcess::register_hb_log_process(TaskRequest &task) +{ + + JournalID client_jid = task.versionInfo.hot_backup_id(); + JournalID master_jid = hbLog.get_writer_jid(); + log_notice("hb register, client[serial=%u, offset=%u], master[serial=%u, offset=%u]", + client_jid.serial, client_jid.offset, master_jid.serial, master_jid.offset); + + //full sync + if (client_jid.Zero()) + { + log_info("full-sync stage."); + task.versionInfo.set_hot_backup_id((uint64_t)master_jid); + task.set_error(-EC_FULL_SYNC_STAGE, "HBProcess", "Register,full-sync stage"); + return HB_PROCESS_ERROR; + } + else + { + //inc sync + if (hbLog.Seek(client_jid) == 0) + { + log_info("inc-sync stage."); + task.versionInfo.set_hot_backup_id((uint64_t)client_jid); + task.set_error(-EC_INC_SYNC_STAGE, "HBProcess", "register, inc-sync stage"); + return HB_PROCESS_ERROR; + } + //error + else + { + log_info("err-sync stage."); + task.versionInfo.set_hot_backup_id((uint64_t)0); + task.set_error(-EC_ERR_SYNC_STAGE, "HBProcess", "register, err-sync stage"); + return HB_PROCESS_ERROR; + } + } +} +THBResult HBProcess::query_hb_log_info_process(TaskRequest &task) +{ + struct DTCServerInfo s_info; + memset(&s_info, 0x00, sizeof(s_info)); + s_info.version = 0x1; + + JournalID jid = hbLog.get_writer_jid(); + s_info.binlog_id = jid.Serial(); + s_info.binlog_off = jid.Offset(); + task.resultInfo.set_server_info(&s_info); + return HB_PROCESS_OK; +} diff --git a/src/search_local/index_storage/cache/hb_process.h b/src/search_local/index_storage/cache/hb_process.h new file mode 100644 index 0000000..8325573 --- /dev/null +++ b/src/search_local/index_storage/cache/hb_process.h @@ -0,0 +1,61 @@ +/* + * ===================================================================================== + * + * Filename: hb_process.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef _HB_PROCESS_H_ +#define _HB_PROCESS_H_ + +#include "request_base.h" +#include "hb_log.h" +#include "task_pendlist.h" +#include "stat_manager.h" +#include + +class PollThread; +class TaskRequest; +enum THBResult +{ + HB_PROCESS_ERROR = -1, + HB_PROCESS_OK = 0, + HB_PROCESS_PENDING = 2, +}; + +class HBProcess : public TaskDispatcher +{ +public: + HBProcess(PollThread *o); + virtual ~HBProcess(); + + virtual void task_notify(TaskRequest *cur); + bool Init(uint64_t total, off_t max_size); + +private: + /*concrete hb operation*/ + THBResult write_hb_log_process(TaskRequest &task); + THBResult read_hb_log_process(TaskRequest &task); + THBResult write_lru_hb_log_process(TaskRequest &task); + THBResult register_hb_log_process(TaskRequest &task); + THBResult query_hb_log_info_process(TaskRequest &task); + +private: + PollThread *ownerThread; + RequestOutput output; + TaskPendingList taskPendList; + HBLog hbLog; + StatSample statIncSyncStep; +}; + +#endif diff --git a/src/search_local/index_storage/cache/logger.cc b/src/search_local/index_storage/cache/logger.cc new file mode 100644 index 0000000..6f3624a --- /dev/null +++ b/src/search_local/index_storage/cache/logger.cc @@ -0,0 +1,517 @@ +/* + * ===================================================================================== + * + * Filename: logger.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include +#include +#include +#include +#include +#include "logger.h" +#include "log.h" +#include "global.h" + +LogBase::LogBase() : _fd(-1) +{ + bzero(_path, sizeof(_path)); + bzero(_prefix, sizeof(_prefix)); +} + +LogBase::~LogBase() +{ + close_file(); +} + +int LogBase::set_path(const char *path, const char *prefix) +{ + snprintf(_path, sizeof(_path), "%s", path); + snprintf(_prefix, sizeof(_prefix), "%s", prefix); + + mkdir(_path, 0777); + + if (access(_path, W_OK | X_OK) < 0) + { + log_error("dir(%s) Not writable", _path); + return -1; + } + + return 0; +} + +void LogBase::close_file() +{ + if (_fd > 0) + { + ::close(_fd); + _fd = -1; + } +} + +int LogBase::stat_size(off_t *s) +{ + struct stat st; + if (fstat(_fd, &st)) + return -1; + + *s = st.st_size; + return 0; +} + +int LogBase::delete_file(uint32_t serial) +{ + char file[MAX_PATH_NAME_LEN] = {0}; + file_name(file, MAX_PATH_NAME_LEN, serial); + + return unlink(file); +} + +int LogBase::open_file(uint32_t serial, int read) +{ + char file[MAX_PATH_NAME_LEN] = {0}; + file_name(file, MAX_PATH_NAME_LEN, serial); + + read ? _fd = ::open(file, O_RDONLY | O_LARGEFILE, 0644) : _fd = ::open(file, O_WRONLY | O_APPEND | O_CREAT | O_LARGEFILE | O_TRUNC, 0644); + + if (_fd < 0) + { + log_debug("open file[%s] failed, %m", file); + return -1; + } + + return 0; +} + +int LogBase::scan_serial(uint32_t *min, uint32_t *max) +{ + DIR *dir = opendir(_path); + if (!dir) + return -1; + + struct dirent *drt = readdir(dir); + if (!drt) + { + closedir(dir); + return -2; + } + + *min = (uint32_t)((1ULL << 32) - 1); + *max = 0; + + char prefix[MAX_PATH_NAME_LEN] = {0}; + snprintf(prefix, MAX_PATH_NAME_LEN, "%s.binlog.", _prefix); + + int l = strlen(prefix); + uint32_t v = 0; + int found = 0; + + for (; drt; drt = readdir(dir)) + { + int n = strncmp(drt->d_name, prefix, l); + if (n == 0) + { + v = strtoul(drt->d_name + l, NULL, 10); + v >= 1 ? (*max < v ? *max = v : v), (v < *min ? *min = v : v) : v; + found = 1; + } + } + + found ? *max : (*max = 0, *min = 0); + + log_debug("scan serial: min=%u, max=%u\n", *min, *max); + + closedir(dir); + return 0; +} + +void LogBase::file_name(char *s, int len, unsigned serial) +{ + snprintf(s, len, "%s/%s.binlog.%u", _path, _prefix, serial); +} + +LogWriter::LogWriter() : LogBase(), + _cur_size(0), + _max_size(0), + _total_size(0), + _cur_max_serial(0), //serial start 0 + _cur_min_serial(0) //serial start 0 +{ +} + +LogWriter::~LogWriter() +{ +} + +int LogWriter::open(const char *path, const char *prefix, + off_t max_size, uint64_t total_size) +{ + if (set_path(path, prefix)) + return -1; + + _max_size = max_size; + _total_size = total_size; + + if (scan_serial(&_cur_min_serial, &_cur_max_serial)) + { + log_debug("scan file serial failed, %m"); + return -1; + } + + _cur_max_serial += 1; //skip current binlog file. + return open_file(_cur_max_serial, 0); +} + +int LogWriter::write(const void *buf, size_t size) +{ + int unused; + + unused = ::write(_fd, buf, size); + if (unused != size) + { + log_error("wirte hblog[input size %u, write success size %d] err, %m", size, unused); + } + _cur_size += size; + return shift_file(); +} + +JournalID LogWriter::query() +{ + JournalID v(_cur_max_serial, _cur_size); + return v; +} + +int LogWriter::shift_file() +{ + int need_shift = 0; + int need_delete = 0; + + if (_cur_size >= _max_size) + need_shift = 1; + else + return 0; + + uint64_t total = _cur_max_serial - _cur_min_serial; + total *= _max_size; + + if (total >= _total_size) + { + need_delete = 1; + } + + log_debug("shift file: cur_size:" UINT64FMT ", total_size:" UINT64FMT ", \ + shift:%d, cur_min_serial=%u, cur_max_serial=%u\n", + total, _total_size, need_shift, _cur_min_serial, _cur_max_serial); + + if (need_shift) + { + if (need_delete) + { + delete_file(_cur_min_serial); + _cur_min_serial += 1; + } + + close_file(); + + _cur_size = 0; + _cur_max_serial += 1; + } + + return open_file(_cur_max_serial, 0); +} + +LogReader::LogReader() : LogBase(), + _min_serial(0), + _max_serial(0), + _cur_serial(0), + _cur_offset(0) +{ +} + +LogReader::~LogReader() +{ +} + +int LogReader::open(const char *path, const char *prefix) +{ + if (set_path(path, prefix)) + return -1; + + //refresh directory + refresh(); + + _cur_serial = _min_serial; + _cur_offset = 0; + + return open_file(_cur_serial, 1); +} + +void LogReader::refresh() +{ + scan_serial(&_min_serial, &_max_serial); +} + +int LogReader::read(void *buf, size_t size) +{ + ssize_t rd = ::read(_fd, buf, size); + if (rd == (ssize_t)size) + { + _cur_offset += rd; + return 0; + } + else if (rd < 0) + { + return -1; + } + + // 如果还有更大的serial,则丢弃buf内容,切换文件。否则,回退文件指针 + refresh(); + + if (_cur_serial < _max_serial) + { + _cur_serial += 1; + _cur_offset = 0; + + close_file(); + //跳过序号不存在的文件 + while (open_file(_cur_serial, 1) == -1 && _cur_serial < _max_serial) + _cur_serial += 1; + + if (_fd > 0 && _cur_serial <= _max_serial) + return read(buf, size); + else + return -1; + } + + // 回退文件指针 + if (rd > 0) + { + seek(JournalID(_cur_serial, _cur_offset)); + } + + return -1; +} + +JournalID LogReader::query() +{ + JournalID v(_cur_serial, _cur_offset); + return v; +} + +int LogReader::seek(const JournalID &v) +{ + char file[MAX_PATH_NAME_LEN] = {0}; + file_name(file, MAX_PATH_NAME_LEN, v.serial); + + /* 确保文件存在 */ + if (access(file, F_OK)) + return -1; + + if (v.serial != _cur_serial) + { + close_file(); + + if (open_file(v.serial, 1) == -1) + { + log_debug("hblog %u not exist, seek failed", v.serial); + return -1; + } + } + + log_debug("open serial=%u, %m", v.serial); + + off_t file_size = 0; + stat_size(&file_size); + + if (v.offset > (uint32_t)file_size) + return -1; + + lseek(_fd, v.offset, SEEK_SET); + + _cur_offset = v.offset; + _cur_serial = v.serial; + return 0; +} + +BinlogWriter::BinlogWriter() : _log_writer() + +{ +} + +BinlogWriter::~BinlogWriter() +{ +} + +int BinlogWriter::Init(const char *path, const char *prefix, uint64_t total, off_t max_size) +{ + return _log_writer.open(path, prefix, max_size, total); +} + +#define struct_sizeof(t) sizeof(((binlog_header_t *)NULL)->t) +#define struct_typeof(t) typeof(((binlog_header_t *)NULL)->t) + +int BinlogWriter::insert_header(uint8_t type, uint8_t operater, uint32_t count) +{ + _codec_buffer.clear(); + + _codec_buffer.expand(offsetof(binlog_header_t, endof)); + + _codec_buffer << (struct_typeof(length))0; //length + _codec_buffer << (struct_typeof(version))BINLOG_DEFAULT_VERSION; //version + _codec_buffer << (struct_typeof(type))type; //type + _codec_buffer << (struct_typeof(operater))operater; //operator + _codec_buffer.append("\0\0\0\0\0", 5); //reserve char[5] + _codec_buffer << (struct_typeof(timestamp))(time(NULL)); //timestamp + _codec_buffer << (struct_typeof(recordcount))count; //recordcount + + return 0; +} + +int BinlogWriter::append_body(const void *buf, size_t size) +{ + _codec_buffer.append((char *)&size, struct_sizeof(length)); + _codec_buffer.append((const char *)buf, size); + + return 0; +} + +int BinlogWriter::Commit() +{ + //计算总长度 + uint32_t total = _codec_buffer.size(); + total -= struct_sizeof(length); + + //写入总长度 + struct_typeof(length) *length = (struct_typeof(length) *)(_codec_buffer.c_str()); + *length = total; + + return _log_writer.write(_codec_buffer.c_str(), _codec_buffer.size()); +} + +int BinlogWriter::Abort() +{ + _codec_buffer.clear(); + return 0; +} + +JournalID BinlogWriter::query_id() +{ + return _log_writer.query(); +} + +BinlogReader::BinlogReader() : _log_reader() +{ +} +BinlogReader::~BinlogReader() +{ +} + +int BinlogReader::Init(const char *path, const char *prefix) +{ + return _log_reader.open(path, prefix); +} + +int BinlogReader::Read() +{ + /* prepare buffer */ + if (_codec_buffer.resize(struct_sizeof(length)) < 0) + { + log_error("expand _codec_buffer failed"); + return -1; + } + /* read length part of one binlog */ + if (_log_reader.read(_codec_buffer.c_str(), struct_sizeof(length))) + return -1; + + struct_typeof(length) len = *(struct_typeof(length) *)_codec_buffer.c_str(); + if (len < 8 || len >= (1 << 20) /*1M*/) + { + // filter some out of range length, + // prevent client sending invalid jid crash server + return -1; + } + _codec_buffer.resize(len + struct_sizeof(length)); + if (_log_reader.read(_codec_buffer.c_str() + struct_sizeof(length), len)) + return -1; + + return 0; +} + +JournalID BinlogReader::query_id() +{ + return _log_reader.query(); +} + +int BinlogReader::Seek(const JournalID &v) +{ + return _log_reader.seek(v); +} + +uint8_t BinlogReader::binlog_type() +{ + return ((binlog_header_t *)(_codec_buffer.c_str()))->type; +} + +uint8_t BinlogReader::binlog_operator() +{ + return ((binlog_header_t *)(_codec_buffer.c_str()))->operater; +} + +uint32_t BinlogReader::record_count() +{ + return ((binlog_header_t *)(_codec_buffer.c_str()))->recordcount; +} + +/* + * binlog format: + * + * ===================================================== + * binlog_header_t | len1 | record1 | len2 | record2 | ... + * ===================================================== + * + */ +char *BinlogReader::record_pointer(int id) +{ + //record start + char *p = (char *)(_codec_buffer.c_str() + offsetof(binlog_header_t, endof)); + char *m = 0; + uint32_t l = struct_sizeof(length); + uint32_t ll = 0; + + for (int i = 0; i <= id; i++) + { + m = p + l; + ll = *(struct_typeof(length) *)(m - struct_sizeof(length)); + l += (ll + struct_sizeof(length)); + } + + return m; +} + +size_t BinlogReader::record_length(int id) +{ + char *p = (char *)(_codec_buffer.c_str() + offsetof(binlog_header_t, endof)); + uint32_t ll, l; + l = ll = 0; + + for (int i = 0; i <= id; i++) + { + l = *(struct_typeof(length) *)(p + ll); + ll += (l + struct_sizeof(length)); + } + + return l; +} diff --git a/src/search_local/index_storage/cache/logger.h b/src/search_local/index_storage/cache/logger.h new file mode 100644 index 0000000..c3fe1bf --- /dev/null +++ b/src/search_local/index_storage/cache/logger.h @@ -0,0 +1,193 @@ +/* + * ===================================================================================== + * + * Filename: logger.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __DTC_LOGGER_H +#define __DTC_LOGGER_H + +#include +#include +#include +#include "buffer.h" +#include "log.h" +#include "journal_id.h" + +#define MAX_PATH_NAME_LEN 256 + +/* + * DTC binlog base class(file) + */ +class LogBase +{ +public: + LogBase(); + virtual ~LogBase(); + +protected: + int set_path(const char *path, const char *prefix); + void file_name(char *s, int len, uint32_t serail); + int open_file(uint32_t serial, int read); + void close_file(); + int scan_serial(uint32_t *min, uint32_t *max); + int stat_size(off_t *); + int delete_file(uint32_t serial); + +private: + LogBase(const LogBase &); + +protected: + int _fd; + +private: + char _path[MAX_PATH_NAME_LEN]; //日志集所在目录 + char _prefix[MAX_PATH_NAME_LEN]; //日志集的文件前缀 +}; + +class LogWriter : public LogBase +{ +public: + int open(const char *path, const char *prefix, + off_t max_size, uint64_t total_size); + int write(const void *buf, size_t size); + JournalID query(); + +public: + LogWriter(); + virtual ~LogWriter(); + +private: + int shift_file(); + +private: + off_t _cur_size; //当前日志文件的大小 + off_t _max_size; //单个日志文件允许的最大大小 + uint64_t _total_size; //日志集允许的最大大小 + uint32_t _cur_max_serial; //当前日志文件最大编号 + uint32_t _cur_min_serial; //当前日志文件最大编号 +}; + +class LogReader : public LogBase +{ +public: + int open(const char *path, const char *prefix); + int read(void *buf, size_t size); + + int seek(const JournalID &); + JournalID query(); + +public: + LogReader(); + virtual ~LogReader(); + +private: + void refresh(); + +private: + uint32_t _min_serial; //日志集的最小文件编号 + uint32_t _max_serial; //日志集的最大文件编号 + uint32_t _cur_serial; //当前日志文件编号 + off_t _cur_offset; //当前日志文件偏移量 +}; + +///////////////////////////////////////////////////////////////////// +/* + * generic binlog header + */ +typedef struct binlog_header +{ + uint32_t length; //长度 + uint8_t version; //版本 + uint8_t type; //类型: bitmap, dtc, other + uint8_t operater; //操作: insert,select,upate ... + uint8_t reserve[5]; //保留 + uint32_t timestamp; //时间戳 + uint32_t recordcount; //子记录个数 + uint8_t endof[0]; +} __attribute__((__aligned__(1))) binlog_header_t; + +/* + * binlog type + * t + */ +typedef enum binlog_type +{ + BINLOG_LRU = 1, + BINLOG_INSERT = 2, + BINLOG_UPDATE = 4, + BINLOG_PRUGE = 8, + +} BINLOG_TYPE; + +/* + * binlog class + */ +#define BINLOG_MAX_SIZE (100 * (1U << 20)) //100M, 默认单个日志文件大小 +#define BINLOG_MAX_TOTAL_SIZE (3ULL << 30) //3G, 默认最大日志文件编号 +#define BINLOG_DEFAULT_VERSION 0x02 + +class BinlogWriter +{ +public: + int Init(const char *path, const char *prefix, + uint64_t total_size = BINLOG_MAX_TOTAL_SIZE, off_t max_size = BINLOG_MAX_SIZE); + int insert_header(uint8_t type, uint8_t operater, uint32_t recordcount); + int append_body(const void *buf, size_t size); + + int Commit(); + int Abort(); + JournalID query_id(); + +public: + BinlogWriter(); + virtual ~BinlogWriter(); + +private: + BinlogWriter(const BinlogWriter &); + +private: + LogWriter _log_writer; //写者 + buffer _codec_buffer; //编码缓冲区 +}; + +class BinlogReader +{ +public: + int Init(const char *path, const char *prefix); + + int Read(); //顺序读,每次读出一条binlog记录 + int Seek(const JournalID &); + JournalID query_id(); + + uint8_t binlog_type(); + uint8_t binlog_operator(); + + uint32_t record_count(); + char *record_pointer(int id = 0); + size_t record_length(int id = 0); + +public: + BinlogReader(); + virtual ~BinlogReader(); + +private: + BinlogReader(const BinlogReader &); + +private: + LogReader _log_reader; //读者 + buffer _codec_buffer; //编码缓冲区 +}; + +#endif diff --git a/src/search_local/index_storage/cache/lru_bit.cc b/src/search_local/index_storage/cache/lru_bit.cc new file mode 100644 index 0000000..058e517 --- /dev/null +++ b/src/search_local/index_storage/cache/lru_bit.cc @@ -0,0 +1,292 @@ +/* + * ===================================================================================== + * + * Filename: lru_bit.h + * + * Description: lru bitmap restore function. + * recording master lru change infomation in order to improve slave hit rate. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include "lru_bit.h" +#include "mem_check.h" +#include "admin_tdef.h" +#include "field.h" +#include "table_def.h" +#include "node.h" +#include "node_index.h" +#include "log.h" +#include "table_def_manager.h" + +LruBitObj::LruBitObj(LruBitUnit *t) : _max_lru_bit(0), + _scan_lru_bit(0), + _scan_idx_off(0), + _lru_writer(0), + _owner(t), + scan_tm(0) + +{ + bzero(_lru_bits, LRU_BITS * sizeof(lru_bit_t *)); + + lru_scan_tm = statmgr.get_item_u32(HBP_LRU_SCAN_TM); + total_bits = statmgr.get_item_u32(HBP_LRU_TOTAL_BITS); + total_1_bits = statmgr.get_item_u32(HBP_LRU_TOTAL_1_BITS); + lru_set_count = statmgr.get_item_u32(HBP_LRU_SET_COUNT); + lru_clr_count = statmgr.get_item_u32(HBP_LRU_CLR_COUNT); + lru_set_hit_count = statmgr.get_item_u32(HBP_LRU_SET_HIT_COUNT); +} + +LruBitObj::~LruBitObj() +{ + for (int i = 0; i < LRU_BITS; i++) + { + DELETE(_lru_bits[i]); + } +} + +int LruBitObj::SetNodeID(unsigned int v, int b) +{ + int off = BBLK_OFF(v); + + if (!_lru_bits[off]) + { + NEW(lru_bit_t, _lru_bits[off]); + if (!_lru_bits[off]) + return -1; + } + + /* stat set/clr count */ + if (b) + lru_set_count++; + else + lru_clr_count++; + + if (_lru_bits[off]->set(v, b)) + { + /* stat set hit count */ + lru_set_hit_count++; + } + else if (b) + { + total_1_bits++; + } + + _max_lru_bit < off ? _max_lru_bit = off : off; + + /* stat total bits */ + total_bits < v ? total_bits = v : total_bits; + + return 0; +} + +void LruBitObj::timer_notify(void) +{ + + Scan(); + + attach_timer(_owner->_scan_timerlist); +} + +int LruBitObj::Init(BinlogWriter *w, int stop_until) +{ + _scan_stop_until = stop_until; + + NEW(LruWriter(w), _lru_writer); + if (!_lru_writer) + return -1; + + if (_lru_writer->Init()) + return -1; + + return 0; +} + +int LruBitObj::Scan(void) +{ + if (scan_tm == 0) + { + INIT_MSEC(scan_tm); + } + + lru_bit_t *p = _lru_bits[_scan_lru_bit]; + if (!p) + return 0; + + unsigned found_id = 0; + for (; _scan_idx_off < IDX_SIZE;) + { + unsigned found = 0; + + //扫描idx中的1 byte, 最大会有512个node id + for (int j = 0; j < 8; ++j) + { + //读取idx中的第_scan_idx_off个字节的第j位对应的blk中的8 bytes + uint64_t v = p->read(_scan_idx_off, j); + if (0 == v) + continue; + + //扫描blk中的8 bytes + for (int i = 0; i < 64; ++i) + { + if (v & 0x1) + { + found += 1; + + uint32_t id = (_scan_lru_bit << 21) + (_scan_idx_off << 9) + (j << 6) + i; + + log_debug("adjust lru: node-id=%u", id); + + _lru_writer->Write(id); + } + + v >>= 1; + } + } + + if (found > 0) + { + //批量写入lru变更 + _lru_writer->Commit(); + + //idx清零1byete, blk清零64bytes + total_1_bits -= p->clear(_scan_idx_off); + } + + _scan_idx_off += 1; + + found_id += found; + // 如果超过此水位,终止扫描, 等待下一次被调度 + if (found_id >= _scan_stop_until) + { + return 0; + } + } + + //调整为下一个lru_bit(4k) + _scan_idx_off = 0; + _scan_lru_bit += 1; + if (_scan_lru_bit > _max_lru_bit) + { + + _scan_lru_bit = 0; + + CALC_MSEC(scan_tm); + lru_scan_tm = scan_tm; + scan_tm = 0; + } + + return 0; +} + +LruBitUnit::LruBitUnit(TimerUnit *p) : _scan_timerlist(0), + _lru_bit_obj(0), + _is_start(0), + _owner(p) +{ +} + +LruBitUnit::~LruBitUnit() +{ + DELETE(_lru_bit_obj); +} + +int LruBitUnit::Init(BinlogWriter *w) +{ + _scan_timerlist = _owner->get_timer_list_by_m_seconds(LRU_SCAN_INTERVAL); + + NEW(LruBitObj(this), _lru_bit_obj); + if (!_lru_bit_obj) + return -1; + + if (_lru_bit_obj->Init(w)) + return -1; + + return 0; +} + +void LruBitUnit::enable_log(void) +{ + _is_start = 1; + _lru_bit_obj->attach_timer(_scan_timerlist); +} + +void LruBitUnit::disable_log(void) +{ + _is_start = 0; + _lru_bit_obj->disable_timer(); +} + +int LruBitUnit::Set(unsigned int v) +{ + return _is_start ? _lru_bit_obj->SetNodeID(v, 1) : 0; +} + +int LruBitUnit::Unset(unsigned int v) +{ + return _is_start ? _lru_bit_obj->SetNodeID(v, 0) : 0; +} + +LruWriter::LruWriter(BinlogWriter *w) : _log_writer(w), + _raw_data(0) +{ +} + +LruWriter::~LruWriter() +{ + DELETE(_raw_data); +} + +int LruWriter::Init() +{ + NEW(RawData(&g_stSysMalloc, 1), _raw_data); + if (!_raw_data) + return -1; + + unsigned type = DTCHotBackup::SYNC_LRU; + if (_raw_data->Init(0, TableDefinitionManager::Instance()->get_hot_backup_table_def()->key_size(), (const char *)&type, 0, -1, -1, 0)) + return -1; + + return 0; +} + +int LruWriter::Write(unsigned int v) +{ + log_debug("enter LruWriter, lru changes, node id:%u", v); + + Node node = I_SEARCH(v); + if (!node) //NODE已经不存在,不处理 + return 0; + + DataChunk *p = M_POINTER(DataChunk, node.vd_handle()); + RowValue r(TableDefinitionManager::Instance()->get_hot_backup_table_def()); + + r[0].u64 = DTCHotBackup::SYNC_LRU; + r[1].u64 = DTCHotBackup::NON_VALUE; + + //self table-definition encode packed key + r[2] = TableDefinitionManager::Instance()->get_cur_table_def()->packed_key(p->Key()); + r[3].Set(0); + + return _raw_data->insert_row(r, false, false); +} + +int LruWriter::Commit(void) +{ + log_debug("lru write commit"); + + _log_writer->insert_header(BINLOG_LRU, 0, 1); + _log_writer->append_body(_raw_data->get_addr(), _raw_data->data_size()); + + log_debug("body: len=%d, content:%x", _raw_data->data_size(), *(char *)_raw_data->get_addr()); + + _raw_data->delete_all_rows(); + return _log_writer->Commit(); +} diff --git a/src/search_local/index_storage/cache/lru_bit.h b/src/search_local/index_storage/cache/lru_bit.h new file mode 100644 index 0000000..d745ef6 --- /dev/null +++ b/src/search_local/index_storage/cache/lru_bit.h @@ -0,0 +1,215 @@ +/* + * ===================================================================================== + * + * Filename: lru_bit.h + * + * Description: lru bitmap restore function. + * recording master lru change infomation in order to improve slave hit rate. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef __LRU_BIT_H +#define __LRU_BIT_H + +#include +#include +#include +#include "bitsop.h" +#include "timer_list.h" +#include "logger.h" +#include "raw_data.h" +#include "data_chunk.h" +#include "admin_tdef.h" +#include "sys_malloc.h" + +#define IDX_SIZE (4 << 10) //4K +#define BLK_SIZE (256 << 10) //256K +#define LRU_BITS (2 << 10) //2k + +#define BBLK_OFF(v) (v >> 21) +#define IDX_BYTE_OFF(v) ((v >> 9) & 0xFFF) +#define IDX_BYTE_SHIFT(v) ((v >> 6) & 0x7) +#define BLK_8_BYTE_OFF(v) ((v >> 6) & 0x7FFF) +#define BLK_BYTE_OFF(v) ((v >> 3) & 0x3FFFF) +#define BLK_BYTE_SHIFT(v) (v & 0x7) + +/* + * Node ID 位图储存表 + * + *==================================================================================== + *| 11 b | 12 b | 3 b | 3 b | 3 b | + *| bblk off | idx byte off | idx byte shift| + * | blk 8-bytes off | + * | blk byte off | blk byte shift| + *==================================================================================== + */ + +typedef struct lru_bit +{ + char _idx[IDX_SIZE]; + char _blk[BLK_SIZE]; + + lru_bit() + { + bzero(_idx, sizeof(_idx)); + bzero(_blk, sizeof(_blk)); + } + + ~lru_bit() {} + + /* 如果set命中返回1,否则返回0 */ + int set(unsigned int v, int b) + { + int hit = 0; + uint32_t byte_shift = BLK_BYTE_SHIFT(v); + uint32_t byte_offset = BLK_BYTE_OFF(v); + + if (b) + { + if (ISSET_B(byte_shift, _blk + byte_offset)) + { + hit = 1; + } + else + { + + SET_B(byte_shift, _blk + byte_offset); + SET_B(IDX_BYTE_SHIFT(v), _idx + IDX_BYTE_OFF(v)); + } + } + else + { + CLR_B(byte_shift, _blk + byte_offset); + } + + return hit; + } + + /* return total clear bits */ + int clear(int idx_off) + { + int clear_bits = COUNT_B(_blk + (idx_off << 6), 1 << 6); + + /* 1 byte idx */ + memset(_idx + idx_off, 0x00, 1); + + /* 64 bytes blk */ + memset(_blk + (idx_off << 6), 0x00, 1 << 6); + + return clear_bits; + } + + uint64_t read(int idx_off, int idx_shift) + { + unsigned char *ix = (unsigned char *)_idx + idx_off; + + if (ISSET_B(idx_shift, ix)) + { + + uint64_t *p = (uint64_t *)_blk; + return p[(idx_off << 3) + idx_shift]; + } + else + { + return 0; + } + } +} lru_bit_t; + +/* + * + * 扫描频率、速度控制 + * 1. 不能影响正常的update同步 + * 2. 尽量在控制时间内完成一趟扫描 + * + */ +#define LRU_SCAN_STOP_UNTIL 20 //20 +#define LRU_SCAN_INTERVAL 10 //10ms + +class RawData; +class LruWriter +{ +public: + LruWriter(BinlogWriter *); + virtual ~LruWriter(); + + int Init(); + int Write(unsigned int id); + int Commit(void); + +private: + BinlogWriter *_log_writer; + RawData *_raw_data; +}; + +class LruBitUnit; +class LruBitObj : private TimerObject +{ +public: + LruBitObj(LruBitUnit *); + ~LruBitObj(); + + int Init(BinlogWriter *, int stop_until = LRU_SCAN_STOP_UNTIL); + int SetNodeID(unsigned int v, int b); + +private: + int Scan(void); + virtual void timer_notify(void); + +private: + lru_bit_t *_lru_bits[LRU_BITS]; + uint16_t _max_lru_bit; + uint16_t _scan_lru_bit; + uint16_t _scan_idx_off; + uint16_t _scan_stop_until; + + LruWriter *_lru_writer; + LruBitUnit *_owner; + + friend class LruBitUnit; + +private: + /* statistic */ + uint32_t scan_tm; + StatItemU32 lru_scan_tm; + + StatItemU32 total_bits; + StatItemU32 total_1_bits; + + StatItemU32 lru_set_count; + StatItemU32 lru_set_hit_count; + StatItemU32 lru_clr_count; +}; + +class LruBitUnit +{ +public: + LruBitUnit(TimerUnit *); + ~LruBitUnit(); + + int Init(BinlogWriter *); + void enable_log(void); + void disable_log(void); + int check_status() { return _is_start; } // 0:不启动, 1:启动 + int Set(unsigned int v); + int Unset(unsigned int v); + +private: + TimerList *_scan_timerlist; + LruBitObj *_lru_bit_obj; + int _is_start; + TimerUnit *_owner; + + friend class LruBitObj; +}; + +#endif diff --git a/src/search_local/index_storage/cache/main.cc b/src/search_local/index_storage/cache/main.cc new file mode 100644 index 0000000..23edefa --- /dev/null +++ b/src/search_local/index_storage/cache/main.cc @@ -0,0 +1,1009 @@ +/* + * ===================================================================================== + * + * Filename: main.cc + * + * Description: entrance. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "unix_socket.h" +#include "stat_dtc.h" +#include "task_control.h" +#include "task_multiplexer.h" +#include "black_hole.h" +#include "container.h" +#include "proc_title.h" +#include "plugin_mgr.h" +#include "dtc_global.h" +#include "dynamic_helper_collection.h" +#include "key_route.h" +#include "proxy_listen_pool.h" +#include "proxy_process.h" +#include "version.h" +#include "dtcutils.h" +#include "relative_hour_calculator.h" +#include "buffer_remoteLog.h" +#include "hb_process.h" +#include "logger.h" + +using namespace ClusterConfig; +const char progname[] = "dtcd"; +const char usage_argv[] = ""; +char cacheFile[256] = CACHE_CONF_NAME; +char tableFile[256] = TABLE_CONF_NAME; +int gMaxConnCnt; + +static PollThread *cacheThread; +static BufferProcess *cachePool; +static int enablePlugin; +static int initPlugin; +static int disableCache; +static int cacheKey; +static int disableDataSource; +static PollThread *dsThread; +static GroupCollect *helperUnit; +static BarrierUnit *barCache; +static BarrierUnit *barHelper; +static BufferBypass *bypassUnit; +static ListenerPool *listener; +static TaskControl *serverControl; +static PluginManager *plugin_mgr; +KeyRoute *keyRoute; +static int asyncUpdate; +int targetNewHash; +int hashChanging; +static AgentListenPool *agentListener; +static AgentProcess *agentProcess; +static TaskMultiplexer *multiPlexer; + +//remote dispatcher,用来迁移数据给远端dtc +static PollThread *remoteThread; +static DynamicHelperCollection *remoteClient; + +//single thread version +static PollThread *workerThread; +static HBProcess *hotbackProcess; +static PollThread *hotbackThread; + +extern DTCConfig *gConfig; +extern void _set_remote_log_config_(const char *addr, int port, int businessid); +int collect_load_config(DbConfig *dbconfig); +static int plugin_start(void) +{ + initPlugin = 0; + + plugin_mgr = PluginManager::Instance(); + if (NULL == plugin_mgr) + { + log_error("create PluginManager instance failed."); + return -1; + } + + if (plugin_mgr->open(gConfig->get_int_val("cache", "PluginNetworkMode", 0)) != 0) + { + log_error("init plugin manager failed."); + return -1; + } + + initPlugin = 1; + + return 0; +} + +static int plugin_stop(void) +{ + plugin_mgr->close(); + PluginManager::Destroy(); + plugin_mgr = NULL; + + return 0; +} + +static int StatOpenFd() +{ + int count = 0; + for (int i = 0; i < 1000; i++) + { + if (fcntl(i, F_GETFL, 0) != -1) + count++; + } + return count; +} + +static int InitCacheMode() +{ + asyncUpdate = gConfig->get_int_val("cache", "DelayUpdate", 0); + if (asyncUpdate < 0 || asyncUpdate > 1) + { + log_crit("Invalid DelayUpdate value"); + return -1; + } + + const char *keyStr = gConfig->get_str_val("cache", "CacheShmKey"); + if (keyStr == NULL) + { + cacheKey = 0; + } + else if (!strcasecmp(keyStr, "none")) + { + log_notice("CacheShmKey set to NONE, Cache disabled"); + disableCache = 1; + } + else if (isdigit(keyStr[0])) + { + cacheKey = strtol(keyStr, NULL, 0); + } + else + { + log_crit("Invalid CacheShmKey value \"%s\"", keyStr); + return -1; + } + + disableDataSource = gConfig->get_int_val("cache", "DisableDataSource", 0); + if (disableCache && disableDataSource) + { + log_crit("can't disableDataSource when CacheShmKey set to NONE"); + return -1; + } + + if (disableCache && asyncUpdate) + { + log_crit("can't DelayUpdate when CacheShmKey set to NONE"); + return -1; + } + + if (disableCache == 0 && cacheKey == 0) + log_notice("CacheShmKey not set, cache data is volatile"); + + if (disableDataSource) + log_notice("disable data source, cache data is volatile"); + + return 0; +} + +static int StartHotbackThread() +{ + log_debug("StartHotbackThread begin"); + hotbackThread = new PollThread("hotback"); + hotbackProcess = new HBProcess(hotbackThread); + if (hotbackThread->initialize_thread() == -1) + { + log_error("init hotback thread fail"); + return -1; + } + if (hotbackProcess->Init(gConfig->get_size_val("cache", "BinlogTotalSize", BINLOG_MAX_TOTAL_SIZE, 'M'), + gConfig->get_size_val("cache", "BinlogOneSize", BINLOG_MAX_SIZE, 'M')) == -1) + { + log_error("hotbackProcess init fail"); + return -1; + } + log_debug("StartHotbackThread end"); + return 0; +} +static int DTC_StartCacheThread() +{ + log_error("DTC_StartCacheThread start"); + cacheThread = new PollThread("cache"); + cachePool = new BufferProcess(cacheThread, TableDefinitionManager::Instance()->get_cur_table_def(), asyncUpdate ? MODE_ASYNC : MODE_SYNC); + cachePool->set_limit_node_size(gConfig->get_int_val("cache", "LimitNodeSize", 100 * 1024 * 1024)); + cachePool->set_limit_node_rows(gConfig->get_int_val("cache", "LimitNodeRows", 0)); + cachePool->set_limit_empty_nodes(gConfig->get_int_val("cache", "LimitEmptyNodes", 0)); + + if (cacheThread->initialize_thread() == -1) + { + return -1; + } + unsigned long long cacheSize = gConfig->get_size_val("cache", "CacheMemorySize", 0, 'M'); + if (cacheSize <= (50ULL << 20)) // 50M + { + log_error("CacheMemorySize too small"); + return -1; + } + else if (sizeof(long) == 4 && cacheSize >= 4000000000ULL) + { + log_crit("CacheMemorySize %lld too large", cacheSize); + } + else if ( + cachePool->buffer_set_size( + cacheSize, + gConfig->get_int_val("cache", "CacheShmVersion", 4)) == -1) + { + return -1; + } + + /* disable async transaction log */ + cachePool->disable_async_log(1); + + int lruLevel = gConfig->get_int_val("cache", "disable_lru_update", 0); + if (disableDataSource) + { + if (cachePool->enable_no_db_mode() < 0) + { + return -1; + } + if (gConfig->get_int_val("cache", "disable_auto_purge", 0) > 0) + { + cachePool->disable_auto_purge(); + // lruLevel = 3; /* LRU_WRITE */ + } + int autoPurgeAlertTime = gConfig->get_int_val("cache", "AutoPurgeAlertTime", 0); + cachePool->set_date_expire_alert_time(autoPurgeAlertTime); + if (autoPurgeAlertTime > 0 && TableDefinitionManager::Instance()->get_cur_table_def()->lastcmod_field_id() <= 0) + { + log_crit("Can't start AutoPurgeAlert without lastcmod field"); + return -1; + } + } + cachePool->disable_lru_update(lruLevel); + cachePool->enable_lossy_data_source(gConfig->get_int_val("cache", "LossyDataSource", 0)); + + if (asyncUpdate != MODE_SYNC && cacheKey == 0) + { + log_crit("Anonymous shared memory don't support DelayUpdate"); + return -1; + } + + int iAutoDeleteDirtyShm = gConfig->get_int_val("cache", "AutoDeleteDirtyShareMemory", 0); + /*disable empty node filter*/ + if (cachePool->cache_open(cacheKey, 0, iAutoDeleteDirtyShm) == -1) + { + return -1; + } + + if (cachePool->update_mode() || cachePool->is_mem_dirty()) // asyncUpdate active + { + if (TableDefinitionManager::Instance()->get_cur_table_def()->uniq_fields() < 1) + { + log_crit("DelayUpdate needs uniq-field(s)"); + return -1; + } + + if (disableDataSource) + { + if (cachePool->update_mode()) + { + log_crit("Can't start async mode when disableDataSource."); + return -1; + } + else + { + log_crit("Can't start disableDataSource with shm dirty,please flush async shm to db first or delete shm"); + return -1; + } + } + else + { + if ((TableDefinitionManager::Instance()->get_cur_table_def()->compress_field_id() >= 0)) + { + + log_crit("sorry,DTC just support compress in disableDataSource mode now."); + return -1; + } + } + + /*marker is the only source of flush speed calculattion, inc precision to 10*/ + cachePool->set_flush_parameter( + gConfig->get_int_val("cache", "MarkerPrecision", 10), + gConfig->get_int_val("cache", "MaxFlushSpeed", 1), + gConfig->get_int_val("cache", "MinDirtyTime", 3600), + gConfig->get_int_val("cache", "MaxDirtyTime", 43200)); + + cachePool->set_drop_count( + gConfig->get_int_val("cache", "MaxDropCount", 1000)); + } + else + { + if (!disableDataSource) + helperUnit->disable_commit_group(); + } + + if (cachePool->set_insert_order(dbConfig->ordIns) < 0) + return -1; + + log_error("DTC_StartCacheThread end"); + return 0; +} + +int collect_load_config(DbConfig *dbconfig) +{ + if (0 != disableDataSource) + return 0; + if (!helperUnit) + return -1; + if (dbconfig == NULL) + { + log_error("dbconfig == NULL"); + return -1; + } + if (helperUnit->renew_config(dbconfig)) + { + log_error("helperunit renew config error!"); + return -1; + } + + return 0; +} + +int StartRemoteClientThread() +{ + log_debug("StartRemoteClientThread begin"); + remoteThread = new PollThread("remoteClient"); + remoteClient = new DynamicHelperCollection(remoteThread, gConfig->get_int_val("cache", "HelperCountPerGroup", 16)); + if (remoteThread->initialize_thread() == -1) + { + log_error("init remote thread error"); + return -1; + } + + //get helper timeout + int timeout = gConfig->get_int_val("cache", "HelperTimeout", 30); + int retry = gConfig->get_int_val("cache", "HelperRetryTimeout", 1); + int connect = gConfig->get_int_val("cache", "HelperConnectTimeout", 10); + + remoteClient->set_timer_handler( + remoteThread->get_timer_list(timeout), + remoteThread->get_timer_list(connect), + remoteThread->get_timer_list(retry)); + log_debug("StartRemoteClientThread end"); + return 0; +} + +int DTC_SetupRemoteClientThread(PollThread *thread) +{ + log_debug("DTC_SetupRemoteClientThread begin"); + + remoteClient = new DynamicHelperCollection(thread, gConfig->get_int_val("cache", "HelperCountPerGroup", 16)); + + //get helper timeout + int timeout = gConfig->get_int_val("cache", "HelperTimeout", 30); + int retry = gConfig->get_int_val("cache", "HelperRetryTimeout", 1); + int connect = gConfig->get_int_val("cache", "HelperConnectTimeout", 10); + + remoteClient->set_timer_handler( + thread->get_timer_list(timeout), + thread->get_timer_list(connect), + thread->get_timer_list(retry)); + log_debug("DTC_SetupRemoteClientThread end"); + return 0; +} +int DTC_StartDataSourceThread() +{ + log_debug("DTC_StartDataSourceThread begin"); + if (disableDataSource == 0) + { + helperUnit = new GroupCollect(); + if (helperUnit->load_config(dbConfig, TableDefinitionManager::Instance()->get_cur_table_def()->key_format()) == -1) + { + return -1; + } + } + + //get helper timeout + int timeout = gConfig->get_int_val("cache", "HelperTimeout", 30); + int retry = gConfig->get_int_val("cache", "HelperRetryTimeout", 1); + int connect = gConfig->get_int_val("cache", "HelperConnectTimeout", 10); + + dsThread = new PollThread("source"); + if (dsThread->initialize_thread() == -1) + return -1; + + if (disableDataSource == 0) + helperUnit->set_timer_handler( + dsThread->get_timer_list(timeout), + dsThread->get_timer_list(connect), + dsThread->get_timer_list(retry)); + log_debug("DTC_StartDataSourceThread end"); + return 0; +} + +static int DTC_SetupBufferProcess(PollThread *thread) +{ + log_error("DTC_SetupBufferProcess start"); + cachePool = new BufferProcess(thread, TableDefinitionManager::Instance()->get_cur_table_def(), asyncUpdate ? MODE_ASYNC : MODE_SYNC); + cachePool->set_limit_node_size(gConfig->get_int_val("cache", "LimitNodeSize", 100 * 1024 * 1024)); + cachePool->set_limit_node_rows(gConfig->get_int_val("cache", "LimitNodeRows", 0)); + cachePool->set_limit_empty_nodes(gConfig->get_int_val("cache", "LimitEmptyNodes", 0)); + + unsigned long long cacheSize = gConfig->get_size_val("cache", "CacheMemorySize", 0, 'M'); + if (cacheSize <= (50ULL << 20)) // 50M + { + log_error("CacheMemorySize too small"); + return -1; + } + else if (sizeof(long) == 4 && cacheSize >= 4000000000ULL) + { + log_crit("CacheMemorySize %lld too large", cacheSize); + } + else if ( + cachePool->buffer_set_size( + cacheSize, + gConfig->get_int_val("cache", "CacheShmVersion", 4)) == -1) + { + return -1; + } + + /* disable async transaction log */ + cachePool->disable_async_log(1); + + int lruLevel = gConfig->get_int_val("cache", "disable_lru_update", 0); + if (disableDataSource) + { + if (cachePool->enable_no_db_mode() < 0) + { + return -1; + } + if (gConfig->get_int_val("cache", "disable_auto_purge", 0) > 0) + { + cachePool->disable_auto_purge(); + // lruLevel = 3; /* LRU_WRITE */ + } + int autoPurgeAlertTime = gConfig->get_int_val("cache", "AutoPurgeAlertTime", 0); + cachePool->set_date_expire_alert_time(autoPurgeAlertTime); + if (autoPurgeAlertTime > 0 && TableDefinitionManager::Instance()->get_cur_table_def()->lastcmod_field_id() <= 0) + { + log_crit("Can't start AutoPurgeAlert without lastcmod field"); + return -1; + } + } + cachePool->disable_lru_update(lruLevel); + cachePool->enable_lossy_data_source(gConfig->get_int_val("cache", "LossyDataSource", 0)); + + if (asyncUpdate != MODE_SYNC && cacheKey == 0) + { + log_crit("Anonymous shared memory don't support DelayUpdate"); + return -1; + } + + int iAutoDeleteDirtyShm = gConfig->get_int_val("cache", "AutoDeleteDirtyShareMemory", 0); + /*disable empty node filter*/ + if (cachePool->cache_open(cacheKey, 0, iAutoDeleteDirtyShm) == -1) + { + return -1; + } + + if (cachePool->update_mode() || cachePool->is_mem_dirty()) // asyncUpdate active + { + if (TableDefinitionManager::Instance()->get_cur_table_def()->uniq_fields() < 1) + { + log_crit("DelayUpdate needs uniq-field(s)"); + return -1; + } + + if (disableDataSource) + { + if (cachePool->update_mode()) + { + log_crit("Can't start async mode when disableDataSource."); + return -1; + } + else + { + log_crit("Can't start disableDataSource with shm dirty,please flush async shm to db first or delete shm"); + return -1; + } + } + else + { + if ((TableDefinitionManager::Instance()->get_cur_table_def()->compress_field_id() >= 0)) + { + log_crit("sorry,DTC just support compress in disableDataSource mode now."); + return -1; + } + } + + /*marker is the only source of flush speed calculattion, inc precision to 10*/ + cachePool->set_flush_parameter( + gConfig->get_int_val("cache", "MarkerPrecision", 10), + gConfig->get_int_val("cache", "MaxFlushSpeed", 1), + gConfig->get_int_val("cache", "MinDirtyTime", 3600), + gConfig->get_int_val("cache", "MaxDirtyTime", 43200)); + + cachePool->set_drop_count(gConfig->get_int_val("cache", "MaxDropCount", 1000)); + } + else + { + if (!disableDataSource) + helperUnit->disable_commit_group(); + } + + if (cachePool->set_insert_order(dbConfig->ordIns) < 0) + return -1; + + log_error("DTC_SetupBufferProcess end"); + return 0; +} + +int DTC_SetupGroupCollect(PollThread *thread) +{ + log_debug("DTC_SetupGroupCollect begin"); + + helperUnit = new GroupCollect(); + if (helperUnit->load_config(dbConfig, TableDefinitionManager::Instance()->get_cur_table_def()->key_format()) == -1) + { + return -1; + } + //get helper timeout + int timeout = gConfig->get_int_val("cache", "HelperTimeout", 30); + int retry = gConfig->get_int_val("cache", "HelperRetryTimeout", 1); + int connect = gConfig->get_int_val("cache", "HelperConnectTimeout", 10); + + helperUnit->set_timer_handler(thread->get_timer_list(timeout), + thread->get_timer_list(connect), + thread->get_timer_list(retry)); + + helperUnit->Attach(thread); + if (disableCache) + { + helperUnit->disable_commit_group(); + } + log_debug("DTC_SetupGroupCollect end"); + return 0; +} + +static int DTC_Startup_Single_Thread() +{ + //instant workerThread + if (0 != StartHotbackThread()) + { + return -1; + } + workerThread = new PollThread("worker"); + if (workerThread->initialize_thread() == -1) + return -1; + + if (DTC_SetupRemoteClientThread(workerThread) < 0) + return -1; + + if (disableDataSource == 0) + { + if (DTC_SetupGroupCollect(workerThread) < 0) + return -1; + } + + if (disableCache == 0) + { + if (DTC_SetupBufferProcess(workerThread) < 0) + return -1; + } + + int iMaxBarrierCount = gConfig->get_int_val("cache", "MaxBarrierCount", 1000); + int iMaxKeyCount = gConfig->get_int_val("cache", "MaxKeyCount", 1000); + + barCache = new BarrierUnit(workerThread, iMaxBarrierCount, iMaxKeyCount, BarrierUnit::IN_FRONT); + if (disableCache) + { + bypassUnit = new BufferBypass(workerThread); + barCache->bind_dispatcher(bypassUnit); + bypassUnit->bind_dispatcher(helperUnit); + } + else + { + keyRoute = new KeyRoute(workerThread, TableDefinitionManager::Instance()->get_cur_table_def()->key_format()); + if (!check_and_create()) + { + log_error("check_and_create error"); + return -1; + } + else + { + log_debug("check_and_create ok"); + } + std::vector clusterConf; + if (!parse_cluster_config(&clusterConf)) + { + log_error("parse_cluster_config error"); + return -1; + } + else + log_debug("parse_cluster_config ok"); + + keyRoute->Init(clusterConf); + if (keyRoute->load_node_state_if_any() != 0) + { + log_error("key route init error!"); + return -1; + } + log_debug("keyRoute->Init ok"); + barCache->bind_dispatcher(keyRoute); + keyRoute->bind_cache(cachePool); + keyRoute->bind_remote_helper(remoteClient); + cachePool->bind_dispatcher_remote(remoteClient); + cachePool->bind_hb_log_dispatcher(hotbackProcess); + if (disableDataSource) + { + BlackHole *hole = new BlackHole(workerThread); + cachePool->bind_dispatcher(hole); + } + else + { + if (cachePool->update_mode() || cachePool->is_mem_dirty()) + { + barHelper = new BarrierUnit(workerThread, iMaxBarrierCount, iMaxKeyCount, BarrierUnit::IN_BACK); + cachePool->bind_dispatcher(barHelper); + barHelper->bind_dispatcher(helperUnit); + } + else + { + cachePool->bind_dispatcher(helperUnit); + } + } + } + + serverControl = TaskControl::get_instance(workerThread); + if (NULL == serverControl) + { + log_crit("create TaskControl object failed, errno[%d], msg[%s]", errno, strerror(errno)); + return -1; + } + serverControl->bind_dispatcher(barCache); + log_debug("bind server control ok"); + + multiPlexer = new TaskMultiplexer(workerThread); + multiPlexer->bind_dispatcher(serverControl); + + agentProcess = new AgentProcess(workerThread); + agentProcess->bind_dispatcher(multiPlexer); + + agentListener = new AgentListenPool(); + if (agentListener->Bind(gConfig, agentProcess, workerThread) < 0) + return -1; + + int open_cnt = StatOpenFd(); + gMaxConnCnt = gConfig->get_int_val("cache", "MaxFdCount", 1024) - open_cnt - 10; // reserve 10 fds + if (gMaxConnCnt < 0) + { + log_crit("MaxFdCount should large than %d", open_cnt + 10); + return -1; + } + + workerThread->running_thread(); + if (hotbackThread) + { + hotbackThread->running_thread(); + } + + return 0; +} + +static int DTC_Startup_Muti_Thread() +{ + if (DTC_StartDataSourceThread() < 0) + return -1; + if (StartRemoteClientThread() < 0) + return -1; + if (0 != StartHotbackThread()) + { + return -1; + } + if (disableCache) + { + helperUnit->disable_commit_group(); + } + else if (DTC_StartCacheThread() < 0) + return -1; + + if (!disableDataSource) + helperUnit->Attach(dsThread); + + int iMaxBarrierCount = gConfig->get_int_val("cache", "MaxBarrierCount", 1000); + int iMaxKeyCount = gConfig->get_int_val("cache", "MaxKeyCount", 1000); + + barCache = new BarrierUnit(cacheThread ?: dsThread, iMaxBarrierCount, iMaxKeyCount, BarrierUnit::IN_FRONT); + if (disableCache) + { + bypassUnit = new BufferBypass(dsThread); + barCache->bind_dispatcher(bypassUnit); + bypassUnit->bind_dispatcher(helperUnit); + } + else + { + keyRoute = new KeyRoute(cacheThread, TableDefinitionManager::Instance()->get_cur_table_def()->key_format()); + if (!check_and_create()) + { + log_error("check_and_create error"); + return -1; + } + else + log_debug("check_and_create ok"); + std::vector clusterConf; + if (!parse_cluster_config(&clusterConf)) + { + log_error("parse_cluster_config error"); + return -1; + } + else + log_debug("parse_cluster_config ok"); + + keyRoute->Init(clusterConf); + if (keyRoute->load_node_state_if_any() != 0) + { + log_error("key route init error!"); + return -1; + } + log_debug("keyRoute->Init ok"); + barCache->bind_dispatcher(keyRoute); + keyRoute->bind_cache(cachePool); + keyRoute->bind_remote_helper(remoteClient); + cachePool->bind_dispatcher_remote(remoteClient); + cachePool->bind_hb_log_dispatcher(hotbackProcess); + if (disableDataSource) + { + BlackHole *hole = new BlackHole(dsThread); + cachePool->bind_dispatcher(hole); + } + else + { + if (cachePool->update_mode() || cachePool->is_mem_dirty()) + { + barHelper = new BarrierUnit(dsThread, iMaxBarrierCount, iMaxKeyCount, BarrierUnit::IN_BACK); + cachePool->bind_dispatcher(barHelper); + barHelper->bind_dispatcher(helperUnit); + } + else + { + cachePool->bind_dispatcher(helperUnit); + } + } + } + + serverControl = TaskControl::get_instance(cacheThread ?: dsThread); + if (NULL == serverControl) + { + log_crit("create TaskControl object failed, errno[%d], msg[%s]", errno, strerror(errno)); + return -1; + } + serverControl->bind_dispatcher(barCache); + log_debug("bind server control ok"); + + multiPlexer = new TaskMultiplexer(cacheThread ?: dsThread); + multiPlexer->bind_dispatcher(serverControl); + + agentProcess = new AgentProcess(cacheThread ?: dsThread); + agentProcess->bind_dispatcher(multiPlexer); + + agentListener = new AgentListenPool(); + if (agentListener->Bind(gConfig, agentProcess) < 0) + return -1; + + int open_cnt = StatOpenFd(); + gMaxConnCnt = gConfig->get_int_val("cache", "MaxFdCount", 1024) - open_cnt - 10; // reserve 10 fds + if (gMaxConnCnt < 0) + { + log_crit("MaxFdCount should large than %d", open_cnt + 10); + return -1; + } + + if (dsThread) + { + dsThread->running_thread(); + } + + if (remoteThread) + { + remoteThread->running_thread(); + } + if (hotbackThread) + { + hotbackThread->running_thread(); + } + if (cacheThread) + { + cacheThread->running_thread(); + } + + agentListener->Run(); + + return 0; +} + +// second part of entry +static int main2(void *dummy); + +int main(int argc, char *argv[]) +{ + enable_memchecker(); + init_proc_title(argc, argv); + mkdir("../stat", 0777); + mkdir("../data", 0777); + if (dtc_daemon_init(argc, argv) < 0) + return -1; + + if (gConfig->get_int_val("cache", "EnableCoreDump", 0)) + daemon_enable_core_dump(); + + hashChanging = gConfig->get_int_val("cache", "HashChanging", 0); + targetNewHash = gConfig->get_int_val("cache", "TargetNewHash", 0); + + DTCGlobal::_pre_alloc_NG_num = gConfig->get_int_val("cache", "PreAllocNGNum", 1024); + DTCGlobal::_pre_alloc_NG_num = DTCGlobal::_pre_alloc_NG_num <= 1 ? 1 : DTCGlobal::_pre_alloc_NG_num >= (1 << 12) ? 1 : DTCGlobal::_pre_alloc_NG_num; + + DTCGlobal::_min_chunk_size = gConfig->get_int_val("cache", "MinChunkSize", 0); + if (DTCGlobal::_min_chunk_size < 0) + { + DTCGlobal::_min_chunk_size = 0; + } + + DTCGlobal::_pre_purge_nodes = gConfig->get_int_val("cache", "pre_purge_nodes", 0); + if (DTCGlobal::_pre_purge_nodes < 0) + { + DTCGlobal::_pre_purge_nodes = 0; + } + else if (DTCGlobal::_pre_purge_nodes > 10000) + { + DTCGlobal::_pre_purge_nodes = 10000; + } + + RELATIVE_HOUR_CALCULATOR->SetBaseHour(gConfig->get_int_val("cache", "RelativeYear", 2014)); + + InitStat(); + + log_info("Table %s: key/field# %d/%d, keysize %d", + dbConfig->tblName, + TableDefinitionManager::Instance()->get_cur_table_def()->key_fields(), + TableDefinitionManager::Instance()->get_cur_table_def()->num_fields() + 1, + TableDefinitionManager::Instance()->get_cur_table_def()->max_key_size()); + + if (InitCacheMode() < 0) + return -1; + + if (daemon_start() < 0) + return -1; + + Thread::set_auto_config_instance(gConfig->get_auto_config_instance("cache")); + + _set_remote_log_config_(gConfig->get_str_val("cache", "RemoteLogAddr"), + gConfig->get_int_val("cache", "RemoteLogPort", 0), + dtc::utils::get_bid()); + REMOTE_LOG->set_remote_port(gConfig->get_int_val("cache", "RemoteLogPort", 0)); + if (gConfig->get_int_val("cache", "RemoteOpLogOn", 0) != 0) + REMOTE_LOG->set_op_log_on(); + + if (start_watch_dog(main2, NULL) < 0) + return -1; + return main2(NULL); +} + +static int main2(void *dummy) +{ + + Thread *mainThread; + NEW(Thread("main", Thread::ThreadTypeProcess), mainThread); + if (mainThread != NULL) + { + mainThread->initialize_thread(); + } + + if (daemon_set_fd_limit(gConfig->get_int_val("cache", "MaxFdCount", 0)) < 0) + return -1; + + //start statistic thread. + statmgr.start_background_thread(); + + int ret = 0; + int useSingleThread = gConfig->get_int_val("cache", "UseSingleThread", 0); + if (useSingleThread) + { + ret = DTC_Startup_Single_Thread(); + } + else + { + ret = DTC_Startup_Muti_Thread(); + } + + if (ret == 0) + { + extern void InitTaskExecutor(const char *, AgentListenPool *, TaskDispatcher *); + InitTaskExecutor(TableDefinitionManager::Instance()->get_cur_table_def()->table_name(), agentListener, serverControl); + + //init plugin + enablePlugin = gConfig->get_int_val("cache", "EnablePlugin", 0); + if (enablePlugin) + { + if (plugin_start() < 0) + { + return -1; + } + } + +#if MEMCHECK + log_debug("memory allocated %lu virtual %lu", count_alloc_size(), count_virtual_size()); + + report_mallinfo(); +#endif + log_info("%s v%s: running...", progname, version); + daemon_wait(); + } + + log_info("%s v%s: stoppping...", progname, version); +#if MEMCHECK + log_debug("memory allocated %lu virtual %lu", count_alloc_size(), count_virtual_size()); + report_mallinfo(); +#endif + + //stop plugin + if (enablePlugin && initPlugin) + { + plugin_stop(); + } + + DELETE(listener); + + if (cacheThread) + { + cacheThread->interrupt(); + } + if (hotbackThread) + { + hotbackThread->interrupt(); + } + if (dsThread) + { + dsThread->interrupt(); + } + + if (remoteThread) + { + remoteThread->interrupt(); + } + + if (workerThread) + { + workerThread->interrupt(); + } + + extern void StopTaskExecutor(void); + StopTaskExecutor(); + + DELETE(cachePool); + DELETE(helperUnit); + DELETE(barCache); + DELETE(keyRoute); + DELETE(barHelper); + DELETE(bypassUnit); + DELETE(hotbackProcess); + DELETE(remoteClient); + DELETE(cacheThread); + DELETE(dsThread); + DELETE(remoteThread); + DELETE(workerThread); + DELETE(hotbackThread); + statmgr.stop_background_thread(); + log_info("%s v%s: stopped", progname, version); + daemon_cleanup(); +#if MEMCHECK + dump_non_delete(); + log_debug("memory allocated %lu virtual %lu", count_alloc_size(), count_virtual_size()); +#endif + return ret; +} diff --git a/src/search_local/index_storage/cache/mallocator.h b/src/search_local/index_storage/cache/mallocator.h new file mode 100644 index 0000000..0f94aea --- /dev/null +++ b/src/search_local/index_storage/cache/mallocator.h @@ -0,0 +1,123 @@ +/* + * ===================================================================================== + * + * Filename: mallocator.h + * + * Description: memory operating interface. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef MALLOCATOR_H +#define MALLOCATOR_H + +#include +#include +#include "namespace.h" + +DTC_BEGIN_NAMESPACE + +#define ALLOC_SIZE_T uint32_t +#define ALLOC_HANDLE_T uint64_t +#define INTER_SIZE_T uint64_t +#define INTER_HANDLE_T uint64_t + +#define INVALID_HANDLE 0ULL + +#define SIZE_SZ (sizeof(ALLOC_SIZE_T)) +#define MALLOC_ALIGNMENT (2 * SIZE_SZ) +#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1) +#define MAX_ALLOC_SIZE (((ALLOC_SIZE_T)-1) & ~MALLOC_ALIGN_MASK) + +class Mallocator +{ +public: + Mallocator() {} + virtual ~Mallocator() {} + + template + T *Pointer(ALLOC_HANDLE_T hHandle) { return reinterpret_cast(handle_to_ptr(hHandle)); } + + virtual ALLOC_HANDLE_T Handle(void *p) = 0; + + virtual const char *get_err_msg() = 0; + + /************************************************* + Description: 分配内存 + Input: tSize 分配的内存大小 + Output: + Return: 内存块句柄,INVALID_HANDLE为失败 + *************************************************/ + virtual ALLOC_HANDLE_T Malloc(ALLOC_SIZE_T tSize) = 0; + + /************************************************* + Description: 分配内存,并将内存初始化为0 + Input: tSize 分配的内存大小 + Output: + Return: 内存块句柄,INVALID_HANDLE为失败 + *************************************************/ + virtual ALLOC_HANDLE_T Calloc(ALLOC_SIZE_T tSize) = 0; + + /************************************************* + Description: 重新分配内存 + Input: hHandle 老内存句柄 + tSize 新分配的内存大小 + Output: + Return: 内存块句柄,INVALID_HANDLE为失败(失败时不会释放老内存块) + *************************************************/ + virtual ALLOC_HANDLE_T ReAlloc(ALLOC_HANDLE_T hHandle, ALLOC_SIZE_T tSize) = 0; + + /************************************************* + Description: 释放内存 + Input: hHandle 内存句柄 + Output: + Return: 0为成功,非0失败 + *************************************************/ + virtual int Free(ALLOC_HANDLE_T hHandle) = 0; + + /************************************************* + Description: 获取内存块大小 + Input: hHandle 内存句柄 + Output: + Return: 内存大小 + *************************************************/ + virtual ALLOC_SIZE_T chunk_size(ALLOC_HANDLE_T hHandle) = 0; + + /************************************************* + Description: 将句柄转换成内存地址 + Input: 内存句柄 + Output: + Return: 内存地址,如果句柄无效返回NULL + *************************************************/ + virtual void *handle_to_ptr(ALLOC_HANDLE_T hHandle) = 0; + + /************************************************* + Description: 将内存地址转换为句柄 + Input: 内存地址 + Output: + Return: 内存句柄,如果地址无效返回INVALID_HANDLE + *************************************************/ + virtual ALLOC_HANDLE_T ptr_to_handle(void *p) = 0; + + virtual ALLOC_SIZE_T ask_for_destroy_size(ALLOC_HANDLE_T hHandl) = 0; + + /************************************************* + Description: 检测handle是否有效 + Input: 内存句柄 + Output: + Return: 0: 有效; -1:无效 + *************************************************/ + virtual int handle_is_valid(ALLOC_HANDLE_T mem_handle) = 0; +}; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/mysql_error.h b/src/search_local/index_storage/cache/mysql_error.h new file mode 100644 index 0000000..d713134 --- /dev/null +++ b/src/search_local/index_storage/cache/mysql_error.h @@ -0,0 +1,291 @@ +/* + * ===================================================================================== + * + * Filename: mysql_error.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __H_DTC_MYSQL_ERROR_H__ +#define __H_DTC_MYSQL_ERROR_H__ +enum +{ + ER_HASHCHK = 1000, + ER_NISAMCHK = 1001, + ER_NO = 1002, + ER_YES = 1003, + ER_CANT_CREATE_FILE = 1004, + ER_CANT_CREATE_TABLE = 1005, + ER_CANT_CREATE_DB = 1006, + ER_DB_CREATE_EXISTS = 1007, + ER_DB_DROP_EXISTS = 1008, + ER_DB_DROP_DELETE = 1009, + ER_DB_DROP_RMDIR = 1010, + ER_CANT_DELETE_FILE = 1011, + ER_CANT_FIND_SYSTEM_REC = 1012, + ER_CANT_GET_STAT = 1013, + ER_CANT_GET_WD = 1014, + ER_CANT_LOCK = 1015, + ER_CANT_OPEN_FILE = 1016, + ER_FILE_NOT_FOUND = 1017, + ER_CANT_READ_DIR = 1018, + ER_CANT_SET_WD = 1019, + ER_CHECKREAD = 1020, + ER_DISK_FULL = 1021, + ER_DUP_KEY = 1022, + ER_ERROR_ON_CLOSE = 1023, + ER_ERROR_ON_READ = 1024, + ER_ERROR_ON_RENAME = 1025, + ER_ERROR_ON_WRITE = 1026, + ER_FILE_USED = 1027, + ER_FILSORT_ABORT = 1028, + ER_FORM_NOT_FOUND = 1029, + ER_GET_ERRNO = 1030, + ER_ILLEGAL_HA = 1031, + ER_KEY_NOT_FOUND = 1032, + ER_NOT_FORM_FILE = 1033, + ER_NOT_KEYFILE = 1034, + ER_OLD_KEYFILE = 1035, + ER_OPEN_AS_READONLY = 1036, + ER_OUTOFMEMORY = 1037, + ER_OUT_OF_SORTMEMORY = 1038, + ER_UNEXPECTED_EOF = 1039, + ER_CON_COUNT_ERROR = 1040, + ER_OUT_OF_RESOURCES = 1041, + ER_BAD_HOST_ERROR = 1042, + ER_HANDSHAKE_ERROR = 1043, + ER_DBACCESS_DENIED_ERROR = 1044, + ER_ACCESS_DENIED_ERROR = 1045, + ER_NO_DB_ERROR = 1046, + ER_UNKNOWN_COM_ERROR = 1047, + ER_BAD_NULL_ERROR = 1048, + ER_BAD_DB_ERROR = 1049, + ER_TABLE_EXISTS_ERROR = 1050, + ER_BAD_TABLE_ERROR = 1051, + ER_NON_UNIQ_ERROR = 1052, + ER_SERVER_SHUTDOWN = 1053, + ER_BAD_FIELD_ERROR = 1054, + ER_WRONG_FIELD_WITH_GROUP = 1055, + ER_WRONG_GROUP_FIELD = 1056, + ER_WRONG_SUM_SELECT = 1057, + ER_WRONG_VALUE_COUNT = 1058, + ER_TOO_LONG_IDENT = 1059, + ER_DUP_FIELDNAME = 1060, + ER_DUP_KEYNAME = 1061, + ER_DUP_ENTRY = 1062, + ER_WRONG_FIELD_SPEC = 1063, + ER_PARSE_ERROR = 1064, + ER_EMPTY_QUERY = 1065, + ER_NONUNIQ_TABLE = 1066, + ER_INVALID_DEFAULT = 1067, + ER_MULTIPLE_PRI_KEY = 1068, + ER_TOO_MANY_KEYS = 1069, + ER_TOO_MANY_KEY_PARTS = 1070, + ER_TOO_LONG_KEY = 1071, + ER_KEY_COLUMN_DOES_NOT_EXITS = 1072, + ER_BLOB_USED_AS_KEY = 1073, + ER_TOO_BIG_FIELDLENGTH = 1074, + ER_WRONG_AUTO_KEY = 1075, + ER_READY = 1076, + ER_NORMAL_SHUTDOWN = 1077, + ER_GOT_SIGNAL = 1078, + ER_SHUTDOWN_COMPLETE = 1079, + ER_FORCING_CLOSE = 1080, + ER_IPSOCK_ERROR = 1081, + ER_NO_SUCH_INDEX = 1082, + ER_WRONG_FIELD_TERMINATORS = 1083, + ER_BLOBS_AND_NO_TERMINATED = 1084, + ER_TEXTFILE_NOT_READABLE = 1085, + ER_FILE_EXISTS_ERROR = 1086, + ER_LOAD_INFO = 1087, + ER_ALTER_INFO = 1088, + ER_WRONG_SUB_KEY = 1089, + ER_CANT_REMOVE_ALL_FIELDS = 1090, + ER_CANT_DROP_FIELD_OR_KEY = 1091, + ER_INSERT_INFO = 1092, + ER_INSERT_TABLE_USED = 1093, + ER_NO_SUCH_THREAD = 1094, + ER_KILL_DENIED_ERROR = 1095, + ER_NO_TABLES_USED = 1096, + ER_TOO_BIG_SET = 1097, + ER_NO_UNIQUE_LOGFILE = 1098, + ER_TABLE_NOT_LOCKED_FOR_WRITE = 1099, + ER_TABLE_NOT_LOCKED = 1100, + ER_BLOB_CANT_HAVE_DEFAULT = 1101, + ER_WRONG_DB_NAME = 1102, + ER_WRONG_TABLE_NAME = 1103, + ER_TOO_BIG_SELECT = 1104, + ER_UNKNOWN_ERROR = 1105, + ER_UNKNOWN_PROCEDURE = 1106, + ER_WRONG_PARAMCOUNT_TO_PROCEDURE = 1107, + ER_WRONG_PARAMETERS_TO_PROCEDURE = 1108, + ER_UNKNOWN_TABLE = 1109, + ER_FIELD_SPECIFIED_TWICE = 1110, + ER_INVALID_GROUP_FUNC_USE = 1111, + ER_UNSUPPORTED_EXTENSION = 1112, + ER_TABLE_MUST_HAVE_COLUMNS = 1113, + ER_RECORD_FILE_FULL = 1114, + ER_UNKNOWN_CHARACTER_SET = 1115, + ER_TOO_MANY_TABLES = 1116, + ER_TOO_MANY_FIELDS = 1117, + ER_TOO_BIG_ROWSIZE = 1118, + ER_STACK_OVERRUN = 1119, + ER_WRONG_OUTER_JOIN = 1120, + ER_NULL_COLUMN_IN_INDEX = 1121, + ER_CANT_FIND_UDF = 1122, + ER_CANT_INITIALIZE_UDF = 1123, + ER_UDF_NO_PATHS = 1124, + ER_UDF_EXISTS = 1125, + ER_CANT_OPEN_LIBRARY = 1126, + ER_CANT_FIND_DL_ENTRY = 1127, + ER_FUNCTION_NOT_DEFINED = 1128, + ER_HOST_IS_BLOCKED = 1129, + ER_HOST_NOT_PRIVILEGED = 1130, + ER_PASSWORD_ANONYMOUS_USER = 1131, + ER_PASSWORD_NOT_ALLOWED = 1132, + ER_PASSWORD_NO_MATCH = 1133, + ER_UPDATE_INFO = 1134, + ER_CANT_CREATE_THREAD = 1135, + ER_WRONG_VALUE_COUNT_ON_ROW = 1136, + ER_CANT_REOPEN_TABLE = 1137, + ER_INVALID_USE_OF_NULL = 1138, + ER_REGEXP_ERROR = 1139, + ER_MIX_OF_GROUP_FUNC_AND_FIELDS = 1140, + ER_NONEXISTING_GRANT = 1141, + ER_TABLEACCESS_DENIED_ERROR = 1142, + ER_COLUMNACCESS_DENIED_ERROR = 1143, + ER_ILLEGAL_GRANT_FOR_TABLE = 1144, + ER_GRANT_WRONG_HOST_OR_USER = 1145, + ER_NO_SUCH_TABLE = 1146, + ER_NONEXISTING_TABLE_GRANT = 1147, + ER_NOT_ALLOWED_COMMAND = 1148, + ER_SYNTAX_ERROR = 1149, + ER_DELAYED_CANT_CHANGE_LOCK = 1150, + ER_TOO_MANY_DELAYED_THREADS = 1151, + ER_ABORTING_CONNECTION = 1152, + ER_NET_PACKET_TOO_LARGE = 1153, + ER_NET_READ_ERROR_FROM_PIPE = 1154, + ER_NET_FCNTL_ERROR = 1155, + ER_NET_PACKETS_OUT_OF_ORDER = 1156, + ER_NET_UNCOMPRESS_ERROR = 1157, + ER_NET_READ_ERROR = 1158, + ER_NET_READ_INTERRUPTED = 1159, + ER_NET_ERROR_ON_WRITE = 1160, + ER_NET_WRITE_INTERRUPTED = 1161, + ER_TOO_LONG_STRING = 1162, + ER_TABLE_CANT_HANDLE_BLOB = 1163, + ER_TABLE_CANT_HANDLE_AUTO_INCREMENT = 1164, + ER_DELAYED_INSERT_TABLE_LOCKED = 1165, + ER_WRONG_COLUMN_NAME = 1166, + ER_WRONG_KEY_COLUMN = 1167, + ER_WRONG_MRG_TABLE = 1168, + ER_DUP_UNIQUE = 1169, + ER_BLOB_KEY_WITHOUT_LENGTH = 1170, + ER_PRIMARY_CANT_HAVE_NULL = 1171, + ER_TOO_MANY_ROWS = 1172, + ER_REQUIRES_PRIMARY_KEY = 1173, + ER_NO_RAID_COMPILED = 1174, + ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE = 1175, + ER_KEY_DOES_NOT_EXITS = 1176, + ER_CHECK_NO_SUCH_TABLE = 1177, + ER_CHECK_NOT_IMPLEMENTED = 1178, + ER_CANT_DO_THIS_DURING_AN_TRANSACTION = 1179, + ER_ERROR_DURING_COMMIT = 1180, + ER_ERROR_DURING_ROLLBACK = 1181, + ER_ERROR_DURING_FLUSH_LOGS = 1182, + ER_ERROR_DURING_CHECKPOINT = 1183, + ER_NEW_ABORTING_CONNECTION = 1184, + ER_DUMP_NOT_IMPLEMENTED = 1185, + ER_FLUSH_MASTER_BINLOG_CLOSED = 1186, + ER_INDEX_REBUILD = 1187, + ER_MASTER = 1188, + ER_MASTER_NET_READ = 1189, + ER_MASTER_NET_WRITE = 1190, + ER_FT_MATCHING_KEY_NOT_FOUND = 1191, + ER_LOCK_OR_ACTIVE_TRANSACTION = 1192, + ER_UNKNOWN_SYSTEM_VARIABLE = 1193, + ER_CRASHED_ON_USAGE = 1194, + ER_CRASHED_ON_REPAIR = 1195, + ER_WARNING_NOT_COMPLETE_ROLLBACK = 1196, + ER_TRANS_CACHE_FULL = 1197, + ER_SLAVE_MUST_STOP = 1198, + ER_SLAVE_NOT_RUNNING = 1199, + ER_BAD_SLAVE = 1200, + ER_MASTER_INFO = 1201, + ER_SLAVE_THREAD = 1202, + ER_TOO_MANY_USER_CONNECTIONS = 1203, + ER_SET_CONSTANTS_ONLY = 1204, + ER_LOCK_WAIT_TIMEOUT = 1205, + ER_LOCK_TABLE_FULL = 1206, + ER_READ_ONLY_TRANSACTION = 1207, + ER_DROP_DB_WITH_READ_LOCK = 1208, + ER_CREATE_DB_WITH_READ_LOCK = 1209, + ER_WRONG_ARGUMENTS = 1210, + ER_NO_PERMISSION_TO_CREATE_USER = 1211, + ER_UNION_TABLES_IN_DIFFERENT_DIR = 1212, + ER_LOCK_DEADLOCK = 1213, + ER_TABLE_CANT_HANDLE_FULLTEXT = 1214, + ER_CANNOT_ADD_FOREIGN = 1215, + ER_NO_REFERENCED_ROW = 1216, + ER_ROW_IS_REFERENCED = 1217, + ER_CONNECT_TO_MASTER = 1218, + ER_QUERY_ON_MASTER = 1219, + ER_ERROR_WHEN_EXECUTING_COMMAND = 1220, + ER_WRONG_USAGE = 1221, + ER_WRONG_NUMBER_OF_COLUMNS_IN_SELECT = 1222, + ER_CANT_UPDATE_WITH_READLOCK = 1223, + ER_MIXING_NOT_ALLOWED = 1224, + ER_DUP_ARGUMENT = 1225, + ER_USER_LIMIT_REACHED = 1226, + ER_SPECIFIC_ACCESS_DENIED_ERROR = 1227, + ER_LOCAL_VARIABLE = 1228, + ER_GLOBAL_VARIABLE = 1229, + ER_NO_DEFAULT = 1230, + ER_WRONG_VALUE_FOR_VAR = 1231, + ER_WRONG_TYPE_FOR_VAR = 1232, + ER_VAR_CANT_BE_READ = 1233, + ER_CANT_USE_OPTION_HERE = 1234, + ER_NOT_SUPPORTED_YET = 1235, + ER_MASTER_FATAL_ERROR_READING_BINLOG = 1236, + ER_SLAVE_IGNORED_TABLE = 1237, + ER_INCORRECT_GLOBAL_LOCAL_VAR = 1238, + CR_UNKNOWN_ERROR = 1900, + CR_SOCKET_CREATE_ERROR = 1901, + CR_CONNECTION_ERROR = 1902, + CR_CONN_HOST_ERROR = 1903, + CR_IPSOCK_ERROR = 1904, + CR_UNKNOWN_HOST = 1905, + CR_SERVER_GONE_ERROR = 1906, + CR_VERSION_ERROR = 1907, + CR_OUT_OF_MEMORY = 1908, + CR_WRONG_HOST_INFO = 1909, + CR_LOCALHOST_CONNECTION = 1910, + CR_TCP_CONNECTION = 1911, + CR_SERVER_HANDSHAKE_ERR = 1912, + CR_SERVER_LOST = 1913, + CR_COMMANDS_OUT_OF_SYNC = 1914, + CR_NAMEDPIPE_CONNECTION = 1915, + CR_NAMEDPIPEWAIT_ERROR = 1916, + CR_NAMEDPIPEOPEN_ERROR = 1917, + CR_NAMEDPIPESETSTATE_ERROR = 1918, + CR_CANT_READ_CHARSET = 1919, + CR_NET_PACKET_TOO_LARGE = 1920, + CR_EMBEDDED_CONNECTION = 1921, + CR_PROBE_SLAVE_STATUS = 1922, + CR_PROBE_SLAVE_HOSTS = 1923, + CR_PROBE_SLAVE_CONNECT = 1924, + CR_PROBE_MASTER_CONNECT = 1925, + CR_SSL_CONNECTION_ERROR = 1926, + CR_MALFORMED_PACKET = 1927, + CR_WRONG_LICENSE = 1928, +}; +#endif diff --git a/src/search_local/index_storage/cache/ng_info.cc b/src/search_local/index_storage/cache/ng_info.cc new file mode 100644 index 0000000..e423111 --- /dev/null +++ b/src/search_local/index_storage/cache/ng_info.cc @@ -0,0 +1,316 @@ +/* + * ===================================================================================== + * + * Filename: ng_info.cc + * + * Description: NodeGroup operation. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include "node_set.h" +#include "node_list.h" +#include "node_index.h" +#include "ng_info.h" +#include "node.h" +#include "dtc_global.h" + +DTC_USING_NAMESPACE + +NGInfo::NGInfo() : _ngInfo(NULL) +{ + memset(_errmsg, 0, sizeof(_errmsg)); + emptyCnt = 0; + emptyStartupMode = CREATED; + + statUsedNG = statmgr.get_item_u32(DTC_USED_NGS); + statUsedNode = statmgr.get_item_u32(DTC_USED_NODES); + statDirtyNode = statmgr.get_item_u32(DTC_DIRTY_NODES); + statEmptyNode = statmgr.get_item_u32(DTC_EMPTY_NODES); + statEmptyNode = 0; + statUsedRow = statmgr.get_item_u32(DTC_USED_ROWS); + statDirtyRow = statmgr.get_item_u32(DTC_DIRTY_ROWS); +} + +NGInfo::~NGInfo() +{ +} + +Node NGInfo::allocate_node(void) +{ + //优先在空闲链表分配 + NODE_SET *NS = find_free_ng(); + if (!NS) + { + /* 防止NodeGroup把内存碎片化,采用预分配 */ + static int step = DTCGlobal::_pre_alloc_NG_num; + static int fail = 0; + for (int i = 0; i < step; i++) + { + NS = allocate_ng(); + if (!NS) + { + if (i == 0) + return Node(); + else + { + fail = 1; + step = 1; + break; + } + } + + free_list_add(NS); + } + + /* find again */ + NS = find_free_ng(); + + if (step < 256 && !fail) + step *= 2; + } + + Node node = NS->allocate_node(); + //NG中没有任何可分配的Node + if (NS->is_full()) + { + list_del(NS); + full_list_add(NS); + } + + if (!node) + { + snprintf(_errmsg, sizeof(_errmsg), "PANIC: allocate node failed"); + return Node(); + } + + //statistic + _ngInfo->ni_used_node++; + statUsedNode = _ngInfo->ni_used_node; + + //insert to node_index + I_INSERT(node); + return node; +} + +int NGInfo::release_node(Node &node) +{ + NODE_SET *NS = node.Owner(); + if (NS->is_full()) + { + //NG挂入空闲链表 + list_del(NS); + free_list_add(NS); + } + + _ngInfo->ni_used_node--; + statUsedNode = _ngInfo->ni_used_node; + return node.Release(); +} + +Node NGInfo::dirty_node_head() +{ + NODE_SET *sysNG = M_POINTER(NODE_SET, _ngInfo->ni_sys_zone); + if (!sysNG) + return Node(); + return Node(sysNG, SYS_DIRTY_NODE_INDEX); +} + +Node NGInfo::clean_node_head() +{ + NODE_SET *sysNG = M_POINTER(NODE_SET, _ngInfo->ni_sys_zone); + if (!sysNG) + return Node(); + return Node(sysNG, SYS_CLEAN_NODE_INDEX); +} + +Node NGInfo::empty_node_head() +{ + NODE_SET *sysNG = M_POINTER(NODE_SET, _ngInfo->ni_sys_zone); + if (!sysNG) + return Node(); + return Node(sysNG, SYS_EMPTY_NODE_INDEX); +} + +int NGInfo::insert2_dirty_lru(Node node) +{ + NODE_SET *sysNG = M_POINTER(NODE_SET, _ngInfo->ni_sys_zone); + Node dirtyNode(sysNG, SYS_DIRTY_NODE_INDEX); + + NODE_LIST_ADD(node, dirtyNode); + + return 0; +} + +int NGInfo::insert2_clean_lru(Node node) +{ + NODE_SET *sysNG = M_POINTER(NODE_SET, _ngInfo->ni_sys_zone); + Node cleanNode(sysNG, SYS_CLEAN_NODE_INDEX); + + NODE_LIST_ADD(node, cleanNode); + + return 0; +} + +int NGInfo::insert2_empty_lru(Node node) +{ + NODE_SET *sysNG = M_POINTER(NODE_SET, _ngInfo->ni_sys_zone); + Node emptyNode(sysNG, SYS_EMPTY_NODE_INDEX); + + NODE_LIST_ADD(node, emptyNode); + + return 0; +} + +int NGInfo::remove_from_lru(Node node) +{ + NODE_LIST_DEL(node); + return 0; +} + +NODE_SET *NGInfo::allocate_ng(void) +{ + MEM_HANDLE_T v = M_CALLOC(NODE_SET::Size()); + if (INVALID_HANDLE == v) + { + snprintf(_errmsg, sizeof(_errmsg), "allocate nodegroup failed, %s", M_ERROR()); + return (NODE_SET *)0; + } + + NODE_SET *NS = M_POINTER(NODE_SET, v); + NS->Init(_ngInfo->ni_min_id); + _ngInfo->ni_min_id += NODE_GROUP_INCLUDE_NODES; + _ngInfo->ni_used_ng++; + statUsedNG = _ngInfo->ni_used_ng; + + return NS; +} + +NODE_SET *NGInfo::find_free_ng(void) +{ + //链表为空 + if (NG_LIST_EMPTY(&(_ngInfo->ni_free_head))) + { + return (NODE_SET *)0; + } + + return NG_LIST_ENTRY(_ngInfo->ni_free_head.Next(), NODE_SET, ng_list); +} + +void NGInfo::list_del(NODE_SET *NS) +{ + NG_LIST_T *p = &(NS->ng_list); + return NG_LIST_DEL(p); +} + +#define EXPORT_NG_LIST_FUNCTION(name, member, function) \ + void NGInfo::name(NODE_SET *NS) \ + { \ + NG_LIST_T *p = &(NS->ng_list); \ + NG_LIST_T *head = &(_ngInfo->member); \ + return function(p, head); \ + } + +EXPORT_NG_LIST_FUNCTION(free_list_add, ni_free_head, NG_LIST_ADD) +EXPORT_NG_LIST_FUNCTION(full_list_add, ni_full_head, NG_LIST_ADD) +EXPORT_NG_LIST_FUNCTION(free_list_add_tail, ni_free_head, NG_LIST_ADD_TAIL) +EXPORT_NG_LIST_FUNCTION(full_list_add_tail, ni_full_head, NG_LIST_ADD_TAIL) + +int NGInfo::InitHeader(NG_INFO_T *ni) +{ + INIT_NG_LIST_HEAD(&(ni->ni_free_head)); + INIT_NG_LIST_HEAD(&(ni->ni_full_head)); + + ni->ni_min_id = SYS_MIN_NODE_ID; + + /* init system reserved zone*/ + { + NODE_SET *sysNG = allocate_ng(); + if (!sysNG) + return -1; + + sysNG->system_reserved_init(); + ni->ni_sys_zone = M_HANDLE(sysNG); + } + + ni->ni_used_ng = 1; + ni->ni_used_node = 0; + ni->ni_dirty_node = 0; + ni->ni_used_row = 0; + ni->ni_dirty_row = 0; + + statUsedNG = ni->ni_used_ng; + statUsedNode = ni->ni_used_node; + statDirtyNode = ni->ni_dirty_node; + statDirtyRow = ni->ni_dirty_row; + statUsedRow = ni->ni_used_row; + statEmptyNode = 0; + + return 0; +} + +int NGInfo::Init(void) +{ + //1. malloc ng_info mem. + MEM_HANDLE_T v = M_CALLOC(sizeof(NG_INFO_T)); + if (INVALID_HANDLE == v) + { + snprintf(_errmsg, sizeof(_errmsg), "init nginfo failed, %s", M_ERROR()); + return -1; + } + + //2. mapping + _ngInfo = M_POINTER(NG_INFO_T, v); + + //3. init header + return InitHeader(_ngInfo); +} + +int NGInfo::Attach(MEM_HANDLE_T v) +{ + if (INVALID_HANDLE == v) + { + snprintf(_errmsg, sizeof(_errmsg), "attach nginfo failed, memory handle = 0"); + return -1; + } + + _ngInfo = M_POINTER(NG_INFO_T, v); + + /* check system reserved zone: + * 1. the present of empty lru list + */ + { + NODE_SET *sysNG = M_POINTER(NODE_SET, _ngInfo->ni_sys_zone); + if (!sysNG) + return -1; + + int ret = sysNG->system_reserved_check(); + if (ret < 0) + return ret; + if (ret > 0) + { + emptyStartupMode = UPGRADED; + } + else + { + emptyStartupMode = ATTACHED; + } + } + + return 0; +} + +int NGInfo::Detach(void) +{ + _ngInfo = NULL; + return 0; +} diff --git a/src/search_local/index_storage/cache/ng_info.h b/src/search_local/index_storage/cache/ng_info.h new file mode 100644 index 0000000..55f5789 --- /dev/null +++ b/src/search_local/index_storage/cache/ng_info.h @@ -0,0 +1,196 @@ +/* + * ===================================================================================== + * + * Filename: ng_info.h + * + * Description: NodeGroup operation. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __DTC_NG_INFO_H +#define __DTC_NG_INFO_H + +#include +#include "stat_dtc.h" +#include "singleton.h" +#include "namespace.h" +#include "global.h" +#include "ng_list.h" + +DTC_BEGIN_NAMESPACE + +/* high-level 层支持的cache种类*/ +enum MEM_CACHE_TYPE_T +{ + MEM_DTC_TYPE = 0x1UL, + MEM_BMP_TYPE = 0x2UL, +}; + +/* high-level 层cache的签名、版本、类型等*/ +#define MEM_CACHE_SIGN 0xFF00FF00FF00FF00ULL +#define MEM_CACHE_VERSION 0x1ULL +#define MEM_CACHE_TYPE MEM_DTC_TYPE + +struct cache_info +{ + uint64_t ci_sign; + uint64_t ci_version; + uint64_t ci_type; +}; +typedef struct cache_info CACHE_INFO_T; + +/* Low-Level预留了4k的空间,供后续扩展 */ +/* TODO: 增加更加细致的逻辑判断*/ +struct app_storage +{ + CACHE_INFO_T as_cache_info; + MEM_HANDLE_T as_extend_info; + + int need_format() + { + return (as_cache_info.ci_sign != MEM_CACHE_SIGN) || + (INVALID_HANDLE == as_extend_info); + } + + int Format(MEM_HANDLE_T v) + { + as_cache_info.ci_sign = MEM_CACHE_SIGN; + as_cache_info.ci_version = MEM_CACHE_VERSION; + as_cache_info.ci_type = MEM_DTC_TYPE; + + as_extend_info = v; + return 0; + } +}; +typedef struct app_storage APP_STORAGE_T; + +struct ng_info +{ + NG_LIST_T ni_free_head; //有空闲Node的NG链表 + NG_LIST_T ni_full_head; //Node分配完的NG链表 + NODE_ID_T ni_min_id; //下一个被分配NG的起始NodeId + MEM_HANDLE_T ni_sys_zone; //第一个NG为系统保留 + + /*以下为统计值,用来控制异步flush的起停,速度等*/ + uint32_t ni_used_ng; + uint32_t ni_used_node; + uint32_t ni_dirty_node; + uint64_t ni_used_row; + uint64_t ni_dirty_row; +}; +typedef struct ng_info NG_INFO_T; + +class NGInfo +{ +public: + NGInfo(); + ~NGInfo(); + + static NGInfo *Instance() { return Singleton::Instance(); } + static void Destroy() { Singleton::Destroy(); } + + Node allocate_node(void); //分配一个新Node + int release_node(Node &); //归还CNode到所属的NG并摧毁自己 + + /*statistic, for async flush */ + void inc_dirty_node(int v) + { + _ngInfo->ni_dirty_node += v; + statDirtyNode = _ngInfo->ni_dirty_node; + } + void inc_dirty_row(int v) + { + _ngInfo->ni_dirty_row += v; + statDirtyRow = _ngInfo->ni_dirty_row; + } + void inc_total_row(int v) + { + _ngInfo->ni_used_row += v; + statUsedRow = _ngInfo->ni_used_row; + } + void inc_empty_node(int v) + { + emptyCnt += v; + statEmptyNode = emptyCnt; + } + + const unsigned int total_dirty_node() const { return _ngInfo->ni_dirty_node; } + const unsigned int total_used_node() const { return _ngInfo->ni_used_node; } + + const uint64_t total_dirty_row() const { return _ngInfo->ni_dirty_row; } + const uint64_t total_used_row() const { return _ngInfo->ni_used_row; } + + Node dirty_node_head(); + Node clean_node_head(); + Node empty_node_head(); + + /* 获取最小可用的NodeID */ + NODE_ID_T min_valid_node_id() const { return (NODE_ID_T)256; } + + /* 获取目前分配的最大NodeID */ + /* 由于目前node-group大小固定,而且分配后不会释放,因此可以直接通过已用的node-group算出来 */ + NODE_ID_T max_node_id() const { return _ngInfo->ni_used_ng * 256 - 1; } + + //time-list op + int insert2_dirty_lru(Node); + int insert2_clean_lru(Node); + int insert2_empty_lru(Node); + int remove_from_lru(Node); + int empty_count(void) const { return emptyCnt; } + enum + { + CREATED, // this memory is fresh + ATTACHED, // this is an old memory, and empty lru present + UPGRADED // this is an old memory, and empty lru is missing + }; + int empty_startup_mode(void) const { return emptyStartupMode; } + + const MEM_HANDLE_T Handle() const { return M_HANDLE(_ngInfo); } + const char *Error() const { return _errmsg; } + + //创建物理内存并格式化 + int Init(void); + //绑定到物理内存 + int Attach(MEM_HANDLE_T handle); + //脱离物理内存 + int Detach(void); + +protected: + int InitHeader(NG_INFO_T *); + + NODE_SET *allocate_ng(void); + NODE_SET *find_free_ng(void); + + void list_del(NODE_SET *); + void free_list_add(NODE_SET *); + void full_list_add(NODE_SET *); + void full_list_add_tail(NODE_SET *); + void free_list_add_tail(NODE_SET *); + +private: + NG_INFO_T *_ngInfo; + char _errmsg[256]; + // the total empty node present + int emptyCnt; + int emptyStartupMode; + +private: + StatItemU32 statUsedNG; + StatItemU32 statUsedNode; + StatItemU32 statDirtyNode; + StatItemU32 statEmptyNode; + StatItemU32 statUsedRow; + StatItemU32 statDirtyRow; +}; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/ng_list.h b/src/search_local/index_storage/cache/ng_list.h new file mode 100644 index 0000000..762c8a3 --- /dev/null +++ b/src/search_local/index_storage/cache/ng_list.h @@ -0,0 +1,116 @@ +/* + * ===================================================================================== + * + * Filename: ng_list.h + * + * Description: double linked list method in sharing memory. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef __DTC_NG_LIST_H +#define __DTC_NG_LIST_H + +#include "namespace.h" +#include "global.h" + +DTC_BEGIN_NAMESPACE + +struct ng_list +{ + MEM_HANDLE_T prev; + MEM_HANDLE_T next; + + struct ng_list *Next() { return M_POINTER(struct ng_list, next); } + struct ng_list *Prev() { return M_POINTER(struct ng_list, prev); } +}; +typedef struct ng_list NG_LIST_T; + +#define INIT_NG_LIST_HEAD(ptr) \ + do \ + { \ + MEM_HANDLE_T v = M_HANDLE(ptr); \ + (ptr)->prev = v; \ + (ptr)->next = v; \ + } while (0) + +inline void __NG_LIST_ADD(NG_LIST_T *p, + NG_LIST_T *prev, + NG_LIST_T *next) +{ + next->prev = M_HANDLE(p); + p->next = M_HANDLE(next); + p->prev = M_HANDLE(prev); + prev->next = M_HANDLE(p); +} + +inline void NG_LIST_ADD(NG_LIST_T *p, NG_LIST_T *head) +{ + __NG_LIST_ADD(p, head, head->Next()); +} + +inline void NG_LIST_ADD_TAIL(NG_LIST_T *p, NG_LIST_T *head) +{ + __NG_LIST_ADD(p, head->Prev(), head); +} + +inline void __NG_LIST_DEL(NG_LIST_T *prev, NG_LIST_T *next) +{ + next->prev = M_HANDLE(prev); + prev->next = M_HANDLE(next); +} + +inline void NG_LIST_DEL(NG_LIST_T *p) +{ + __NG_LIST_DEL(p->Prev(), p->Next()); + p->next = INVALID_HANDLE; + p->prev = INVALID_HANDLE; +} + +inline void NG_LIST_DEL_INIT(NG_LIST_T *p) +{ + __NG_LIST_DEL(p->Prev(), p->Next()); + INIT_NG_LIST_HEAD(p); +} + +inline void NG_LIST_MOVE(NG_LIST_T *p, NG_LIST_T *head) +{ + __NG_LIST_DEL(p->Prev(), p->Next()); + NG_LIST_ADD(p, head); +} + +inline void NG_LIST_MOVE_TAIL(NG_LIST_T *p, NG_LIST_T *head) +{ + __NG_LIST_DEL(p->Prev(), p->Next()); + NG_LIST_ADD_TAIL(p, head); +} + +inline int NG_LIST_EMPTY(NG_LIST_T *head) +{ + return head->next == M_HANDLE(head); +} + +#define OFFSETOF(type, member) (unsigned long)(&((type *)0)->member) + +#define NG_LIST_ENTRY(ptr, type, member) \ + ((type *)((char *)(ptr)-OFFSETOF(type, member))) + +#define NG_LIST_FOR_EACH(pos, head) \ + for (pos = (head)->Next(); pos != (head); pos = pos->Next()) + +#define NG_LIST_FOR_EACH_ENTRY(pos, head, member) \ + for (pos = NG_LIST_ENTRY((head)->Next(), typeof(*pos), member), \ + &pos->member != (head); \ + pos = list_entry((pos->member).Next(), typeof(*pos), member)) + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/node.h b/src/search_local/index_storage/cache/node.h new file mode 100644 index 0000000..e8de576 --- /dev/null +++ b/src/search_local/index_storage/cache/node.h @@ -0,0 +1,136 @@ +/* + * ===================================================================================== + * + * Filename: node.h + * + * Description: node operation. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef __NODE_DTC_H +#define __NODE_DTC_H + +#include +#include "namespace.h" +#include "global.h" +#include "node_set.h" +#include "node_index.h" + +DTC_BEGIN_NAMESPACE + +class NGInfo; +class NodeIndex; + +class Node +{ +public: + Node(NODE_SET *ns = NULL, int idx = 0) : _owner(ns), _index(idx) {} + Node(const Node &n) : _owner(n._owner), _index(n._index) {} + ~Node() {} + +public: + int Index(void) { return _index; } + NODE_SET *Owner() { return _owner; } + + /* attribute op*/ + NODE_ID_T &lru_prev() + { + NODE_ID_T *p = node_lru(); + return p[LRU_PREV]; + } + + NODE_ID_T &lru_next() + { + NODE_ID_T *p = node_lru(); + return p[LRU_NEXT]; + } + + NODE_ID_T &next_node_id() { return _owner->next_node_id(_index); } + NODE_ID_T node_id() { return _owner->node_id(_index); } + + MEM_HANDLE_T &vd_handle() { return _owner->vd_handle(_index); } + + /* return time-marker time */ + unsigned int Time() { return (unsigned int)vd_handle(); } + + /* dirty flag*/ + bool is_dirty() const { return _owner->is_dirty(_index); } + void set_dirty() { return _owner->set_dirty(_index); } + void clr_dirty() { return _owner->clr_dirty(_index); } + +public: + /* used for timelist */ + Node Next() { return from_id(lru_next()); } + Node Prev() { return from_id(lru_prev()); } + + /* used for hash */ + Node next_node(void) { return from_id(next_node_id()); } + + /* for copyable */ + Node &operator=(const Node &n) + { + _owner = n._owner; + _index = n._index; + return *this; + } + int operator!() const { return _owner == NULL || _index >= NODE_GROUP_INCLUDE_NODES; } + int operator!=(Node &node) { return _owner != node.Owner() || _index != node.Index(); } + int operator==(Node &node) { return _owner == node.Owner() && _index == node.Index(); } + + int not_in_lru_list() { return lru_prev() == node_id() || lru_next() == node_id(); } + static Node Empty(void) + { + Node node; + return node; + } + +private: + /* init or delete this */ + int Reset() + { + next_node_id() = INVALID_NODE_ID; + lru_prev() = node_id(); + lru_next() = node_id(); + + clr_dirty(); + return 0; + } + + int Release() + { + _owner->release_node(*this); + Reset(); + _owner = NULL; + _index = 0; + return 0; + } + + static inline Node from_id(NODE_ID_T id) { return I_SEARCH(id); } + +private: + // [0] = prev, [1] = next + NODE_ID_T *node_lru() { return _owner->node_lru(_index); } + +private: + NODE_SET *_owner; + int _index; + +public: + /* friend class */ + friend class NGInfo; + friend class NodeIndex; + friend struct node_set; +}; + +DTC_END_NAMESPACE + +#endif \ No newline at end of file diff --git a/src/search_local/index_storage/cache/node_index.cc b/src/search_local/index_storage/cache/node_index.cc new file mode 100644 index 0000000..6e45847 --- /dev/null +++ b/src/search_local/index_storage/cache/node_index.cc @@ -0,0 +1,147 @@ +/* + * ===================================================================================== + * + * Filename: node_index.cc + * + * Description: NodeId to Node + * + * + * node_id ----- Node + * 8bits 1-index + * 16bits 2-index + * 8bits NodeGroup internal index. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include "node_index.h" +#include "singleton.h" +#include "node.h" + +DTC_USING_NAMESPACE + +NodeIndex::NodeIndex() : _firstIndex(NULL) +{ + memset(_errmsg, 0, sizeof(_errmsg)); +} + +NodeIndex::~NodeIndex() +{ +} + +NodeIndex *NodeIndex::Instance() +{ + return Singleton::Instance(); +} + +void NodeIndex::Destroy() +{ + Singleton::Destroy(); +} + +int NodeIndex::pre_allocate_index(size_t mem_size) +{ + /* + * 按所有节点全为空节点来分配2级NodeIndex + * 一个空节点占用44 bytes + */ + uint32_t n = 65536 * 256 * 44; + n = mem_size / n + 1; + n = n > 256 ? 256 : n; + + for (uint32_t i = 0; i < n; ++i) + { + _firstIndex->fi_h[i] = M_CALLOC(INDEX_2_SIZE); + + if (INVALID_HANDLE == _firstIndex->fi_h[i]) + { + log_crit("PANIC: PrepareNodeIndex[%u] failed", i); + return -1; + } + } + + return 0; +} + +int NodeIndex::Insert(Node node) +{ + NODE_ID_T id = node.node_id(); + + if (INVALID_HANDLE == _firstIndex->fi_h[OFFSET1(id)]) + { + _firstIndex->fi_h[OFFSET1(id)] = M_CALLOC(INDEX_2_SIZE); + if (INVALID_HANDLE == _firstIndex->fi_h[OFFSET1(id)]) + { + log_crit("PANIC: Insert node=%u to NodeIndex failed", id); + return -1; + } + } + + SECOND_INDEX_T *p = M_POINTER(SECOND_INDEX_T, _firstIndex->fi_h[OFFSET1(id)]); + p->si_used++; + p->si_h[OFFSET2(id)] = M_HANDLE(node.Owner()); + + return 0; +} + +Node NodeIndex::Search(NODE_ID_T id) +{ + if (INVALID_NODE_ID == id) + return Node(NULL, 0); + + if (INVALID_HANDLE == _firstIndex->fi_h[OFFSET1(id)]) + return Node(NULL, 0); + + SECOND_INDEX_T *p = M_POINTER(SECOND_INDEX_T, _firstIndex->fi_h[OFFSET1(id)]); + if (INVALID_HANDLE == p->si_h[OFFSET2(id)]) + return Node(NULL, 0); + + NODE_SET *NS = M_POINTER(NODE_SET, p->si_h[OFFSET2(id)]); + + int index = (id - NS->ng_nid); + if (index < 0 || index > 255) + return Node(NULL, 0); + + return Node(NS, index); +} + +int NodeIndex::Init(size_t mem_size) +{ + MEM_HANDLE_T v = M_CALLOC(INDEX_1_SIZE); + if (INVALID_HANDLE == v) + { + log_crit("Create Index-1 failed"); + return -1; + } + + _firstIndex = M_POINTER(FIRST_INDEX_T, v); + + return pre_allocate_index(mem_size); +} + +int NodeIndex::Attach(MEM_HANDLE_T handle) +{ + if (INVALID_HANDLE == handle) + { + log_crit("attach index-1 failed, memory handle=0"); + return -1; + } + + _firstIndex = M_POINTER(FIRST_INDEX_T, handle); + return 0; +} + +int NodeIndex::Detach(void) +{ + _firstIndex = 0; + return 0; +} diff --git a/src/search_local/index_storage/cache/node_index.h b/src/search_local/index_storage/cache/node_index.h new file mode 100644 index 0000000..3533058 --- /dev/null +++ b/src/search_local/index_storage/cache/node_index.h @@ -0,0 +1,83 @@ +/* + * ===================================================================================== + * + * Filename: node_index.h + * + * Description: NodeId to Node + * + * + * node_id ----- Node + * 8bits 1-index + * 16bits 2-index + * 8bits NodeGroup internal index. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef __DTC_NODE_INDEX_H +#define __DTC_NODE_INDEX_H + +#include "namespace.h" +#include "global.h" + +DTC_BEGIN_NAMESPACE + +#define INDEX_1_SIZE (((1UL << 8) * sizeof(MEM_HANDLE_T)) + sizeof(FIRST_INDEX_T)) // first-index size +#define INDEX_2_SIZE (((1UL << 16) * sizeof(MEM_HANDLE_T)) + sizeof(SECOND_INDEX_T)) // second-index size + +#define OFFSET1(id) ((id) >> 24) //高8位,一级index +#define OFFSET2(id) (((id)&0xFFFF00) >> 8) //中间16位,二级index +#define OFFSET3(id) ((id)&0xFF) //低8位 + +struct first_index +{ + uint32_t fi_used; //一级index使用个数 + MEM_HANDLE_T fi_h[0]; //存放二级index的handle +}; +typedef struct first_index FIRST_INDEX_T; + +struct second_index +{ + uint32_t si_used; + MEM_HANDLE_T si_h[0]; +}; +typedef struct second_index SECOND_INDEX_T; + +class Node; +class NodeIndex +{ +public: + NodeIndex(); + ~NodeIndex(); + + static NodeIndex *Instance(); + static void Destroy(); + + int Insert(Node); + Node Search(NODE_ID_T id); + + int pre_allocate_index(size_t size); + + const MEM_HANDLE_T Handle() const { return M_HANDLE(_firstIndex); } + const char *Error() const { return _errmsg; } + ///* 内存区块操作函数 */ + int Init(size_t mem_size); + int Attach(MEM_HANDLE_T handle); + int Detach(void); + +private: + FIRST_INDEX_T *_firstIndex; + char _errmsg[256]; +}; + +DTC_END_NAMESPACE + +#endif \ No newline at end of file diff --git a/src/search_local/index_storage/cache/node_list.h b/src/search_local/index_storage/cache/node_list.h new file mode 100644 index 0000000..9df192e --- /dev/null +++ b/src/search_local/index_storage/cache/node_list.h @@ -0,0 +1,94 @@ +/* + * ===================================================================================== + * + * Filename: node_list.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __DTC_NODE_LIST_H +#define __DTC_NODE_LIST_H + +#include "namespace.h" +#include "global.h" +#include "node.h" + +DTC_BEGIN_NAMESPACE + +#define INIT_NODE_LIST_HEAD(node, id) \ + do \ + { \ + node.lru_prev() = id; \ + node.lru_next() = id; \ + } while (0) + +inline void __NODE_LIST_ADD(Node p, + Node prev, + Node next) +{ + next.lru_prev() = p.node_id(); + p.lru_next() = next.node_id(); + p.lru_prev() = prev.node_id(); + prev.lru_next() = p.node_id(); +} + +inline void NODE_LIST_ADD(Node p, Node head) +{ + __NODE_LIST_ADD(p, head, head.Next()); +} + +inline void NODE_LIST_ADD_TAIL(Node p, Node head) +{ + __NODE_LIST_ADD(p, head.Prev(), head); +} + +inline void __NODE_LIST_DEL(Node prev, Node next) +{ + next.lru_prev() = prev.node_id(); + prev.lru_next() = next.node_id(); +} + +inline void NODE_LIST_DEL(Node p) +{ + __NODE_LIST_DEL(p.Prev(), p.Next()); + p.lru_prev() = p.node_id(); + p.lru_next() = p.node_id(); +} + +inline void NODE_LIST_MOVE(Node p, Node head) +{ + __NODE_LIST_DEL(p.Prev(), p.Next()); + NODE_LIST_ADD(p, head); +} + +inline void NODE_LIST_MOVE_TAIL(Node p, Node head) +{ + __NODE_LIST_DEL(p.Prev(), p.Next()); + NODE_LIST_ADD_TAIL(p, head); +} + +inline int NODE_LIST_EMPTY(Node head) +{ + return head.lru_next() == head.node_id(); +} + +/*正向遍历*/ +#define NODE_LIST_FOR_EACH(pos, head) \ + for (pos = head.Next(); pos != head; pos = pos.Next()) + +/*反向遍历*/ +#define NODE_LIST_FOR_EACH_RVS(pos, head) \ + for (pos = head.Prev(); pos != head; pos = pos.Prev()) + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/node_set.cc b/src/search_local/index_storage/cache/node_set.cc new file mode 100644 index 0000000..623e413 --- /dev/null +++ b/src/search_local/index_storage/cache/node_set.cc @@ -0,0 +1,237 @@ +/* + * ===================================================================================== + * + * Filename: node_set.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include "node_set.h" +#include "node_index.h" +#include "node_list.h" +#include "global.h" +#include "node.h" + +DTC_USING_NAMESPACE + +//定义每种属性的内存大小, 至少有以下四种,可以再增加 +const uint32_t NODE_SET::NG_ATTR_SIZE[] = + { + NODE_GROUP_INCLUDE_NODES * sizeof(NODE_ID_T), //NEXT_NODE + NODE_GROUP_INCLUDE_NODES * sizeof(NODE_ID_T) * 2, //TIME_LIST + NODE_GROUP_INCLUDE_NODES * sizeof(MEM_HANDLE_T), //VD_HANDLE + NODE_GROUP_INCLUDE_NODES / 8, //DIRTY_BMP +}; + +int NODE_SET::Init(NODE_ID_T id) +{ + ng_list.prev = ng_list.next = INVALID_HANDLE; + ng_dele.top = 0; + ng_dele.count = 0; + ng_free = 0; + ng_nid = id; + + //属性 + ng_attr.count = attr_count(); + ng_attr.offset[0] = base_header_size(); + for (unsigned int i = 1; i < ng_attr.count; i++) + { + ng_attr.offset[i] = ng_attr.offset[i - 1] + NG_ATTR_SIZE[i - 1]; + } + + /* 初始化每个Node */ + for (unsigned i = 0; i < NODE_GROUP_INCLUDE_NODES; ++i) + { + next_node_id(i) = INVALID_NODE_ID; + NODE_ID_T *lru = node_lru(i); + lru[LRU_PREV] = node_id(i); + lru[LRU_NEXT] = node_id(i); + vd_handle(i) = INVALID_HANDLE; + clr_dirty(i); + } + + return 0; +} + +/* init system reserved zone */ +int NODE_SET::system_reserved_init() +{ + Node dirtyNode = allocate_node(); + if (!dirtyNode) + { + return -2; + } + + Node cleanNode = allocate_node(); + if (!cleanNode) + { + return -3; + } + + Node emptyNode = allocate_node(); + if (!emptyNode) + { + return -3; + } + + /* init node list head */ + INIT_NODE_LIST_HEAD(dirtyNode, dirtyNode.node_id()); + INIT_NODE_LIST_HEAD(cleanNode, cleanNode.node_id()); + INIT_NODE_LIST_HEAD(emptyNode, emptyNode.node_id()); + + /* insert node head's node-id to node-index*/ + I_INSERT(dirtyNode); + I_INSERT(cleanNode); + I_INSERT(emptyNode); + + return 0; +} + +/* check system reserved zone integrity + * the main purpose is upgrade/add the missing empty lru list + */ +int NODE_SET::system_reserved_check() +{ + if (ng_free < 2) + return -10; + // ng_free==2 old format, index 2 is free & reserved + // ng_free==3 new format, index 2 allocated to emptyNodeLru + int hasEmptyLru1 = ng_free >= 3; + + // if new format, index 2 is allocated, lru pointer should be non-zero + + // sanity check passed + if (hasEmptyLru1 == 0) + { + // no empty lru, allocate one + Node emptyNode = allocate_node(); + if (!emptyNode) + { + return -3; + } + + /* init node list head */ + INIT_NODE_LIST_HEAD(emptyNode, emptyNode.node_id()); + + /* insert node head's node-id to node-index*/ + I_INSERT(emptyNode); + return 1; + } + + return 0; +} + +Node NODE_SET::allocate_node(void) +{ + if (is_full()) + { + return Node(NULL, 0); + } + + //优先分配release掉的Node空间 + if (ng_dele.count > 0) + { + Node N(this, ng_dele.top); + N.Reset(); + + ng_dele.count--; + ng_dele.top = (uint8_t)N.vd_handle(); + + return N; + } + //在空闲Node中分配 + else + { + Node N(this, ng_free); + N.Reset(); + + ng_free++; + return N; + } +} + +int NODE_SET::release_node(Node N) +{ + //复用node的handle attribute空间来把释放掉的node组织为单链表 + N.vd_handle() = ng_dele.top; + ng_dele.top = N.Index(); + ng_dele.count++; + + return 0; +} + +bool NODE_SET::is_full(void) +{ + return (ng_dele.count == 0 && ng_free >= NODE_GROUP_INCLUDE_NODES); +} + +uint32_t NODE_SET::attr_count(void) +{ + return sizeof(NG_ATTR_SIZE) / sizeof(uint32_t); +} + +uint32_t NODE_SET::base_header_size(void) +{ + return OFFSETOF(NODE_SET, ng_attr) + OFFSETOF(NG_ATTR_T, offset) + sizeof(uint32_t) * attr_count(); +} + +uint32_t NODE_SET::attr_size(void) +{ + uint32_t size = 0; + + for (uint32_t i = 0; i < attr_count(); i++) + { + size += NG_ATTR_SIZE[i]; + } + + return size; +} + +uint32_t NODE_SET::Size(void) +{ + return base_header_size() + attr_size(); +} + +NODE_ID_T NODE_SET::node_id(int idx) const +{ + return (ng_nid + idx); +} + +NODE_ID_T &NODE_SET::next_node_id(int idx) +{ + return __CAST__(NEXT_NODE)[idx]; +} + +NODE_ID_T *NODE_SET::node_lru(int idx) +{ + return &(__CAST__(TIME_LIST)[idx * 2]); +} + +MEM_HANDLE_T &NODE_SET::vd_handle(int idx) +{ + return __CAST__(VD_HANDLE)[idx]; +} + +bool NODE_SET::is_dirty(int idx) +{ + return FD_ISSET(idx, __CAST__(DIRTY_BMP)); +} + +void NODE_SET::set_dirty(int idx) +{ + FD_SET(idx, __CAST__(DIRTY_BMP)); +} + +void NODE_SET::clr_dirty(int idx) +{ + FD_CLR(idx, __CAST__(DIRTY_BMP)); +} diff --git a/src/search_local/index_storage/cache/node_set.h b/src/search_local/index_storage/cache/node_set.h new file mode 100644 index 0000000..7d61245 --- /dev/null +++ b/src/search_local/index_storage/cache/node_set.h @@ -0,0 +1,104 @@ +/* + * ===================================================================================== + * + * Filename: node_set.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __DTC_NODE_SET_H +#define __DTC_NODE_SET_H + +#include +#include "namespace.h" +#include "global.h" +#include "ng_list.h" + +DTC_BEGIN_NAMESPACE + +enum attr_type +{ + NEXT_NODE = 0, + TIME_LIST = 1, + VD_HANDLE = 2, + DIRTY_BMP = 3, +}; +typedef enum attr_type ATTR_TYPE_T; + +//nodeset释放掉的node链表 +struct ng_delete +{ + uint16_t top; + uint16_t count; +}; +typedef struct ng_delete NG_DELE_T; + +//nodeset属性 +struct ng_attr +{ + uint32_t count; + uint32_t offset[0]; +}; +typedef struct ng_attr NG_ATTR_T; + +class Node; +struct node_set +{ +public: + NG_LIST_T ng_list; + NG_DELE_T ng_dele; + uint16_t ng_free; + uint8_t ng_rsv[2]; //保留空间 + NODE_ID_T ng_nid; + NG_ATTR_T ng_attr; + +private: + Node allocate_node(void); // 分配一个Node + int release_node(Node); // 释放一个Node + bool is_full(void); // NodeGroup是否已经分配完 + int Init(NODE_ID_T id); // NodeGroup初始化 + int system_reserved_init(); // 系统保留的NG初始化 + // this routine return: + // 0, passed, empty lru present + // 1, passed, empty lru created + // <0, integrity error + int system_reserved_check(); // 系统保留的NG一致性检查 + static uint32_t Size(void); // 返回nodegroup的总大小 + +private: + //属性操作接口,供CNode访问 + NODE_ID_T node_id(int idx) const; + NODE_ID_T &next_node_id(int idx); // attr1] -> 下一个Node的NodeID + NODE_ID_T *node_lru(int idx); // attr[2] -> LRU链表 + MEM_HANDLE_T &vd_handle(int idx); // attr[3] -> 数据handle + bool is_dirty(int idx); // attr[4] -> 脏位图 + void set_dirty(int idx); + void clr_dirty(int idx); + + //返回每种属性块的起始地址 + template + T *__CAST__(ATTR_TYPE_T t) { return (T *)((char *)this + ng_attr.offset[t]); } + +private: + static uint32_t attr_count(void); // 支持的属性个数 + static uint32_t attr_size(void); // 所有属性的内存大小 + static uint32_t base_header_size(void); // 除开属性外,Nodegroup的大小 + static const uint32_t NG_ATTR_SIZE[]; + + friend class Node; + friend class NGInfo; +}; +typedef struct node_set NODE_SET; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/pt_malloc.cc b/src/search_local/index_storage/cache/pt_malloc.cc new file mode 100644 index 0000000..b162c21 --- /dev/null +++ b/src/search_local/index_storage/cache/pt_malloc.cc @@ -0,0 +1,1504 @@ +/* + * ===================================================================================== + * + * Filename: bin_malloc.cc + * + * Description: packaging ptmalloc memory dispatch algorithm and interface. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include +#include + +#include "log.h" +#include "pt_malloc.h" +#include "singleton.h" + +DTC_USING_NAMESPACE + +/* conversion from malloc headers to user pointers, and back */ +#define chunk2mem(h) (void *)(((char *)h) + 2 * sizeof(ALLOC_SIZE_T)) +#define mem2chunk(h) (void *)(((char *)h) - 2 * sizeof(ALLOC_SIZE_T)) +#define chunkhandle2memhandle(handle) (handle + 2 * sizeof(ALLOC_SIZE_T)) +#define memhandle2chunkhandle(handle) (handle - 2 * sizeof(ALLOC_SIZE_T)) +#if BIN_MEM_CHECK +#define chunksize2memsize(size) (size - 2 * sizeof(ALLOC_SIZE_T)) +#define checked_chunksize2memsize(size) (size > 2 * sizeof(ALLOC_SIZE_T) ? (size - 2 * sizeof(ALLOC_SIZE_T)) : 0) +#else +#define chunksize2memsize(size) (size - sizeof(ALLOC_SIZE_T)) +#define checked_chunksize2memsize(size) (size > sizeof(ALLOC_SIZE_T) ? (size - sizeof(ALLOC_SIZE_T)) : 0) +#endif + +/* Check if m has acceptable alignment */ + +#define aligned_OK(m) (((unsigned long)(m)&MALLOC_ALIGN_MASK) == 0) + +#define misaligned_chunk(h) \ + ((MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (h) : chunkhandle2memhandle(h)) & MALLOC_ALIGN_MASK) + +/* + Check if a request is so large that it would wrap around zero when + padded and aligned. To simplify some other code, the bound is made + low enough so that adding MINSIZE will also not wrap around zero. +*/ + +#define REQUEST_OUT_OF_RANGE(req) \ + ((unsigned long)(req) >= \ + (unsigned long)(ALLOC_SIZE_T)(-2 * MINSIZE)) + +/* pad request bytes into a usable size -- internal version */ +#if BIN_MEM_CHECK +#define request2size(req) \ + (((req) + 2 * SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? MINSIZE : ((req) + 2 * SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) +#else +#define request2size(req) \ + (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? MINSIZE : ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) +#endif + +/* Same, except also perform argument check */ + +#define checked_request2size(req, sz) \ + if (REQUEST_OUT_OF_RANGE(req)) \ + { \ + return (INVALID_HANDLE); \ + } \ + (sz) = request2size(req); + +/* + --------------- Physical chunk operations --------------- +*/ +/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */ +#define PREV_INUSE 0x1 +#define RESERVE_BITS (0x2 | 0x4) +/* + Bits to mask off when extracting size +*/ +#define SIZE_BITS (PREV_INUSE | RESERVE_BITS) + +/* Get size, ignoring use bits */ +#define CHUNK_SIZE(p) ((p)->m_tSize & ~(SIZE_BITS)) +#define REAL_SIZE(sz) ((sz) & ~(SIZE_BITS)) + +/* extract inuse bit of previous chunk */ +#define prev_inuse(p) ((p)->m_tSize & PREV_INUSE) +#define inuse_bit_at_offset(p, offset) (((MallocChunk *)(((char *)p) + offset))->m_tSize & PREV_INUSE) +#define set_inuse_bit_at_offset(p, s) (((MallocChunk *)(((char *)(p)) + (s)))->m_tSize |= PREV_INUSE) +#define clear_inuse_bit_at_offset(p, s) (((MallocChunk *)(((char *)(p)) + (s)))->m_tSize &= ~(PREV_INUSE)) +#define set_size_at_offset(p, offset, size) (((MallocChunk *)(((char *)p) + (offset)))->m_tSize = REAL_SIZE(size) | (((MallocChunk *)(((char *)p) + (offset)))->m_tSize & SIZE_BITS)) +#define set_presize_at_offset(p, offset, size) (((MallocChunk *)(((char *)p) + (offset)))->m_tPreSize = REAL_SIZE(size)) + +#define in_smallbin_range(sz) \ + ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE) + +#define smallbin_index(sz) (((unsigned)(sz)) >> 3) + +#define largebin_index(sz) \ + (((((unsigned long)(sz)) >> 6) <= 32) ? 56 + (((unsigned long)(sz)) >> 6) : ((((unsigned long)(sz)) >> 9) <= 20) ? 91 + (((unsigned long)(sz)) >> 9) : ((((unsigned long)(sz)) >> 12) <= 10) ? 110 + (((unsigned long)(sz)) >> 12) : ((((unsigned long)(sz)) >> 15) <= 4) ? 119 + (((unsigned long)(sz)) >> 15) : ((((unsigned long)(sz)) >> 18) <= 2) ? 124 + (((unsigned long)(sz)) >> 18) : 126) + +#define bin_index(sz) \ + ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz)) + +#define NFASTBINS NSMALLBINS +#define FAST_MAX_SIZE MIN_LARGE_SIZE +#define fastbin_index(sz) smallbin_index(sz) + +#define AT_TOP(chunk, sz) (((char *)chunk) + sz == ((char *)m_pBaseAddr) + m_pstHead->m_hTop) + +#define CAN_COMBILE(size, add) ((INTER_SIZE_T)size + add <= (INTER_SIZE_T)MAX_ALLOC_SIZE) + +DTCBinMalloc::DTCBinMalloc() +{ + m_pBaseAddr = NULL; + m_pstHead = NULL; + m_ptBin = NULL; + m_ptFastBin = NULL; + m_ptUnsortedBin = NULL; + statChunkTotal = statmgr.get_item_u32(DTC_CHUNK_TOTAL); + statDataSize = statmgr.get_item(DTC_DATA_SIZE); + statMemoryTop = statmgr.get_item(DTC_MEMORY_TOP); + statTmpDataSizeRecently = 0; + statTmpDataAllocCountRecently = 0; + statAverageDataSizeRecently = statmgr.get_item(DATA_SIZE_AVG_RECENT); + memset(m_szErr, 0, sizeof(m_szErr)); + minChunkSize = MINSIZE; +} + +DTCBinMalloc::~DTCBinMalloc() +{ +} + +DTCBinMalloc *DTCBinMalloc::Instance() +{ + return Singleton::Instance(); +} + +void DTCBinMalloc::Destroy() +{ + Singleton::Destroy(); +} +/*初始化header中的signature域*/ +void DTCBinMalloc::init_sign() +{ + static const unsigned int V4Sign[14] = { + DTC_SIGN_0, + DTC_SIGN_1, + DTC_SIGN_2, + DTC_SIGN_3, + DTC_SIGN_4, + DTC_SIGN_5, + DTC_SIGN_6, + DTC_SIGN_7, + DTC_SIGN_8, + DTC_SIGN_9, + DTC_SIGN_A, + DTC_SIGN_B, + DTC_SIGN_C, + DTC_SIGN_D}; + + memcpy(m_pstHead->m_auiSign, V4Sign, sizeof(m_pstHead->m_auiSign)); +} + +#if __WORDSIZE == 64 +#define UINT64FMT_T "%lu" +#else +#define UINT64FMT_T "%llu" +#endif +/*初始化cache头信息*/ +/*传入参数,cache的起始地址,cache的总大小*/ +int DTCBinMalloc::Init(void *pAddr, INTER_SIZE_T tSize) +{ + int i; + + if (tSize < sizeof(MemHead) + sizeof(CBin) * (NBINS + NFASTBINS + 1) + DTC_RESERVE_SIZE + MINSIZE) + { + snprintf(m_szErr, sizeof(m_szErr), "invalid size[" UINT64FMT_T "]", tSize); + return (-1); + } + + m_pBaseAddr = pAddr; + m_pstHead = (MemHead *)m_pBaseAddr; + memset(m_pstHead, 0, sizeof(MemHead)); + init_sign(); + m_pstHead->m_ushVer = DTC_VER_MIN; + m_pstHead->m_ushHeadSize = sizeof(MemHead); + m_pstHead->m_tSize = tSize; + m_pstHead->m_tUserAllocChunkCnt = 0; + m_pstHead->m_hReserveZone = sizeof(MemHead) + sizeof(CBin) * (NBINS + NFASTBINS + 1); + m_pstHead->m_hReserveZone = (m_pstHead->m_hReserveZone + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK; + m_pstHead->m_hBottom = (m_pstHead->m_hReserveZone + DTC_RESERVE_SIZE + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK; + m_pstHead->m_hTop = m_pstHead->m_hBottom; + m_pstHead->m_tUserAllocSize = m_pstHead->m_hBottom; + statMemoryTop = m_pstHead->m_hTop; + m_pstHead->m_tLastFreeChunkSize = (tSize > m_pstHead->m_hTop + MINSIZE) ? (tSize - m_pstHead->m_hTop - MINSIZE) : 0; + m_pstHead->m_ushBinCnt = NBINS; + m_pstHead->m_ushFastBinCnt = NFASTBINS; + memset(m_pstHead->m_auiBinBitMap, 0, sizeof(m_pstHead->m_auiBinBitMap)); + m_ptBin = (CBin *)(((char *)m_pBaseAddr) + sizeof(MemHead)); + m_ptFastBin = m_ptBin + NBINS; + m_ptUnsortedBin = m_ptFastBin + NFASTBINS; + + for (i = 0; i < NBINS; i++) + { + m_ptBin[i].m_hPreChunk = INVALID_HANDLE; + m_ptBin[i].m_hNextChunk = INVALID_HANDLE; + } + + for (i = 0; i < NFASTBINS; i++) + { + m_ptFastBin[i].m_hPreChunk = INVALID_HANDLE; + m_ptFastBin[i].m_hNextChunk = INVALID_HANDLE; + } + + m_ptUnsortedBin[0].m_hPreChunk = INVALID_HANDLE; + m_ptUnsortedBin[0].m_hNextChunk = INVALID_HANDLE; + + MallocChunk *pstChunk; + pstChunk = (MallocChunk *)handle_to_ptr(m_pstHead->m_hTop); + pstChunk->m_tPreSize = 0; + pstChunk->m_tSize = PREV_INUSE; + + // init stat + statChunkTotal = m_pstHead->m_tUserAllocChunkCnt; + statDataSize = m_pstHead->m_tUserAllocSize; + + return (0); +} +/*校验cache的版本是否正确*/ +int DTCBinMalloc::detect_version() +{ + if (m_pstHead->m_auiSign[0] != DTC_SIGN_0 || m_pstHead->m_auiSign[1] != DTC_SIGN_1) + return 1; + if (m_pstHead->m_ushVer == 2) + return (2); + if (m_pstHead->m_ushVer == 3) + return (3); + if (m_pstHead->m_ushVer == 4) + return (4); + + snprintf(m_szErr, sizeof(m_szErr), "unknown version signature %u", m_pstHead->m_ushVer); + return (0); +} +/*查看cache是否一致:在启动dtc,加载cache的时候,只要是需要写cache,就会设置不一致,防止dtc在运行时crash,重启后不经检查使用乱掉的内存*/ +int DTCBinMalloc::share_memory_integrity() +{ + return (int)m_pstHead->m_shmIntegrity; +} + +void DTCBinMalloc::set_share_memory_integrity(const int flags) +{ + if (flags) + m_pstHead->m_shmIntegrity = 1; + else + m_pstHead->m_shmIntegrity = 0; +} +/*对于已经存在的IPC shared memory,dtc在启动后会将这个块内存作为cache,在这里检查这块cache的头信息,是否正确*/ +int DTCBinMalloc::Attach(void *pAddr, INTER_SIZE_T tSize) +{ + + if (tSize < sizeof(MemHead) + sizeof(CBin) * (NBINS + NFASTBINS + 1) + MINSIZE) + { + snprintf(m_szErr, sizeof(m_szErr), "invalid size[" UINT64FMT_T "]", tSize); + return (-1); + } + + m_pBaseAddr = pAddr; + m_pstHead = (MemHead *)m_pBaseAddr; + if (detect_version() != DTC_VER_MIN) + { + snprintf(m_szErr, sizeof(m_szErr), "Unsupported preferred version %u", m_pstHead->m_ushVer); + return (-2); + } + + if (m_pstHead->m_tSize != tSize) + { + snprintf(m_szErr, sizeof(m_szErr), "invalid argument"); + return (-3); + } + if (m_pstHead->m_hTop >= m_pstHead->m_tSize) + { + snprintf(m_szErr, sizeof(m_szErr), "memory corruption-invalid bottom value"); + return (-4); + } + m_ptBin = (CBin *)(((char *)m_pBaseAddr) + sizeof(MemHead)); + m_ptFastBin = m_ptBin + NBINS; + m_ptUnsortedBin = m_ptFastBin + NFASTBINS; + + // init stat + statChunkTotal = m_pstHead->m_tUserAllocChunkCnt; + statDataSize = m_pstHead->m_tUserAllocSize; + + return (0); +} + +ALLOC_HANDLE_T DTCBinMalloc::get_reserve_zone() +{ + return m_pstHead->m_hReserveZone; +} +/*输入参数是chunk的用户handle*/ +/*返回这块chunk的用户使用空间的大小*/ +ALLOC_SIZE_T DTCBinMalloc::chunk_size(ALLOC_HANDLE_T hHandle) +{ + MallocChunk *pstChunk; + + if (hHandle >= m_pstHead->m_hTop || hHandle <= m_pstHead->m_hBottom) + { + snprintf(m_szErr, sizeof(m_szErr), "[chunk_size]-invalid handle"); + return (0); + } + + pstChunk = (MallocChunk *)mem2chunk(handle_to_ptr(hHandle)); + + if (check_inuse_chunk(pstChunk) != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "[chunk_size]-invalid chunk"); + return (0); + } + + return chunksize2memsize(CHUNK_SIZE(pstChunk)); +} +/*设置输入bin上的头chunk为使用状态,并将这个chunk从bin上拖链*/ +void *DTCBinMalloc::bin_malloc(CBin &ptBin) +{ + MallocChunk *pstChunk; + void *p; + + if (ptBin.m_hNextChunk == INVALID_HANDLE) + return (NULL); + + p = handle_to_ptr(ptBin.m_hNextChunk); + pstChunk = (MallocChunk *)p; + set_inuse_bit_at_offset(pstChunk, REAL_SIZE(pstChunk->m_tSize)); + unlink_bin(ptBin, ptBin.m_hNextChunk); + + return p; +} +/*对所有的bin检查:small&large bins, fast bins, unsorted bins*/ +/*校验方法:每个bin组成一个双向的循环链表*/ +int DTCBinMalloc::check_bin() +{ + int i; + + INTER_HANDLE_T hHandle; + MallocChunk *pstChunk; + for (i = 0; i < NBINS; i++) + { + hHandle = m_ptBin[i].m_hNextChunk; + if (hHandle != INVALID_HANDLE) + { + do + { + pstChunk = (MallocChunk *)handle_to_ptr(hHandle); + if (pstChunk->m_hNextChunk != INVALID_HANDLE) + hHandle = pstChunk->m_hNextChunk; + } while (pstChunk->m_hNextChunk != INVALID_HANDLE); + } + if (m_ptBin[i].m_hPreChunk != hHandle) + { + snprintf(m_szErr, sizeof(m_szErr), "bad bin[%d]", i); + return (-1); + } + } + + for (i = 0; i < NFASTBINS; i++) + { + hHandle = m_ptFastBin[i].m_hNextChunk; + if (hHandle != INVALID_HANDLE) + { + do + { + pstChunk = (MallocChunk *)handle_to_ptr(hHandle); + if (pstChunk->m_hNextChunk != INVALID_HANDLE) + hHandle = pstChunk->m_hNextChunk; + } while (pstChunk->m_hNextChunk != INVALID_HANDLE); + } + if (m_ptFastBin[i].m_hPreChunk != hHandle) + { + snprintf(m_szErr, sizeof(m_szErr), "bad fast-bin[%d]", i); + return (-2); + } + } + + hHandle = m_ptUnsortedBin[0].m_hNextChunk; + if (hHandle != INVALID_HANDLE) + { + do + { + pstChunk = (MallocChunk *)handle_to_ptr(hHandle); + if (pstChunk->m_hNextChunk != INVALID_HANDLE) + hHandle = pstChunk->m_hNextChunk; + } while (pstChunk->m_hNextChunk != INVALID_HANDLE); + } + if (m_ptUnsortedBin[0].m_hPreChunk != hHandle) + { +#if __WORDSIZE == 64 + snprintf(m_szErr, sizeof(m_szErr), "bad unsorted-bin[%d] %lu!=%lu", 0, m_ptUnsortedBin[0].m_hPreChunk, hHandle); +#else + snprintf(m_szErr, sizeof(m_szErr), "bad unsorted-bin[%d] %llu!=%llu", 0, m_ptUnsortedBin[0].m_hPreChunk, hHandle); +#endif + return (-3); + } + + return (0); +} +/*校验存放在bin中的chunk的一致性*/ +/*检验方法:从分配的top线开始向bottom方向,一个chunk一个chunk的检查,检查这个chunk的大小是不是和它的后一个chunk的presize一致*/ +#if BIN_MEM_CHECK +int DTCBinMalloc::check_mem() +{ + INTER_HANDLE_T hHandle; + MallocChunk *pstChunk; + ALLOC_SIZE_T tSize; + + tSize = 0; + hHandle = m_pstHead->m_hTop; + while (hHandle > m_pstHead->m_hBottom) + { + pstChunk = (MallocChunk *)handle_to_ptr(hHandle); + if (CHUNK_SIZE(pstChunk) != tSize) + { +#if __WORDSIZE == 64 + snprintf(m_szErr, sizeof(m_szErr), "bad memory1 handle[%lu]", hHandle); +#else + snprintf(m_szErr, sizeof(m_szErr), "bad memory1 handle[%llu]", hHandle); +#endif + return (-1); + } + tSize = pstChunk->m_tPreSize; + if (hHandle < tSize) + { +#if __WORDSIZE == 64 + snprintf(m_szErr, sizeof(m_szErr), "bad memory handle[%lu]", hHandle); +#else + snprintf(m_szErr, sizeof(m_szErr), "bad memory handle[%llu]", hHandle); +#endif + return (-2); + } + hHandle -= tSize; + } + + return (0); +} +#endif +/*从fastbins的一个bin下取一个空闲chunk,满足tsize大小。*/ +/*bin的索引查找方法是:按照在smallbins中查找bin的方法进行*/ +void *DTCBinMalloc::fast_malloc(ALLOC_SIZE_T tSize) +{ + return bin_malloc(m_ptFastBin[smallbin_index(tSize)]); +} +/*从smallbins的一个bin下取一个空闲chunk满足tsize大小*/ +void *DTCBinMalloc::small_bin_malloc(ALLOC_SIZE_T tSize) +{ + void *p; + unsigned int uiBinIdx; + + uiBinIdx = smallbin_index(tSize); + p = bin_malloc(m_ptBin[uiBinIdx]); + if (empty_bin(uiBinIdx)) + clear_bin_bit_map(uiBinIdx); + + return (p); +} +/*释放fastbins的每个bin下的空闲chunk*/ +/*对于每个chunk试探是否可以和内存里的前后chunk合并,合并如果可以,并设置新chunk为使用状态,并从bin上拖链,最后将拖链的chunk存放在unsortedbin下*/ +int DTCBinMalloc::free_fast() +{ + if (!(m_pstHead->m_uiFlags & MALLOC_FLAG_FAST)) // no fast chunk + return (0); + + for (int i = 0; i < NFASTBINS; i++) + { + if (m_ptFastBin[i].m_hNextChunk != INVALID_HANDLE) + { + MallocChunk *pstChunk; + // MallocChunk* pstPreChunk; + MallocChunk *pstNextChunk; + ALLOC_SIZE_T tSize; + ALLOC_SIZE_T tPreSize; + // ALLOC_SIZE_T tNextSize; + unsigned int uiBinIdx; + + do + { // free fast-chunk & put it into unsorted chunk list + pstChunk = (MallocChunk *)handle_to_ptr(m_ptFastBin[i].m_hNextChunk); + unlink_bin(m_ptFastBin[i], m_ptFastBin[i].m_hNextChunk); + + tSize = CHUNK_SIZE(pstChunk); + if (!prev_inuse(pstChunk) && CAN_COMBILE(tSize, pstChunk->m_tPreSize)) + { + tPreSize = pstChunk->m_tPreSize; + tSize += tPreSize; + pstChunk = (MallocChunk *)(((char *)pstChunk) - tPreSize); + + uiBinIdx = bin_index(tPreSize); + unlink_bin(m_ptBin[uiBinIdx], ptr_to_handle(pstChunk)); + if (empty_bin(uiBinIdx)) + clear_bin_bit_map(uiBinIdx); + set_inuse_bit_at_offset(pstChunk, tSize); + } + + if (!AT_TOP(pstChunk, tSize)) + { + pstNextChunk = (MallocChunk *)(((char *)pstChunk) + tSize); + ALLOC_SIZE_T tNextSize = CHUNK_SIZE(pstNextChunk); + uiBinIdx = bin_index(tNextSize); + if (!inuse_bit_at_offset(pstNextChunk, tNextSize) && CAN_COMBILE(tSize, tNextSize)) + { + tSize += tNextSize; + unlink_bin(m_ptBin[uiBinIdx], ptr_to_handle(pstNextChunk)); + if (empty_bin(uiBinIdx)) + clear_bin_bit_map(uiBinIdx); + set_inuse_bit_at_offset(pstChunk, tSize); + } + else + { + // clear_inuse_bit_at_offset(pstNextChunk, 0); + } + } + + if (m_pstHead->m_tLastFreeChunkSize < REAL_SIZE(tSize)) + m_pstHead->m_tLastFreeChunkSize = REAL_SIZE(tSize); + pstChunk->m_tSize = REAL_SIZE(tSize) | (pstChunk->m_tSize & SIZE_BITS); + if (AT_TOP(pstChunk, tSize)) + { + // combine into bottom + m_pstHead->m_hTop -= tSize; + statMemoryTop = m_pstHead->m_hTop; + // clear_inuse_bit_at_offset(pstChunk, 0); + } + else + { + link_bin(m_ptUnsortedBin[0], ptr_to_handle(pstChunk)); + } + pstNextChunk = (MallocChunk *)(((char *)pstChunk) + REAL_SIZE(tSize)); + pstNextChunk->m_tPreSize = REAL_SIZE(tSize); + + } while (m_ptFastBin[i].m_hNextChunk != INVALID_HANDLE); + } + } + + m_pstHead->m_uiFlags &= ~MALLOC_FLAG_FAST; + + return (0); +} +/*从top线上面分配一个chunk满足tsize*/ +void *DTCBinMalloc::top_alloc(ALLOC_SIZE_T tSize) +{ + if (m_pstHead->m_hTop + tSize + MINSIZE >= m_pstHead->m_tSize) + { + snprintf(m_szErr, sizeof(m_szErr), "out of memory"); + return (NULL); + } + + void *p; + MallocChunk *pstChunk; + pstChunk = (MallocChunk *)handle_to_ptr(m_pstHead->m_hTop); + pstChunk->m_tSize = (pstChunk->m_tSize & SIZE_BITS) | REAL_SIZE(tSize); + p = (void *)pstChunk; + + pstChunk = (MallocChunk *)(((char *)pstChunk) + tSize); + pstChunk->m_tPreSize = REAL_SIZE(tSize); + pstChunk->m_tSize = PREV_INUSE; + + m_pstHead->m_hTop += tSize; + statMemoryTop = m_pstHead->m_hTop; + + return chunk2mem(p); +} +/*从输入的bin上将handle指定的chunk拖链*/ +int DTCBinMalloc::unlink_bin(CBin &stBin, INTER_HANDLE_T hHandle) +{ + MallocChunk *pstChunk; + MallocChunk *pstTmp; + + if (hHandle == INVALID_HANDLE) + return (-1); + + if (stBin.m_hNextChunk == INVALID_HANDLE || stBin.m_hPreChunk == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "unlink-bin: bad bin!"); + return (-2); + } + + pstChunk = (MallocChunk *)handle_to_ptr(hHandle); + if (pstChunk->m_hPreChunk == INVALID_HANDLE) + { + //remove head + stBin.m_hNextChunk = pstChunk->m_hNextChunk; + } + else + { + pstTmp = (MallocChunk *)handle_to_ptr(pstChunk->m_hPreChunk); + pstTmp->m_hNextChunk = pstChunk->m_hNextChunk; + } + if (pstChunk->m_hNextChunk == INVALID_HANDLE) + { + stBin.m_hPreChunk = pstChunk->m_hPreChunk; + } + else + { + pstTmp = (MallocChunk *)handle_to_ptr(pstChunk->m_hNextChunk); + pstTmp->m_hPreChunk = pstChunk->m_hPreChunk; + } + + return (0); +} +/*将handle指定的chunk插入到bin上*/ +int DTCBinMalloc::link_bin(CBin &stBin, INTER_HANDLE_T hHandle) +{ + MallocChunk *pstChunk; + MallocChunk *pstTmp; + + if (hHandle == INVALID_HANDLE) + return (-1); + + pstChunk = (MallocChunk *)handle_to_ptr(hHandle); + pstChunk->m_hNextChunk = stBin.m_hNextChunk; + pstChunk->m_hPreChunk = INVALID_HANDLE; + if (stBin.m_hNextChunk != INVALID_HANDLE) + { + pstTmp = (MallocChunk *)handle_to_ptr(stBin.m_hNextChunk); + pstTmp->m_hPreChunk = hHandle; + if (stBin.m_hPreChunk == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "link-bin: bad bin"); + return (-2); + } + } + else + { + if (stBin.m_hPreChunk != INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "link-bin: bad bin"); + return (-3); + } + stBin.m_hPreChunk = hHandle; + } + stBin.m_hNextChunk = hHandle; + + return (0); +} +/*在bin中查找一个合适的位置,将hanlde指定的chunk插入进去*/ +/*寻找位置的方法:从bin的尾部开始,找到第一个位置,它的大小介于前后chunk的大小之间*/ +int DTCBinMalloc::link_sorted_bin(CBin &stBin, INTER_HANDLE_T hHandle, ALLOC_SIZE_T tSize) +{ + MallocChunk *pstChunk; + MallocChunk *pstNextChunk; + + if (hHandle == INVALID_HANDLE) + return (-1); + + pstChunk = (MallocChunk *)handle_to_ptr(hHandle); + pstChunk->m_hNextChunk = INVALID_HANDLE; + pstChunk->m_hPreChunk = INVALID_HANDLE; + + if (stBin.m_hNextChunk == INVALID_HANDLE) + { // empty bin + pstChunk->m_hPreChunk = INVALID_HANDLE; + pstChunk->m_hNextChunk = INVALID_HANDLE; + stBin.m_hNextChunk = hHandle; + stBin.m_hPreChunk = hHandle; + } + else + { + INTER_HANDLE_T hPre; + hPre = stBin.m_hPreChunk; + tSize = REAL_SIZE(tSize) | PREV_INUSE; + MallocChunk *pstPreChunk = 0; + while (hPre != INVALID_HANDLE) + { + pstPreChunk = (MallocChunk *)handle_to_ptr(hPre); + if (tSize <= pstPreChunk->m_tSize) + break; + hPre = pstPreChunk->m_hPreChunk; + } + if (hPre == INVALID_HANDLE) + { + if (stBin.m_hPreChunk == INVALID_HANDLE) + { + // empty list + snprintf(m_szErr, sizeof(m_szErr), "memory corruction"); + return (-1); + } + + // place chunk at list head + link_bin(stBin, hHandle); + } + else + { + pstChunk->m_hPreChunk = hPre; + pstChunk->m_hNextChunk = pstPreChunk->m_hNextChunk; + pstPreChunk->m_hNextChunk = hHandle; + if (pstChunk->m_hNextChunk != INVALID_HANDLE) + { + pstNextChunk = (MallocChunk *)handle_to_ptr(pstChunk->m_hNextChunk); + pstNextChunk->m_hPreChunk = ptr_to_handle(pstChunk); + } + else + { + // list tail + stBin.m_hPreChunk = hHandle; + } + } + } + + return (0); +} +/*分配chunk满足tsize的主体逻辑*/ +ALLOC_HANDLE_T DTCBinMalloc::inter_malloc(ALLOC_SIZE_T tSize) +{ + void *p; + + checked_request2size(tSize, tSize); + + /* no more use fast bin + if(tSize < FAST_MAX_SIZE){ + p = fast_malloc(tSize); + if(p != NULL) + return ptr_to_handle(chunk2mem(p)); + } + */ + + if (in_smallbin_range(tSize)) + { + p = small_bin_malloc(tSize); + if (p != NULL) + return ptr_to_handle(chunk2mem(p)); + } + + for (;;) + { + MallocChunk *pstChunk = NULL; + MallocChunk *pstNextChunk = NULL; + + unsigned int uiBinIdx = bin_index(tSize); + if (!in_smallbin_range(tSize)) + { + INTER_HANDLE_T v = m_ptBin[uiBinIdx].m_hNextChunk; + unsigned int try_search_count = 0; + + /* 每个bin最多只搜索100次,如果失败则跳至下一个bin */ + while (v != INVALID_HANDLE && ++try_search_count < 100) + { + + pstChunk = (MallocChunk *)handle_to_ptr(v); + if (CHUNK_SIZE(pstChunk) >= tSize) + break; + + v = pstChunk->m_hNextChunk; + } + + if (!(v != INVALID_HANDLE && try_search_count < 100)) + goto SEARCH_NEXT_BIN; + + ALLOC_SIZE_T tRemainSize; + tRemainSize = CHUNK_SIZE(pstChunk) - tSize; + // unlink + unlink_bin(m_ptBin[uiBinIdx], ptr_to_handle(pstChunk)); + if (empty_bin(uiBinIdx)) + clear_bin_bit_map(uiBinIdx); + + if (tRemainSize < get_min_chunk_size()) + { + set_inuse_bit_at_offset(pstChunk, CHUNK_SIZE(pstChunk)); + } + else + { + pstChunk->m_tSize = tSize | (pstChunk->m_tSize & SIZE_BITS); + pstNextChunk = (MallocChunk *)(((char *)pstChunk) + tSize); + pstNextChunk->m_tSize = tRemainSize; + pstNextChunk->m_tPreSize = tSize; + set_inuse_bit_at_offset(pstNextChunk, 0); + ((MallocChunk *)(((char *)pstChunk) + tSize + tRemainSize))->m_tPreSize = tRemainSize; + set_inuse_bit_at_offset(pstNextChunk, tRemainSize); + ALLOC_SIZE_T user_size; + inter_free(chunkhandle2memhandle(ptr_to_handle(pstNextChunk)), user_size); + } + + p = (void *)pstChunk; + return ptr_to_handle(chunk2mem(p)); + } + + /* + Search for a chunk by scanning bins, starting with next largest + bin. This search is strictly by best-fit; i.e., the smallest + (with ties going to approximately the least recently used) chunk + that fits is selected. + */ + SEARCH_NEXT_BIN: + uiBinIdx++; + unsigned int uiBitMapIdx = uiBinIdx / 32; + if (m_pstHead->m_auiBinBitMap[uiBitMapIdx] == 0) + { + uiBitMapIdx++; + uiBinIdx = uiBitMapIdx * 32; + while (uiBitMapIdx < sizeof(m_pstHead->m_auiBinBitMap) && m_pstHead->m_auiBinBitMap[uiBitMapIdx] == 0) + { + uiBitMapIdx++; + uiBinIdx += 32; + } + } + while (uiBinIdx < NBINS && m_ptBin[uiBinIdx].m_hNextChunk == INVALID_HANDLE) + uiBinIdx++; + + if (uiBinIdx >= NBINS) + { + goto MALLOC_BOTTOM; + } + + INTER_HANDLE_T hPre; + hPre = m_ptBin[uiBinIdx].m_hPreChunk; + do + { + pstChunk = (MallocChunk *)handle_to_ptr(hPre); + hPre = pstChunk->m_hPreChunk; + } while (CHUNK_SIZE(pstChunk) < tSize); + ALLOC_SIZE_T tRemainSize; + tRemainSize = CHUNK_SIZE(pstChunk) - tSize; + // unlink + unlink_bin(m_ptBin[uiBinIdx], ptr_to_handle(pstChunk)); + if (empty_bin(uiBinIdx)) + clear_bin_bit_map(uiBinIdx); + + if (tRemainSize < get_min_chunk_size()) + { + set_inuse_bit_at_offset(pstChunk, CHUNK_SIZE(pstChunk)); + } + else + { + /* disable unsorted bins */ + pstChunk->m_tSize = tSize | (pstChunk->m_tSize & SIZE_BITS); + pstNextChunk = (MallocChunk *)(((char *)pstChunk) + tSize); + pstNextChunk->m_tSize = tRemainSize; + pstNextChunk->m_tPreSize = tSize; + set_inuse_bit_at_offset(pstNextChunk, 0); + ((MallocChunk *)(((char *)pstChunk) + tSize + tRemainSize))->m_tPreSize = tRemainSize; + set_inuse_bit_at_offset(pstNextChunk, tRemainSize); + ALLOC_SIZE_T user_size; + inter_free(chunkhandle2memhandle(ptr_to_handle(pstNextChunk)), user_size); + } + + p = (void *)pstChunk; + return ptr_to_handle(chunk2mem(p)); + } + +MALLOC_BOTTOM: + return ptr_to_handle(top_alloc(tSize)); +} +/*对intermalloc的包装,对返回结果进行了简单检查*/ +ALLOC_HANDLE_T DTCBinMalloc::Malloc(ALLOC_SIZE_T tSize) +{ + MallocChunk *pstChunk; + + m_pstHead->m_tLastFreeChunkSize = 0; + ALLOC_HANDLE_T hHandle = inter_malloc(tSize); + if (hHandle != INVALID_HANDLE) + { + // log_error("MALLOC: %lu", hHandle); + pstChunk = (MallocChunk *)mem2chunk(handle_to_ptr(hHandle)); + m_pstHead->m_tUserAllocSize += CHUNK_SIZE(pstChunk); + m_pstHead->m_tUserAllocChunkCnt++; + ++statChunkTotal; + statDataSize = m_pstHead->m_tUserAllocSize; + add_alloc_size_to_stat(tSize); + } + return (hHandle); +} +/*对intermalloc的包装,对返回结果进行了简单检查,并将返回的chunk的用户部分清空*/ +ALLOC_HANDLE_T DTCBinMalloc::Calloc(ALLOC_SIZE_T tSize) +{ + ALLOC_HANDLE_T hHandle = Malloc(tSize); + if (hHandle != INVALID_HANDLE) + { + char *p = Pointer(hHandle); + memset(p, 0x00, tSize); + } + + return hHandle; +} + +/*当输入的chunk在使用中时候返回0*/ +int DTCBinMalloc::check_inuse_chunk(MallocChunk *pstChunk) +{ + if (!inuse_bit_at_offset(pstChunk, CHUNK_SIZE(pstChunk))) + { + snprintf(m_szErr, sizeof(m_szErr), "chunk not inuse!"); + return (-1); + } + + MallocChunk *pstTmp; + if (!prev_inuse(pstChunk)) + { + pstTmp = (MallocChunk *)(((char *)pstChunk) - pstChunk->m_tPreSize); + if (ptr_to_handle(pstTmp) < m_pstHead->m_hBottom || CHUNK_SIZE(pstTmp) != pstChunk->m_tPreSize) + { + snprintf(m_szErr, sizeof(m_szErr), "invalid pre-chunk size!"); + return (-2); + } + } + + pstTmp = (MallocChunk *)(((char *)pstChunk) + CHUNK_SIZE(pstChunk)); + if (!AT_TOP(pstTmp, 0)) + { + if (CHUNK_SIZE(pstTmp) < MINSIZE) + { + snprintf(m_szErr, sizeof(m_szErr), "invalid next chunk!"); + return (-3); + } + } + + return (0); +} +/*realloc的主体逻辑*/ +ALLOC_HANDLE_T DTCBinMalloc::inter_re_alloc(ALLOC_HANDLE_T hHandle, ALLOC_SIZE_T tSize, ALLOC_SIZE_T &tOldMemSize) +{ + INTER_HANDLE_T hNewHandle; + INTER_SIZE_T tNewSize; + MallocChunk *pstChunk; + + ALLOC_SIZE_T tUserReqSize = tSize; + + tOldMemSize = 0; + if (hHandle == INVALID_HANDLE) + { + // return inter_malloc(tSize - MALLOC_ALIGN_MASK); + return inter_malloc(tSize); + } + + if (tSize == 0) + { + inter_free(hHandle, tOldMemSize); + return (INVALID_HANDLE); + } + + checked_request2size(tSize, tSize); + + if (hHandle >= m_pstHead->m_hTop || hHandle <= m_pstHead->m_hBottom) + { + snprintf(m_szErr, sizeof(m_szErr), "realloc-invalid handle"); + return (INVALID_HANDLE); + } + + ALLOC_SIZE_T tOldSize; + pstChunk = (MallocChunk *)mem2chunk(handle_to_ptr(hHandle)); + tOldSize = CHUNK_SIZE(pstChunk); + hHandle = ptr_to_handle((void *)pstChunk); + if (hHandle + tOldSize > m_pstHead->m_hTop) + { +#if __WORDSIZE == 64 + snprintf(m_szErr, sizeof(m_szErr), "realloc-invalid handle: %lu, size: %u", hHandle, tOldSize); +#else + snprintf(m_szErr, sizeof(m_szErr), "realloc-invalid handle: %llu, size: %u", hHandle, tOldSize); +#endif + return (INVALID_HANDLE); + } + + if (misaligned_chunk(hHandle)) + { +#if __WORDSIZE == 64 + snprintf(m_szErr, sizeof(m_szErr), "realloc-invalid handle: %lu, size: %u", hHandle, tOldSize); +#else + snprintf(m_szErr, sizeof(m_szErr), "realloc-invalid handle: %llu, size: %u", hHandle, tOldSize); +#endif + return (INVALID_HANDLE); + } + + if (tOldSize < MINSIZE) + { +#if __WORDSIZE == 64 + snprintf(m_szErr, sizeof(m_szErr), "realloc-invalid old-size: %lu, size: %u", hHandle, tOldSize); +#else + snprintf(m_szErr, sizeof(m_szErr), "realloc-invalid old-size: %llu, size: %u", hHandle, tOldSize); +#endif + return (INVALID_HANDLE); + } + + if (check_inuse_chunk(pstChunk) != 0) + { +#if __WORDSIZE == 64 + snprintf(m_szErr, sizeof(m_szErr), "realloc-invalid chunk: %lu, size: %u", hHandle, tOldSize); +#else + snprintf(m_szErr, sizeof(m_szErr), "realloc-invalid chunk: %llu, size: %u", hHandle, tOldSize); +#endif + return (INVALID_HANDLE); + } + tOldMemSize = tOldSize; + + int iPreInUse = prev_inuse(pstChunk); + ALLOC_SIZE_T tPreSize = pstChunk->m_tPreSize; + + MallocChunk *pstTmp; + MallocChunk *pstNextChunk; + pstNextChunk = (MallocChunk *)(((char *)pstChunk) + CHUNK_SIZE(pstChunk)); + + if (tOldSize >= tSize) + { + hNewHandle = hHandle; + tNewSize = tOldSize; + } + else + { + /* Try to expand forward into top */ + if (AT_TOP(pstChunk, tOldSize) && m_pstHead->m_hTop + (tSize - tOldSize) + MINSIZE < m_pstHead->m_tSize) + { + pstChunk->m_tSize = REAL_SIZE(tSize) | (pstChunk->m_tSize & SIZE_BITS); + pstNextChunk = (MallocChunk *)handle_to_ptr(m_pstHead->m_hTop + (tSize - tOldSize)); + pstNextChunk->m_tPreSize = REAL_SIZE(tSize); + pstNextChunk->m_tSize = PREV_INUSE; + + m_pstHead->m_hTop += (tSize - tOldSize); + statMemoryTop = m_pstHead->m_hTop; + return ptr_to_handle(chunk2mem(pstChunk)); + } + else if (!AT_TOP(pstChunk, tOldSize) && !inuse_bit_at_offset(pstNextChunk, CHUNK_SIZE(pstNextChunk)) && ((INTER_SIZE_T)tOldSize + CHUNK_SIZE(pstNextChunk)) >= tSize) + { + hNewHandle = hHandle; + tNewSize = (INTER_SIZE_T)tOldSize + CHUNK_SIZE(pstNextChunk); + unlink_bin(m_ptBin[bin_index(CHUNK_SIZE(pstNextChunk))], ptr_to_handle(pstNextChunk)); + } + /* ada: defrag */ + else if (!prev_inuse(pstChunk) && (tOldSize + pstChunk->m_tPreSize) >= tSize) + { + pstTmp = (MallocChunk *)(((char *)pstChunk) - pstChunk->m_tPreSize); + iPreInUse = prev_inuse(pstTmp); + tPreSize = pstTmp->m_tPreSize; + // copy & move + hNewHandle = hHandle - pstChunk->m_tPreSize; + tNewSize = (INTER_SIZE_T)tOldSize + pstChunk->m_tPreSize; + unlink_bin(m_ptBin[bin_index(pstChunk->m_tPreSize)], hNewHandle); + // copy user data + memmove(chunk2mem(handle_to_ptr(hNewHandle)), chunk2mem(handle_to_ptr(hHandle)), chunksize2memsize(tOldSize)); + } + else + { + // alloc , copy & free + hNewHandle = inter_malloc(tUserReqSize); + if (hNewHandle == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "realloc-out of memory"); + return (INVALID_HANDLE); + } + pstTmp = (MallocChunk *)mem2chunk(handle_to_ptr(hNewHandle)); + hNewHandle = ptr_to_handle(pstTmp); + tNewSize = CHUNK_SIZE(pstTmp); + // copy user data + memcpy(chunk2mem(pstTmp), chunk2mem(handle_to_ptr(hHandle)), chunksize2memsize(tOldSize)); + ALLOC_SIZE_T tTmpSize; + inter_free(chunkhandle2memhandle(hHandle), tTmpSize); + return chunkhandle2memhandle(hNewHandle); + } + } + + assert(tNewSize >= tSize); + MallocChunk *pstNewChunk; + pstNewChunk = (MallocChunk *)handle_to_ptr(hNewHandle); + INTER_SIZE_T tRemainderSize = tNewSize - tSize; + if (tRemainderSize >= get_min_chunk_size()) + { + // split + MallocChunk *pstRemainChunk; + pstRemainChunk = (MallocChunk *)(((char *)pstNewChunk) + tSize); + ALLOC_SIZE_T tPreChunkSize = tSize; + do + { + ALLOC_SIZE_T tThisChunkSize; + if (tRemainderSize > MAX_ALLOC_SIZE) + { + if (tRemainderSize - MAX_ALLOC_SIZE >= MINSIZE) + tThisChunkSize = REAL_SIZE(MAX_ALLOC_SIZE); + else + tThisChunkSize = REAL_SIZE(tRemainderSize - MINSIZE); + } + else + { + tThisChunkSize = tRemainderSize; + } + pstRemainChunk->m_tSize = REAL_SIZE(tThisChunkSize) | PREV_INUSE; + + // next chunk + pstNextChunk = (MallocChunk *)(((char *)pstRemainChunk) + REAL_SIZE(tThisChunkSize)); + pstNextChunk->m_tPreSize = REAL_SIZE(tThisChunkSize); + pstNextChunk->m_tSize |= PREV_INUSE; + /* Mark remainder as inuse so free() won't complain */ + set_inuse_bit_at_offset(pstRemainChunk, tThisChunkSize); + ALLOC_SIZE_T tTmpSize; + inter_free(ptr_to_handle(chunk2mem(pstRemainChunk)), tTmpSize); + + tPreChunkSize = tThisChunkSize; + tRemainderSize -= tThisChunkSize; + pstRemainChunk = (MallocChunk *)(((char *)pstRemainChunk) + REAL_SIZE(tThisChunkSize)); + } while (tRemainderSize > 0); + + tNewSize = tSize; + } + else + { + // next chunk + pstNextChunk = (MallocChunk *)(((char *)pstNewChunk) + REAL_SIZE(tNewSize)); + pstNextChunk->m_tSize |= PREV_INUSE; + } + pstNewChunk->m_tSize = REAL_SIZE(tNewSize); + if (iPreInUse) + pstNewChunk->m_tSize |= PREV_INUSE; + pstNewChunk->m_tPreSize = tPreSize; + + return ptr_to_handle(chunk2mem(pstNewChunk)); +} +/*对intserrealloc的包装,对返回结果进行了简单的检查*/ +ALLOC_HANDLE_T DTCBinMalloc::ReAlloc(ALLOC_HANDLE_T hHandle, ALLOC_SIZE_T tSize) +{ + ALLOC_HANDLE_T hNewHandle; + ALLOC_SIZE_T tOldSize; + MallocChunk *pstChunk; + + m_pstHead->m_tLastFreeChunkSize = 0; + hNewHandle = inter_re_alloc(hHandle, tSize, tOldSize); + if (hNewHandle != INVALID_HANDLE) + { + pstChunk = (MallocChunk *)mem2chunk(handle_to_ptr(hNewHandle)); + m_pstHead->m_tUserAllocSize += CHUNK_SIZE(pstChunk); + m_pstHead->m_tUserAllocSize -= tOldSize; + if (hHandle == INVALID_HANDLE) + { + m_pstHead->m_tUserAllocChunkCnt++; + ++statChunkTotal; + } + add_alloc_size_to_stat(tSize); + statDataSize = m_pstHead->m_tUserAllocSize; + } + else if (tSize == 0) + { + m_pstHead->m_tUserAllocSize -= tOldSize; + m_pstHead->m_tUserAllocChunkCnt--; + --statChunkTotal; + statDataSize = m_pstHead->m_tUserAllocSize; + } + + return (hNewHandle); +} +/*free接口的主体逻辑*/ +int DTCBinMalloc::inter_free(ALLOC_HANDLE_T hHandle, ALLOC_SIZE_T &tMemSize) +{ + tMemSize = 0; + if (hHandle == INVALID_HANDLE) + return (0); + + if (hHandle >= m_pstHead->m_tSize) + { + snprintf(m_szErr, sizeof(m_szErr), "free-invalid handle"); + return (-1); + } + + // log_error("FREE: %lu", hHandle); + + MallocChunk *pstChunk; + ALLOC_SIZE_T tSize; + pstChunk = (MallocChunk *)mem2chunk(handle_to_ptr(hHandle)); + tSize = CHUNK_SIZE(pstChunk); + tMemSize = tSize; + hHandle = ptr_to_handle((void *)pstChunk); + if (hHandle + tSize >= m_pstHead->m_tSize) + { +#if __WORDSIZE == 64 + snprintf(m_szErr, sizeof(m_szErr), "free-invalid handle: %lu, size: %u", hHandle, tSize); +#else + snprintf(m_szErr, sizeof(m_szErr), "free-invalid handle: %llu, size: %u", hHandle, tSize); +#endif + return (-2); + } + + if (!inuse_bit_at_offset(pstChunk, tSize)) + { +#if __WORDSIZE == 64 + snprintf(m_szErr, sizeof(m_szErr), "free-memory[handle %lu, size: %u, top: %lu] not in use", hHandle, tSize, m_pstHead->m_hTop); +#else + snprintf(m_szErr, sizeof(m_szErr), "free-memory[handle %llu, size: %u, top: %llu] not in use", hHandle, tSize, m_pstHead->m_hTop); +#endif + return (-3); + } + + if (misaligned_chunk(hHandle)) + { +#if __WORDSIZE == 64 + snprintf(m_szErr, sizeof(m_szErr), "free-invalid handle: %lu, size: %u", hHandle, tSize); +#else + snprintf(m_szErr, sizeof(m_szErr), "free-invalid handle: %llu, size: %u", hHandle, tSize); +#endif + return (INVALID_HANDLE); + } + + if (check_inuse_chunk(pstChunk) != 0) + { +#if __WORDSIZE == 64 + snprintf(m_szErr, sizeof(m_szErr), "free-invalid chunk: %lu, size: %u", hHandle, tSize); +#else + snprintf(m_szErr, sizeof(m_szErr), "free-invalid chunk: %llu, size: %u", hHandle, tSize); +#endif + return (INVALID_HANDLE); + } + + unsigned int uiBinIdx; + MallocChunk *pstNextChunk; + + if (!prev_inuse(pstChunk) && CAN_COMBILE(tSize, pstChunk->m_tPreSize)) + { + tSize += pstChunk->m_tPreSize; + hHandle -= pstChunk->m_tPreSize; + uiBinIdx = bin_index(pstChunk->m_tPreSize); + pstChunk = (MallocChunk *)(((char *)pstChunk) - pstChunk->m_tPreSize); + // unlink + unlink_bin(m_ptBin[uiBinIdx], ptr_to_handle(pstChunk)); + if (empty_bin(uiBinIdx)) + clear_bin_bit_map(uiBinIdx); + set_size_at_offset(pstChunk, 0, tSize); + set_presize_at_offset(pstChunk, tSize, tSize); + } + + if ((hHandle + tSize) != m_pstHead->m_hTop) + { + pstNextChunk = (MallocChunk *)handle_to_ptr(hHandle + tSize); + if (CHUNK_SIZE(pstNextChunk) < MINSIZE) + { + snprintf(m_szErr, sizeof(m_szErr), "free-invalid handle: " UINT64FMT_T ", size: %u", hHandle, tSize); + return (-4); + } + if (!inuse_bit_at_offset(pstNextChunk, REAL_SIZE(pstNextChunk->m_tSize)) && CAN_COMBILE(tSize, CHUNK_SIZE(pstNextChunk))) + { + tSize += CHUNK_SIZE(pstNextChunk); + uiBinIdx = bin_index(CHUNK_SIZE(pstNextChunk)); + // unlink + unlink_bin(m_ptBin[uiBinIdx], ptr_to_handle(pstNextChunk)); + if (empty_bin(uiBinIdx)) + clear_bin_bit_map(uiBinIdx); + set_size_at_offset(pstChunk, 0, tSize); + set_presize_at_offset(pstChunk, tSize, tSize); + } + } + + set_size_at_offset(pstChunk, 0, tSize); + set_presize_at_offset(pstChunk, tSize, tSize); + set_inuse_bit_at_offset(pstChunk, tSize); + + if (m_pstHead->m_tLastFreeChunkSize < tSize) + m_pstHead->m_tLastFreeChunkSize = tSize; + + if ((hHandle + tSize) == m_pstHead->m_hTop) + { + m_pstHead->m_hTop -= tSize; + statMemoryTop = m_pstHead->m_hTop; + pstChunk->m_tSize = PREV_INUSE; + if (m_pstHead->m_tSize > (m_pstHead->m_hTop + MINSIZE) && m_pstHead->m_tLastFreeChunkSize < m_pstHead->m_tSize - m_pstHead->m_hTop - MINSIZE) + m_pstHead->m_tLastFreeChunkSize = m_pstHead->m_tSize - m_pstHead->m_hTop - MINSIZE; + return (0); + } + + clear_inuse_bit_at_offset(pstChunk, tSize); + + // place chunk into bin + if (in_smallbin_range(tSize)) + { + link_bin(m_ptBin[smallbin_index(tSize)], ptr_to_handle(pstChunk)); + set_bin_bit_map(smallbin_index(tSize)); + } + else + { +#if 0 + /* 当一个bin下挂接的节点非常多时,因为要排序,所以这个调用会花费很多cpu时间 by ada */ + int iIdx = largebin_index(tSize); + link_sorted_bin(m_ptBin[iIdx], ptr_to_handle(pstChunk), tSize); +#endif + link_bin(m_ptBin[largebin_index(tSize)], ptr_to_handle(pstChunk)); + set_bin_bit_map(largebin_index(tSize)); + } + //#endif + + return (0); +} +/*对interfree的包装,对返回结果进行了简单检查*/ +int DTCBinMalloc::Free(ALLOC_HANDLE_T hHandle) +{ + int iRet; + ALLOC_SIZE_T tSize; + + tSize = 0; + iRet = inter_free(hHandle, tSize); + if (iRet == 0) + { + m_pstHead->m_tUserAllocSize -= tSize; + m_pstHead->m_tUserAllocChunkCnt--; + --statChunkTotal; + statDataSize = m_pstHead->m_tUserAllocSize; + } + + return (iRet); +} +/*返回如果free掉handle指定chunk能够给cache共享多少空闲内存*/ +/*前后合并chunk可能导致释放比指定handle的大小更大的空间*/ +unsigned DTCBinMalloc::ask_for_destroy_size(ALLOC_HANDLE_T hHandle) +{ + ALLOC_SIZE_T logic_size = 0; + ALLOC_SIZE_T physic_size = 0; + ALLOC_HANDLE_T physic_handle = 0; + + MallocChunk *current_chunk = 0; + MallocChunk *next_chunk = 0; + + if (INVALID_HANDLE == hHandle || hHandle >= m_pstHead->m_tSize) + goto ERROR; + + /* physic pointer */ + current_chunk = (MallocChunk *)mem2chunk(handle_to_ptr(hHandle)); + physic_size = CHUNK_SIZE(current_chunk); + logic_size = chunksize2memsize(physic_size); + physic_handle = ptr_to_handle((void *)current_chunk); + + /* start error check. */ + /* overflow */ + if (physic_handle + physic_size > m_pstHead->m_tSize) + goto ERROR; + + /* current chunk is not inuse */ + if (!inuse_bit_at_offset(current_chunk, physic_size)) + goto ERROR; + + /* not aligned */ + if (misaligned_chunk(physic_handle)) + goto ERROR; + + /* */ + if (0 != check_inuse_chunk(current_chunk)) + goto ERROR; + + /* try combile prev-chunk */ + if (!prev_inuse(current_chunk) && CAN_COMBILE(physic_size, current_chunk->m_tPreSize)) + { + physic_size += current_chunk->m_tPreSize; + + /* forward handle */ + physic_handle -= current_chunk->m_tPreSize; + current_chunk = (MallocChunk *)((char *)current_chunk - current_chunk->m_tPreSize); + } + + /* try combile next-chunk */ + if (physic_handle + physic_size != m_pstHead->m_hTop) + { + next_chunk = (MallocChunk *)(handle_to_ptr(physic_handle + physic_size)); + if (CHUNK_SIZE(next_chunk) < MINSIZE) + goto ERROR; + + /* can combile */ + if (!inuse_bit_at_offset(next_chunk, CHUNK_SIZE(next_chunk)) && + CAN_COMBILE(physic_size, CHUNK_SIZE(next_chunk))) + { + physic_size += CHUNK_SIZE(next_chunk); + } + } + + /* 释放到top边界,合并成一大块内存 */ + if (physic_handle + physic_size == m_pstHead->m_hTop) + { + ALLOC_SIZE_T physic_free = m_pstHead->m_tSize - m_pstHead->m_hTop - MINSIZE + physic_size; + physic_size = physic_size < physic_free ? physic_free : physic_size; + } + + return chunksize2memsize(physic_size); + +ERROR: + snprintf(m_szErr, sizeof(m_szErr), "found invalid handle, can't destroy"); + return 0; +} + +ALLOC_SIZE_T DTCBinMalloc::last_free_size() +{ + free_fast(); + + return chunksize2memsize(m_pstHead->m_tLastFreeChunkSize); +} + +/************************************************************************** + * for test + * dump all bins and chunks + *************************************************************************/ + +/*对所有的bin检查:small&large bins, fast bins, unsorted bins*/ +/*校验方法:每个bin组成一个双向的循环链表*/ +int DTCBinMalloc::dump_bins() +{ + int i; + int count; + uint64_t size; + + INTER_HANDLE_T hHandle; + MallocChunk *pstChunk; + printf("dump bins\n"); + for (i = 0; i < NBINS; i++) + { + hHandle = m_ptBin[i].m_hNextChunk; + count = 0; + size = 0; + if (hHandle != INVALID_HANDLE) + { + do + { + + pstChunk = (MallocChunk *)handle_to_ptr(hHandle); + if (pstChunk->m_hNextChunk != INVALID_HANDLE) + hHandle = pstChunk->m_hNextChunk; + size += CHUNK_SIZE(pstChunk); + ++count; + } while (pstChunk->m_hNextChunk != INVALID_HANDLE); + } + if (m_ptBin[i].m_hPreChunk != hHandle) + { + printf("bad bin[%d]", i); + return (-1); + } + if (count) + { +#if __WORDSIZE == 64 + printf("bins[%d] chunk num[%d] size[%lu]\n", i, count, size); +#else + printf("bins[%d] chunk num[%d] size[%llu]\n", i, count, size); +#endif + } + } + + printf("dump fast bins\n"); + for (i = 0; i < NFASTBINS; i++) + { + hHandle = m_ptFastBin[i].m_hNextChunk; + count = 0; + if (hHandle != INVALID_HANDLE) + { + do + { + pstChunk = (MallocChunk *)handle_to_ptr(hHandle); + if (pstChunk->m_hNextChunk != INVALID_HANDLE) + hHandle = pstChunk->m_hNextChunk; + ++count; + } while (pstChunk->m_hNextChunk != INVALID_HANDLE); + } + if (m_ptFastBin[i].m_hPreChunk != hHandle) + { + printf("bad fast-bin[%d]\n", i); + return (-2); + } + if (count) + { + printf("fast bins[%d] chunk num[%d]\n", i, count); + } + } + printf("dump unsorted bins\n"); + hHandle = m_ptUnsortedBin[0].m_hNextChunk; + count = 0; + if (hHandle != INVALID_HANDLE) + { + do + { + pstChunk = (MallocChunk *)handle_to_ptr(hHandle); + printf("%d\n", CHUNK_SIZE(pstChunk)); + if (pstChunk->m_hNextChunk != INVALID_HANDLE) + hHandle = pstChunk->m_hNextChunk; + } while (pstChunk->m_hNextChunk != INVALID_HANDLE); + } + if (m_ptUnsortedBin[0].m_hPreChunk != hHandle) + { +#if __WORDSIZE == 64 + printf("bad unsorted-bin[%d] %lu!=%lu\n", 0, m_ptUnsortedBin[0].m_hPreChunk, hHandle); +#else + printf("bad unsorted-bin[%d] %llu!=%llu\n", 0, m_ptUnsortedBin[0].m_hPreChunk, hHandle); +#endif + return (-3); + } + printf("unsorted bins:chunk num[%d]\n", count); + + return (0); +} + +int DTCBinMalloc::dump_mem() +{ + INTER_HANDLE_T hHandle; + MallocChunk *pstChunk; + ALLOC_SIZE_T tSize; + + tSize = 0; + printf("dump_mem\n"); + hHandle = m_pstHead->m_hBottom; + while (hHandle < m_pstHead->m_hTop) + { + pstChunk = (MallocChunk *)handle_to_ptr(hHandle); + printf("%d\t\t%d\n", CHUNK_SIZE(pstChunk), prev_inuse(pstChunk)); + hHandle += CHUNK_SIZE(pstChunk); + } + + return (0); +} diff --git a/src/search_local/index_storage/cache/pt_malloc.h b/src/search_local/index_storage/cache/pt_malloc.h new file mode 100644 index 0000000..09ab367 --- /dev/null +++ b/src/search_local/index_storage/cache/pt_malloc.h @@ -0,0 +1,374 @@ +/* + * ===================================================================================== + * + * Filename: pt_malloc.h + * + * Description: packaging ptmalloc memory dispatch algorithm and interface. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef BIN_MALLOC_H +#define BIN_MALLOC_H + +#include +#include +#include "namespace.h" +#include "mallocator.h" +#include "log.h" +#include "stat_dtc.h" + +DTC_BEGIN_NAMESPACE + +#define MALLOC_FLAG_FAST 0x1 + +/* + This struct declaration is misleading (but accurate and necessary). + It declares a "view" into memory allowing access to necessary + fields at known offsets from a given base. See explanation below. +*/ + +typedef struct +{ + ALLOC_SIZE_T m_tPreSize; /* Size of previous chunk (if free). */ + ALLOC_SIZE_T m_tSize; /* Size in bytes, including overhead. */ + + INTER_HANDLE_T m_hPreChunk; /* double links -- used only if free. */ + INTER_HANDLE_T m_hNextChunk; +} MallocChunk; + +typedef struct +{ + INTER_HANDLE_T m_hPreChunk; + INTER_HANDLE_T m_hNextChunk; +} CBin; + +/* The smallest possible chunk */ +#define MIN_CHUNK_SIZE (sizeof(MallocChunk)) + +/* The smallest size we can malloc is an aligned minimal chunk */ +#define MINSIZE (unsigned long)(((MIN_CHUNK_SIZE + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)) + +#define NBINS 128 +#define NSMALLBINS 64 +#define SMALLBIN_WIDTH 8 +#define MIN_LARGE_SIZE 512 + +#define DTC_SIGN_0 0 +#define DTC_SIGN_1 0x4D635474U +#define DTC_SIGN_2 1 +#define DTC_SIGN_3 0xFFFFFFFFU +#define DTC_SIGN_4 0xFFFFFFFFU +#define DTC_SIGN_5 0xFFFFFFFFU +#define DTC_SIGN_6 4 +#define DTC_SIGN_7 0 +#define DTC_SIGN_8 16 +#define DTC_SIGN_9 0xFFFFFFFFU +#define DTC_SIGN_A 0 +#define DTC_SIGN_B 0 +#define DTC_SIGN_C 0xFFFFFFFFU +#define DTC_SIGN_D 0xFFFFFFFFU + +#define DTC_VER_MIN 4 // 本代码认识的dtc内存最小版本 + +#define DTC_RESERVE_SIZE (4 * 1024UL) + +#define EC_NO_MEM 2041 // 内存不足错误码 +#define EC_KEY_EXIST 2042 +#define EC_KEY_NOT_EXIST 2043 +#define MAXSTATCOUNT 10000 * 3600 * 12 + +struct _MemHead +{ + uint32_t m_auiSign[14]; // 内存格式标记 + unsigned short m_ushVer; // 内存格式版本号 + unsigned short m_ushHeadSize; // 头大小 + INTER_SIZE_T m_tSize; // 内存总大小 + INTER_SIZE_T m_tUserAllocSize; // 上层应用分配到可用的内存大小 + INTER_SIZE_T m_tUserAllocChunkCnt; // 上层应用分配的内存块数量 + uint32_t m_uiFlags; // 特性标记 + INTER_HANDLE_T m_hBottom; // 上层应用可用内存底地址 + INTER_HANDLE_T m_hReserveZone; // 为上层应用保留的地址 + INTER_HANDLE_T m_hTop; // 目前分配到的最高地址 + INTER_SIZE_T m_tLastFreeChunkSize; // 最近一次free后,合并得到的chunk大小 + uint16_t m_ushBinCnt; // bin的数量 + uint16_t m_ushFastBinCnt; // fastbin数量 + uint32_t m_auiBinBitMap[(NBINS - 1) / 32 + 1]; // bin的bitmap + uint32_t m_shmIntegrity; //共享内存完整性标记 + char m_achReserv[872]; // 保留字段 (使CMemHead的大小为1008Bytes,加上后面的bins后达到4K) +} __attribute__((__aligned__(4))); +typedef struct _MemHead MemHead; + +#define GET_OBJ(mallocter, handle, obj_ptr) \ + do \ + { \ + obj_ptr = (typeof(obj_ptr))mallocter.handle_to_ptr(handle); \ + } while (0) + +class DTCBinMalloc : public Mallocator +{ +private: + void *m_pBaseAddr; + MemHead *m_pstHead; + CBin *m_ptBin; + CBin *m_ptFastBin; + CBin *m_ptUnsortedBin; + char m_szErr[200]; + + // stat + StatItemU32 statChunkTotal; + StatItem statDataSize; + StatItem statMemoryTop; + + uint64_t statTmpDataSizeRecently; //最近分配的内存大小 + uint64_t statTmpDataAllocCountRecently; //最近分配的内存次数 + StatItem statAverageDataSizeRecently; + inline void add_alloc_size_to_stat(uint64_t size) + { + if (statTmpDataAllocCountRecently > MAXSTATCOUNT) + { + statTmpDataSizeRecently = 0; + statTmpDataAllocCountRecently = 0; + statAverageDataSizeRecently = MINSIZE; + } + else + { + statTmpDataSizeRecently += size; + statTmpDataAllocCountRecently++; + statAverageDataSizeRecently = statTmpDataSizeRecently / statTmpDataAllocCountRecently; + } + } + + //最小的chrunk size, + unsigned int minChunkSize; + inline unsigned int get_min_chunk_size(void) + { + return minChunkSize == 1 ? ( + (statChunkTotal <= 0) ? MINSIZE : statDataSize / statChunkTotal) + : minChunkSize; + } + +public: + void set_min_chunk_size(unsigned int size) + { + minChunkSize = size == 1 ? 1 : (size < MINSIZE ? MINSIZE : size); + } + +protected: + void init_sign(); + + void *bin_malloc(CBin &ptBin); + void *small_bin_malloc(ALLOC_SIZE_T tSize); + void *fast_malloc(ALLOC_SIZE_T tSize); + void *top_alloc(ALLOC_SIZE_T tSize); + int unlink_bin(CBin &stBin, INTER_HANDLE_T hHandle); + int link_bin(CBin &stBin, INTER_HANDLE_T hHandle); + int link_sorted_bin(CBin &stBin, INTER_HANDLE_T hHandle, ALLOC_SIZE_T tSize); + int check_inuse_chunk(MallocChunk *pstChunk); + int free_fast(); + + inline void set_bin_bit_map(unsigned int uiBinIdx) + { + m_pstHead->m_auiBinBitMap[uiBinIdx / 32] |= (1UL << (uiBinIdx % 32)); + } + inline void clear_bin_bit_map(unsigned int uiBinIdx) + { + m_pstHead->m_auiBinBitMap[uiBinIdx / 32] &= (~(1UL << (uiBinIdx % 32))); + } + inline int empty_bin(unsigned int uiBinIdx) + { + return (m_ptBin[uiBinIdx].m_hNextChunk == INVALID_HANDLE); + } + + // 内部做一下统计 + ALLOC_HANDLE_T inter_malloc(ALLOC_SIZE_T tSize); + ALLOC_HANDLE_T inter_re_alloc(ALLOC_HANDLE_T hHandle, ALLOC_SIZE_T tSize, ALLOC_SIZE_T &tOldMemSize); + int inter_free(ALLOC_HANDLE_T hHandle, ALLOC_SIZE_T &tMemSize); + +public: + DTCBinMalloc(); + ~DTCBinMalloc(); + + static DTCBinMalloc *Instance(); + static void Destroy(); + + template + T *Pointer(ALLOC_HANDLE_T hHandle) { return reinterpret_cast(handle_to_ptr(hHandle)); } + + ALLOC_HANDLE_T Handle(void *p) { return ptr_to_handle(p); } + + const char *get_err_msg() { return m_szErr; } + const MemHead *get_head_info() const { return m_pstHead; } + + /************************************************* + Description: 格式化内存 + Input: pAddr 内存块地址 + tSize 内存块大小 + Return: 0为成功,非0失败 + *************************************************/ + int Init(void *pAddr, INTER_SIZE_T tSize); + + /************************************************* + Description: attach已经格式化好的内存块 + Input: pAddr 内存块地址 + tSize 内存块大小 + Return: 0为成功,非0失败 + *************************************************/ + int Attach(void *pAddr, INTER_SIZE_T tSize); + + /************************************************* + Description: 检测内存块的dtc版本 + Input: pAddr 内存块地址 + tSize 内存块大小 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int detect_version(); + + /* 共享内存完整性检测接口 */ + int share_memory_integrity(); + void set_share_memory_integrity(const int flag); + + /************************************************* + Description: 检测内部数据结构bin是否正确 + Input: + Output: + Return: 0为成功,非0失败 + *************************************************/ + int check_bin(); +#if BIN_MEM_CHECK + int check_mem(); +#endif + int dump_bins(); + int dump_mem(); + + /************************************************* + Description: 分配内存 + Input: tSize 分配的内存大小 + Output: + Return: 内存块句柄,INVALID_HANDLE为失败 + *************************************************/ + ALLOC_HANDLE_T Malloc(ALLOC_SIZE_T tSize); + + /************************************************* + Description: 分配内存,并将内存初始化为0 + Input: tSize 分配的内存大小 + Output: + Return: 内存块句柄,INVALID_HANDLE为失败 + *************************************************/ + ALLOC_HANDLE_T Calloc(ALLOC_SIZE_T tSize); + + /************************************************* + Description: 重新分配内存 + Input: hHandle 老内存句柄 + tSize 新分配的内存大小 + Output: + Return: 内存块句柄,INVALID_HANDLE为失败(失败时不会释放老内存块) + *************************************************/ + ALLOC_HANDLE_T ReAlloc(ALLOC_HANDLE_T hHandle, ALLOC_SIZE_T tSize); + + /************************************************* + Description: 释放内存 + Input: hHandle 内存句柄 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int Free(ALLOC_HANDLE_T hHandle); + + /************************************************* + Description: 获取释放这块内存后可以得到多少free空间 + Input: hHandle 内存句柄 + Output: + Return: >0为成功,0失败 + *************************************************/ + unsigned ask_for_destroy_size(ALLOC_HANDLE_T hHandle); + + /************************************************* + Description: 获取内存块大小 + Input: hHandle 内存句柄 + Output: + Return: 内存大小 + *************************************************/ + ALLOC_SIZE_T chunk_size(ALLOC_HANDLE_T hHandle); + + /************************************************* + Description: 获取用户已经分配的内存总大小 + Input: + Output: + Return: 内存大小 + *************************************************/ + INTER_SIZE_T user_alloc_size() { return m_pstHead->m_tUserAllocSize; } + + /************************************************* + Description: 获取内存总大小 + Input: + Output: + Return: 内存大小 + *************************************************/ + INTER_SIZE_T total_size() { return m_pstHead->m_tSize; } + + /************************************************* + Description: 最近一次释放内存,合并后的chunk大小 + Input: + Output: + Return: 内存大小 + *************************************************/ + ALLOC_SIZE_T last_free_size(); + + /************************************************* + Description: 获取为上层应用保留的内存块(大小为DTC_RESERVE_SIZE=4K) + Input: + Output: + Return: 内存句柄 + *************************************************/ + ALLOC_HANDLE_T get_reserve_zone(); + + /************************************************* + Description: 将句柄转换成内存地址 + Input: 内存句柄 + Output: + Return: 内存地址,如果句柄无效返回NULL + *************************************************/ + inline void *handle_to_ptr(ALLOC_HANDLE_T hHandle) + { + if (hHandle == INVALID_HANDLE) + return (NULL); + return (void *)(((char *)m_pBaseAddr) + hHandle); + } + + /************************************************* + Description: 将内存地址转换为句柄 + Input: 内存地址 + Output: + Return: 内存句柄,如果地址无效返回INVALID_HANDLE + *************************************************/ + inline ALLOC_HANDLE_T ptr_to_handle(void *p) + { + if ((char *)p < (char *)m_pBaseAddr || (char *)p >= ((char *)m_pBaseAddr) + m_pstHead->m_tSize) + return INVALID_HANDLE; + return (ALLOC_HANDLE_T)(((char *)p) - ((char *)m_pBaseAddr)); + } + + /************************************************* + Description: 检测handle是否有效 + Input: 内存句柄 + Output: + Return: 0: 有效; -1:无效 + *************************************************/ + virtual int handle_is_valid(ALLOC_HANDLE_T mem_handle) + { + return 0; + } +}; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/raw_data.cc b/src/search_local/index_storage/cache/raw_data.cc new file mode 100644 index 0000000..16a6b16 --- /dev/null +++ b/src/search_local/index_storage/cache/raw_data.cc @@ -0,0 +1,1181 @@ +/* + * ===================================================================================== + * + * Filename: raw_data.cc + * + * Description: raw data fundamental operation + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include + +#include "raw_data.h" +#include "global.h" +#include "relative_hour_calculator.h" + +#ifndef likely +#if __GCC_MAJOR >= 3 +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif +#endif + +#define GET_VALUE(x, t) \ + do \ + { \ + if (unlikely(m_uiOffset + sizeof(t) > _size)) \ + goto ERROR_RET; \ + x = (typeof(x)) * (t *)(m_pchContent + m_uiOffset); \ + m_uiOffset += sizeof(t); \ + } while (0) + +#define GET_VALUE_AT_OFFSET(x, t, offset) \ + do \ + { \ + if (unlikely(offset + sizeof(t) > _size)) \ + goto ERROR_RET; \ + x = (typeof(x)) * (t *)(m_pchContent + offset); \ + } while (0) + +#define SET_VALUE(x, t) \ + do \ + { \ + if (unlikely(m_uiOffset + sizeof(t) > _size)) \ + goto ERROR_RET; \ + *(t *)(m_pchContent + m_uiOffset) = x; \ + m_uiOffset += sizeof(t); \ + } while (0) + +#define SET_VALUE_AT_OFFSET(x, t, offset) \ + do \ + { \ + if (unlikely(offset + sizeof(t) > _size)) \ + goto ERROR_RET; \ + *(t *)(m_pchContent + offset) = x; \ + } while (0) + +#define SET_BIN_VALUE(p, len) \ + do \ + { \ + if (unlikely(m_uiOffset + sizeof(int) + len > _size)) \ + goto ERROR_RET; \ + *(int *)(m_pchContent + m_uiOffset) = len; \ + m_uiOffset += sizeof(int); \ + if (likely(len != 0)) \ + memcpy(m_pchContent + m_uiOffset, p, len); \ + m_uiOffset += len; \ + } while (0) + +#define CHECK_SIZE(s) \ + do \ + { \ + if (unlikely(m_uiOffset + s > _size)) \ + goto ERROR_RET; \ + } while (0) + +#define SKIP_SIZE(s) \ + do \ + { \ + if (unlikely(m_uiOffset + s > _size)) \ + goto ERROR_RET; \ + m_uiOffset += s; \ + } while (0) +const int BTYE_MAX_VALUE = 255; +RawData::RawData(Mallocator *pstMalloc, int iAutoDestroy) +{ + m_uiDataSize = 0; + m_uiRowCnt = 0; + m_iKeySize = 0; + m_iLAId = -1; + m_iExpireId = -1; + m_iTableIdx = -1; + m_uiKeyStart = 0; + m_uiDataStart = 0; + m_uiOffset = 0; + m_uiLAOffset = 0; + m_uiRowOffset = 0; + m_uiGetCountOffset = 0; + m_uiTimeStampOffSet = 0; + m_uchGetCount = 0; + m_CreateHour = 0; + m_LastAccessHour = 0; + m_LastUpdateHour = 0; + m_uchKeyIdx = -1; + m_pchContent = NULL; + m_uiNeedSize = 0; + _mallocator = pstMalloc; + _handle = INVALID_HANDLE; + _autodestroy = iAutoDestroy; + _size = 0; + m_pstRef = NULL; + memset(m_szErr, 0, sizeof(m_szErr)); +} + +RawData::~RawData() +{ + if (_autodestroy) + { + Destroy(); + } + _handle = INVALID_HANDLE; + _size = 0; +} + +int RawData::Init(uint8_t uchKeyIdx, int iKeySize, const char *pchKey, ALLOC_SIZE_T uiDataSize, int laId, int expireId, int nodeIdx) +{ + int ks = iKeySize != 0 ? iKeySize : 1 + *(unsigned char *)pchKey; + + /*|1字节:类型|4字节:数据大小|4字节: 行数| 1字节 : Get次数| 2字节: 最后访问时间| 2字节 : 最后更新时间|2字节: 最后创建时间 |key|*/ + uiDataSize += 2 + sizeof(uint32_t) * 2 + sizeof(uint16_t) * 3 + ks; + + _handle = INVALID_HANDLE; + _size = 0; + + _handle = _mallocator->Malloc(uiDataSize); + if (_handle == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "malloc error"); + m_uiNeedSize = uiDataSize; + return (EC_NO_MEM); + } + _size = _mallocator->chunk_size(_handle); + + m_uiDataSize = 2 + sizeof(uint32_t) * 2 + sizeof(uint16_t) * 3 + ks; + m_uiRowCnt = 0; + m_uchKeyIdx = uchKeyIdx; + m_iKeySize = iKeySize; + m_iLAId = laId; + m_iExpireId = expireId; + + m_pchContent = Pointer(); + m_uiOffset = 0; + m_uiLAOffset = 0; + if (nodeIdx != -1) + { + m_iTableIdx = nodeIdx; + } + if (m_iTableIdx != 0 && m_iTableIdx != 1) + { + snprintf(m_szErr, sizeof(m_szErr), "node idx error"); + return -100; + } + SET_VALUE(((m_iTableIdx << 7) & 0x80) + DATA_TYPE_RAW, unsigned char); + SET_VALUE(m_uiDataSize, uint32_t); + SET_VALUE(m_uiRowCnt, uint32_t); + + m_uiGetCountOffset = m_uiOffset; + m_uchGetCount = 1; + SET_VALUE(m_uchGetCount, uint8_t); + m_uiTimeStampOffSet = m_uiOffset; + init_timp_stamp(); + SKIP_SIZE(3 * sizeof(uint16_t)); + m_uiKeyStart = m_uiOffset; + if (iKeySize != 0) + { + memcpy(m_pchContent + m_uiOffset, pchKey, iKeySize); + m_uiOffset += iKeySize; + } + else + { + memcpy(m_pchContent + m_uiOffset, pchKey, ks); + m_uiOffset += ks; + } + m_uiDataStart = m_uiOffset; + m_uiRowOffset = m_uiDataStart; + + return (0); + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "set value error"); + return (-100); +} + +int RawData::Init(const char *pchKey, ALLOC_SIZE_T uiDataSize) +{ + if (DTCColExpand::Instance()->is_expanding()) + m_iTableIdx = (DTCColExpand::Instance()->cur_table_idx() + 1) % 2; + else + m_iTableIdx = DTCColExpand::Instance()->cur_table_idx() % 2; + if (m_iTableIdx != 0 && m_iTableIdx != 1) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error, nodeIdx[%d] error", m_iTableIdx); + return -1; + } + _tabledef = TableDefinitionManager::Instance()->get_table_def_by_idx(m_iTableIdx); + if (_tabledef == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error, tabledef[NULL]"); + return -1; + } + + return Init(_tabledef->key_fields() - 1, _tabledef->key_format(), pchKey, uiDataSize, _tabledef->lastacc_field_id(), _tabledef->expire_time_field_id()); +} + +int RawData::Attach(MEM_HANDLE_T hHandle) +{ + _handle = hHandle; + char *p = Pointer(); + m_iTableIdx = (*p >> 7) & 0x01; + if (m_iTableIdx != 0 && m_iTableIdx != 1) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error, nodeIdx[%d] error", m_iTableIdx); + return -1; + } + _tabledef = TableDefinitionManager::Instance()->get_table_def_by_idx(m_iTableIdx); + if (_tabledef == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error, tabledef[NULL]"); + return -1; + } + return Attach(hHandle, _tabledef->key_fields() - 1, _tabledef->key_format(), _tabledef->lastacc_field_id(), _tabledef->lastcmod_field_id(), _tabledef->expire_time_field_id()); +} + +/* this function belive that inputted raw data is formatted correclty, but it's not the case sometimes */ +int RawData::Attach(MEM_HANDLE_T hHandle, uint8_t uchKeyIdx, int iKeySize, int laid, int lcmodid, int expireid) +{ + int ks = 0; + + _size = _mallocator->chunk_size(hHandle); + if (unlikely(_size == 0)) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error: %s", _mallocator->get_err_msg()); + return (-1); + } + _handle = hHandle; + + m_pchContent = Pointer(); + m_uiOffset = 0; + m_uiLAOffset = 0; + unsigned char uchType; + GET_VALUE(uchType, unsigned char); + if (unlikely((uchType & 0x7f) != DATA_TYPE_RAW)) + { + snprintf(m_szErr, sizeof(m_szErr), "invalid data type: %u", uchType); + return (-2); + } + + GET_VALUE(m_uiDataSize, uint32_t); + GET_VALUE(m_uiRowCnt, uint32_t); + m_uiGetCountOffset = m_uiOffset; + GET_VALUE(m_uchGetCount, uint8_t); + m_uiTimeStampOffSet = m_uiOffset; + attach_time_stamp(); + SKIP_SIZE(3 * sizeof(uint16_t)); + if (unlikely(m_uiDataSize > _size)) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data handle[" UINT64FMT "] data size[%u] error, large than chunk size[" UINT64FMT "]", hHandle, m_uiDataSize, _size); + return (-3); + } + + m_uchKeyIdx = uchKeyIdx; + m_uiKeyStart = m_uiOffset; + m_iKeySize = iKeySize; + m_iLAId = laid; + m_iLCmodId = lcmodid; + m_iExpireId = expireid; + + ks = iKeySize != 0 ? iKeySize : 1 + *(unsigned char *)(m_pchContent + m_uiKeyStart); + SKIP_SIZE(ks); + m_uiDataStart = m_uiOffset; + m_uiRowOffset = m_uiDataStart; + + return (0); + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "get value error"); + return (-100); +} + +int RawData::Destroy() +{ + if (_handle == INVALID_HANDLE) + { + _size = 0; + return 0; + } + + int iRet = _mallocator->Free(_handle); + _handle = INVALID_HANDLE; + _size = 0; + return (iRet); +} + +int RawData::check_size(MEM_HANDLE_T hHandle, uint8_t uchKeyIdx, int iKeySize, int size) +{ + _size = _mallocator->chunk_size(hHandle); + if (unlikely(_size == 0)) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error: %s", _mallocator->get_err_msg()); + return (-1); + } + _handle = hHandle; + + m_pchContent = Pointer(); + m_uiOffset = 0; + m_uiLAOffset = 0; + unsigned char uchType; + GET_VALUE(uchType, unsigned char); + if (unlikely(uchType != DATA_TYPE_RAW)) + { + snprintf(m_szErr, sizeof(m_szErr), "invalid data type: %u", uchType); + return (-2); + } + + GET_VALUE(m_uiDataSize, uint32_t); + if (m_uiDataSize != (unsigned int)size) + { + snprintf(m_szErr, sizeof(m_szErr), "invalid data type: %u", uchType); + return -1; + } + + return 0; +ERROR_RET: + return -1; +} + +int RawData::strip_mem() +{ + ALLOC_HANDLE_T hTmp = _mallocator->ReAlloc(_handle, m_uiDataSize); + if (hTmp == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "realloc error"); + m_uiNeedSize = m_uiDataSize; + return (EC_NO_MEM); + } + _handle = hTmp; + _size = _mallocator->chunk_size(_handle); + m_pchContent = Pointer(); + + return (0); +} + +int RawData::decode_row(RowValue &stRow, unsigned char &uchRowFlags, int iDecodeFlag) +{ + if (unlikely(_handle == INVALID_HANDLE || m_pchContent == NULL)) + { + snprintf(m_szErr, sizeof(m_szErr), "rawdata not init yet"); + return (-1); + } + + ALLOC_SIZE_T uiOldOffset = m_uiOffset; + ALLOC_SIZE_T uiOldRowOffset = m_uiRowOffset; + m_uiLAOffset = 0; + m_uiRowOffset = m_uiOffset; + GET_VALUE(uchRowFlags, unsigned char); + + for (int j = m_uchKeyIdx + 1; j <= stRow.num_fields(); j++) //拷贝一行数据 + { + if (stRow.table_definition()->is_discard(j)) + continue; + if (j == m_iLAId) + m_uiLAOffset = m_uiOffset; + switch (stRow.field_type(j)) + { + case DField::Signed: + if (unlikely(stRow.field_size(j) > (int)sizeof(int32_t))) + { + GET_VALUE(stRow.field_value(j)->s64, int64_t); + } + else + { + GET_VALUE(stRow.field_value(j)->s64, int32_t); + } + break; + + case DField::Unsigned: + if (unlikely(stRow.field_size(j) > (int)sizeof(uint32_t))) + { + GET_VALUE(stRow.field_value(j)->u64, uint64_t); + } + else + { + GET_VALUE(stRow.field_value(j)->u64, uint32_t); + } + break; + + case DField::Float: //浮点数 + if (likely(stRow.field_size(j) > (int)sizeof(float))) + { + GET_VALUE(stRow.field_value(j)->flt, double); + } + else + { + GET_VALUE(stRow.field_value(j)->flt, float); + } + break; + + case DField::String: //字符串 + case DField::Binary: //二进制数据 + default: + { + GET_VALUE(stRow.field_value(j)->bin.len, int); + stRow.field_value(j)->bin.ptr = m_pchContent + m_uiOffset; + SKIP_SIZE(stRow.field_value(j)->bin.len); + break; + } + } //end of switch + } + + if (unlikely(iDecodeFlag & PRE_DECODE_ROW)) + { + m_uiOffset = uiOldOffset; + m_uiRowOffset = uiOldRowOffset; + } + + return (0); + +ERROR_RET: + if (unlikely(iDecodeFlag & PRE_DECODE_ROW)) + { + m_uiOffset = uiOldOffset; + m_uiRowOffset = uiOldRowOffset; + } + snprintf(m_szErr, sizeof(m_szErr), "get value error"); + return (-100); +} + +int RawData::get_expire_time(DTCTableDefinition *t, uint32_t &expire) +{ + expire = 0; + if (unlikely(_handle == INVALID_HANDLE || m_pchContent == NULL)) + { + snprintf(m_szErr, sizeof(m_szErr), "rawdata not init yet"); + return (-1); + } + if (m_iExpireId == -1) + { + expire = 0; + return 0; + } + SKIP_SIZE(sizeof(unsigned char)); //skip flag + // the first field should be expire time + for (int j = m_uchKeyIdx + 1; j <= _tabledef->num_fields(); j++) + { //拷贝一行数据 + if (j == m_iExpireId) + { + expire = *((uint32_t *)(m_pchContent + m_uiOffset)); + break; + } + + switch (_tabledef->field_type(j)) + { + case DField::Unsigned: + case DField::Signed: + if (_tabledef->field_size(j) > (int)sizeof(int32_t)) + SKIP_SIZE(sizeof(int64_t)); + else + SKIP_SIZE(sizeof(int32_t)); + ; + break; + + case DField::Float: //浮点数 + if (_tabledef->field_size(j) > (int)sizeof(float)) + SKIP_SIZE(sizeof(double)); + else + SKIP_SIZE(sizeof(float)); + break; + + case DField::String: //字符串 + case DField::Binary: //二进制数据 + default: + int iLen = 0; + GET_VALUE(iLen, int); + SKIP_SIZE(iLen); + break; + } //end of switch + } + return 0; + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "get expire error"); + return (-100); +} + +int RawData::get_lastcmod(uint32_t &lastcmod) +{ + lastcmod = 0; + if (unlikely(_handle == INVALID_HANDLE || m_pchContent == NULL)) + { + snprintf(m_szErr, sizeof(m_szErr), "rawdata not init yet"); + return (-1); + } + + m_uiRowOffset = m_uiOffset; + SKIP_SIZE(sizeof(unsigned char)); //skip flag + + for (int j = m_uchKeyIdx + 1; j <= _tabledef->num_fields(); j++) //拷贝一行数据 + { + //id: bug fix skip discard + if (_tabledef->is_discard(j)) + continue; + if (j == m_iLCmodId) + lastcmod = *((uint32_t *)(m_pchContent + m_uiOffset)); + + switch (_tabledef->field_type(j)) + { + case DField::Unsigned: + case DField::Signed: + if (_tabledef->field_size(j) > (int)sizeof(int32_t)) + SKIP_SIZE(sizeof(int64_t)); + else + SKIP_SIZE(sizeof(int32_t)); + ; + break; + + case DField::Float: //浮点数 + if (_tabledef->field_size(j) > (int)sizeof(float)) + SKIP_SIZE(sizeof(double)); + else + SKIP_SIZE(sizeof(float)); + break; + + case DField::String: //字符串 + case DField::Binary: //二进制数据 + default: + { + int iLen = 0; + GET_VALUE(iLen, int); + SKIP_SIZE(iLen); + break; + } + } //end of switch + } + return (0); + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "get timecmod error"); + return (-100); +} + +int RawData::set_data_size() +{ + SET_VALUE_AT_OFFSET(m_uiDataSize, uint32_t, 1); + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "set data size error"); + return (-100); +} + +int RawData::set_row_count() +{ + SET_VALUE_AT_OFFSET(m_uiRowCnt, uint32_t, 5); + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "set row count error"); + return (-100); +} + +int RawData::expand_chunk(ALLOC_SIZE_T tExpSize) +{ + if (_handle == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "data not init yet"); + return (-1); + } + + if (m_uiDataSize + tExpSize > _size) + { + ALLOC_HANDLE_T hTmp = _mallocator->ReAlloc(_handle, m_uiDataSize + tExpSize); + if (hTmp == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "realloc error[%s]", _mallocator->get_err_msg()); + m_uiNeedSize = m_uiDataSize + tExpSize; + return (EC_NO_MEM); + } + _handle = hTmp; + _size = _mallocator->chunk_size(_handle); + m_pchContent = Pointer(); + } + + return (0); +} + +int RawData::re_alloc_chunk(ALLOC_SIZE_T tSize) +{ + if (tSize > _size) + { + ALLOC_HANDLE_T hTmp = _mallocator->ReAlloc(_handle, tSize); + if (hTmp == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "realloc error"); + m_uiNeedSize = tSize; + return (EC_NO_MEM); + } + _handle = hTmp; + _size = _mallocator->chunk_size(_handle); + m_pchContent = Pointer(); + } + + return (0); +} + +ALLOC_SIZE_T RawData::calc_row_size(const RowValue &stRow, int keyIdx) +{ + if (keyIdx == -1) + log_error("RawData may not init yet..."); + ALLOC_SIZE_T tSize = 1; // flag + for (int j = keyIdx + 1; j <= stRow.num_fields(); j++) //拷贝一行数据 + { + if (stRow.table_definition()->is_discard(j)) + continue; + switch (stRow.field_type(j)) + { + case DField::Signed: + case DField::Unsigned: + tSize += unlikely(stRow.field_size(j) > (int)sizeof(int32_t)) ? sizeof(int64_t) : sizeof(int32_t); + break; + + case DField::Float: //浮点数 + tSize += likely(stRow.field_size(j) > (int)sizeof(float)) ? sizeof(double) : sizeof(float); + break; + + case DField::String: //字符串 + case DField::Binary: //二进制数据 + default: + { + tSize += sizeof(int); + tSize += stRow.field_value(j)->bin.len; + break; + } + } //end of switch + } + if (tSize < 2) + log_notice("m_uchKeyIdx:%d, stRow.num_fields():%d tSize:%d", keyIdx, stRow.num_fields(), tSize); + + return (tSize); +} + +int RawData::encode_row(const RowValue &stRow, unsigned char uchOp, bool expendBuf) +{ + int iRet; + + ALLOC_SIZE_T tSize; + tSize = calc_row_size(stRow, m_uchKeyIdx); + + if (unlikely(expendBuf)) + { + iRet = expand_chunk(tSize); + if (unlikely(iRet != 0)) + return (iRet); + } + + SET_VALUE(uchOp, unsigned char); + + for (int j = m_uchKeyIdx + 1; j <= stRow.num_fields(); j++) //拷贝一行数据 + { + if (stRow.table_definition()->is_discard(j)) + continue; + const DTCValue *const v = stRow.field_value(j); + switch (stRow.field_type(j)) + { + case DField::Signed: + if (unlikely(stRow.field_size(j) > (int)sizeof(int32_t))) + SET_VALUE(v->s64, int64_t); + else + SET_VALUE(v->s64, int32_t); + break; + + case DField::Unsigned: + if (unlikely(stRow.field_size(j) > (int)sizeof(uint32_t))) + SET_VALUE(v->u64, uint64_t); + else + SET_VALUE(v->u64, uint32_t); + break; + + case DField::Float: //浮点数 + if (likely(stRow.field_size(j) > (int)sizeof(float))) + SET_VALUE(v->flt, double); + else + SET_VALUE(v->flt, float); + break; + + case DField::String: //字符串 + case DField::Binary: //二进制数据 + default: + { + SET_BIN_VALUE(v->bin.ptr, v->bin.len); + break; + } + } //end of switch + } + + m_uiDataSize += tSize; + set_data_size(); + m_uiRowCnt++; + set_row_count(); + + return 0; + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "encode row error"); + return (-100); +} + +int RawData::insert_row_flag(const RowValue &stRow, bool byFirst, unsigned char uchOp) +{ + + uint32_t uiOldSize = m_uiDataSize; + + m_uiOffset = m_uiDataSize; + int iRet = encode_row(stRow, uchOp); + uint32_t uiNewRowSize = m_uiDataSize - uiOldSize; + if (iRet == 0 && byFirst == true && uiNewRowSize > 0 && (uiOldSize - m_uiDataStart) > 0) + { + void *pBuf = MALLOC(uiNewRowSize); + if (pBuf == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "malloc error: %m"); + return (-ENOMEM); + } + char *pchDataStart = m_pchContent + m_uiDataStart; + // save last row + memmove(pBuf, m_pchContent + uiOldSize, uiNewRowSize); + // move buf up sz bytes + memmove(pchDataStart + uiNewRowSize, pchDataStart, uiOldSize - m_uiDataStart); + // last row as first row + memcpy(pchDataStart, pBuf, uiNewRowSize); + FREE(pBuf); + } + + return (iRet); +} + +int RawData::insert_row(const RowValue &stRow, bool byFirst, bool isDirty) +{ + return insert_row_flag(stRow, byFirst, isDirty ? OPER_INSERT : OPER_SELECT); +} + +int RawData::insert_n_rows(unsigned int uiNRows, const RowValue *pstRow, bool byFirst, bool isDirty) +{ + int iRet; + unsigned int i; + ALLOC_SIZE_T tSize; + + tSize = 0; + for (i = 0; i < uiNRows; i++) + tSize += calc_row_size(pstRow[i], m_uchKeyIdx); + + iRet = expand_chunk(tSize); // 先扩大buffer,避免后面insert失败回滚 + if (iRet != 0) + return (iRet); + + uint32_t uiOldSize = m_uiDataSize; + m_uiOffset = m_uiDataSize; + for (i = 0; i < uiNRows; i++) + { + iRet = encode_row(pstRow[i], isDirty ? OPER_INSERT : OPER_SELECT); + if (iRet != 0) + { + return (iRet); + } + } + + uint32_t uiNewRowSize = m_uiDataSize - uiOldSize; + if (byFirst == true && uiNewRowSize > 0 && (uiOldSize - m_uiDataStart) > 0) + { + void *pBuf = MALLOC(uiNewRowSize); + if (pBuf == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "malloc error: %m"); + return (-ENOMEM); + } + char *pchDataStart = m_pchContent + m_uiDataStart; + // save last row + memmove(pBuf, m_pchContent + uiOldSize, uiNewRowSize); + // move buf up sz bytes + memmove(pchDataStart + uiNewRowSize, pchDataStart, uiOldSize - m_uiDataStart); + // last row as first row + memcpy(pchDataStart, pBuf, uiNewRowSize); + FREE(pBuf); + } + + return (0); +} + +int RawData::skip_row(const RowValue &stRow) +{ + if (_handle == INVALID_HANDLE || m_pchContent == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "rawdata not init yet"); + return (-1); + } + + m_uiOffset = m_uiRowOffset; + if (m_uiOffset >= m_uiDataSize) + { + snprintf(m_szErr, sizeof(m_szErr), "already at end of data"); + return (-2); + } + + SKIP_SIZE(sizeof(unsigned char)); // flag + + for (int j = m_uchKeyIdx + 1; j <= stRow.num_fields(); j++) //拷贝一行数据 + { + //id: bug fix skip discard + if (stRow.table_definition()->is_discard(j)) + continue; + + switch (stRow.field_type(j)) + { + case DField::Unsigned: + case DField::Signed: + if (stRow.field_size(j) > (int)sizeof(int32_t)) + SKIP_SIZE(sizeof(int64_t)); + else + SKIP_SIZE(sizeof(int32_t)); + ; + break; + + case DField::Float: //浮点数 + if (stRow.field_size(j) > (int)sizeof(float)) + SKIP_SIZE(sizeof(double)); + else + SKIP_SIZE(sizeof(float)); + break; + + case DField::String: //字符串 + case DField::Binary: //二进制数据 + default: + { + int iLen; + GET_VALUE(iLen, int); + SKIP_SIZE(iLen); + break; + } + } //end of switch + } + + return (0); + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "skip row error"); + return (-100); +} + +int RawData::replace_cur_row(const RowValue &stRow, bool isDirty) +{ + int iRet = 0; + ALLOC_SIZE_T uiOldOffset; + ALLOC_SIZE_T uiNewRowSize; + ALLOC_SIZE_T uiCurRowSize; + ALLOC_SIZE_T uiNextRowsOffset; + ALLOC_SIZE_T uiNextRowsSize; + + uiOldOffset = m_uiOffset; + if ((iRet = skip_row(stRow)) != 0) + { + goto ERROR_RET; + } + + unsigned char uchRowFlag; + GET_VALUE_AT_OFFSET(uchRowFlag, unsigned char, m_uiRowOffset); + if (isDirty) + uchRowFlag = OPER_UPDATE; + + uiNewRowSize = calc_row_size(stRow, m_uchKeyIdx); + uiCurRowSize = m_uiOffset - m_uiRowOffset; + uiNextRowsOffset = m_uiOffset; + uiNextRowsSize = m_uiDataSize - m_uiOffset; + + if (uiNewRowSize > uiCurRowSize) + { + // enlarge buffer + MEM_HANDLE_T hTmp = _mallocator->ReAlloc(_handle, m_uiDataSize + uiNewRowSize - uiCurRowSize); + if (hTmp == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "realloc error"); + m_uiNeedSize = m_uiDataSize + uiNewRowSize - uiCurRowSize; + iRet = EC_NO_MEM; + goto ERROR_RET; + } + _handle = hTmp; + _size = _mallocator->chunk_size(_handle); + m_pchContent = Pointer(); + + // move data + if (uiNextRowsSize > 0) + memmove(m_pchContent + uiNextRowsOffset + (uiNewRowSize - uiCurRowSize), m_pchContent + uiNextRowsOffset, uiNextRowsSize); + + // copy new row + m_uiOffset = m_uiRowOffset; + iRet = encode_row(stRow, uchRowFlag, false); + if (iRet != 0) + { + if (uiNextRowsSize > 0) + memmove(m_pchContent + uiNextRowsOffset, m_pchContent + uiNextRowsOffset + (uiNewRowSize - uiCurRowSize), uiNextRowsSize); + iRet = -1; + goto ERROR_RET; + } + + m_uiRowCnt--; + m_uiDataSize -= uiCurRowSize; + } + else + { + // back up old row + void *pTmpBuf = MALLOC(uiCurRowSize); + if (pTmpBuf == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "malloc error: %m"); + return (-ENOMEM); + } + memmove(pTmpBuf, m_pchContent + m_uiRowOffset, uiCurRowSize); + + // copy new row + m_uiOffset = m_uiRowOffset; + iRet = encode_row(stRow, uchRowFlag, false); + if (iRet != 0) + { + memmove(m_pchContent + m_uiRowOffset, pTmpBuf, uiCurRowSize); + FREE(pTmpBuf); + iRet = -1; + goto ERROR_RET; + } + + // move data + if (uiNextRowsSize > 0 && m_uiOffset != uiNextRowsOffset) + memmove(m_pchContent + m_uiOffset, m_pchContent + uiNextRowsOffset, uiNextRowsSize); + FREE(pTmpBuf); + + // shorten buffer + MEM_HANDLE_T hTmp = _mallocator->ReAlloc(_handle, m_uiDataSize + uiNewRowSize - uiCurRowSize); + if (hTmp != INVALID_HANDLE) + { + _handle = hTmp; + _size = _mallocator->chunk_size(_handle); + m_pchContent = Pointer(); + } + + m_uiRowCnt--; + m_uiDataSize -= uiCurRowSize; + } + + set_data_size(); + set_row_count(); + + return (0); + +ERROR_RET: + m_uiOffset = uiOldOffset; + return (iRet); +} + +int RawData::delete_cur_row(const RowValue &stRow) +{ + int iRet = 0; + ALLOC_SIZE_T uiOldOffset; + ALLOC_SIZE_T uiNextRowsSize; + + uiOldOffset = m_uiOffset; + if ((iRet = skip_row(stRow)) != 0) + { + goto ERROR_RET; + } + uiNextRowsSize = m_uiDataSize - m_uiOffset; + + memmove(m_pchContent + m_uiRowOffset, m_pchContent + m_uiOffset, uiNextRowsSize); + m_uiDataSize -= (m_uiOffset - m_uiRowOffset); + m_uiRowCnt--; + set_data_size(); + set_row_count(); + + m_uiOffset = m_uiRowOffset; + return (iRet); + +ERROR_RET: + m_uiOffset = uiOldOffset; + return (iRet); +} + +int RawData::delete_all_rows() +{ + m_uiDataSize = m_uiDataStart; + m_uiRowOffset = m_uiDataStart; + m_uiRowCnt = 0; + m_uiOffset = m_uiDataSize; + + set_data_size(); + set_row_count(); + + m_uiNeedSize = 0; + + return (0); +} + +int RawData::set_cur_row_flag(unsigned char uchFlag) +{ + if (m_uiRowOffset >= m_uiDataSize) + { + snprintf(m_szErr, sizeof(m_szErr), "no more rows"); + return (-1); + } + *(unsigned char *)(m_pchContent + m_uiRowOffset) = uchFlag; + + return (0); +} + +int RawData::copy_row() +{ + int iRet; + ALLOC_SIZE_T uiSize = m_pstRef->m_uiOffset - m_pstRef->m_uiRowOffset; + if ((iRet = expand_chunk(uiSize)) != 0) + return (iRet); + + memcpy(m_pchContent + m_uiOffset, m_pstRef->m_pchContent + m_pstRef->m_uiRowOffset, uiSize); + m_uiOffset += uiSize; + m_uiDataSize += uiSize; + m_uiRowCnt++; + + set_data_size(); + set_row_count(); + + return (0); +} + +int RawData::copy_all() +{ + int iRet; + ALLOC_SIZE_T uiSize = m_pstRef->m_uiDataSize; + if ((iRet = re_alloc_chunk(uiSize)) != 0) + return (iRet); + + memcpy(m_pchContent, m_pstRef->m_pchContent, uiSize); + + if ((iRet = Attach(_handle)) != 0) + return (iRet); + + return (0); +} + +int RawData::append_n_records(unsigned int uiNRows, const char *pchData, const unsigned int uiLen) +{ + int iRet; + + iRet = expand_chunk(uiLen); + if (iRet != 0) + return (iRet); + + memcpy(m_pchContent + m_uiDataSize, pchData, uiLen); + m_uiDataSize += uiLen; + m_uiRowCnt += uiNRows; + + set_data_size(); + set_row_count(); + + return (0); +} + +void RawData::init_timp_stamp() +{ + if (unlikely(NULL == m_pchContent)) + { + return; + } + + if (unlikely(m_uiOffset + 3 * sizeof(uint16_t) > _size)) + { + return; + } + uint16_t dwCurHour = RELATIVE_HOUR_CALCULATOR->get_relative_hour(); + + m_LastAccessHour = dwCurHour; + m_LastUpdateHour = dwCurHour; + m_CreateHour = dwCurHour; + + *(uint16_t *)(m_pchContent + m_uiTimeStampOffSet) = dwCurHour; + *(uint16_t *)(m_pchContent + m_uiTimeStampOffSet + sizeof(uint16_t)) = dwCurHour; + *(uint16_t *)(m_pchContent + m_uiTimeStampOffSet + 2 * sizeof(uint16_t)) = dwCurHour; +} + +void RawData::attach_time_stamp() +{ + if (unlikely(NULL == m_pchContent)) + { + return; + } + if (unlikely(m_uiTimeStampOffSet + 3 * sizeof(uint16_t) > _size)) + { + return; + } + m_LastAccessHour = *(uint16_t *)(m_pchContent + m_uiTimeStampOffSet); + m_LastUpdateHour = *(uint16_t *)(m_pchContent + m_uiTimeStampOffSet + sizeof(uint16_t)); + m_CreateHour = *(uint16_t *)(m_pchContent + m_uiTimeStampOffSet + 2 * sizeof(uint16_t)); +} +void RawData::update_last_access_time_by_hour() +{ + if (unlikely(NULL == m_pchContent)) + { + return; + } + if (unlikely(m_uiTimeStampOffSet + sizeof(uint16_t) > _size)) + { + return; + } + m_LastAccessHour = RELATIVE_HOUR_CALCULATOR->get_relative_hour(); + *(uint16_t *)(m_pchContent + m_uiTimeStampOffSet) = m_LastAccessHour; +} +void RawData::update_last_update_time_by_hour() +{ + if (unlikely(NULL == m_pchContent)) + { + return; + } + if (unlikely(m_uiTimeStampOffSet + 2 * sizeof(uint16_t) > _size)) + { + return; + } + m_LastUpdateHour = RELATIVE_HOUR_CALCULATOR->get_relative_hour(); + *(uint16_t *)(m_pchContent + m_uiTimeStampOffSet + sizeof(uint16_t)) = m_LastUpdateHour; +} +uint32_t RawData::get_create_time_by_hour() +{ + return m_CreateHour; +} +uint32_t RawData::get_last_access_time_by_hour() +{ + + return m_LastAccessHour; +} + +uint32_t RawData::get_last_update_time_by_hour() +{ + + return m_LastUpdateHour; +} +uint32_t RawData::get_select_op_count() +{ + return m_uchGetCount; +} + +void RawData::inc_select_count() +{ + + if (unlikely(m_uchGetCount >= BTYE_MAX_VALUE)) + { + return; + } + if (unlikely(m_uiGetCountOffset + sizeof(uint8_t) > _size)) + { + return; + } + m_uchGetCount++; + *(uint8_t *)(m_pchContent + m_uiGetCountOffset) = m_uchGetCount; +} + +DTCTableDefinition *RawData::get_node_table_def() +{ + return _tabledef; +} diff --git a/src/search_local/index_storage/cache/raw_data.h b/src/search_local/index_storage/cache/raw_data.h new file mode 100644 index 0000000..8e4160a --- /dev/null +++ b/src/search_local/index_storage/cache/raw_data.h @@ -0,0 +1,430 @@ +/* + * ===================================================================================== + * + * Filename: raw_data.h + * + * Description: raw data fundamental operation + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef RAW_DATA_H +#define RAW_DATA_H + +#include "pt_malloc.h" +#include "global.h" +#include "field.h" +#include "col_expand.h" +#include "table_def_manager.h" +#include "node.h" + +#define PRE_DECODE_ROW 1 + +typedef enum _EnumDataType +{ + DATA_TYPE_RAW, // 平板数据结构 + DATA_TYPE_TREE_ROOT, // 树的根节点 + DATA_TYPE_TREE_NODE // 树的节点 +} EnumDataType; + +typedef enum _enum_oper_type_ +{ + OPER_DIRTY = 0x02, // cover INSERT, DELETE, UPDATE + OPER_SELECT = 0x30, + OPER_INSERT_OLD = 0x31, // old stuff, same as SELECT aka useless + OPER_UPDATE = 0x32, + OPER_DELETE_NA = 0x33, // async DELETE require quite a lot change + OPER_FLUSH = 0x34, // useless too, same as SELECT + OPER_RESV1 = 0x35, + OPER_INSERT = 0x36, + OPER_RESV2 = 0x37, +} TOperType; + +struct RawFormat +{ + unsigned char m_uchDataType; // 数据类型EnumDataType + uint32_t m_uiDataSize; // 数据总大小 + uint32_t m_uiRowCnt; // 行数 + uint8_t m_uchGetCount; // get次数 + uint16_t m_LastAccessHour; // 最近访问时间 + uint16_t m_LastUpdateHour; // 最近更新时间 + uint16_t m_CreateHour; // 创建时间 + char m_achKey[0]; // key + char m_achRows[0]; // 行数据 +} __attribute__((packed)); + +// 注意:修改操作可能会导致handle改变,因此需要检查重新保存 +class RawData +{ +private: + char *m_pchContent; // 注意:地址可能会因为realloc而改变 + uint32_t m_uiDataSize; // 包括data_type,data_size,rowcnt,key,rows等总数据大小 + uint32_t m_uiRowCnt; + uint8_t m_uchKeyIdx; + int m_iKeySize; + int m_iLAId; + int m_iLCmodId; + int m_iExpireId; + int m_iTableIdx; + + ALLOC_SIZE_T m_uiKeyStart; + ALLOC_SIZE_T m_uiDataStart; + ALLOC_SIZE_T m_uiRowOffset; + ALLOC_SIZE_T m_uiOffset; + ALLOC_SIZE_T m_uiLAOffset; + int m_uiGetCountOffset; + int m_uiTimeStampOffSet; + uint8_t m_uchGetCount; + uint16_t m_LastAccessHour; + uint16_t m_LastUpdateHour; + uint16_t m_CreateHour; + ALLOC_SIZE_T m_uiNeedSize; // 最近一次分配内存失败需要的大小 + + MEM_HANDLE_T _handle; + uint64_t _size; + Mallocator *_mallocator; + int _autodestroy; + + RawData *m_pstRef; + char m_szErr[200]; + + DTCTableDefinition *_tabledef; + +protected: + template + T *Pointer(void) const { return reinterpret_cast(_mallocator->handle_to_ptr(_handle)); } + + int set_data_size(); + int set_row_count(); + int expand_chunk(ALLOC_SIZE_T tExpSize); + int re_alloc_chunk(ALLOC_SIZE_T tSize); + int skip_row(const RowValue &stRow); + int encode_row(const RowValue &stRow, unsigned char uchOp, bool expendBuf = true); + +public: + /************************************************* + Description: 构造函数 + Input: pstMalloc 内存分配器 + iAutoDestroy 析构的时候是否自动释放内存 + Output: + Return: + *************************************************/ + RawData(Mallocator *pstMalloc, int iAutoDestroy = 0); + + ~RawData(); + + void change_mallocator(Mallocator *pstMalloc) + { + _mallocator = pstMalloc; + } + + const char *get_err_msg() { return m_szErr; } + + /************************************************* + Description: 新分配一块内存,并初始化 + Input: uchKeyIdx 作为key的字段在table里的下标 + iKeySize key的格式,0为变长,非0为定长长度 + pchKey 为格式化后的key,变长key的第0字节为长度 + uiDataSize 为数据的大小,用于一次分配足够大的chunk。如果设置为0,则insert row的时候再realloc扩大 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int Init(uint8_t uchKeyIdx, int iKeySize, const char *pchKey, ALLOC_SIZE_T uiDataSize = 0, int laid = -1, int expireid = -1, int nodeIdx = -1); + int Init(const char *pchKey, ALLOC_SIZE_T uiDataSize = 0); + + /************************************************* + Description: attach一块已经格式化好的内存 + Input: hHandle 内存的句柄 + uchKeyIdx 作为key的字段在table里的下标 + iKeySize key的格式,0为变长,非0为定长长度 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int Attach(MEM_HANDLE_T hHandle, uint8_t uchKeyIdx, int iKeySize, int laid = -1, int lastcmod = -1, int expireid = -1); + int Attach(MEM_HANDLE_T hHandle); + + /************************************************* + Description: 获取内存块的句柄 + Input: + Output: + Return: 句柄。 注意:任何修改操作可能会导致handle改变,因此需要检查重新保存 + *************************************************/ + MEM_HANDLE_T get_handle() { return _handle; } + + const char *get_addr() const { return m_pchContent; } + + /************************************************* + Description: 设置一个refrence,在调用CopyRow()或者CopyAll()的时候使用 + Input: pstRef refrence指针 + Output: + Return: + *************************************************/ + void set_refrence(RawData *pstRef) { m_pstRef = pstRef; } + + /************************************************* + Description: 包括key、rows等所有内存的大小 + Input: + Output: + Return: 所有内存的大小 + *************************************************/ + uint32_t data_size() const { return m_uiDataSize; } + + /************************************************* + Description: rows的开始偏移量 + Input: + Output: + Return: rows的开始偏移量 + *************************************************/ + uint32_t data_start() const { return m_uiDataStart; } + + /************************************************* + Description: 内存分配失败时,返回所需要的内存大小 + Input: + Output: + Return: 返回所需要的内存大小 + *************************************************/ + ALLOC_SIZE_T need_size() { return m_uiNeedSize; } + + /************************************************* + Description: 计算插入该行所需要的内存大小 + Input: stRow 行数据 + Output: + Return: 返回所需要的内存大小 + *************************************************/ + ALLOC_SIZE_T calc_row_size(const RowValue &stRow, int keyIndex); + + /************************************************* + Description: 获取格式化后的key + Input: + Output: + Return: 格式化后的key + *************************************************/ + const char *Key() const { return m_pchContent ? (m_pchContent + m_uiKeyStart) : NULL; } + char *Key() { return m_pchContent ? (m_pchContent + m_uiKeyStart) : NULL; } + + /************************************************* + Description: 获取key的格式 + Input: + Output: + Return: 变长返回0,定长key返回定长的长度 + *************************************************/ + int key_format() const { return m_iKeySize; } + + /************************************************* + Description: 获取key的实际长度 + Input: + Output: + Return: key的实际长度 + *************************************************/ + int key_size(); + + unsigned int total_rows() const { return m_uiRowCnt; } + void rewind(void) + { + m_uiOffset = m_uiDataStart; + m_uiRowOffset = m_uiDataStart; + } + + /************************************************* + Description: 销毁释放内存 + Input: + Output: + Return: 0为成功,非0失败 + *************************************************/ + int Destroy(); + + /************************************************* + Description: 释放多余的内存(通常在delete一些row后调用一次) + Input: + Output: + Return: 0为成功,非0失败 + *************************************************/ + int strip_mem(); + + /************************************************* + Description: 读取一行数据 + Input: + Output: stRow 保存行数据 + uchRowFlags 行数据是否脏数据等flag + iDecodeFlag 是否只是pre-read,不fetch_row移动指针 + Return: 0为成功,非0失败 + *************************************************/ + int decode_row(RowValue &stRow, unsigned char &uchRowFlags, int iDecodeFlag = 0); + + /************************************************* + Description: 插入一行数据 + Input: stRow 需要插入的行数据 + Output: + byFirst 是否插入到最前面,默认添加到最后面 + isDirty 是否脏数据 + Return: 0为成功,非0失败 + *************************************************/ + int insert_row(const RowValue &stRow, bool byFirst, bool isDirty); + + /************************************************* + Description: 插入一行数据 + Input: stRow 需要插入的行数据 + Output: + byFirst 是否插入到最前面,默认添加到最后面 + uchOp row的标记 + Return: 0为成功,非0失败 + *************************************************/ + int insert_row_flag(const RowValue &stRow, bool byFirst, unsigned char uchOp); + + /************************************************* + Description: 插入若干行数据 + Input: uiNRows 行数 + stRow 需要插入的行数据 + Output: + byFirst 是否插入到最前面,默认添加到最后面 + isDirty 是否脏数据 + Return: 0为成功,非0失败 + *************************************************/ + int insert_n_rows(unsigned int uiNRows, const RowValue *pstRow, bool byFirst, bool isDirty); + + /************************************************* + Description: 用指定数据替换当前行 + Input: stRow 新的行数据 + Output: + isDirty 是否脏数据 + Return: 0为成功,非0失败 + *************************************************/ + int replace_cur_row(const RowValue &stRow, bool isDirty); + + /************************************************* + Description: 删除当前行 + Input: stRow 仅使用row的字段类型等信息,不需要实际数据 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int delete_cur_row(const RowValue &stRow); + + /************************************************* + Description: 删除所有行 + Input: + Output: + Return: 0为成功,非0失败 + *************************************************/ + int delete_all_rows(); + + /************************************************* + Description: 设置当前行的标记 + Input: uchFlag 行的标记 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int set_cur_row_flag(unsigned char uchFlag); + + /************************************************* + Description: 从refrence copy当前行到本地buffer末尾 + Input: + Output: + Return: 0为成功,非0失败 + *************************************************/ + int copy_row(); + + /************************************************* + Description: 用refrence的数据替换本地数据 + Input: + Output: + Return: 0为成功,非0失败 + *************************************************/ + int copy_all(); + + /************************************************* + Description: 添加N行已经格式化好的数据到末尾 + Input: + Output: + Return: 0为成功,非0失败 + *************************************************/ + int append_n_records(unsigned int uiNRows, const char *pchData, const unsigned int uiLen); + + /************************************************* + Description: 更新最后访问时间戳 + Input: 时间戳 + Output: + Return: + *************************************************/ + void update_lastacc(uint32_t now) + { + if (m_uiLAOffset > 0) + *(uint32_t *)(m_pchContent + m_uiLAOffset) = now; + } + int get_expire_time(DTCTableDefinition *t, uint32_t &expire); + /************************************************* + Description: 获取最后需改时间 + Input: 时间戳 + Output: + Return: + *************************************************/ + int get_lastcmod(uint32_t &lastcmod); + int check_size(MEM_HANDLE_T hHandle, uint8_t uchKeyIdx, int iKeySize, int size); + + /************************************************* + Description: 初始化时间戳,包括最后访问时间 + 、最后更新时间、创建时间三部分 + Input: 时间戳(以某个绝对事件为开始的小时数) + 虽然名字为Update,其实只会被调用一次 + tomchen + *************************************************/ + void init_timp_stamp(); + /************************************************* + Description: 更新节点最后访问时间 + Input: 时间戳(以某个绝对事件为开始的小时数) + tomchen + *************************************************/ + void update_last_access_time_by_hour(); + /************************************************* + Description: 更新节点最后更新时间 + Input: 时间戳(以某个绝对事件为开始的小时数) + tomchen + *************************************************/ + void update_last_update_time_by_hour(); + /************************************************* + Description: 增加节点被select请求的次数 + tomchen + *************************************************/ + void inc_select_count(); + /************************************************* + Description: 获取节点创建时间 + tomchen + *************************************************/ + uint32_t get_create_time_by_hour(); + /************************************************* + Description: 获取节点最后访问时间 + tomchen + *************************************************/ + uint32_t get_last_access_time_by_hour(); + /************************************************* + Description: 获取节点最后更新时间 + tomchen + *************************************************/ + uint32_t get_last_update_time_by_hour(); + /************************************************* + Description: 获取节点被select操作的次数 + tomchen + *************************************************/ + uint32_t get_select_op_count(); + /************************************************* + Description: attach上时间戳 + tomchen + *************************************************/ + void attach_time_stamp(); + + DTCTableDefinition *get_node_table_def(); +}; + +inline int RawData::key_size() +{ + return m_iKeySize > 0 ? m_iKeySize : (sizeof(char) + *(unsigned char *)Key()); +} + +#endif diff --git a/src/search_local/index_storage/cache/raw_data_process.cc b/src/search_local/index_storage/cache/raw_data_process.cc new file mode 100644 index 0000000..889cd27 --- /dev/null +++ b/src/search_local/index_storage/cache/raw_data_process.cc @@ -0,0 +1,1137 @@ +/* + * ===================================================================================== + * + * Filename: raw_data_process.cc + * + * Description: raw data process interface + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include + +#include "raw_data_process.h" +#include "global.h" +#include "log.h" +#include "sys_malloc.h" +#include "task_pkey.h" +#include "buffer_flush.h" +#include "relative_hour_calculator.h" + +DTC_USING_NAMESPACE + +RawDataProcess::RawDataProcess(Mallocator *pstMalloc, DTCTableDefinition *pstTab, DTCBufferPool *pstPool, const UpdateMode *pstUpdateMode) : m_stRawData(pstMalloc), m_pstTab(pstTab), m_pMallocator(pstMalloc), m_pstPool(pstPool) +{ + memcpy(&m_stUpdateMode, pstUpdateMode, sizeof(m_stUpdateMode)); + nodeSizeLimit = 0; + history_datasize = statmgr.get_sample(DATA_SIZE_HISTORY_STAT); + history_rowsize = statmgr.get_sample(ROW_SIZE_HISTORY_STAT); +} + +RawDataProcess::~RawDataProcess() +{ +} + +int RawDataProcess::init_data(Node *pstNode, RawData *pstAffectedRows, const char *ptrKey) +{ + int iRet; + + iRet = m_stRawData.Init(ptrKey, 0); + if (iRet != 0) + { + log_error("raw-data init error: %d,%s", iRet, m_stRawData.get_err_msg()); + return (-1); + } + pstNode->vd_handle() = m_stRawData.get_handle(); + + if (pstAffectedRows != NULL) + { + iRet = pstAffectedRows->Init(ptrKey, 0); + if (iRet != 0) + { + log_error("raw-data init error: %d,%s", iRet, pstAffectedRows->get_err_msg()); + return (-2); + } + } + + return (0); +} + +int RawDataProcess::attach_data(Node *pstNode, RawData *pstAffectedRows) +{ + int iRet; + + iRet = m_stRawData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + log_error("raw-data attach[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stRawData.get_err_msg()); + return (-1); + } + + if (pstAffectedRows != NULL) + { + iRet = pstAffectedRows->Init(m_stRawData.Key(), 0); + if (iRet != 0) + { + log_error("raw-data init error: %d,%s", iRet, pstAffectedRows->get_err_msg()); + return (-2); + } + } + + return (0); +} + +int RawDataProcess::get_all_rows(Node *pstNode, RawData *pstRows) +{ + int iRet; + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + iRet = attach_data(pstNode, pstRows); + if (iRet != 0) + { + log_error("attach data error: %d", iRet); + return (-1); + } + + pstRows->set_refrence(&m_stRawData); + if (pstRows->copy_all() != 0) + { + log_error("copy data error: %d,%s", iRet, pstRows->get_err_msg()); + return (-2); + } + + return (0); +} + +int RawDataProcess::expand_node(TaskRequest &stTask, Node *pstNode) +{ + int iRet; + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow; + + // no need to check expand status as checked in CCacheProces + + // save node to stack as new version + iRet = attach_data(pstNode, NULL); + if (iRet != 0) + { + log_error("attach data error: %d", iRet); + return -1; + } + unsigned int uiTotalRows = m_stRawData.total_rows(); + stpNodeTab = m_stRawData.get_node_table_def(); + stpTaskTab = TableDefinitionManager::Instance()->get_new_table_def(); + if (stpTaskTab == stpNodeTab) + { + log_notice("expand one node which is already new version, pay attention, treat as success"); + return 0; + } + RowValue stNewRow(stpTaskTab); + RowValue stNewNodeRow(stpNodeTab); + stpTaskRow = &stNewRow; + stpNodeRow = &stNewNodeRow; + RawData stNewTmpRawData(&g_stSysMalloc, 1); + iRet = stNewTmpRawData.Init(m_stRawData.Key(), m_stRawData.data_size()); + if (iRet != 0) + { + log_error("init raw-data struct error, ret = %d, err = %s", iRet, stNewTmpRawData.get_err_msg()); + return -2; + } + for (unsigned int i = 0; i < uiTotalRows; ++i) + { + unsigned char uchRowFlags; + if (m_stRawData.decode_row(*stpNodeRow, uchRowFlags, 0) != 0) + { + log_error("raw-data decode row error: %d, %s", iRet, m_stRawData.get_err_msg()); + return -1; + } + stpTaskRow->default_value(); + stpTaskRow->Copy(stpNodeRow); + iRet = stNewTmpRawData.insert_row(*stpTaskRow, m_stUpdateMode.m_uchInsertOrder ? true : false, false); + if (0 != iRet) + { + log_error("insert row to raw-data error: ret = %d, err = %s", iRet, stNewTmpRawData.get_err_msg()); + return -2; + } + } + + // allocate new with new version + RawData stTmpRawData(m_pMallocator); + iRet = stTmpRawData.Init(stNewTmpRawData.Key(), stNewTmpRawData.data_size()); + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(stTmpRawData.need_size(), *pstNode) == 0) + iRet = stTmpRawData.Init(stNewTmpRawData.Key(), stNewTmpRawData.data_size() - stNewTmpRawData.data_start()); + } + + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data init error: %s", stTmpRawData.get_err_msg()); + stTmpRawData.Destroy(); + return -3; + } + + stTmpRawData.set_refrence(&stNewTmpRawData); + iRet = stTmpRawData.copy_all(); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data init error: %s", stTmpRawData.get_err_msg()); + stTmpRawData.Destroy(); + return -3; + } + + // purge old + m_stRawData.Destroy(); + pstNode->vd_handle() = stTmpRawData.get_handle(); + return 0; +} + +int RawDataProcess::destroy_data(Node *pstNode) +{ + int iRet; + + iRet = m_stRawData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + log_error("raw-data attach error: %d,%s", iRet, m_stRawData.get_err_msg()); + return (-1); + } + m_llRowsInc += 0LL - m_stRawData.total_rows(); + + m_stRawData.Destroy(); + pstNode->vd_handle() = INVALID_HANDLE; + + return (0); +} + +int RawDataProcess::replace_data(Node *pstNode, RawData *pstRawData) +{ + int iRet; + + log_debug("replace_data start "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + RawData tmpRawData(m_pMallocator); + + iRet = tmpRawData.Init(pstRawData->Key(), pstRawData->data_size() - pstRawData->data_start()); + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(tmpRawData.need_size(), *pstNode) == 0) + iRet = tmpRawData.Init(pstRawData->Key(), pstRawData->data_size() - pstRawData->data_start()); + } + + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data init error: %s", tmpRawData.get_err_msg()); + tmpRawData.Destroy(); + return (-2); + } + + tmpRawData.set_refrence(pstRawData); + iRet = tmpRawData.copy_all(); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data init error: %s", tmpRawData.get_err_msg()); + tmpRawData.Destroy(); + return (-3); + } + + if (pstNode->vd_handle() != INVALID_HANDLE) + destroy_data(pstNode); + pstNode->vd_handle() = tmpRawData.get_handle(); + m_llRowsInc += pstRawData->total_rows(); + if (tmpRawData.total_rows() > 0) + { + log_debug("replace_data, stat history datasize, size is %u", tmpRawData.data_size()); + history_datasize.push(tmpRawData.data_size()); + history_rowsize.push(tmpRawData.total_rows()); + } + return (0); +} + +int RawDataProcess::get_expire_time(DTCTableDefinition *t, Node *pstNode, uint32_t &expire) +{ + int iRet = 0; + + iRet = attach_data(pstNode, NULL); + if (iRet != 0) + { + log_error("attach data error: %d", iRet); + return iRet; + } + iRet = m_stRawData.get_expire_time(t, expire); + if (iRet != 0) + { + log_error("raw data get expire time error: %d", iRet); + return iRet; + } + return 0; +} + +int RawDataProcess::dirty_rows_in_node(TaskRequest &stTask, Node *pstNode) +{ + int iRet = 0; + int dirty_rows = 0; + + iRet = attach_data(pstNode, NULL); + if (iRet != 0) + { + log_error("attach data error: %d", iRet); + return iRet; + } + + unsigned char uchRowFlags; + unsigned int uiTotalRows = m_stRawData.total_rows(); + + DTCTableDefinition *t = m_stRawData.get_node_table_def(); + RowValue stRow(t); + for (unsigned int i = 0; i < uiTotalRows; i++) + { + iRet = m_stRawData.decode_row(stRow, uchRowFlags, 0); + if (iRet != 0) + { + log_error("raw-data decode row error: %d,%s", iRet, m_stRawData.get_err_msg()); + return (-4); + } + + if (uchRowFlags & OPER_DIRTY) + dirty_rows++; + } + + return dirty_rows; +} + +// pstAffectedRows is always NULL +int RawDataProcess::delete_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows) +{ + int iRet; + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow; + + log_debug("delete_data start! "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + iRet = attach_data(pstNode, pstAffectedRows); + if (iRet != 0) + { + log_error("attach data error: %d", iRet); + return (iRet); + } + + if (pstAffectedRows != NULL) + pstAffectedRows->set_refrence(&m_stRawData); + + stpNodeTab = m_stRawData.get_node_table_def(); + stpTaskTab = stTask.table_definition(); + RowValue stNodeRow(stpNodeTab); + RowValue stTaskRow(stpTaskTab); + if (stpNodeTab == stpTaskTab) + { + stpNodeRow = &stTaskRow; + stpTaskRow = &stTaskRow; + } + else + { + stpNodeRow = &stNodeRow; + stpTaskRow = &stTaskRow; + } + + int iAffectRows = 0; + unsigned char uchRowFlags; + unsigned int uiTotalRows = m_stRawData.total_rows(); + for (unsigned int i = 0; i < uiTotalRows; i++) + { + iRet = m_stRawData.decode_row(*stpNodeRow, uchRowFlags, 0); + if (iRet != 0) + { + log_error("raw-data decode row error: %d,%s", iRet, m_stRawData.get_err_msg()); + return (-4); + } + if (stpNodeTab != stpTaskTab) + { + stpTaskRow->Copy(stpNodeRow); + } + if (stTask.compare_row(*stpTaskRow) != 0) + { //符合del条件 + if (pstAffectedRows != NULL) + { // copy row + iRet = pstAffectedRows->copy_row(); + if (iRet != 0) + { + log_error("raw-data copy row error: %d,%s", iRet, pstAffectedRows->get_err_msg()); + } + } + iRet = m_stRawData.delete_cur_row(*stpNodeRow); + if (iRet != EC_NO_MEM) + pstNode->vd_handle() = m_stRawData.get_handle(); + if (iRet != 0) + { + log_error("raw-data delete row error: %d,%s", iRet, m_stRawData.get_err_msg()); + return (-5); + } + iAffectRows++; + m_llRowsInc--; + if (uchRowFlags & OPER_DIRTY) + m_llDirtyRowsInc--; + } + } + if (iAffectRows > 0) + { + if (stTask.resultInfo.affected_rows() == 0 || + (stTask.request_condition() && stTask.request_condition()->has_type_timestamp())) + { + stTask.resultInfo.set_affected_rows(iAffectRows); + } + m_stRawData.strip_mem(); + } + + if (m_stRawData.total_rows() > 0) + { + log_debug("stat history datasize, size is %u", m_stRawData.data_size()); + history_datasize.push(m_stRawData.data_size()); + history_rowsize.push(m_stRawData.total_rows()); + m_stRawData.update_last_access_time_by_hour(); + m_stRawData.update_last_update_time_by_hour(); + } + return (0); +} + +int RawDataProcess::get_data(TaskRequest &stTask, Node *pstNode) +{ + int iRet; + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow; + + log_debug("get_data start! "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + int laid = stTask.flag_no_cache() ? -1 : stTask.table_definition()->lastacc_field_id(); + + iRet = m_stRawData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + log_error("raw-data attach[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stRawData.get_err_msg()); + return (-1); + } + + unsigned int uiTotalRows = m_stRawData.total_rows(); + stTask.prepare_result(); //准备返回结果对象 + if (stTask.all_rows() && (stTask.count_only() || !stTask.in_range((int)uiTotalRows, 0))) + { + if (stTask.is_batch_request()) + { + if ((int)uiTotalRows > 0) + stTask.add_total_rows((int)uiTotalRows); + } + else + { + stTask.set_total_rows((int)uiTotalRows); + } + } + else + { + stpNodeTab = m_stRawData.get_node_table_def(); + stpTaskTab = stTask.table_definition(); + RowValue stNodeRow(stpNodeTab); + RowValue stTaskRow(stpTaskTab); + if (stpNodeTab == stpTaskTab) + { + stpNodeRow = &stTaskRow; + stpTaskRow = &stTaskRow; + } + else + { + stpNodeRow = &stNodeRow; + stpTaskRow = &stTaskRow; + } + unsigned char uchRowFlags; + for (unsigned int i = 0; i < uiTotalRows; i++) //逐行拷贝数据 + { + stTask.update_key(*stpNodeRow); // use stpNodeRow is fine, as just modify key field + if ((iRet = m_stRawData.decode_row(*stpNodeRow, uchRowFlags, 0)) != 0) + { + log_error("raw-data decode row error: %d,%s", iRet, m_stRawData.get_err_msg()); + return (-2); + } + // this pointer compare is ok, as these two is both come from tabledefmanager. if they mean same, they are same object. + if (stpNodeTab != stpTaskTab) + { + stpTaskRow->Copy(stpNodeRow); + } + if (stTask.compare_row(*stpTaskRow) == 0) //如果不符合查询条件 + continue; + + if (stpTaskTab->expire_time_field_id() > 0) + stpTaskRow->update_expire_time(); + //当前行添加到task中 + if (stTask.append_row(stpTaskRow) > 0 && laid > 0) + { + m_stRawData.update_lastacc(stTask.Timestamp()); + } + if (stTask.all_rows() && stTask.result_full()) + { + stTask.set_total_rows((int)uiTotalRows); + break; + } + } + } + /*更新访问时间和查找操作计数*/ + m_stRawData.update_last_access_time_by_hour(); + m_stRawData.inc_select_count(); + log_debug("node[id:%u] ,Get Count is %d, last_access_time is %d, create_time is %d", pstNode->node_id(), + m_stRawData.get_select_op_count(), m_stRawData.get_last_access_time_by_hour(), m_stRawData.get_create_time_by_hour()); + return (0); +} + +// pstAffectedRows is always NULL +int RawDataProcess::append_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool isDirty, bool setrows) +{ + int iRet; + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow; + + iRet = attach_data(pstNode, pstAffectedRows); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "attach data error"); + log_warning("attach data error: %d", iRet); + return (iRet); + } + + stpNodeTab = m_stRawData.get_node_table_def(); + stpTaskTab = stTask.table_definition(); + RowValue stTaskRow(stpTaskTab); + RowValue stNodeRow(stpNodeTab); + stpTaskRow = &stTaskRow; + stpTaskRow->default_value(); + stTask.update_row(*stpTaskRow); + + if (stpTaskTab->auto_increment_field_id() >= stpTaskTab->key_fields() && stTask.resultInfo.insert_id()) + { + const int iFieldID = stpTaskTab->auto_increment_field_id(); + const uint64_t iVal = stTask.resultInfo.insert_id(); + stpTaskRow->field_value(iFieldID)->Set(iVal); + } + + if (stpNodeTab == stpTaskTab) + { + stpNodeRow = stpTaskRow; + } + else + { + stpNodeRow = &stNodeRow; + stpNodeRow->default_value(); + stpNodeRow->Copy(stpTaskRow); + } + + log_debug("append_data start! "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + unsigned int uiTotalRows = m_stRawData.total_rows(); + if (uiTotalRows > 0) + { + if ((isDirty || setrows) && stTask.table_definition()->key_as_uniq_field()) + { + snprintf(m_szErr, sizeof(m_szErr), "duplicate key error"); + return (-1062); + } + RowValue stOldRow(stpNodeTab); //一行数据 + if (setrows && stTask.table_definition()->key_part_of_uniq_field()) + { + for (unsigned int i = 0; i < uiTotalRows; i++) + { //逐行拷贝数据 + unsigned char uchRowFlags; + if (m_stRawData.decode_row(stOldRow, uchRowFlags, 0) != 0) + { + log_error("raw-data decode row error: %d,%s", iRet, m_stRawData.get_err_msg()); + return (-1); + } + + if (stpNodeRow->Compare(stOldRow, stpNodeTab->uniq_fields_list(), + stpNodeTab->uniq_fields()) == 0) + { + snprintf(m_szErr, sizeof(m_szErr), "duplicate key error"); + return (-1062); + } + } + } + } + + if (pstAffectedRows != NULL && pstAffectedRows->insert_row(*stpNodeRow, false, isDirty) != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data insert row error: %s", pstAffectedRows->get_err_msg()); + return (-1); + } + + // insert clean row + iRet = m_stRawData.insert_row(*stpNodeRow, m_stUpdateMode.m_uchInsertOrder ? true : false, isDirty); + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(m_stRawData.need_size(), *pstNode) == 0) + iRet = m_stRawData.insert_row(*stpNodeRow, m_stUpdateMode.m_uchInsertOrder ? true : false, isDirty); + } + if (iRet != EC_NO_MEM) + pstNode->vd_handle() = m_stRawData.get_handle(); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data insert row error: %s", m_stRawData.get_err_msg()); + /*标记加入黑名单*/ + stTask.push_black_list_size(m_stRawData.need_size()); + return (-2); + } + + if (stTask.resultInfo.affected_rows() == 0 || setrows == true) + stTask.resultInfo.set_affected_rows(1); + m_llRowsInc++; + if (isDirty) + m_llDirtyRowsInc++; + log_debug("stat history datasize, size is %u", m_stRawData.data_size()); + history_datasize.push(m_stRawData.data_size()); + history_rowsize.push(m_stRawData.total_rows()); + m_stRawData.update_last_access_time_by_hour(); + m_stRawData.update_last_update_time_by_hour(); + log_debug("node[id:%u] ,Get Count is %d, create_time is %d, last_access_time is %d, last_update_time is %d ", pstNode->node_id(), + m_stRawData.get_select_op_count(), m_stRawData.get_create_time_by_hour(), + m_stRawData.get_last_access_time_by_hour(), m_stRawData.get_last_update_time_by_hour()); + return (0); +} + +int RawDataProcess::replace_data(TaskRequest &stTask, Node *pstNode) +{ + log_debug("replace_data start! "); + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow; + + int iRet; + int try_purge_count = 0; + uint64_t all_rows_size = 0; + int laid = stTask.flag_no_cache() || stTask.count_only() ? -1 : stTask.table_definition()->lastacc_field_id(); + int matchedCount = 0; + int limitStart = 0; + int limitStop = 0x10000000; + + stpTaskTab = stTask.table_definition(); + if (DTCColExpand::Instance()->is_expanding()) + stpNodeTab = TableDefinitionManager::Instance()->get_new_table_def(); + else + stpNodeTab = TableDefinitionManager::Instance()->get_cur_table_def(); + RowValue stNodeRow(stpNodeTab); + stpNodeRow = &stNodeRow; + stpNodeRow->default_value(); + + if (laid > 0 && stTask.requestInfo.limit_count() > 0) + { + limitStart = stTask.requestInfo.limit_start(); + if (stTask.requestInfo.limit_start() > 0x10000000) + { + laid = -1; + } + else if (stTask.requestInfo.limit_count() < 0x10000000) + { + limitStop = limitStart + stTask.requestInfo.limit_count(); + } + } + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + if (pstNode->vd_handle() != INVALID_HANDLE) + { + iRet = destroy_data(pstNode); + if (iRet != 0) + return (-1); + } + + iRet = m_stRawData.Init(stTask.packed_key(), 0); + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(m_stRawData.need_size(), *pstNode) == 0) + iRet = m_stRawData.Init(m_pstTab->key_fields() - 1, m_pstTab->key_format(), stTask.packed_key(), 0); + } + if (iRet != EC_NO_MEM) + pstNode->vd_handle() = m_stRawData.get_handle(); + + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data init error: %s", m_stRawData.get_err_msg()); + /*标记加入黑名单*/ + stTask.push_black_list_size(m_stRawData.need_size()); + m_pstPool->purge_node(stTask.packed_key(), *pstNode); + return (-2); + } + + if (stTask.result != NULL) + { + ResultSet *pstResultSet = stTask.result; + for (int i = 0; i < pstResultSet->total_rows(); i++) + { + RowValue *pstRow = pstResultSet->_fetch_row(); + if (pstRow == NULL) + { + log_debug("%s!", "call fetch_row func error"); + m_pstPool->purge_node(stTask.packed_key(), *pstNode); + m_stRawData.Destroy(); + return (-3); + } + + if (laid > 0 && stTask.compare_row(*pstRow)) + { + if (matchedCount >= limitStart && matchedCount < limitStop) + { + (*pstRow)[laid].s64 = stTask.Timestamp(); + } + matchedCount++; + } + + if (stpTaskTab != stpNodeTab) + { + stpNodeRow->Copy(pstRow); + } + else + { + stpNodeRow = pstRow; + } + + /* 插入当前行 */ + iRet = m_stRawData.insert_row(*stpNodeRow, false, false); + + /* 如果内存空间不足,尝试扩大最多两次 */ + if (iRet == EC_NO_MEM) + { + + /* 预测整个Node的数据大小 */ + all_rows_size = m_stRawData.need_size() - m_stRawData.data_start(); + all_rows_size *= pstResultSet->total_rows(); + all_rows_size /= (i + 1); + all_rows_size += m_stRawData.data_start(); + + if (try_purge_count >= 2) + { + goto ERROR_PROCESS; + } + + /* 尝试次数 */ + ++try_purge_count; + if (m_pstPool->try_purge_size((size_t)all_rows_size, *pstNode) == 0) + iRet = m_stRawData.insert_row(*stpNodeRow, false, false); + } + if (iRet != EC_NO_MEM) + pstNode->vd_handle() = m_stRawData.get_handle(); + + /* 当前行操作成功 */ + if (0 == iRet) + continue; + ERROR_PROCESS: + snprintf(m_szErr, sizeof(m_szErr), "raw-data insert row error: ret=%d,err=%s, cnt=%d", + iRet, m_stRawData.get_err_msg(), try_purge_count); + /*标记加入黑名单*/ + stTask.push_black_list_size(all_rows_size); + m_pstPool->purge_node(stTask.packed_key(), *pstNode); + m_stRawData.Destroy(); + return (-4); + } + + m_llRowsInc += pstResultSet->total_rows(); + } + + m_stRawData.update_last_access_time_by_hour(); + m_stRawData.update_last_update_time_by_hour(); + log_debug("node[id:%u], handle[" UINT64FMT "] ,data-size[%u], Get Count is %d, create_time is %d, last_access_time is %d, Update time is %d", + pstNode->node_id(), + pstNode->vd_handle(), + m_stRawData.data_size(), + m_stRawData.get_select_op_count(), + m_stRawData.get_create_time_by_hour(), + m_stRawData.get_last_access_time_by_hour(), + m_stRawData.get_last_update_time_by_hour()); + + history_datasize.push(m_stRawData.data_size()); + history_rowsize.push(m_stRawData.total_rows()); + return (0); +} + +// The correct replace behavior: +// If conflict rows found, delete them all +// Insert new row +// Affected rows is total deleted and inserted rows +// Implementation hehavior: +// If first conflict row found, update it, and increase affected rows to 2 (1 delete + 1 insert) +// delete other fonflict row, increase affected 1 per row +// If no rows found, insert it and set affected rows to 1 +int RawDataProcess::replace_rows(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows) +{ + int iRet; + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow; + + log_debug("replace_rows start! "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + if (pstNode->vd_handle() == INVALID_HANDLE) + { + iRet = init_data(pstNode, pstAffectedRows, stTask.packed_key()); + if (iRet != 0) + { + log_error("init data error: %d", iRet); + if (pstNode->vd_handle() == INVALID_HANDLE) + m_pstPool->purge_node(stTask.packed_key(), *pstNode); + return (iRet); + } + } + else + { + iRet = attach_data(pstNode, pstAffectedRows); + if (iRet != 0) + { + log_error("attach data error: %d", iRet); + return (iRet); + } + } + + unsigned char uchRowFlags; + uint64_t ullAffectedrows = 0; + unsigned int uiTotalRows = m_stRawData.total_rows(); + if (pstAffectedRows != NULL) + pstAffectedRows->set_refrence(&m_stRawData); + + stpNodeTab = m_stRawData.get_node_table_def(); + stpTaskTab = stTask.table_definition(); + RowValue stNewRow(stpTaskTab); + RowValue stNewNodeRow(stpNodeTab); + stNewRow.default_value(); + stpTaskRow = &stNewRow; + stpNodeRow = &stNewNodeRow; + stTask.update_row(*stpTaskRow); //获取Replace的行 + if (stpNodeTab != stpTaskTab) + stpNodeRow->Copy(stpTaskRow); + else + stpNodeRow = stpTaskRow; + + RowValue stRow(stpNodeTab); //一行数据 + for (unsigned int i = 0; i < uiTotalRows; i++) + { //逐行拷贝数据 + if (m_stRawData.decode_row(stRow, uchRowFlags, 0) != 0) + { + log_error("raw-data decode row error: %d,%s", iRet, m_stRawData.get_err_msg()); + return (-1); + } + + if (stTask.table_definition()->key_as_uniq_field() == false && + stNewRow.Compare(stRow, stTask.table_definition()->uniq_fields_list(), + stTask.table_definition()->uniq_fields()) != 0) + continue; + + if (ullAffectedrows == 0) + { + if (pstAffectedRows != NULL && pstAffectedRows->insert_row(*stpNodeRow, false, async) != 0) + { + log_error("raw-data copy row error: %d,%s", iRet, pstAffectedRows->get_err_msg()); + return (-2); + } + + ullAffectedrows = 2; + iRet = m_stRawData.replace_cur_row(*stpNodeRow, async); // 加进cache + } + else + { + ullAffectedrows++; + iRet = m_stRawData.delete_cur_row(*stpNodeRow); // 加进cache + } + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(m_stRawData.need_size(), *pstNode) == 0) + iRet = m_stRawData.replace_cur_row(*stpNodeRow, async); + } + if (iRet != EC_NO_MEM) + pstNode->vd_handle() = m_stRawData.get_handle(); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data replace row error: %d, %s", + iRet, m_stRawData.get_err_msg()); + /*标记加入黑名单*/ + stTask.push_black_list_size(m_stRawData.need_size()); + return (-3); + } + if (uchRowFlags & OPER_DIRTY) + m_llDirtyRowsInc--; + if (async) + m_llDirtyRowsInc++; + } + + if (ullAffectedrows == 0) + { // 找不到匹配的行,insert一行 + iRet = m_stRawData.insert_row(*stpNodeRow, false, async); // 加进cache + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(m_stRawData.need_size(), *pstNode) == 0) + iRet = m_stRawData.insert_row(*stpNodeRow, false, async); + } + if (iRet != EC_NO_MEM) + pstNode->vd_handle() = m_stRawData.get_handle(); + + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data replace row error: %d, %s", + iRet, m_stRawData.get_err_msg()); + /*标记加入黑名单*/ + stTask.push_black_list_size(m_stRawData.need_size()); + return (-3); + } + m_llRowsInc++; + ullAffectedrows++; + if (async) + m_llDirtyRowsInc++; + } + + if (async == true || setrows == true) + { + stTask.resultInfo.set_affected_rows(ullAffectedrows); + } + else if (ullAffectedrows != stTask.resultInfo.affected_rows()) + { + //如果cache更新纪录数和helper更新的纪录数不相等 + log_debug("unequal affected rows, cache[%lld], helper[%lld]", + (long long)ullAffectedrows, + (long long)stTask.resultInfo.affected_rows()); + } + + log_debug("stat history datasize, size is %u", m_stRawData.data_size()); + history_datasize.push(m_stRawData.data_size()); + history_rowsize.push(m_stRawData.total_rows()); + m_stRawData.update_last_access_time_by_hour(); + m_stRawData.update_last_update_time_by_hour(); + log_debug("node[id:%u], create_time is %d, last_access_time is %d, Update Time is %d ", + pstNode->node_id(), m_stRawData.get_create_time_by_hour(), m_stRawData.get_last_access_time_by_hour(), m_stRawData.get_last_update_time_by_hour()); + return (0); +} + +/* + * encode到私有内存,防止replace,update引起重新rellocate导致value引用了过期指针 + */ +int RawDataProcess::encode_to_private_area(RawData &raw, RowValue &value, unsigned char value_flag) +{ + int ret = raw.Init(m_stRawData.Key(), raw.calc_row_size(value, m_pstTab->key_fields() - 1)); + if (0 != ret) + { + log_error("init raw-data struct error, ret=%d, err=%s", ret, raw.get_err_msg()); + return -1; + } + + ret = raw.insert_row(value, false, false); + if (0 != ret) + { + log_error("insert row to raw-data error: ret=%d, err=%s", ret, raw.get_err_msg()); + return -2; + } + + raw.rewind(); + + ret = raw.decode_row(value, value_flag, 0); + if (0 != ret) + { + log_error("decode raw-data to row error: ret=%d, err=%s", ret, raw.get_err_msg()); + return -3; + } + + return 0; +} + +int RawDataProcess::update_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows) +{ + int iRet; + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow; + + log_debug("update_data start! "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + iRet = attach_data(pstNode, pstAffectedRows); + if (iRet != 0) + { + log_error("attach data error: %d", iRet); + return (iRet); + } + + unsigned char uchRowFlags; + uint64_t ullAffectedrows = 0; + unsigned int uiTotalRows = m_stRawData.total_rows(); + if (pstAffectedRows != NULL) + pstAffectedRows->set_refrence(&m_stRawData); + + RowValue stRow(stTask.table_definition()); //一行数据 + + stpNodeTab = m_stRawData.get_node_table_def(); + stpTaskTab = stTask.table_definition(); + RowValue stNewRow(stpTaskTab); + RowValue stNewNodeRow(stpNodeTab); + stpTaskRow = &stNewRow; + stpNodeRow = &stNewNodeRow; + if (stpNodeTab == stpTaskTab) + stpNodeRow = stpTaskRow; + + for (unsigned int i = 0; i < uiTotalRows; i++) + { //逐行拷贝数据 + if (m_stRawData.decode_row(*stpNodeRow, uchRowFlags, 0) != 0) + { + log_error("raw-data decode row error: %d,%s", iRet, m_stRawData.get_err_msg()); + return (-1); + } + + if (stpNodeTab != stpTaskTab) + stpTaskRow->Copy(stpNodeRow); + + //如果不符合查询条件 + if (stTask.compare_row(*stpTaskRow) == 0) + continue; + + stTask.update_row(*stpTaskRow); //修改数据 + ullAffectedrows++; + + if (stpNodeTab != stpTaskTab) + stpNodeRow->Copy(stpTaskRow); + + if (pstAffectedRows != NULL && pstAffectedRows->insert_row(*stpNodeRow, false, async) != 0) + { + log_error("raw-data copy row error: %d,%s", iRet, pstAffectedRows->get_err_msg()); + return (-2); + } + + // 在私有区间decode + RawData stTmpRows(&g_stSysMalloc, 1); + if (encode_to_private_area(stTmpRows, *stpNodeRow, uchRowFlags)) + { + log_error("encode rowvalue to private rawdata area failed"); + return -3; + } + + iRet = m_stRawData.replace_cur_row(*stpNodeRow, async); // 加进cache + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(m_stRawData.need_size(), *pstNode) == 0) + iRet = m_stRawData.replace_cur_row(*stpNodeRow, async); + } + if (iRet != EC_NO_MEM) + pstNode->vd_handle() = m_stRawData.get_handle(); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data replace row error: %d, %s", + iRet, m_stRawData.get_err_msg()); + /*标记加入黑名单*/ + stTask.push_black_list_size(m_stRawData.need_size()); + return (-6); + } + + if (uchRowFlags & OPER_DIRTY) + m_llDirtyRowsInc--; + if (async) + m_llDirtyRowsInc++; + } + + if (async == true || setrows == true) + { + stTask.resultInfo.set_affected_rows(ullAffectedrows); + } + else if (ullAffectedrows != stTask.resultInfo.affected_rows()) + { + //如果cache更新纪录数和helper更新的纪录数不相等 + log_debug("unequal affected rows, cache[%lld], helper[%lld]", + (long long)ullAffectedrows, + (long long)stTask.resultInfo.affected_rows()); + } + log_debug("stat history datasize, size is %u", m_stRawData.data_size()); + history_datasize.push(m_stRawData.data_size()); + history_rowsize.push(m_stRawData.total_rows()); + m_stRawData.update_last_access_time_by_hour(); + m_stRawData.update_last_update_time_by_hour(); + log_debug("node[id:%u], create_time is %d, last_access_time is %d, UpdateTime is %d", + pstNode->node_id(), m_stRawData.get_create_time_by_hour(), m_stRawData.get_last_access_time_by_hour(), m_stRawData.get_last_update_time_by_hour()); + return (0); +} + +int RawDataProcess::flush_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt) +{ + int iRet; + + log_debug("flush_data start! "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + iRet = attach_data(pstNode, NULL); + if (iRet != 0) + { + log_error("attach data error: %d", iRet); + return (iRet); + } + + unsigned char uchRowFlags; + unsigned int uiTotalRows = m_stRawData.total_rows(); + + uiFlushRowsCnt = 0; + DTCValue astKey[m_pstTab->key_fields()]; + TaskPackedKey::unpack_key(m_pstTab, m_stRawData.Key(), astKey); + RowValue stRow(m_pstTab); //一行数据 + for (int i = 0; i < m_pstTab->key_fields(); i++) + stRow[i] = astKey[i]; + + for (unsigned int i = 0; pstNode->is_dirty() && i < uiTotalRows; i++) + { //逐行拷贝数据 + if (m_stRawData.decode_row(stRow, uchRowFlags, 0) != 0) + { + log_error("raw-data decode row error: %d,%s", iRet, m_stRawData.get_err_msg()); + return (-1); + } + + if ((uchRowFlags & OPER_DIRTY) == false) + continue; + + if (pstFlushReq && pstFlushReq->flush_row(stRow) != 0) + { + log_error("flush_data() invoke flushRow() failed."); + return (-2); + } + m_stRawData.set_cur_row_flag(uchRowFlags & ~OPER_DIRTY); + m_llDirtyRowsInc--; + uiFlushRowsCnt++; + } + + return (0); +} + +int RawDataProcess::purge_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt) +{ + int iRet; + + log_debug("purge_data start! "); + + iRet = flush_data(pstFlushReq, pstNode, uiFlushRowsCnt); + if (iRet != 0) + { + return (iRet); + } + m_llRowsInc = 0LL - m_stRawData.total_rows(); + + return (0); +} diff --git a/src/search_local/index_storage/cache/raw_data_process.h b/src/search_local/index_storage/cache/raw_data_process.h new file mode 100644 index 0000000..5dc00b1 --- /dev/null +++ b/src/search_local/index_storage/cache/raw_data_process.h @@ -0,0 +1,212 @@ +/* + * ===================================================================================== + * + * Filename: raw_data_process.h + * + * Description: raw data process interface + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef RAW_DATA_PROCESS_H +#define RAW_DATA_PROCESS_H + +#include "buffer_def.h" +#include "protocol.h" +#include "value.h" +#include "field.h" +#include "section.h" +#include "table_def.h" +#include "task_request.h" +#include "stat_dtc.h" +#include "raw_data.h" +#include "node.h" +#include "data_process.h" +#include "buffer_pool.h" +#include "namespace.h" +#include "stat_manager.h" + +DTC_BEGIN_NAMESPACE + +class TaskRequest; +class DTCFlushRequest; + +class RawDataProcess + : public DataProcess +{ +private: + RawData m_stRawData; + DTCTableDefinition *m_pstTab; + Mallocator *m_pMallocator; + DTCBufferPool *m_pstPool; + UpdateMode m_stUpdateMode; + int64_t m_llRowsInc; + int64_t m_llDirtyRowsInc; + char m_szErr[200]; + + unsigned int nodeSizeLimit; // -DEBUG- + + /*对历史节点数据的采样统计,放在高端内存操作管理的地方,便于收敛统计点 , modify by tomchen 2014.08.27*/ + StatSample history_datasize; + StatSample history_rowsize; + +protected: + int init_data(Node *pstNode, RawData *pstAffectedRows, const char *ptrKey); + int attach_data(Node *pstNode, RawData *pstAffectedRows); + int destroy_data(Node *pstNode); + +private: + int encode_to_private_area(RawData &, RowValue &, unsigned char); + +public: + RawDataProcess(Mallocator *pstMalloc, DTCTableDefinition *pstTab, DTCBufferPool *pstPool, const UpdateMode *pstUpdateMode); + + ~RawDataProcess(); + + void set_limit_node_size(int node_size) { nodeSizeLimit = node_size; } // -DEBUG- + + const char *get_err_msg() { return m_szErr; } + void set_insert_mode(EUpdateMode iMode) { m_stUpdateMode.m_iInsertMode = iMode; } + void set_insert_order(int iOrder) { m_stUpdateMode.m_uchInsertOrder = iOrder; } + + void change_mallocator(Mallocator *pstMalloc) + { + log_debug("oring mallc: %p, new mallc: %p", m_pMallocator, pstMalloc); + m_pMallocator = pstMalloc; + m_stRawData.change_mallocator(pstMalloc); + } + + /* expire time for nodb mode */ + int get_expire_time(DTCTableDefinition *t, Node *node, uint32_t &expire); + + /*count dirty row, cache process will use it when buffer_delete_rows in task->all_rows case*/ + int dirty_rows_in_node(TaskRequest &stTask, Node *node); + + /************************************************* + Description: 查询本次操作增加的行数(可以为负数) + Input: + Output: + Return: 行数 + *************************************************/ + int64_t rows_inc() { return m_llRowsInc; } + + /************************************************* + Description: 查询本次操作增加的脏行数(可以为负数) + Input: + Output: + Return: 行数 + *************************************************/ + int64_t dirty_rows_inc() { return m_llDirtyRowsInc; } + + /************************************************* + Description: 查询node里的所有数据 + Input: pstNode node节点 + Output: pstRows 保存数据的结构 + Return: 0为成功,非0失败 + *************************************************/ + int get_all_rows(Node *pstNode, RawData *pstRows); + + /************************************************* + Description: 扩展node的列 + Input: pstNode node节点 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int expand_node(TaskRequest &stTask, Node *pstNode); + + /************************************************* + Description: 用pstRows的数据替换cache里的数据 + Input: pstRows 新数据 + pstNode node节点 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int replace_data(Node *pstNode, RawData *pstRawData); + + /************************************************* + Description: 根据task请求删除数据 + Input: stTask task请求 + pstNode node节点 + Output: pstAffectedRows 保存被删除的数据(为NULL时不保存) + Return: 0为成功,非0失败 + *************************************************/ + int delete_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows); + + /************************************************* + Description: 根据task请求查询数据 + Input: stTask task请求 + pstNode node节点 + Output: stTask 保存查找到的数据 + Return: 0为成功,非0失败 + *************************************************/ + int get_data(TaskRequest &stTask, Node *pstNode); + + /************************************************* + Description: 根据task请求添加一行数据 + Input: stTask task请求 + pstNode node节点 + isDirty 是否脏数据 + Output: pstAffectedRows 保存被删除的数据(为NULL时不保存) + Return: 0为成功,非0失败 + *************************************************/ + int append_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool isDirty, bool uniq); + + /************************************************* + Description: 用task的数据替换cache里的数据 + Input: stTask task请求 + pstNode node节点 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int replace_data(TaskRequest &stTask, Node *pstNode); + + /************************************************* + Description: 用task的数据替换cache里的数据 + Input: stTask task请求 + pstNode node节点 + async 是否异步操作 + Output: pstAffectedRows 保存被更新后的数据(为NULL时不保存) + Return: 0为成功,非0失败 + *************************************************/ + int replace_rows(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows = false); + + /************************************************* + Description: 根据task请求更新cache数据 + Input: stTask task请求 + pstNode node节点 + async 是否异步操作 + Output: pstAffectedRows 保存被更新后的数据(为NULL时不保存) + Return: 0为成功,非0失败 + *************************************************/ + int update_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows = false); + + /************************************************* + Description: 将node节点的脏数据组成若干个flush请求 + Input: pstNode node节点 + Output: pstFlushReq 保存flush请求 + uiFlushRowsCnt 被flush的行数 + Return: 0为成功,非0失败 + *************************************************/ + int flush_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt); + + /************************************************* + Description: 删除cache里的数据,如果有脏数据会生成flush请求 + Input: pstNode node节点 + Output: pstFlushReq 保存flush请求 + uiFlushRowsCnt 被flush的行数 + Return: 0为成功,非0失败 + *************************************************/ + int purge_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt); +}; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/reader_interface.h b/src/search_local/index_storage/cache/reader_interface.h new file mode 100644 index 0000000..5a79fad --- /dev/null +++ b/src/search_local/index_storage/cache/reader_interface.h @@ -0,0 +1,35 @@ +/* + * ===================================================================================== + * + * Filename: reader_interface.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __READER_INTERFACE_H +#define __READER_INTERFACE_H + +#include "field.h" + +class ReaderInterface +{ +public: + ReaderInterface() {} + virtual ~ReaderInterface() {} + + virtual const char *err_msg() = 0; + virtual int begin_read() { return 0; } + virtual int read_row(RowValue &row) = 0; + virtual int end() = 0; +}; + +#endif diff --git a/src/search_local/index_storage/cache/sys_malloc.cc b/src/search_local/index_storage/cache/sys_malloc.cc new file mode 100644 index 0000000..0975d23 --- /dev/null +++ b/src/search_local/index_storage/cache/sys_malloc.cc @@ -0,0 +1,20 @@ +/* + * ===================================================================================== + * + * Filename: sys_malloc.cc + * + * Description: packaging system malloc memory method. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include "sys_malloc.h" + +SysMalloc g_stSysMalloc; diff --git a/src/search_local/index_storage/cache/sys_malloc.h b/src/search_local/index_storage/cache/sys_malloc.h new file mode 100644 index 0000000..4557882 --- /dev/null +++ b/src/search_local/index_storage/cache/sys_malloc.h @@ -0,0 +1,186 @@ +/* + * ===================================================================================== + * + * Filename: sys_malloc.h + * + * Description: packaging system malloc memory method. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef SYS_MALLOC_H +#define SYS_MALLOC_H + +#include +#include +#include +#include "namespace.h" +#include "mallocator.h" + +DTC_BEGIN_NAMESPACE + +class SysMalloc : public Mallocator +{ +private: + char m_szErr[200]; + +public: + SysMalloc() {} + virtual ~SysMalloc() {} + + template + T *Pointer(ALLOC_HANDLE_T hHandle) { return reinterpret_cast(handle_to_ptr(hHandle)); } + + ALLOC_HANDLE_T Handle(void *p) { return (ALLOC_HANDLE_T)((char *)p - (char *)0); } + + const char *get_err_msg() { return m_szErr; } + + /************************************************* + Description: 分配内存 + Input: tSize 分配的内存大小 + Output: + Return: 内存块句柄,INVALID_HANDLE为失败 + *************************************************/ + ALLOC_HANDLE_T Malloc(ALLOC_SIZE_T tSize) + { + void *p = malloc(sizeof(ALLOC_SIZE_T) + tSize); + if (p == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "%m"); + return (INVALID_HANDLE); + } + *(ALLOC_SIZE_T *)p = tSize; + return Handle((void *)((char *)p + sizeof(ALLOC_SIZE_T))); + } + + /************************************************* + Description: 分配内存,并将内存初始化为0 + Input: tSize 分配的内存大小 + Output: + Return: 内存块句柄,INVALID_HANDLE为失败 + *************************************************/ + ALLOC_HANDLE_T Calloc(ALLOC_SIZE_T tSize) + { + void *p = calloc(1, sizeof(ALLOC_SIZE_T) + tSize); + if (p == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "%m"); + return (INVALID_HANDLE); + } + *(ALLOC_SIZE_T *)p = tSize; + return Handle((void *)((char *)p + sizeof(ALLOC_SIZE_T))); + } + + /************************************************* + Description: 重新分配内存 + Input: hHandle 老内存句柄 + tSize 新分配的内存大小 + Output: + Return: 内存块句柄,INVALID_HANDLE为失败(失败时不会释放老内存块) + *************************************************/ + ALLOC_HANDLE_T ReAlloc(ALLOC_HANDLE_T hHandle, ALLOC_SIZE_T tSize) + { + char *old; + if (hHandle == INVALID_HANDLE) + old = NULL; + else + old = (char *)0 + (hHandle - sizeof(ALLOC_SIZE_T)); + if (tSize == 0) + { + free(old); + return (INVALID_HANDLE); + } + void *p = realloc(old, sizeof(ALLOC_SIZE_T) + tSize); + if (p == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "%m"); + return (INVALID_HANDLE); + } + *(ALLOC_SIZE_T *)p = tSize; + return Handle((void *)((char *)p + sizeof(ALLOC_SIZE_T))); + } + + /************************************************* + Description: 释放内存 + Input: hHandle 内存句柄 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int Free(ALLOC_HANDLE_T hHandle) + { + if (hHandle == INVALID_HANDLE) + return (0); + + char *old = (char *)0 + (hHandle - sizeof(ALLOC_SIZE_T)); + free(old); + return (0); + } + + /************************************************* + Description: 获取内存块大小 + Input: hHandle 内存句柄 + Output: + Return: 内存大小 + *************************************************/ + ALLOC_SIZE_T chunk_size(ALLOC_HANDLE_T hHandle) + { + if (hHandle == INVALID_HANDLE) + return (0); + + char *old = (char *)0 + (hHandle - sizeof(ALLOC_SIZE_T)); + return *(ALLOC_SIZE_T *)old; + } + + /************************************************* + Description: 将句柄转换成内存地址 + Input: 内存句柄 + Output: + Return: 内存地址,如果句柄无效返回NULL + *************************************************/ + void *handle_to_ptr(ALLOC_HANDLE_T hHandle) + { + return (char *)0 + hHandle; + } + + /************************************************* + Description: 将内存地址转换为句柄 + Input: 内存地址 + Output: + Return: 内存句柄,如果地址无效返回INVALID_HANDLE + *************************************************/ + ALLOC_HANDLE_T ptr_to_handle(void *p) + { + return Handle(p); + } + + /* not implement */ + ALLOC_SIZE_T ask_for_destroy_size(ALLOC_HANDLE_T hHandle) + { + return (ALLOC_SIZE_T)0; + } + + /************************************************* + Description: 检测handle是否有效 + Input: 内存句柄 + Output: + Return: 0: 有效; -1:无效 + *************************************************/ + virtual int handle_is_valid(ALLOC_HANDLE_T mem_handle) + { + return 0; + } +}; + +extern SysMalloc g_stSysMalloc; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/t_tree.cc b/src/search_local/index_storage/cache/t_tree.cc new file mode 100644 index 0000000..6a58529 --- /dev/null +++ b/src/search_local/index_storage/cache/t_tree.cc @@ -0,0 +1,1741 @@ +/* + * ===================================================================================== + * + * Filename: t_tree.cc + * + * Description: T-tree fundamental operation. only for TreeData invoke. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include +#include +#include "log.h" +#include "t_tree.h" +#include "value.h" +#include "data_chunk.h" + +/*#ifndef MODU_TEST +#include "tree_data.h" +#endif*/ + +#define GET_KEY(x, u, t) \ + do \ + { \ + x = (typeof(x)) * (t *)(u); \ + } while (0) + +int64_t KeyCompare(const char *pchKey, void *pCmpCookie, Mallocator &stMalloc, ALLOC_HANDLE_T hOtherKey) +{ + const char *pOtherKey = reinterpret_cast(stMalloc.handle_to_ptr(hOtherKey)); + pOtherKey = pOtherKey + sizeof(unsigned char) * 2 + 2 * sizeof(uint32_t); + + CmpCookie *cookie = reinterpret_cast(pCmpCookie); + const DTCTableDefinition *t_pstTab = cookie->m_pstTab; + const int idx = cookie->m_uchIdx; + int fieldType = t_pstTab->field_type(idx); + + char *v = const_cast(pchKey); + DTCValue *value = reinterpret_cast(v); + + switch (fieldType) + { + case DField::Signed: + int64_t skey, sotherKey; + skey = value->s64; + if (unlikely(t_pstTab->field_size(idx) > (int)sizeof(int32_t))) + { + GET_KEY(sotherKey, pOtherKey, int64_t); + } + else + { + GET_KEY(sotherKey, pOtherKey, int32_t); + } + return skey - sotherKey; + + case DField::Unsigned: + uint64_t ukey, uotherKey; + ukey = value->u64; + if (unlikely(t_pstTab->field_size(idx) > (int)sizeof(uint32_t))) + { + GET_KEY(uotherKey, pOtherKey, uint64_t); + } + else + { + GET_KEY(uotherKey, pOtherKey, uint32_t); + } + return ukey - uotherKey; + + case DField::Float: + double dkey, dotherKey, sKey; + dkey = value->flt; + if (likely(t_pstTab->field_size(idx) > (int)sizeof(float))) + { + GET_KEY(dotherKey, pOtherKey, double); + } + else + { + GET_KEY(dotherKey, pOtherKey, float); + } + sKey = dkey - dotherKey; + if (sKey > -0.0001 && sKey < 0.0001) + return 0; + return sKey; + + case DField::String: + { + int keyLen = 0, tKeyLen = 0; + char *key = NULL; + if (DField::String == fieldType) + { + keyLen = value->str.len; + key = value->str.ptr; + } + else if (DField::Binary == fieldType) + { + keyLen = value->bin.len; + key = value->bin.ptr; + } + else + keyLen = 0; + + GET_KEY(tKeyLen, pOtherKey, int); + if (keyLen == 0 && tKeyLen == 0) + { + return 0; + } + else if (keyLen == 0 && tKeyLen != 0) + { + return -1; + } + else if (keyLen != 0 && tKeyLen == 0) + { + return 1; + } + else if (keyLen != 0 && tKeyLen != 0) + { + pOtherKey = pOtherKey + sizeof(int); + int len = keyLen < tKeyLen ? keyLen : tKeyLen; + int res = strncasecmp(key, pOtherKey, len); + if (keyLen == tKeyLen) + return res; + else if (res == 0) + { + return keyLen > tKeyLen ? 1 : -1; + } + else + { + return res; + } + } + return 0; + } + case DField::Binary: + { + int keyLen = 0, tKeyLen = 0; + char *key = NULL; + if (DField::String == fieldType) + { + keyLen = value->str.len; + key = value->str.ptr; + } + else if (DField::Binary == fieldType) + { + keyLen = value->bin.len; + key = value->bin.ptr; + } + else + keyLen = 0; + + GET_KEY(tKeyLen, pOtherKey, int); + if (keyLen == 0 && tKeyLen == 0) + { + return 0; + } + else if (keyLen == 0 && tKeyLen != 0) + { + return -1; + } + else if (keyLen != 0 && tKeyLen == 0) + { + return 1; + } + else if (keyLen != 0 && tKeyLen != 0) + { + pOtherKey = pOtherKey + sizeof(int); + int len = keyLen < tKeyLen ? keyLen : tKeyLen; + int res = memcmp(key, pOtherKey, len); + if (keyLen == tKeyLen) + return res; + else if (res == 0) + { + return keyLen > tKeyLen ? 1 : -1; + } + else + { + return res; + } + } + return 0; + } + + default: + return 0; + } + return 0; +} + +int Visit(Mallocator &stMalloc, ALLOC_HANDLE_T &hRecord, void *pCookie) +{ + pResCookie *cookie = reinterpret_cast(pCookie); + const char *m_pchContent = reinterpret_cast(stMalloc.handle_to_ptr(hRecord)); + uint32_t hRecordRowCnts = *(uint32_t *)(m_pchContent + sizeof(unsigned char) + sizeof(uint32_t)); + + if (cookie->nodesNum > 0 && cookie->rowsGot >= cookie->nodesNum) + return 0; + (cookie->m_handle)[cookie->nodesGot] = hRecord; + cookie->nodesGot = cookie->nodesGot + 1; + cookie->rowsGot = cookie->rowsGot + hRecordRowCnts; + return 0; +} + +int _TtreeNode::Init() +{ + m_hLeft = INVALID_HANDLE; + m_hRight = INVALID_HANDLE; + m_chBalance = 0; + m_ushNItems = 0; + for (int i = 0; i < PAGE_SIZE; i++) + m_ahItems[i] = INVALID_HANDLE; + return (0); +} + +ALLOC_HANDLE_T _TtreeNode::Alloc(Mallocator &stMalloc, ALLOC_HANDLE_T hRecord) +{ + ALLOC_HANDLE_T h; + h = stMalloc.Malloc(sizeof(TtreeNode)); + if (h == INVALID_HANDLE) + return (INVALID_HANDLE); + + TtreeNode *p = (TtreeNode *)stMalloc.handle_to_ptr(h); + p->Init(); + p->m_ahItems[0] = hRecord; + p->m_ushNItems = 1; + + return (h); +} + +int convert_cvalue(Mallocator &stMalloc, DTCValue *pch, void *pCmpCookie, ALLOC_HANDLE_T hReInsert) +{ + CmpCookie *cookie = reinterpret_cast(pCmpCookie); + const DTCTableDefinition *t_pstTab = cookie->m_pstTab; + const int idx = cookie->m_uchIdx; + int fieldType = t_pstTab->field_type(idx); + + char *pchKey = ((DataChunk *)stMalloc.handle_to_ptr(hReInsert))->index_key(); + + switch (fieldType) + { + case DField::Signed: + if (unlikely(t_pstTab->field_size(idx) > (int)sizeof(int32_t))) + pch->s64 = *(int64_t *)pchKey; + else + pch->s64 = (int64_t) * (int32_t *)pchKey; + break; + + case DField::Unsigned: + if (unlikely(t_pstTab->field_size(idx) > (int)sizeof(uint32_t))) + pch->u64 = *(uint64_t *)pchKey; + else + pch->u64 = (uint64_t) * (uint32_t *)pchKey; + break; + + case DField::Float: + if (likely(t_pstTab->field_size(idx) > (int)sizeof(float))) + pch->flt = *(double *)pchKey; + else + pch->flt = (double)*(float *)pchKey; + break; + + case DField::String: + case DField::Binary: + pch->bin.len = *((int *)pchKey); + pch->bin.ptr = pchKey + sizeof(int); + break; + } + + return 0; +} + +int _TtreeNode::Insert(Mallocator &stMalloc, ALLOC_HANDLE_T &hNode, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T hRecord, bool &isAllocNode) +{ + TtreeNode *pstNode; + + GET_OBJ(stMalloc, hNode, pstNode); + uint16_t ushNodeCnt = pstNode->m_ushNItems; + int iDiff = pfComp(pchKey, pCmpCookie, stMalloc, pstNode->m_ahItems[0]); + + if (iDiff == 0) + { + // assert(0); + return (-2); + } + + if (iDiff <= 0) + { + ALLOC_HANDLE_T hLeft = pstNode->m_hLeft; + if ((hLeft == INVALID_HANDLE || iDiff == 0) && pstNode->m_ushNItems < PAGE_SIZE) + { + for (uint32_t i = ushNodeCnt; i > 0; i--) + pstNode->m_ahItems[i] = pstNode->m_ahItems[i - 1]; + pstNode->m_ahItems[0] = hRecord; + pstNode->m_ushNItems++; + return (0); + } + if (hLeft == INVALID_HANDLE) + { + hLeft = Alloc(stMalloc, hRecord); + if (hLeft == INVALID_HANDLE) + return (-1); + isAllocNode = true; + pstNode->m_hLeft = hLeft; + } + else + { + ALLOC_HANDLE_T hChild = hLeft; + int iGrow = Insert(stMalloc, hChild, pchKey, pCmpCookie, pfComp, hRecord, isAllocNode); + if (iGrow < 0) + return iGrow; + if (hChild != hLeft) + { + hLeft = hChild; + pstNode->m_hLeft = hChild; + } + if (iGrow == 0) + return (0); + } + if (pstNode->m_chBalance > 0) + { + pstNode->m_chBalance = 0; + return (0); + } + else if (pstNode->m_chBalance == 0) + { + pstNode->m_chBalance = -1; + return (1); + } + else + { + TtreeNode *pstLeft = (TtreeNode *)stMalloc.handle_to_ptr(hLeft); + if (pstLeft->m_chBalance < 0) + { // single LL turn + pstNode->m_hLeft = pstLeft->m_hRight; + pstLeft->m_hRight = hNode; + pstNode->m_chBalance = 0; + pstLeft->m_chBalance = 0; + hNode = hLeft; + } + else + { // double LR turn + ALLOC_HANDLE_T hRight = pstLeft->m_hRight; + TtreeNode *pstRight = (TtreeNode *)stMalloc.handle_to_ptr(hRight); + pstLeft->m_hRight = pstRight->m_hLeft; + pstRight->m_hLeft = hLeft; + pstNode->m_hLeft = pstRight->m_hRight; + pstRight->m_hRight = hNode; + pstNode->m_chBalance = (pstRight->m_chBalance < 0) ? 1 : 0; + pstLeft->m_chBalance = (pstRight->m_chBalance > 0) ? -1 : 0; + pstRight->m_chBalance = 0; + hNode = hRight; + } + return (0); + } + } + + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, pstNode->m_ahItems[ushNodeCnt - 1]); + if (iDiff == 0) + { + // assert(0); + return (-2); + } + if (iDiff >= 0) + { + ALLOC_HANDLE_T hRight = pstNode->m_hRight; + if ((hRight == INVALID_HANDLE || iDiff == 0) && pstNode->m_ushNItems < PAGE_SIZE) + { + pstNode->m_ahItems[ushNodeCnt] = hRecord; + pstNode->m_ushNItems++; + return (0); + } + if (hRight == INVALID_HANDLE) + { + hRight = Alloc(stMalloc, hRecord); + if (hRight == INVALID_HANDLE) + return (-1); + pstNode->m_hRight = hRight; + isAllocNode = true; + } + else + { + ALLOC_HANDLE_T hChild = hRight; + int iGrow = Insert(stMalloc, hChild, pchKey, pCmpCookie, pfComp, hRecord, isAllocNode); + if (iGrow < 0) + return iGrow; + if (hChild != hRight) + { + hRight = hChild; + pstNode->m_hRight = hChild; + } + if (iGrow == 0) + return (0); + } + if (pstNode->m_chBalance < 0) + { + pstNode->m_chBalance = 0; + return (0); + } + else if (pstNode->m_chBalance == 0) + { + pstNode->m_chBalance = 1; + return (1); + } + else + { + TtreeNode *pstRight = (TtreeNode *)stMalloc.handle_to_ptr(hRight); + if (pstRight->m_chBalance > 0) + { // single RR turn + pstNode->m_hRight = pstRight->m_hLeft; + pstRight->m_hLeft = hNode; + pstNode->m_chBalance = 0; + pstRight->m_chBalance = 0; + hNode = hRight; + } + else + { // double RL turn + ALLOC_HANDLE_T hLeft = pstRight->m_hLeft; + TtreeNode *pstLeft = (TtreeNode *)stMalloc.handle_to_ptr(hLeft); + pstRight->m_hLeft = pstLeft->m_hRight; + pstLeft->m_hRight = hRight; + pstNode->m_hRight = pstLeft->m_hLeft; + pstLeft->m_hLeft = hNode; + pstNode->m_chBalance = (pstLeft->m_chBalance > 0) ? -1 : 0; + pstRight->m_chBalance = (pstLeft->m_chBalance < 0) ? 1 : 0; + pstLeft->m_chBalance = 0; + hNode = hLeft; + } + return (0); + } + } + + int iLeft = 1; + int iRight = ushNodeCnt - 1; + while (iLeft < iRight) + { + int i = (iLeft + iRight) >> 1; + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, pstNode->m_ahItems[i]); + if (iDiff == 0) + { + // assert(0); + return (-2); + } + if (iDiff > 0) + { + iLeft = i + 1; + } + else + { + iRight = i; + if (iDiff == 0) + break; + } + } + // Insert before item[r] + if (pstNode->m_ushNItems < PAGE_SIZE) + { + for (int i = ushNodeCnt; i > iRight; i--) + pstNode->m_ahItems[i] = pstNode->m_ahItems[i - 1]; + pstNode->m_ahItems[iRight] = hRecord; + pstNode->m_ushNItems++; + return (0); + } + else + { + TtreeNode stBackup; + memcpy(&stBackup, pstNode, sizeof(TtreeNode)); + ALLOC_HANDLE_T hReInsert; + if (pstNode->m_chBalance >= 0) + { + hReInsert = pstNode->m_ahItems[0]; + for (int i = 1; i < iRight; i++) + pstNode->m_ahItems[i - 1] = pstNode->m_ahItems[i]; + pstNode->m_ahItems[iRight - 1] = hRecord; + } + else + { + hReInsert = pstNode->m_ahItems[ushNodeCnt - 1]; + for (int i = ushNodeCnt - 1; i > iRight; i--) + pstNode->m_ahItems[i] = pstNode->m_ahItems[i - 1]; + pstNode->m_ahItems[iRight] = hRecord; + } + + DTCValue pch; + convert_cvalue(stMalloc, &pch, pCmpCookie, hReInsert); + int iRet = Insert(stMalloc, hNode, (const char *)(&pch), pCmpCookie, pfComp, hReInsert, isAllocNode); + if (iRet < 0) + { + memcpy(pstNode->m_ahItems, stBackup.m_ahItems, sizeof(pstNode->m_ahItems)); + } + return (iRet); + } +} + +int _TtreeNode::Delete(Mallocator &stMalloc, ALLOC_HANDLE_T &hNode, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, bool &isFreeNode) +{ + TtreeNode *pstNode; + ALLOC_HANDLE_T hTmp; + + GET_OBJ(stMalloc, hNode, pstNode); + uint16_t ushNodeCnt = pstNode->m_ushNItems; + int iDiff = pfComp(pchKey, pCmpCookie, stMalloc, pstNode->m_ahItems[0]); + + if (iDiff < 0) + { + ALLOC_HANDLE_T hLeft = pstNode->m_hLeft; + if (hLeft != INVALID_HANDLE) + { + ALLOC_HANDLE_T hChild = hLeft; + int iRet = Delete(stMalloc, hChild, pchKey, pCmpCookie, pfComp, isFreeNode); + if (iRet < -1) + return (iRet); + if (hChild != hLeft) + { + pstNode->m_hLeft = hChild; + } + if (iRet > 0) + { + return balance_left_branch(stMalloc, hNode); + } + else if (iRet == 0) + { + return (0); + } + } + // assert(iDiff == 0); + } + + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, pstNode->m_ahItems[ushNodeCnt - 1]); + if (iDiff <= 0) + { + for (int i = 0; i < ushNodeCnt; i++) + { + if (pfComp(pchKey, pCmpCookie, stMalloc, pstNode->m_ahItems[i]) == 0) + { + if (ushNodeCnt == 1) + { + if (pstNode->m_hRight == INVALID_HANDLE) + { + hTmp = pstNode->m_hLeft; + stMalloc.Free(hNode); + hNode = hTmp; + return (1); + } + else if (pstNode->m_hLeft == INVALID_HANDLE) + { + hTmp = pstNode->m_hRight; + stMalloc.Free(hNode); + hNode = hTmp; + return (1); + } + isFreeNode = true; + } + ALLOC_HANDLE_T hLeft = pstNode->m_hLeft; + ALLOC_HANDLE_T hRight = pstNode->m_hRight; + if (ushNodeCnt <= MIN_ITEMS) + { + if (hLeft != INVALID_HANDLE && pstNode->m_chBalance <= 0) + { + TtreeNode *pstLeft; + GET_OBJ(stMalloc, hLeft, pstLeft); + while (pstLeft->m_hRight != INVALID_HANDLE) + { + GET_OBJ(stMalloc, pstLeft->m_hRight, pstLeft); + } + while (--i >= 0) + { + pstNode->m_ahItems[i + 1] = pstNode->m_ahItems[i]; + } + pstNode->m_ahItems[0] = pstLeft->m_ahItems[pstLeft->m_ushNItems - 1]; + DTCValue pch; + convert_cvalue(stMalloc, &pch, pCmpCookie, pstNode->m_ahItems[0]); + + ALLOC_HANDLE_T hChild = hLeft; + int iRet = Delete(stMalloc, hChild, (const char *)(&pch), pCmpCookie, pfComp, isFreeNode); + if (iRet < -1) + { + return (iRet); + } + if (hChild != hLeft) + { + pstNode->m_hLeft = hChild; + } + if (iRet > 0) + { + iRet = balance_left_branch(stMalloc, hNode); + } + return (iRet); + } + else if (pstNode->m_hRight != INVALID_HANDLE) + { + TtreeNode *pstRight; + GET_OBJ(stMalloc, hRight, pstRight); + while (pstRight->m_hLeft != INVALID_HANDLE) + { + GET_OBJ(stMalloc, pstRight->m_hLeft, pstRight); + } + while (++i < ushNodeCnt) + { + pstNode->m_ahItems[i - 1] = pstNode->m_ahItems[i]; + } + pstNode->m_ahItems[ushNodeCnt - 1] = pstRight->m_ahItems[0]; + DTCValue pch; + convert_cvalue(stMalloc, &pch, pCmpCookie, pstNode->m_ahItems[ushNodeCnt - 1]); + ALLOC_HANDLE_T hChild = hRight; + int iRet = Delete(stMalloc, hChild, (const char *)(&pch), pCmpCookie, pfComp, isFreeNode); + if (iRet < -1) + { + return (iRet); + } + if (hChild != hRight) + { + pstNode->m_hRight = hChild; + } + if (iRet > 0) + { + iRet = balance_right_branch(stMalloc, hNode); + } + return (iRet); + } + } + + while (++i < ushNodeCnt) + { + pstNode->m_ahItems[i - 1] = pstNode->m_ahItems[i]; + } + pstNode->m_ushNItems--; + + return (0); + } + } + } + + ALLOC_HANDLE_T hRight = pstNode->m_hRight; + if (hRight != 0) + { + ALLOC_HANDLE_T hChild = hRight; + int iRet = Delete(stMalloc, hChild, pchKey, pCmpCookie, pfComp, isFreeNode); + if (iRet < -1) + { + return (iRet); + } + if (hChild != hRight) + { + pstNode->m_hRight = hChild; + } + if (iRet > 0) + { + return balance_right_branch(stMalloc, hNode); + } + else + { + return iRet; + } + } + + return -1; +} + +inline int _TtreeNode::balance_left_branch(Mallocator &stMalloc, ALLOC_HANDLE_T &hNode) +{ + TtreeNode *pstNode; + GET_OBJ(stMalloc, hNode, pstNode); + + if (pstNode->m_chBalance < 0) + { + pstNode->m_chBalance = 0; + return (1); + } + else if (pstNode->m_chBalance == 0) + { + pstNode->m_chBalance = 1; + return (0); + } + else + { + ALLOC_HANDLE_T hRight = pstNode->m_hRight; + TtreeNode *pstRight; + GET_OBJ(stMalloc, hRight, pstRight); + + if (pstRight->m_chBalance >= 0) + { // single RR turn + pstNode->m_hRight = pstRight->m_hLeft; + pstRight->m_hLeft = hNode; + if (pstRight->m_chBalance == 0) + { + pstNode->m_chBalance = 1; + pstRight->m_chBalance = -1; + hNode = hRight; + return 0; + } + else + { + pstNode->m_chBalance = 0; + pstRight->m_chBalance = 0; + hNode = hRight; + return 1; + } + } + else + { // double RL turn + ALLOC_HANDLE_T hLeft = pstRight->m_hLeft; + TtreeNode *pstLeft; + GET_OBJ(stMalloc, hLeft, pstLeft); + pstRight->m_hLeft = pstLeft->m_hRight; + pstLeft->m_hRight = hRight; + pstNode->m_hRight = pstLeft->m_hLeft; + pstLeft->m_hLeft = hNode; + pstNode->m_chBalance = pstLeft->m_chBalance > 0 ? -1 : 0; + pstRight->m_chBalance = pstLeft->m_chBalance < 0 ? 1 : 0; + pstLeft->m_chBalance = 0; + hNode = hLeft; + return 1; + } + } +} + +inline int _TtreeNode::balance_right_branch(Mallocator &stMalloc, ALLOC_HANDLE_T &hNode) +{ + TtreeNode *pstNode; + GET_OBJ(stMalloc, hNode, pstNode); + + if (pstNode->m_chBalance > 0) + { + pstNode->m_chBalance = 0; + return (1); + } + else if (pstNode->m_chBalance == 0) + { + pstNode->m_chBalance = -1; + return (0); + } + else + { + ALLOC_HANDLE_T hLeft = pstNode->m_hLeft; + TtreeNode *pstLeft; + GET_OBJ(stMalloc, hLeft, pstLeft); + if (pstLeft->m_chBalance <= 0) + { // single LL turn + pstNode->m_hLeft = pstLeft->m_hRight; + pstLeft->m_hRight = hNode; + if (pstLeft->m_chBalance == 0) + { + pstNode->m_chBalance = -1; + pstLeft->m_chBalance = 1; + hNode = hLeft; + return (0); + } + else + { + pstNode->m_chBalance = 0; + pstLeft->m_chBalance = 0; + hNode = hLeft; + return (1); + } + } + else + { // double LR turn + ALLOC_HANDLE_T hRight = pstLeft->m_hRight; + TtreeNode *pstRight; + GET_OBJ(stMalloc, hRight, pstRight); + + pstLeft->m_hRight = pstRight->m_hLeft; + pstRight->m_hLeft = hLeft; + pstNode->m_hLeft = pstRight->m_hRight; + pstRight->m_hRight = hNode; + pstNode->m_chBalance = pstRight->m_chBalance < 0 ? 1 : 0; + pstLeft->m_chBalance = pstRight->m_chBalance > 0 ? -1 : 0; + pstRight->m_chBalance = 0; + hNode = hRight; + return (1); + } + } +} + +unsigned _TtreeNode::ask_for_destroy_size(Mallocator &stMalloc, ALLOC_HANDLE_T hNode) +{ + unsigned size = 0; + + if (INVALID_HANDLE == hNode) + return size; + + TtreeNode *pstNode; + GET_OBJ(stMalloc, hNode, pstNode); + ALLOC_HANDLE_T hLeft = pstNode->m_hLeft; + ALLOC_HANDLE_T hRight = pstNode->m_hRight; + + for (int i = 0; i < pstNode->m_ushNItems; i++) + size += stMalloc.chunk_size(pstNode->m_ahItems[i]); + //size += ((DataChunk*)(stMalloc.handle_to_ptr(pstNode->m_ahItems[i])))->ask_for_destroy_size(&stMalloc); + + size += stMalloc.chunk_size(hNode); + + size += ask_for_destroy_size(stMalloc, hLeft); + size += ask_for_destroy_size(stMalloc, hRight); + + return size; +} + +int _TtreeNode::Destroy(Mallocator &stMalloc, ALLOC_HANDLE_T hNode) +{ + if (hNode != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(stMalloc, hNode, pstNode); + ALLOC_HANDLE_T hLeft = pstNode->m_hLeft; + ALLOC_HANDLE_T hRight = pstNode->m_hRight; + for (int i = 0; i < pstNode->m_ushNItems; i++) + stMalloc.Free(pstNode->m_ahItems[i]); + //((DataChunk*)(stMalloc.handle_to_ptr(pstNode->m_ahItems[i])))->Destroy(&stMalloc); + stMalloc.Free(hNode); + + Destroy(stMalloc, hLeft); + Destroy(stMalloc, hRight); + } + return (0); +} + +int _TtreeNode::Find(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T *&phRecord) +{ + int iDiff; + + phRecord = NULL; + if (m_ushNItems == 0) + return (0); + + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff == 0) + { + phRecord = &(m_ahItems[0]); + return (1); + } + else if (iDiff > 0) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + if (iDiff == 0) + { + phRecord = &(m_ahItems[m_ushNItems - 1]); + return (1); + } + else if (iDiff > 0) + { + if (m_hRight == INVALID_HANDLE) + { + return (0); + } + TtreeNode *pstNode; + GET_OBJ(stMalloc, m_hRight, pstNode); + return pstNode->Find(stMalloc, pchKey, pCmpCookie, pfComp, phRecord); + } + + int iLeft = 1; + int iRight = m_ushNItems - 1; + while (iLeft < iRight) + { + int i = (iLeft + iRight) >> 1; + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff == 0) + { + phRecord = &(m_ahItems[i]); + return (1); + } + if (iDiff > 0) + { + iLeft = i + 1; + } + else + { + iRight = i; + } + } + return (0); + } + else + { + if (m_hLeft == INVALID_HANDLE) + { + return (0); + } + TtreeNode *pstNode; + GET_OBJ(stMalloc, m_hLeft, pstNode); + return pstNode->Find(stMalloc, pchKey, pCmpCookie, pfComp, phRecord); + } +} + +int _TtreeNode::Find(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T &hRecord) +{ + int iRet; + ALLOC_HANDLE_T *phItem; + + hRecord = INVALID_HANDLE; + iRet = Find(stMalloc, pchKey, pCmpCookie, pfComp, phItem); + if (iRet == 1 && phItem != NULL) + { + hRecord = *phItem; + } + + return (iRet); +} + +int _TtreeNode::find_handle(Mallocator &stMalloc, ALLOC_HANDLE_T hRecord) +{ + + if (m_ushNItems == 0) + return (0); + + for (int i = 0; i < m_ushNItems; i++) + if (m_ahItems[i] == hRecord) + return (1); + + TtreeNode *pstNode; + if (m_hRight != INVALID_HANDLE) + { + GET_OBJ(stMalloc, m_hRight, pstNode); + if (pstNode->find_handle(stMalloc, hRecord) == 1) + return (1); + } + + if (m_hLeft != INVALID_HANDLE) + { + GET_OBJ(stMalloc, m_hLeft, pstNode); + if (pstNode->find_handle(stMalloc, hRecord) == 1) + return (1); + } + + return (0); +} + +int _TtreeNode::find_node(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T &hNode) +{ + int iDiff; + + hNode = INVALID_HANDLE; + if (m_ushNItems == 0) + return (0); + + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff == 0) + { + hNode = stMalloc.ptr_to_handle(this); + return (1); + } + else if (iDiff > 0) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + if (iDiff <= 0) + { + hNode = stMalloc.ptr_to_handle(this); + return (1); + } + else if (iDiff > 0) + { + if (m_hRight == INVALID_HANDLE) + { + return (0); + } + TtreeNode *pstNode; + GET_OBJ(stMalloc, m_hRight, pstNode); + return pstNode->find_node(stMalloc, pchKey, pCmpCookie, pfComp, hNode); + } + } + else + { + if (m_hLeft == INVALID_HANDLE) + { + hNode = stMalloc.ptr_to_handle(this); + return (1); + } + TtreeNode *pstNode; + GET_OBJ(stMalloc, m_hLeft, pstNode); + return pstNode->find_node(stMalloc, pchKey, pCmpCookie, pfComp, hNode); + } + + return (0); +} + +int _TtreeNode::traverse_forward(Mallocator &stMalloc, ItemVisit pfVisit, void *pCookie) +{ + int iRet; + + if (m_hLeft != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hLeft))->traverse_forward(stMalloc, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + for (int i = 0; i < m_ushNItems; i++) + { + if ((iRet = pfVisit(stMalloc, m_ahItems[i], pCookie)) != 0) + { + return (iRet); + } + } + + if (m_hRight != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hRight))->traverse_forward(stMalloc, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + return (0); +} + +int _TtreeNode::traverse_backward(Mallocator &stMalloc, ItemVisit pfVisit, void *pCookie) +{ + int iRet; + + if (m_hRight != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hRight))->traverse_backward(stMalloc, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + for (int i = m_ushNItems; --i >= 0;) + { + if ((iRet = pfVisit(stMalloc, m_ahItems[i], pCookie)) != 0) + { + return (iRet); + } + } + if (m_hLeft != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hLeft))->traverse_backward(stMalloc, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + return (0); +} + +int _TtreeNode::post_order_traverse(Mallocator &stMalloc, ItemVisit pfVisit, void *pCookie) +{ + int iRet; + + if (m_hRight != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hRight))->post_order_traverse(stMalloc, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + if (m_hLeft != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hLeft))->post_order_traverse(stMalloc, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + for (int i = m_ushNItems; --i >= 0;) + { + if ((iRet = pfVisit(stMalloc, m_ahItems[i], pCookie)) != 0) + { + return (iRet); + } + } + + return (0); +} + +int _TtreeNode::traverse_forward(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, int iInclusion, ItemVisit pfVisit, void *pCookie) +{ + int iDiff; + int iRet; + + if (m_hLeft != INVALID_HANDLE) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff < 0) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hLeft))->traverse_forward(stMalloc, pchKey, pCmpCookie, pfComp, iInclusion, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + } + + int i = m_ushNItems; + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + if (iDiff <= 0) + { + for (i = 0; i < m_ushNItems; i++) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff <= 0 && iDiff >= 0 - iInclusion) + { + if ((iRet = pfVisit(stMalloc, m_ahItems[i], pCookie)) != 0) + { + return (iRet); + } + } + else if (iDiff < 0 - iInclusion) + { + break; + } + } + } + + if (i >= m_ushNItems && m_hRight != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hRight))->traverse_forward(stMalloc, pchKey, pCmpCookie, pfComp, iInclusion, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + return (0); +} + +int _TtreeNode::traverse_forward(Mallocator &stMalloc, const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + int iDiff; + int iDiff1; + int iRet; + + if (m_hLeft != INVALID_HANDLE) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff < 0) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hLeft))->traverse_forward(stMalloc, pchKey, pchKey1, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + } + + int i; + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + iDiff1 = pfComp(pchKey1, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff1 < 0 || iDiff > 0) + { // key1 < item[0] OR key > item[n] + } + else + { + for (i = 0; i < m_ushNItems; i++) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff <= 0) + { + iDiff1 = pfComp(pchKey1, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff1 >= 0) + { + if ((iRet = pfVisit(stMalloc, m_ahItems[i], pCookie)) != 0) + { + return (iRet); + } + } + } + } + } + + iDiff1 = pfComp(pchKey1, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + if (iDiff1 >= 0 && m_hRight != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hRight))->traverse_forward(stMalloc, pchKey, pchKey1, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + return (0); +} + +int _TtreeNode::traverse_forward(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + int iDiff; + int iRet; + + if (m_hLeft != INVALID_HANDLE) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff < 0) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hLeft))->traverse_forward(stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + } + + int i; + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + if (iDiff <= 0) + { + for (i = 0; i < m_ushNItems; i++) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff <= 0) + { + if ((iRet = pfVisit(stMalloc, m_ahItems[i], pCookie)) != 0) + { + return (iRet); + } + } + } + } + + if (m_hRight != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hRight))->traverse_forward(stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + return (0); +} + +int _TtreeNode::traverse_backward(Mallocator &stMalloc, const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + int iDiff; + int iDiff1; + int iRet; + int i; + + if (m_hRight != INVALID_HANDLE) + { + iDiff1 = pfComp(pchKey1, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + if (iDiff1 > 0) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hRight))->traverse_backward(stMalloc, pchKey, pchKey1, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + } + + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + iDiff1 = pfComp(pchKey1, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff1 < 0 || iDiff > 0) + { // key1 < item[0] OR key > item[n] + } + else + { + for (i = m_ushNItems; --i >= 0;) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff <= 0) + { + iDiff1 = pfComp(pchKey1, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff1 >= 0) + { + if ((iRet = pfVisit(stMalloc, m_ahItems[i], pCookie)) != 0) + { + return (iRet); + } + } + } + } + } + + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff <= 0 && m_hLeft != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hLeft))->traverse_backward(stMalloc, pchKey, pchKey1, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + return (0); +} + +int _TtreeNode::traverse_backward(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + int iDiff; + int iRet; + + if (m_hRight != INVALID_HANDLE) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + if (iDiff > 0) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hRight))->traverse_backward(stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + } + + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff >= 0) + { + for (int i = m_ushNItems; --i >= 0;) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff >= 0) + { + if ((iRet = pfVisit(stMalloc, m_ahItems[i], pCookie)) != 0) + { + return (iRet); + } + } + } + } + + if (m_hLeft != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hLeft))->traverse_backward(stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + return (0); +} + +int _TtreeNode::post_order_traverse(Mallocator &stMalloc, const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + int iDiff; + int iDiff1; + int iRet; + + if (m_hLeft != INVALID_HANDLE) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff < 0) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hLeft))->post_order_traverse(stMalloc, pchKey, pchKey1, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + } + + iDiff1 = pfComp(pchKey1, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + if (iDiff1 >= 0 && m_hRight != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hRight))->post_order_traverse(stMalloc, pchKey, pchKey1, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + int i; + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + iDiff1 = pfComp(pchKey1, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff1 < 0 || iDiff > 0) + { // key1 < item[0] OR key > item[n] + } + else + { + for (i = 0; i < m_ushNItems; i++) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff <= 0) + { + iDiff1 = pfComp(pchKey1, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff1 >= 0) + { + if ((iRet = pfVisit(stMalloc, m_ahItems[i], pCookie)) != 0) + { + return (iRet); + } + } + } + } + } + + return (0); +} + +int _TtreeNode::post_order_traverse_ge(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + int iDiff; + int iRet; + + if (m_hLeft != INVALID_HANDLE) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff < 0) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hLeft))->post_order_traverse_ge(stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + } + + if (m_hRight != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hRight))->post_order_traverse_ge(stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + int i; + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + if (iDiff <= 0) + { + for (i = 0; i < m_ushNItems; i++) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff <= 0) + { + if ((iRet = pfVisit(stMalloc, m_ahItems[i], pCookie)) != 0) + { + return (iRet); + } + } + } + } + + return (0); +} + +int _TtreeNode::post_order_traverse_le(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + int iDiff; + int iRet; + + if (m_hRight != INVALID_HANDLE) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[m_ushNItems - 1]); + if (iDiff > 0) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hRight))->post_order_traverse_le(stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + } + + if (m_hLeft != INVALID_HANDLE) + { + if ((iRet = ((TtreeNode *)stMalloc.handle_to_ptr(m_hLeft))->post_order_traverse_le(stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie)) != 0) + { + return (iRet); + } + } + + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[0]); + if (iDiff >= 0) + { + for (int i = m_ushNItems; --i >= 0;) + { + iDiff = pfComp(pchKey, pCmpCookie, stMalloc, m_ahItems[i]); + if (iDiff >= 0) + { + if ((iRet = pfVisit(stMalloc, m_ahItems[i], pCookie)) != 0) + { + return (iRet); + } + } + } + } + + return (0); +} + +Ttree::Ttree(Mallocator &stMalloc) : m_stMalloc(stMalloc) +{ + m_hRoot = INVALID_HANDLE; + m_szErr[0] = 0; +} + +Ttree::~Ttree() +{ +} + +ALLOC_HANDLE_T Ttree::first_node() +{ + if (m_hRoot == INVALID_HANDLE) + return INVALID_HANDLE; + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + return pstNode->m_ahItems[0]; +} + +int Ttree::Insert(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T hRecord, bool &isAllocNode) +{ + ALLOC_HANDLE_T hNode; + + if (m_hRoot == INVALID_HANDLE) + { + hNode = TtreeNode::Alloc(m_stMalloc, hRecord); + if (hNode == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "alloc tree-node error: %s", m_stMalloc.get_err_msg()); + return (EC_NO_MEM); + } + isAllocNode = true; + m_hRoot = hNode; + } + else + { + hNode = m_hRoot; + int iRet = TtreeNode::Insert(m_stMalloc, hNode, pchKey, pCmpCookie, pfComp, hRecord, isAllocNode); + if (iRet == -2) + { + snprintf(m_szErr, sizeof(m_szErr), "key already exists."); + return (EC_KEY_EXIST); + } + else if (iRet == -1) + { + snprintf(m_szErr, sizeof(m_szErr), "alloc tree-node error: %s", m_stMalloc.get_err_msg()); + return (EC_NO_MEM); + } + else if (iRet < 0) + { + snprintf(m_szErr, sizeof(m_szErr), "insert error"); + return (-1); + } + if (hNode != m_hRoot) + { + m_hRoot = hNode; + } + } + + return (0); +} + +int Ttree::Delete(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, bool &isFreeNode) +{ + if (m_hRoot == INVALID_HANDLE) + { + return (0); + } + + ALLOC_HANDLE_T hNode = m_hRoot; + int iRet = TtreeNode::Delete(m_stMalloc, hNode, pchKey, pCmpCookie, pfComp, isFreeNode); + if (iRet < -1) + { + snprintf(m_szErr, sizeof(m_szErr), "internal error"); + return (-1); + } + else if (iRet == -1) + { + snprintf(m_szErr, sizeof(m_szErr), "tree error"); + return (-1); + } + if (hNode != m_hRoot) + m_hRoot = hNode; + + return (0); +} + +int Ttree::find_handle(ALLOC_HANDLE_T hRecord) +{ + if (m_hRoot == INVALID_HANDLE) + { + return (0); + } + + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + return pstNode->find_handle(m_stMalloc, hRecord); +} + +int Ttree::Find(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T &hRecord) +{ + hRecord = INVALID_HANDLE; + if (m_hRoot == INVALID_HANDLE) + { + return (0); + } + + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + return pstNode->Find(m_stMalloc, pchKey, pCmpCookie, pfComp, hRecord); +} + +int Ttree::Find(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T *&phRecord) +{ + phRecord = NULL; + if (m_hRoot == INVALID_HANDLE) + { + return (0); + } + + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + return pstNode->Find(m_stMalloc, pchKey, pCmpCookie, pfComp, phRecord); +} + +int Ttree::Destroy() +{ + TtreeNode::Destroy(m_stMalloc, m_hRoot); + m_hRoot = INVALID_HANDLE; + return (0); +} + +unsigned Ttree::ask_for_destroy_size(void) +{ + return TtreeNode::ask_for_destroy_size(m_stMalloc, m_hRoot); +} + +int Ttree::traverse_forward(ItemVisit pfVisit, void *pCookie) +{ + if (m_hRoot != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + return pstNode->traverse_forward(m_stMalloc, pfVisit, pCookie); + } + + return (0); +} + +int Ttree::traverse_backward(ItemVisit pfVisit, void *pCookie) +{ + if (m_hRoot != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + return pstNode->traverse_backward(m_stMalloc, pfVisit, pCookie); + } + + return (0); +} + +int Ttree::post_order_traverse(ItemVisit pfVisit, void *pCookie) +{ + if (m_hRoot != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + return pstNode->post_order_traverse(m_stMalloc, pfVisit, pCookie); + } + + return (0); +} + +int Ttree::traverse_forward(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, int64_t iInclusion, ItemVisit pfVisit, void *pCookie) +{ + if (m_hRoot != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + + return pstNode->traverse_forward(m_stMalloc, pchKey, pCmpCookie, pfComp, iInclusion, pfVisit, pCookie); + } + + return (0); +} + +int Ttree::traverse_forward(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + if (m_hRoot != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + + return pstNode->traverse_forward(m_stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie); + } + + return (0); +} + +int Ttree::traverse_forward(const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + if (m_hRoot != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + + return pstNode->traverse_forward(m_stMalloc, pchKey, pchKey1, pCmpCookie, pfComp, pfVisit, pCookie); + } + + return (0); +} + +int Ttree::traverse_backward(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + if (m_hRoot != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + + return pstNode->traverse_backward(m_stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie); + } + + return (0); +} + +int Ttree::traverse_backward(const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + if (m_hRoot != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + + return pstNode->traverse_backward(m_stMalloc, pchKey, pchKey1, pCmpCookie, pfComp, pfVisit, pCookie); + } + + return (0); +} + +int Ttree::post_order_traverse(const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + if (m_hRoot != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + + return pstNode->post_order_traverse(m_stMalloc, pchKey, pchKey1, pCmpCookie, pfComp, pfVisit, pCookie); + } + + return (0); +} + +int Ttree::post_order_traverse_ge(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + if (m_hRoot != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + + return pstNode->post_order_traverse_ge(m_stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie); + } + + return (0); +} + +int Ttree::post_order_traverse_le(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie) +{ + if (m_hRoot != INVALID_HANDLE) + { + TtreeNode *pstNode; + GET_OBJ(m_stMalloc, m_hRoot, pstNode); + + return pstNode->post_order_traverse_le(m_stMalloc, pchKey, pCmpCookie, pfComp, pfVisit, pCookie); + } + + return (0); +} diff --git a/src/search_local/index_storage/cache/t_tree.h b/src/search_local/index_storage/cache/t_tree.h new file mode 100644 index 0000000..833e64d --- /dev/null +++ b/src/search_local/index_storage/cache/t_tree.h @@ -0,0 +1,290 @@ +/* + * ===================================================================================== + * + * Filename: t_tree.h + * + * Description: T-tree fundamental operation. only for TreeData invoke. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef T_TREE_H +#define T_TREE_H + +#include +#include "mallocator.h" + +int64_t KeyCompare(const char *pchKey, void *pCmpCookie, Mallocator &stMalloc, ALLOC_HANDLE_T hOtherKey); +int Visit(Mallocator &stMalloc, ALLOC_HANDLE_T &hRecord, void *pCookie); + +typedef int64_t (*KeyComparator)(const char *pchKey, void *pCmpCookie, Mallocator &stMalloc, ALLOC_HANDLE_T hOtherKey); +typedef int (*ItemVisit)(Mallocator &stMalloc, ALLOC_HANDLE_T &hRecord, void *pCookie); + +class Ttree +{ +protected: + ALLOC_HANDLE_T m_hRoot; + Mallocator &m_stMalloc; + char m_szErr[100]; + +public: + Ttree(Mallocator &stMalloc); + ~Ttree(); + + const char *get_err_msg() { return m_szErr; } + const ALLOC_HANDLE_T Root() const { return m_hRoot; } + ALLOC_HANDLE_T first_node(); + + /************************************************* + Description: attach一块已经格式化好的内存 + Input: + Output: + Return: + *************************************************/ + void Attach(ALLOC_HANDLE_T hRoot) { m_hRoot = hRoot; } + + /************************************************* + Description: 将key insert到树里,hRecord为key对应的数据(包含key) + Input: pchKey 插入的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + hRecord 保存着要插入的key以及其他数据的句柄 + Output: + Return: 0为成功,EC_NO_MEM为内存不足,EC_KEY_EXIST为key已经存在,其他值为错误 + *************************************************/ + int Insert(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T hRecord, bool &isAllocNode); + + /************************************************* + Description: 删除key以及对应的数据(但不会自动释放key对应的内存) + Input: pchKey 插入的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int Delete(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, bool &isFreeNode); + + int find_handle(ALLOC_HANDLE_T hRecord); + + /************************************************* + Description: 查找key对应的数据 + Input: pchKey 插入的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + Output: hRecord 保存查找到的key以及其他数据的句柄 + Return: 0为查找不到,1为找到数据 + *************************************************/ + int Find(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T &hRecord); + + /************************************************* + Description: 查找key对应的数据 + Input: pchKey 插入的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + Output: phRecord 指向树节点的item指针 + Return: 0为查找不到,1为找到数据 + *************************************************/ + int Find(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T *&phRecord); + + /************************************************* + Description: 销毁整棵树,并释放相应的内存 + Input: + Output: + Return: 0为成功,非0失败 + *************************************************/ + int Destroy(); + + /************************************************* + Description: 查询销毁整棵树可以释放多少空闲内存 + Input: + Output: + Return: >0 成功, 0 失败 + *************************************************/ + unsigned ask_for_destroy_size(void); + + /************************************************* + Description: 从小到大遍历整棵树 + Input: pfVisit 访问数据记录的用户自定义函数 + pCookie 自定义函数的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int traverse_forward(ItemVisit pfVisit, void *pCookie); + + /************************************************* + Description: 从大到小遍历整棵树 + Input: pfVisit 访问数据记录的用户自定义函数 + pCookie 自定义函数的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int traverse_backward(ItemVisit pfVisit, void *pCookie); + + /************************************************* + Description: 后序遍历整棵树 + Input: pfVisit 访问数据记录的用户自定义函数 + pCookie 自定义函数的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int post_order_traverse(ItemVisit pfVisit, void *pCookie); + + /************************************************* + Description: 从指定的key开始,从小到大遍历树,遍历的范围为[key, key+iInclusion] + Input: pchKey 开始的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + iInclusion key的范围 + pfVisit 访问数据记录的用户自定义函数 + pCookie 自定义函数的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int traverse_forward(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, int64_t iInclusion, ItemVisit pfVisit, void *pCookie); + + /************************************************* + Description: 从指定的key开始,从小到大遍历树, 遍历的范围为[key, key1] + Input: pchKey 开始的key + pchKey1 结束的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + pfVisit 访问数据记录的用户自定义函数 + pCookie 自定义函数的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int traverse_forward(const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + + /************************************************* + Description: 从指定的key开始,从小到大遍历树(遍历大于等于key的所有记录) + Input: pchKey 开始的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + pfVisit 访问数据记录的用户自定义函数 + pCookie 自定义函数的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int traverse_forward(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + + /************************************************* + Description: 从指定的key开始,从大到小遍历树(遍历小于等于key的所有记录) + Input: pchKey 开始的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + pfVisit 访问数据记录的用户自定义函数 + pCookie 自定义函数的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int traverse_backward(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + + /************************************************* + Description: 从指定的key开始,从大到小遍历树,遍历的范围为[key, key1] + Input: pchKey 开始的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + pfVisit 访问数据记录的用户自定义函数 + pCookie 自定义函数的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int traverse_backward(const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + + /************************************************* + Description: 从指定的key开始,先左右树,后根结点, 遍历的范围为[key, key1] + Input: pchKey 开始的key + pchKey1 结束的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + pfVisit 访问数据记录的用户自定义函数 + pCookie 自定义函数的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int post_order_traverse(const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + + /************************************************* + Description: 从指定的key开始,后序遍历树(遍历大于等于key的所有记录) + Input: pchKey 开始的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + pfVisit 访问数据记录的用户自定义函数 + pCookie 自定义函数的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int post_order_traverse_ge(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + + /************************************************* + Description: 从指定的key开始,后序遍历树(遍历小于等于key的所有记录) + Input: pchKey 开始的key + pCmpCookie 调用用户自定义的pfComp函数跟树里的节点比较时作为输入参数 + pfComp 用户自定义的key比较函数 + pfVisit 访问数据记录的用户自定义函数 + pCookie 自定义函数的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int post_order_traverse_le(const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); +}; + +/************************************************************ + Description: 封装了T-tree node的各种操作,仅供t-tree内部使用 + Version: DTC 3.0 +***********************************************************/ +struct _TtreeNode +{ + enum + { + PAGE_SIZE = 20, // 每个节点保存多少条记录 + MIN_ITEMS = PAGE_SIZE - 2 // minimal number of items in internal node + }; + + ALLOC_HANDLE_T m_hLeft; + ALLOC_HANDLE_T m_hRight; + int8_t m_chBalance; + uint16_t m_ushNItems; + ALLOC_HANDLE_T m_ahItems[PAGE_SIZE]; + + int Init(); + static ALLOC_HANDLE_T Alloc(Mallocator &stMalloc, ALLOC_HANDLE_T hRecord); + static int Insert(Mallocator &stMalloc, ALLOC_HANDLE_T &hNode, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T hRecord, bool &isAllocNode); + static int Delete(Mallocator &stMalloc, ALLOC_HANDLE_T &hNode, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, bool &isFreeNode); + static int balance_left_branch(Mallocator &stMalloc, ALLOC_HANDLE_T &hNode); + static int balance_right_branch(Mallocator &stMalloc, ALLOC_HANDLE_T &hNode); + static int Destroy(Mallocator &stMalloc, ALLOC_HANDLE_T hNode); + static unsigned ask_for_destroy_size(Mallocator &, ALLOC_HANDLE_T hNode); + + // 查找指定的key。找到返回1,否则返回0 + int Find(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T &hRecord); + int Find(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T *&phRecord); + int find_handle(Mallocator &stMalloc, ALLOC_HANDLE_T hRecord); + // 假设node包含key-k1~kn,查找这样的node节点:k1<= key <=kn + int find_node(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ALLOC_HANDLE_T &hNode); + int traverse_forward(Mallocator &stMalloc, ItemVisit pfVisit, void *pCookie); + int traverse_backward(Mallocator &stMalloc, ItemVisit pfVisit, void *pCookie); + int post_order_traverse(Mallocator &stMalloc, ItemVisit pfVisit, void *pCookie); + + int traverse_forward(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, int iInclusion, ItemVisit pfVisit, void *pCookie); + int traverse_forward(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + int traverse_forward(Mallocator &stMalloc, const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + + int traverse_backward(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + int traverse_backward(Mallocator &stMalloc, const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + + int post_order_traverse(Mallocator &stMalloc, const char *pchKey, const char *pchKey1, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + int post_order_traverse_ge(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); + int post_order_traverse_le(Mallocator &stMalloc, const char *pchKey, void *pCmpCookie, KeyComparator pfComp, ItemVisit pfVisit, void *pCookie); +} __attribute__((packed)); +typedef struct _TtreeNode TtreeNode; + +#endif diff --git a/src/search_local/index_storage/cache/task_control.cc b/src/search_local/index_storage/cache/task_control.cc new file mode 100644 index 0000000..516786b --- /dev/null +++ b/src/search_local/index_storage/cache/task_control.cc @@ -0,0 +1,137 @@ +/* + * ===================================================================================== + * + * Filename: task_control.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include "protocol.h" + +TaskControl *TaskControl::serverControl = NULL; + +TaskControl::TaskControl(PollThread *o) : TaskDispatcher(o), m_output(o) +{ + atomic8_set(&m_readOnly, 0); + m_statReadonly = statmgr.get_item_u32(SERVER_READONLY); + m_statReadonly.set((0 == atomic8_read(&m_readOnly)) ? 0 : 1); +} + +TaskControl::~TaskControl(void) +{ +} + +TaskControl *TaskControl::get_instance(PollThread *o) +{ + if (NULL == serverControl) + { + NEW(TaskControl(o), serverControl); + } + return serverControl; +} + +TaskControl *TaskControl::get_instance() +{ + return serverControl; +} + +bool TaskControl::is_read_only() +{ + return 0 != atomic8_read(&m_readOnly); +} +void TaskControl::query_mem_info(TaskRequest *cur) +{ + struct DTCServerInfo s_info; + memset(&s_info, 0x00, sizeof(s_info)); + + s_info.version = 0x1; + s_info.datasize = statmgr.get10_s_item_value(DTC_DATA_SIZE); + s_info.memsize = statmgr.get10_s_item_value(DTC_CACHE_SIZE); + log_debug("Memory info is: memsize is %lu , datasize is %lu", s_info.memsize, s_info.datasize); + cur->resultInfo.set_server_info(&s_info); +} +void TaskControl::deal_server_admin(TaskRequest *cur) +{ + switch (cur->requestInfo.admin_code()) + { + case DRequest::ServerAdminCmd::SET_READONLY: + { + atomic8_set(&m_readOnly, 1); + m_statReadonly.set(1); + log_info("set server status to readonly."); + break; + } + case DRequest::ServerAdminCmd::SET_READWRITE: + { + atomic8_set(&m_readOnly, 0); + m_statReadonly.set(0); + log_info("set server status to read/write."); + break; + } + case DRequest::ServerAdminCmd::QUERY_MEM_INFO: + { + log_debug("query meminfo."); + query_mem_info(cur); + break; + } + + default: + { + log_debug("unknow cmd: %d", cur->requestInfo.admin_code()); + cur->set_error(-EC_REQUEST_ABORTED, "RequestControl", "Unknown svrAdmin command."); + break; + } + } + + cur->reply_notify(); +} + +void TaskControl::task_notify(TaskRequest *cur) +{ + log_debug("TaskControl::task_notify Cmd is %d, AdminCmd is %u", cur->request_code(), cur->requestInfo.admin_code()); + //处理ServerAdmin命令 + if (DRequest::SvrAdmin == cur->request_code()) + { + switch (cur->requestInfo.admin_code()) + { + case DRequest::ServerAdminCmd::SET_READONLY: + case DRequest::ServerAdminCmd::SET_READWRITE: + case DRequest::ServerAdminCmd::QUERY_MEM_INFO: + deal_server_admin(cur); + return; + + //allow all admin_code pass + default: + { + log_debug("TaskControl::task_notify admincmd, tasknotify next process "); + m_output.task_notify(cur); + return; + } + } + } + + //当server为readonly,对非查询请求直接返回错误 + if (0 != atomic8_read(&m_readOnly)) + { + if (DRequest::Get != cur->request_code()) + { + log_info("server is readonly, reject write operation"); + cur->set_error(-EC_SERVER_READONLY, "RequestControl", "Server is readonly."); + cur->reply_notify(); + return; + } + } + log_debug("TaskControl::task_notify tasknotify next process "); + m_output.task_notify(cur); +} diff --git a/src/search_local/index_storage/cache/task_control.h b/src/search_local/index_storage/cache/task_control.h new file mode 100644 index 0000000..49262e1 --- /dev/null +++ b/src/search_local/index_storage/cache/task_control.h @@ -0,0 +1,53 @@ +/* + * ===================================================================================== + * + * Filename: task_control.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __REQUEST_CONTROL_H +#define __REQUEST_CONTROL_H + +#include +#include + +class TaskControl : public TaskDispatcher +{ +protected: + static TaskControl *serverControl; + TaskControl(PollThread *o); + +public: + //返回实例,如果实例尚未构造,则构造一个新的实例返回 + static TaskControl *get_instance(PollThread *o); + //仅是返回,如果实例尚未构造,则返回空 + static TaskControl *get_instance(); + virtual ~TaskControl(void); + void bind_dispatcher(TaskDispatcher *p) { m_output.bind_dispatcher(p); } + bool is_read_only(); + +private: + RequestOutput m_output; + //server是否为只读状态 + atomic8_t m_readOnly; + //Readonly的统计对象 + StatItemU32 m_statReadonly; + +private: + virtual void task_notify(TaskRequest *); + //处理serveradmin 命令 + void deal_server_admin(TaskRequest *cur); + void query_mem_info(TaskRequest *cur); +}; + +#endif diff --git a/src/search_local/index_storage/cache/task_pendlist.cc b/src/search_local/index_storage/cache/task_pendlist.cc new file mode 100644 index 0000000..c4fbd4b --- /dev/null +++ b/src/search_local/index_storage/cache/task_pendlist.cc @@ -0,0 +1,111 @@ +/* + * ===================================================================================== + * + * Filename: task_pendlist.cc + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include "task_pendlist.h" +#include "buffer_process.h" +#include "log.h" + +DTC_USING_NAMESPACE + +TaskPendingList::TaskPendingList(TaskDispatcher *o, int to) : _timeout(to), + _timelist(0), + _owner(o), + _wakeup(0) +{ + _timelist = _owner->owner->get_timer_list(_timeout); +} + +TaskPendingList::~TaskPendingList() +{ + std::list::iterator it; + for (it = _pendlist.begin(); it != _pendlist.end(); ++it) + { + //把所有请求踢回客户端 + it->first->set_error(-ETIMEDOUT, __FUNCTION__, "object deconstruct"); + it->first->reply_notify(); + } +} + +void TaskPendingList::add2_list(TaskRequest *task) +{ + + if (task) + { + if (_pendlist.empty()) + attach_timer(_timelist); + + _pendlist.push_back(std::make_pair(task, time(NULL))); + } + + return; +} + +// 唤醒队列中所有已经pending的task +void TaskPendingList::Wakeup(void) +{ + + log_debug("TaskPendingList Wakeup"); + + //唤醒所有task + _wakeup = 1; + + attach_ready_timer(_owner->owner); + + return; +} + +void TaskPendingList::timer_notify(void) +{ + + std::list copy; + copy.swap(_pendlist); + std::list::iterator it; + + if (_wakeup) + { + for (it = copy.begin(); it != copy.end(); ++it) + { + _owner->task_notify(it->first); + } + + _wakeup = 0; + } + else + { + + time_t now = time(NULL); + + for (it = copy.begin(); it != copy.end(); ++it) + { + //超时处理 + if (it->second + _timeout >= now) + { + _pendlist.push_back(*it); + } + else + { + it->first->set_error(-ETIMEDOUT, __FUNCTION__, "pending task is timedout"); + it->first->reply_notify(); + } + } + + if (!_pendlist.empty()) + attach_timer(_timelist); + } + + return; +} diff --git a/src/search_local/index_storage/cache/task_pendlist.h b/src/search_local/index_storage/cache/task_pendlist.h new file mode 100644 index 0000000..2720504 --- /dev/null +++ b/src/search_local/index_storage/cache/task_pendlist.h @@ -0,0 +1,65 @@ +/* + * ===================================================================================== + * + * Filename: task_pendlist.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#ifndef __TASK_REQUEST_PENDINGLIST_H +#define __TASK_REQUEST_PENDINGLIST_H + +#include "timer_list.h" +#include "namespace.h" +#include "task_request.h" +#include + +DTC_BEGIN_NAMESPACE +/* + * 请求挂起列表。 + * + * 如果发现请求暂时没法满足,则挂起,直到 + * 1. 超时 + * 2. 条件满足被唤醒 + */ +class BufferProcess; +class CacheBase; +class TaskReqeust; +class TimerObject; +class TaskPendingList : private TimerObject +{ +public: + TaskPendingList(TaskDispatcher *o, int timeout = 5); + ~TaskPendingList(); + + void add2_list(TaskRequest *); //加入pending list + void Wakeup(void); //唤醒队列中的所有task + +private: + virtual void timer_notify(void); + +private: + TaskPendingList(const TaskPendingList &); + const TaskPendingList &operator=(const TaskPendingList &); + +private: + int _timeout; + TimerList *_timelist; + TaskDispatcher *_owner; + int _wakeup; + typedef std::pair slot_t; + std::list _pendlist; +}; + +DTC_END_NAMESPACE + +#endif diff --git a/src/search_local/index_storage/cache/tree_data.cc b/src/search_local/index_storage/cache/tree_data.cc new file mode 100644 index 0000000..d2f9136 --- /dev/null +++ b/src/search_local/index_storage/cache/tree_data.cc @@ -0,0 +1,2014 @@ +/* + * ===================================================================================== + * + * Filename: tree_data.cc + * + * Description: T-tree data struct operation. For user invoke. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include + +#include "tree_data.h" +#include "global.h" +#include "task_pkey.h" +#include "buffer_flush.h" +#include "relative_hour_calculator.h" + +#ifndef likely +#if __GCC_MAJOR >= 3 +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif +#endif + +#define GET_TREE_VALUE(x, t) \ + do \ + { \ + if (unlikely(m_uiOffset + sizeof(t) > _size)) \ + goto ERROR_RET; \ + x = (typeof(x)) * (t *)(m_pchContent + m_uiOffset); \ + m_uiOffset += sizeof(t); \ + } while (0) + +#define GET_TREE_VALUE_AT_OFFSET(x, t, offset) \ + do \ + { \ + if (unlikely(offset + sizeof(t) > _size)) \ + goto ERROR_RET; \ + x = (typeof(x)) * (t *)(m_pchContent + offset); \ + } while (0) + +#define SET_TREE_VALUE_AT_OFFSET(x, t, offset) \ + do \ + { \ + if (unlikely(offset + sizeof(t) > _size)) \ + goto ERROR_RET; \ + *(t *)(m_pchContent + offset) = x; \ + } while (0) + +#define SET_TREE_VALUE(x, t) \ + do \ + { \ + if (unlikely(m_uiOffset + sizeof(t) > _size)) \ + goto ERROR_RET; \ + *(t *)(m_pchContent + m_uiOffset) = x; \ + m_uiOffset += sizeof(t); \ + } while (0) + +#define SET_TREE_BIN_VALUE(p, len) \ + do \ + { \ + if (unlikely(m_uiOffset + sizeof(int) + len > _size)) \ + goto ERROR_RET; \ + *(int *)(m_pchContent + m_uiOffset) = len; \ + m_uiOffset += sizeof(int); \ + if (likely(len != 0)) \ + memcpy(m_pchContent + m_uiOffset, p, len); \ + m_uiOffset += len; \ + } while (0) + +#define SKIP_TREE_SIZE(s) \ + do \ + { \ + if (unlikely(m_uiOffset + s > _size)) \ + goto ERROR_RET; \ + m_uiOffset += s; \ + } while (0) + +TreeData::TreeData(Mallocator *pstMalloc) : m_stTree(*pstMalloc) +{ + m_pstRootData = NULL; + m_uchIndexDepth = 0; + m_uiNeedSize = 0; + m_iKeySize = 0; + _handle = INVALID_HANDLE; + m_iTableIdx = -1; + _size = 0; + _root_size = 0; + _mallocator = pstMalloc; + memset(m_szErr, 0, sizeof(m_szErr)); + + m_uchKeyIdx = -1; + m_iExpireId = -1; + m_iLAId = -1; + m_iLCmodId = -1; + + m_uiOffset = 0; + m_uiRowOffset = 0; + m_ullAffectedrows = 0; + + m_IndexPartOfUniqField = false; + m_hRecord = INVALID_HANDLE; +} + +TreeData::~TreeData() +{ + _handle = INVALID_HANDLE; + _root_size = 0; +} + +int TreeData::Init(uint8_t uchKeyIdx, int iKeySize, const char *pchKey, int laId, int expireId, int nodeIdx) +{ + int ks = iKeySize != 0 ? iKeySize : 1 + *(unsigned char *)pchKey; + int uiDataSize = 2 + sizeof(uint32_t) * 4 + sizeof(uint16_t) * 3 + sizeof(MEM_HANDLE_T) + ks; + + _handle = INVALID_HANDLE; + _root_size = 0; + + _handle = _mallocator->Malloc(uiDataSize); + if (_handle == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "malloc error"); + m_uiNeedSize = uiDataSize; + return (EC_NO_MEM); + } + _root_size = _mallocator->chunk_size(_handle); + + m_pstRootData = Pointer(); + m_pstRootData->m_uchDataType = ((m_iTableIdx << 7) & 0x80) + DATA_TYPE_TREE_ROOT; + m_pstRootData->m_treeSize = 0; + m_pstRootData->m_uiTotalRawSize = 0; + m_pstRootData->m_uiNodeCnts = 0; + m_pstRootData->m_uiRowCnt = 0; + m_pstRootData->m_hRoot = INVALID_HANDLE; + + m_pstRootData->m_uchGetCount = 1; + + m_uiLAOffset = 0; + + m_iKeySize = iKeySize; + m_uchKeyIdx = uchKeyIdx; + m_iLAId = laId; + m_iExpireId = expireId; + if (nodeIdx != -1) + { + m_iTableIdx = nodeIdx; + } + if (m_iTableIdx != 0 && m_iTableIdx != 1) + { + snprintf(m_szErr, sizeof(m_szErr), "node idx error"); + return -100; + } + + if (iKeySize != 0) + { + memcpy(m_pstRootData->m_achKey, pchKey, iKeySize); + } + else + { + memcpy(m_pstRootData->m_achKey, pchKey, ks); + } + + m_stTree.Attach(INVALID_HANDLE); + + return (0); +} + +int TreeData::Init(const char *pchKey) +{ + if (DTCColExpand::Instance()->is_expanding()) + m_iTableIdx = (DTCColExpand::Instance()->cur_table_idx() + 1) % 2; + else + m_iTableIdx = DTCColExpand::Instance()->cur_table_idx() % 2; + if (m_iTableIdx != 0 && m_iTableIdx != 1) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error, nodeIdx[%d] error", m_iTableIdx); + return -1; + } + m_pstTab = TableDefinitionManager::Instance()->get_table_def_by_idx(m_iTableIdx); + if (m_pstTab == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error, tabledef[NULL]"); + return -1; + } + + return Init(m_pstTab->key_fields() - 1, m_pstTab->key_format(), pchKey, m_pstTab->lastacc_field_id(), m_pstTab->expire_time_field_id()); +} + +int TreeData::Attach(MEM_HANDLE_T hHandle, uint8_t uchKeyIdx, int iKeySize, int laid, int lcmodid, int expireid) +{ + _root_size = _mallocator->chunk_size(hHandle); + if (unlikely(_root_size == 0)) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error: %s", _mallocator->get_err_msg()); + return (-1); + } + _handle = hHandle; + + m_pstRootData = Pointer(); + + unsigned char uchType; + uchType = m_pstRootData->m_uchDataType; + if (unlikely((uchType & 0x7f) != DATA_TYPE_TREE_ROOT)) + { + snprintf(m_szErr, sizeof(m_szErr), "invalid data type: %u", uchType); + return (-2); + } + + m_uiLAOffset = 0; + + m_iKeySize = iKeySize; + m_uchKeyIdx = uchKeyIdx; + m_iExpireId = expireid; + m_iLAId = laid; + m_iLCmodId = lcmodid; + + m_stTree.Attach(m_pstRootData->m_hRoot); + + return (0); +} + +int TreeData::Attach(MEM_HANDLE_T hHandle) +{ + _handle = hHandle; + char *p = Pointer(); + m_iTableIdx = (*p >> 7) & 0x01; + if (m_iTableIdx != 0 && m_iTableIdx != 1) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error, nodeIdx[%d] error", m_iTableIdx); + return -1; + } + m_pstTab = TableDefinitionManager::Instance()->get_table_def_by_idx(m_iTableIdx); + if (m_pstTab == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error, tabledef[NULL]"); + return -1; + } + return Attach(hHandle, m_pstTab->key_fields() - 1, m_pstTab->key_format(), m_pstTab->lastacc_field_id(), m_pstTab->lastcmod_field_id(), m_pstTab->expire_time_field_id()); +} + +int TreeData::encode_tree_row(const RowValue &stRow, unsigned char uchOp) +{ + SET_TREE_VALUE(uchOp, unsigned char); + for (int j = 1; j <= stRow.num_fields(); j++) //¿½±´Ò»ÐÐÊý¾Ý + { + if (stRow.table_definition()->is_discard(j)) + continue; + const DTCValue *const v = stRow.field_value(j); + switch (stRow.field_type(j)) + { + case DField::Signed: + if (unlikely(stRow.field_size(j) > (int)sizeof(int32_t))) + SET_TREE_VALUE(v->s64, int64_t); + else + SET_TREE_VALUE(v->s64, int32_t); + break; + + case DField::Unsigned: + if (unlikely(stRow.field_size(j) > (int)sizeof(uint32_t))) + SET_TREE_VALUE(v->u64, uint64_t); + else + SET_TREE_VALUE(v->u64, uint32_t); + break; + + case DField::Float: + if (likely(stRow.field_size(j) > (int)sizeof(float))) + SET_TREE_VALUE(v->flt, double); + else + SET_TREE_VALUE(v->flt, float); + break; + + case DField::String: + case DField::Binary: + default: + { + SET_TREE_BIN_VALUE(v->bin.ptr, v->bin.len); + break; + } + } //end of switch + } + + return 0; + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "encode row error"); + return (-100); +} + +int TreeData::expand_tree_chunk(MEM_HANDLE_T *pRecord, ALLOC_SIZE_T tExpSize) +{ + if (pRecord == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "tree data not init yet"); + return (-1); + } + + uint32_t dataSize = *(uint32_t *)(m_pchContent + sizeof(unsigned char)); + if (dataSize + tExpSize > _size) + { + ALLOC_HANDLE_T hTmp = _mallocator->ReAlloc((*pRecord), dataSize + tExpSize); + if (hTmp == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "realloc error[%s]", _mallocator->get_err_msg()); + m_uiNeedSize = dataSize + tExpSize; + return (EC_NO_MEM); + } + m_pstRootData->m_treeSize -= _size; + *pRecord = hTmp; + _size = _mallocator->chunk_size(hTmp); + m_pchContent = Pointer(*pRecord); + m_pstRootData->m_treeSize += _size; + } + return (0); +} + +int TreeData::insert_sub_tree(uint8_t uchCondIdxCnt, const RowValue &stCondition, KeyComparator pfComp, ALLOC_HANDLE_T hRoot) +{ + int iRet; + if (uchCondIdxCnt != TTREE_INDEX_POS) + { + snprintf(m_szErr, sizeof(m_szErr), "index field error"); + return (-100); + } + + bool isAllocNode = false; + DTCValue value = stCondition[TTREE_INDEX_POS]; + char *indexKey = reinterpret_cast(&value); + CmpCookie cookie(m_pstTab, uchCondIdxCnt); + iRet = m_stTree.Insert(indexKey, &cookie, pfComp, hRoot, isAllocNode); + if (iRet == 0 && isAllocNode) + { + m_pstRootData->m_treeSize += sizeof(TtreeNode); + } + return iRet; +} + +int TreeData::Find(uint8_t uchCondIdxCnt, const RowValue &stCondition, KeyComparator pfComp, ALLOC_HANDLE_T *&hRecord) +{ + int iRet; + if (uchCondIdxCnt != TTREE_INDEX_POS) + { + snprintf(m_szErr, sizeof(m_szErr), "index field error"); + return (-100); + } + + DTCValue value = stCondition[TTREE_INDEX_POS]; + char *indexKey = reinterpret_cast(&value); + CmpCookie cookie(m_pstTab, uchCondIdxCnt); + iRet = m_stTree.Find(indexKey, &cookie, pfComp, hRecord); + return iRet; +} + +int TreeData::insert_row_flag(const RowValue &stRow, KeyComparator pfComp, unsigned char uchFlag) +{ + int iRet; + uint32_t rowCnt = 0; + MEM_HANDLE_T *pRecord = NULL; + MEM_HANDLE_T hRecord = INVALID_HANDLE; + int trowSize = calc_tree_row_size(stRow, 0); + int tSize = 0; + m_uiOffset = 0; + + iRet = Find(TTREE_INDEX_POS, stRow, pfComp, pRecord); + if (iRet == -100) + return iRet; + if (pRecord == NULL) + { + tSize = trowSize + sizeof(unsigned char) + sizeof(uint32_t) * 2; + hRecord = _mallocator->Malloc(tSize); + if (hRecord == INVALID_HANDLE) + { + m_uiNeedSize = tSize; + snprintf(m_szErr, sizeof(m_szErr), "malloc error"); + return (EC_NO_MEM); + } + _size = _mallocator->chunk_size(hRecord); + m_pchContent = Pointer(hRecord); + *m_pchContent = DATA_TYPE_TREE_NODE; //RawFormat->DataType + m_uiOffset += sizeof(unsigned char); + *(uint32_t *)(m_pchContent + m_uiOffset) = 0; //RawFormat->data_size + m_uiOffset += sizeof(uint32_t); + *(uint32_t *)(m_pchContent + m_uiOffset) = 0; //RawFormat->RowCount + m_uiOffset += sizeof(uint32_t); + + iRet = encode_tree_row(stRow, uchFlag); + if (iRet != 0) + { + goto ERROR_INSERT_RET; + } + + iRet = insert_sub_tree(TTREE_INDEX_POS, stRow, pfComp, hRecord); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "insert error"); + m_uiNeedSize = sizeof(TtreeNode); + _mallocator->Free(hRecord); + goto ERROR_INSERT_RET; + } + m_pstRootData->m_treeSize += _size; + m_pstRootData->m_uiNodeCnts++; + } + else + { + m_pchContent = Pointer(*pRecord); + _size = _mallocator->chunk_size(*pRecord); + iRet = expand_tree_chunk(pRecord, trowSize); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "expand tree chunk error"); + return iRet; + } + + m_uiOffset = *(uint32_t *)(m_pchContent + sizeof(unsigned char)); //datasize + + iRet = encode_tree_row(stRow, uchFlag); + if (iRet != 0) + { + goto ERROR_INSERT_RET; + } + } + + /*每次insert数据之后,更新头部信息*/ + rowCnt = *(uint32_t *)(m_pchContent + sizeof(unsigned char) + sizeof(uint32_t)); + *(uint32_t *)(m_pchContent + sizeof(unsigned char)) = m_uiOffset; + *(uint32_t *)(m_pchContent + sizeof(unsigned char) + sizeof(uint32_t)) = rowCnt + 1; + m_pstRootData->m_hRoot = m_stTree.Root(); + m_pstRootData->m_uiRowCnt += 1; + m_pstRootData->m_uiTotalRawSize += trowSize; + +ERROR_INSERT_RET: + m_uiOffset = 0; + _size = 0; + hRecord = INVALID_HANDLE; + m_pchContent = NULL; + + return (iRet); +} + +int TreeData::insert_row(const RowValue &stRow, KeyComparator pfComp, bool isDirty) +{ + return insert_row_flag(stRow, pfComp, isDirty ? OPER_INSERT : OPER_SELECT); +} + +unsigned TreeData::ask_for_destroy_size(void) +{ + if (unlikely(_root_size == 0)) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error: %s", _mallocator->get_err_msg()); + return (-1); + } + return m_pstRootData->m_treeSize + _root_size; +} + +int TreeData::Destroy() +{ + if (unlikely(_root_size == 0)) + { + snprintf(m_szErr, sizeof(m_szErr), "attach error: %s", _mallocator->get_err_msg()); + return (-1); + } + m_stTree.Destroy(); + _mallocator->Free(_handle); + + _handle = INVALID_HANDLE; + _root_size = 0; + return (0); +} + +int TreeData::copy_raw_all(RawData *pstRawData) +{ + int iRet; + uint32_t totalNodeCnt = m_pstRootData->m_uiNodeCnts; + if (totalNodeCnt == 0) + { + return 1; + } + pResCookie resCookie; + MEM_HANDLE_T pCookie[totalNodeCnt]; + resCookie.m_handle = pCookie; + resCookie.nodesNum = 0; + iRet = m_stTree.traverse_forward(Visit, &resCookie); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), " traverse tree-data rows error:%d", iRet); + return (-1); + } + ALLOC_SIZE_T headlen = sizeof(unsigned char) + sizeof(uint32_t) * 2; + for (uint32_t i = 0; i < resCookie.nodesGot; i++) + { + char *pch = Pointer(pCookie[i]); + ALLOC_SIZE_T dtsize = *(uint32_t *)(pch + sizeof(unsigned char)); + + uint32_t rowcnt = *(uint32_t *)(pch + sizeof(unsigned char) + sizeof(uint32_t)); + iRet = pstRawData->append_n_records(rowcnt, pch + headlen, dtsize - headlen); + if (iRet != 0) + return iRet; + } + if ((iRet = pstRawData->Attach(pstRawData->get_handle())) != 0) + return (iRet); + + return 0; +} + +int TreeData::copy_tree_all(RawData *pstRawData) +{ + int iRet; + if (m_pstTab->num_fields() < 1) + { + log_error("field nums is too short"); + return -1; + } + + unsigned int uiTotalRows = pstRawData->total_rows(); + if (uiTotalRows == 0) + return (0); + + pstRawData->rewind(); + RowValue stOldRow(m_pstTab); + for (unsigned int i = 0; i < uiTotalRows; i++) + { + unsigned char uchRowFlags; + stOldRow.default_value(); + if (pstRawData->decode_row(stOldRow, uchRowFlags, 0) != 0) + { + log_error("raw-data decode row error: %s", pstRawData->get_err_msg()); + return (-1); + } + + iRet = insert_row(stOldRow, KeyCompare, false); + if (iRet == EC_NO_MEM) + { + /*这里为了下次完全重新建立T树,把未建立完的树全部删除*/ + m_uiNeedSize = pstRawData->data_size() - pstRawData->data_start(); + destroy_sub_tree(); + return (EC_NO_MEM); + } + } + + return (0); +} + +int TreeData::decode_tree_row(RowValue &stRow, unsigned char &uchRowFlags, int iDecodeFlag) +{ + m_uiRowOffset = m_uiOffset; + + GET_TREE_VALUE(uchRowFlags, unsigned char); + for (int j = 1; j <= stRow.num_fields(); j++) + { + if (stRow.table_definition()->is_discard(j)) + continue; + if (j == m_iLAId) + m_uiLAOffset = m_uiOffset; + switch (stRow.field_type(j)) + { + case DField::Signed: + if (unlikely(stRow.field_size(j) > (int)sizeof(int32_t))) + { + GET_TREE_VALUE(stRow.field_value(j)->s64, int64_t); + } + else + { + GET_TREE_VALUE(stRow.field_value(j)->s64, int32_t); + } + break; + + case DField::Unsigned: + if (unlikely(stRow.field_size(j) > (int)sizeof(uint32_t))) + { + GET_TREE_VALUE(stRow.field_value(j)->u64, uint64_t); + } + else + { + GET_TREE_VALUE(stRow.field_value(j)->u64, uint32_t); + } + break; + + case DField::Float: + if (likely(stRow.field_size(j) > (int)sizeof(float))) + { + GET_TREE_VALUE(stRow.field_value(j)->flt, double); + } + else + { + GET_TREE_VALUE(stRow.field_value(j)->flt, float); + } + break; + + case DField::String: + case DField::Binary: + default: + { + GET_TREE_VALUE(stRow.field_value(j)->bin.len, int); + stRow.field_value(j)->bin.ptr = m_pchContent + m_uiOffset; + SKIP_TREE_SIZE((uint32_t)stRow.field_value(j)->bin.len); + break; + } + } //end of switch + } + return (0); + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "get value error"); + return (-100); +} + +int TreeData::compare_tree_data(RowValue *stpNodeRow) +{ + uint32_t rowCnt = m_pstRootData->m_uiNodeCnts; + if (rowCnt == 0) + { + return 1; + } + + const uint8_t *ufli = m_pstTab->uniq_fields_list(); + for (int i = 0; !m_IndexPartOfUniqField && i < m_pstTab->uniq_fields(); i++) + { + if (ufli[i] == TTREE_INDEX_POS) + { + m_IndexPartOfUniqField = true; + break; + } + } + + if (m_IndexPartOfUniqField) + { + MEM_HANDLE_T *pRecord = NULL; + RowValue stOldRow(m_pstTab); + char *indexKey = reinterpret_cast(stpNodeRow->field_value(TTREE_INDEX_POS)); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + int iRet = m_stTree.Find(indexKey, &cookie, KeyCompare, pRecord); + if (iRet == -100) + return iRet; + if (pRecord != NULL) + { + m_pchContent = Pointer(*pRecord); + uint32_t rows = *(uint32_t *)(m_pchContent + sizeof(unsigned char) + sizeof(uint32_t)); + m_uiOffset = sizeof(unsigned char) + sizeof(uint32_t) * 2; + _size = _mallocator->chunk_size(*pRecord); + + for (uint32_t j = 0; j < rows; j++) + { + stOldRow.default_value(); + unsigned char uchRowFlags; + if (decode_tree_row(stOldRow, uchRowFlags, 0) != 0) + { + return (-2); + } + if (stpNodeRow->Compare(stOldRow, m_pstTab->uniq_fields_list(), m_pstTab->uniq_fields()) == 0) + { + m_hRecord = *pRecord; + return 0; + } + } + } + } + else + { + pResCookie resCookie; + MEM_HANDLE_T pCookie[rowCnt]; + resCookie.m_handle = pCookie; + resCookie.nodesNum = 0; + if (m_stTree.traverse_forward(Visit, &resCookie) != 0) + { + snprintf(m_szErr, sizeof(m_szErr), " traverse tree-data rows error"); + return (-1); + } + + RowValue stOldRow(m_pstTab); + for (uint32_t i = 0; i < resCookie.nodesGot; i++) + { //逐行拷贝数据 + m_pchContent = Pointer(pCookie[i]); + uint32_t rows = *(uint32_t *)(m_pchContent + sizeof(unsigned char) + sizeof(uint32_t)); + m_uiOffset = sizeof(unsigned char) + sizeof(uint32_t) * 2; + _size = _mallocator->chunk_size(pCookie[i]); + + for (uint32_t j = 0; j < rows; j++) + { + stOldRow.default_value(); + unsigned char uchRowFlags; + if (decode_tree_row(stOldRow, uchRowFlags, 0) != 0) + { + return (-2); + } + if (stpNodeRow->Compare(stOldRow, m_pstTab->uniq_fields_list(), m_pstTab->uniq_fields()) == 0) + { + m_hRecord = pCookie[i]; + return 0; + } + } + } + } + + return 1; +} + +int TreeData::replace_tree_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, unsigned char &RowFlag, bool setrows) +{ + int iRet; + unsigned int uiTotalRows = 0; + uint32_t iDelete = 0; + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow; + + stpNodeTab = m_pstTab; + stpTaskTab = stTask.table_definition(); + RowValue stNewRow(stpTaskTab); + RowValue stNewNodeRow(stpNodeTab); + m_ullAffectedrows = 0; + + stpTaskRow = &stNewRow; + stpNodeRow = &stNewNodeRow; + if (stpNodeTab == stpTaskTab) + stpNodeRow = stpTaskRow; + + stNewRow.default_value(); + stTask.update_row(*stpTaskRow); + + if (stpNodeTab != stpTaskTab) + stpNodeRow->Copy(stpTaskRow); + else + stpNodeRow = stpTaskRow; + + iRet = compare_tree_data(stpNodeRow); + if (iRet < 0) + { + snprintf(m_szErr, sizeof(m_szErr), "compare tree data error:%d", iRet); + return iRet; + } + else if (iRet == 0) + { + DTCValue new_value = (*stpTaskRow)[TTREE_INDEX_POS]; + char *NewIndex = reinterpret_cast(&new_value); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + if (KeyCompare(NewIndex, &cookie, *_mallocator, m_hRecord) != 0) //Index字段变更 + { + char *tmp_pchContent = m_pchContent; + uint32_t tmp_size = _size; + ALLOC_SIZE_T tmp_uiOffset = m_uiOffset; + iRet = insert_row(*stpTaskRow, KeyCompare, m_async); + m_pchContent = tmp_pchContent; + _size = tmp_size; + m_uiOffset = tmp_uiOffset; + + if (iRet == EC_NO_MEM) + return iRet; + else if (iRet == 0) + { + m_uiOffset = m_uiRowOffset; + RowValue stOldRow(m_pstTab); + stOldRow.default_value(); + unsigned char uchRowFlags; + if (decode_tree_row(stOldRow, uchRowFlags, 0) != 0) + { + return (-2); + } + RowFlag = uchRowFlags; + uiTotalRows = get_row_count(); + m_uiOffset = m_uiRowOffset; + if (delete_cur_row(stOldRow) == 0) + iDelete++; + + if (uiTotalRows > 0 && uiTotalRows == iDelete && get_row_count() == 0) //RowFormat上的内容已删光 + { + //删除tree node + bool isFreeNode = false; + DTCValue value = (stOldRow)[TTREE_INDEX_POS]; //for轮询的最后一行数据 + char *indexKey = reinterpret_cast(&value); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + int iret = m_stTree.Delete(indexKey, &cookie, KeyCompare, isFreeNode); + if (iret != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "delete stTree failed:%d", iret); + return -4; + } + if (isFreeNode) + m_pstRootData->m_treeSize -= sizeof(TtreeNode); + m_pstRootData->m_treeSize -= _size; + m_pstRootData->m_uiNodeCnts--; + m_pstRootData->m_hRoot = m_stTree.Root(); + //释放handle + _mallocator->Free(m_hRecord); + } + } + } + else //Index字段不变 + { + MEM_HANDLE_T *pRawHandle = NULL; + int iRet = Find(TTREE_INDEX_POS, *stpNodeRow, KeyCompare, pRawHandle); + if (iRet == -100 || iRet == 0) + return iRet; + + iRet = replace_cur_row(*stpNodeRow, m_async, pRawHandle); // 加进cache + if (iRet == EC_NO_MEM) + { + return iRet; + } + if (iRet != 0) + { + /*标记加入黑名单*/ + stTask.push_black_list_size(need_size()); + return (-6); + } + } + m_ullAffectedrows = 2; + } + return 0; +} + +int TreeData::replace_sub_raw_data(TaskRequest &stTask, MEM_HANDLE_T hRecord) +{ + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow, *stpCurRow; + + stpNodeTab = m_pstTab; + stpTaskTab = stTask.table_definition(); + RowValue stNewRow(stpTaskTab); + RowValue stNewNodeRow(stpNodeTab); + RowValue stCurRow(stpNodeTab); + + stpTaskRow = &stNewRow; + stpNodeRow = &stNewNodeRow; + stpCurRow = &stCurRow; + if (stpNodeTab == stpTaskTab) + stpNodeRow = stpTaskRow; + + m_pchContent = Pointer(hRecord); + unsigned int uiTotalRows = get_row_count(); + m_uiOffset = sizeof(unsigned char) + sizeof(uint32_t) * 2; //offset DataType + data_size + RowCount + _size = _mallocator->chunk_size(hRecord); + + unsigned char uchRowFlags; + uint32_t iDelete = 0; + uint32_t iInsert = 0; + for (unsigned int i = 0; i < uiTotalRows; i++) + { + if (decode_tree_row(*stpNodeRow, uchRowFlags, 0) != 0) + return (-1); + + if (stpNodeTab != stpTaskTab) + stpTaskRow->Copy(stpNodeRow); + + stpCurRow->Copy(stpNodeRow); + + //如果不符合查询条件 + if (stTask.compare_row(*stpTaskRow) == 0) + continue; + + MEM_HANDLE_T *pRawHandle = NULL; + int iRet = Find(TTREE_INDEX_POS, *stpCurRow, KeyCompare, pRawHandle); + if (iRet == -100 || iRet == 0) + return iRet; + + stTask.update_row(*stpTaskRow); //修改数据 + + if (stpNodeTab != stpTaskTab) + stpNodeRow->Copy(stpTaskRow); + + if (m_ullAffectedrows == 0) + { + iRet = 0; + DTCValue new_value = (*stpTaskRow)[TTREE_INDEX_POS]; + char *NewIndex = reinterpret_cast(&new_value); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + + if (KeyCompare(NewIndex, &cookie, *_mallocator, hRecord) != 0) //update Index字段 + { + char *tmp_pchContent = m_pchContent; + uint32_t tmp_size = _size; + ALLOC_SIZE_T tmp_uiOffset = m_uiOffset; + + iRet = insert_row(*stpTaskRow, KeyCompare, m_async); + + m_pchContent = tmp_pchContent; + _size = tmp_size; + m_uiOffset = tmp_uiOffset; + if (iRet == EC_NO_MEM) + { + return iRet; + } + else if (iRet == 0) + { + iInsert++; + m_uiOffset = m_uiRowOffset; + if (delete_cur_row(*stpCurRow) == 0) + iDelete++; + } + } + else + { + iRet = replace_cur_row(*stpNodeRow, m_async, pRawHandle); // 加进cache + if (iRet == EC_NO_MEM) + { + return iRet; + } + if (iRet != 0) + { + /*标记加入黑名单*/ + stTask.push_black_list_size(need_size()); + return (-6); + } + } + + m_ullAffectedrows += 2; + } + else + { + if (delete_cur_row(*stpCurRow) == 0) + { + iDelete++; + m_ullAffectedrows++; + } + } + } + + if (uiTotalRows > 0 && uiTotalRows - iDelete == 0) //RowFormat上的内容已删光 + { + //删除tree node + bool isFreeNode = false; + DTCValue value = (*stpCurRow)[TTREE_INDEX_POS]; //for轮询的最后一行数据 + char *indexKey = reinterpret_cast(&value); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + int iret = m_stTree.Delete(indexKey, &cookie, KeyCompare, isFreeNode); + if (iret != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "delete stTree failed:%d", iret); + return -4; + } + if (isFreeNode) + m_pstRootData->m_treeSize -= sizeof(TtreeNode); + m_pstRootData->m_treeSize -= _size; + m_pstRootData->m_uiNodeCnts--; + m_pstRootData->m_hRoot = m_stTree.Root(); + //释放handle + _mallocator->Free(hRecord); + } + + return 0; +} + +/* + * encode到私有内存,防止replace,update引起重新rellocate导致value引用了过期指针 + */ +int TreeData::encode_to_private_area(RawData &raw, RowValue &value, unsigned char value_flag) +{ + int ret = raw.Init(Key(), raw.calc_row_size(value, m_pstTab->key_fields() - 1)); + if (0 != ret) + { + log_error("init raw-data struct error, ret=%d, err=%s", ret, raw.get_err_msg()); + return -1; + } + + ret = raw.insert_row(value, false, false); + if (0 != ret) + { + log_error("insert row to raw-data error: ret=%d, err=%s", ret, raw.get_err_msg()); + return -2; + } + + raw.rewind(); + + ret = raw.decode_row(value, value_flag, 0); + if (0 != ret) + { + log_error("decode raw-data to row error: ret=%d, err=%s", ret, raw.get_err_msg()); + return -3; + } + + return 0; +} + +int TreeData::update_sub_raw_data(TaskRequest &stTask, MEM_HANDLE_T hRecord) +{ + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow, *stpCurRow; + + stpNodeTab = m_pstTab; + stpTaskTab = stTask.table_definition(); + RowValue stNewRow(stpTaskTab); + RowValue stNewNodeRow(stpNodeTab); + RowValue stCurRow(stpNodeTab); + + stpTaskRow = &stNewRow; + stpNodeRow = &stNewNodeRow; + stpCurRow = &stCurRow; + if (stpNodeTab == stpTaskTab) + stpNodeRow = stpTaskRow; + + m_pchContent = Pointer(hRecord); + unsigned int uiTotalRows = get_row_count(); + m_uiOffset = sizeof(unsigned char) + sizeof(uint32_t) * 2; //offset DataType + data_size + RowCount + _size = _mallocator->chunk_size(hRecord); + + unsigned char uchRowFlags; + uint32_t iDelete = 0; + uint32_t iInsert = 0; + for (unsigned int i = 0; i < uiTotalRows; i++) + { + if (decode_tree_row(*stpNodeRow, uchRowFlags, 0) != 0) + return (-1); + + if (stpNodeTab != stpTaskTab) + stpTaskRow->Copy(stpNodeRow); + + stpCurRow->Copy(stpNodeRow); + + //如果不符合查询条件 + if (stTask.compare_row(*stpTaskRow) == 0) + continue; + + MEM_HANDLE_T *pRawHandle = NULL; + int iRet = Find(TTREE_INDEX_POS, *stpCurRow, KeyCompare, pRawHandle); + if (iRet == -100 || iRet == 0) + return iRet; + + stTask.update_row(*stpTaskRow); //修改数据 + + if (stpNodeTab != stpTaskTab) + stpNodeRow->Copy(stpTaskRow); + + iRet = 0; + DTCValue new_value = (*stpTaskRow)[TTREE_INDEX_POS]; + char *NewIndex = reinterpret_cast(&new_value); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + + if (KeyCompare(NewIndex, &cookie, *_mallocator, hRecord) != 0) //update Index字段 + { + char *tmp_pchContent = m_pchContent; + uint32_t tmp_size = _size; + ALLOC_SIZE_T tmp_uiOffset = m_uiOffset; + + iRet = insert_row(*stpTaskRow, KeyCompare, m_async); + + m_pchContent = tmp_pchContent; + _size = tmp_size; + m_uiOffset = tmp_uiOffset; + if (iRet == EC_NO_MEM) + { + return iRet; + } + else if (iRet == 0) + { + iInsert++; + m_uiOffset = m_uiRowOffset; + if (delete_cur_row(*stpCurRow) == 0) + iDelete++; + } + } + else + { + // 在私有区间decode + RawData stTmpRows(&g_stSysMalloc, 1); + if (encode_to_private_area(stTmpRows, *stpNodeRow, uchRowFlags)) + { + log_error("encode rowvalue to private rawdata area failed"); + return -3; + } + + iRet = replace_cur_row(*stpNodeRow, m_async, pRawHandle); // 加进cache + if (iRet == EC_NO_MEM) + { + return iRet; + } + if (iRet != 0) + { + /*标记加入黑名单*/ + stTask.push_black_list_size(need_size()); + return (-6); + } + } + + m_ullAffectedrows++; + if (uchRowFlags & OPER_DIRTY) + m_llDirtyRowsInc--; + if (m_async) + m_llDirtyRowsInc++; + } + + if (uiTotalRows > 0 && uiTotalRows - iDelete == 0) //RowFormat上的内容已删光 + { + //删除tree node + bool isFreeNode = false; + DTCValue value = (*stpCurRow)[TTREE_INDEX_POS]; //for轮询的最后一行数据 + char *indexKey = reinterpret_cast(&value); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + int iret = m_stTree.Delete(indexKey, &cookie, KeyCompare, isFreeNode); + if (iret != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "delete stTree failed:%d", iret); + return -4; + } + if (isFreeNode) + m_pstRootData->m_treeSize -= sizeof(TtreeNode); + m_pstRootData->m_treeSize -= _size; + m_pstRootData->m_uiNodeCnts--; + m_pstRootData->m_hRoot = m_stTree.Root(); + //释放handle + _mallocator->Free(hRecord); + } + + if (iInsert != iDelete) + { + snprintf(m_szErr, sizeof(m_szErr), "update index change error: insert:%d, delete:%d", iInsert, iDelete); + return (-10); + } + + return 0; +} + +int TreeData::delete_sub_raw_data(TaskRequest &stTask, MEM_HANDLE_T hRecord) +{ + int iRet; + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow; + + stpNodeTab = m_pstTab; + stpTaskTab = stTask.table_definition(); + RowValue stNodeRow(stpNodeTab); + RowValue stTaskRow(stpTaskTab); + if (stpNodeTab == stpTaskTab) + { + stpNodeRow = &stTaskRow; + stpTaskRow = &stTaskRow; + } + else + { + stpNodeRow = &stNodeRow; + stpTaskRow = &stTaskRow; + } + + unsigned int iAffectRows = 0; + unsigned char uchRowFlags; + + m_pchContent = Pointer(hRecord); + unsigned int uiTotalRows = get_row_count(); + m_uiOffset = sizeof(unsigned char) + sizeof(uint32_t) * 2; //offset DataType + data_size + RowCount + _size = _mallocator->chunk_size(hRecord); + + for (unsigned int i = 0; i < uiTotalRows; i++) + { + if ((decode_tree_row(*stpNodeRow, uchRowFlags, 0)) != 0) + { + return (-2); + } + if (stpNodeTab != stpTaskTab) + { + stpTaskRow->Copy(stpNodeRow); + } + if (stTask.compare_row(*stpTaskRow) != 0) + { //符合del条件 + iRet = delete_cur_row(*stpNodeRow); + if (iRet != 0) + { + log_error("tree-data delete row error: %d", iRet); + return (-5); + } + iAffectRows++; + m_llRowsInc--; + if (uchRowFlags & OPER_DIRTY) + m_llDirtyRowsInc--; + } + } + + if (iAffectRows > uiTotalRows) + return (-3); + else if (iAffectRows == uiTotalRows && uiTotalRows > 0) //RowFormat上的内容已删光 + { + //删除tree node + bool isFreeNode = false; + DTCValue value = (*stpNodeRow)[TTREE_INDEX_POS]; //for轮询的最后一行数据 + char *indexKey = reinterpret_cast(&value); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + int iret = m_stTree.Delete(indexKey, &cookie, KeyCompare, isFreeNode); + if (iret != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "delete stTree failed:%d\t%s", iret, m_stTree.get_err_msg()); + return -4; + } + if (isFreeNode) + m_pstRootData->m_treeSize -= sizeof(TtreeNode); + m_pstRootData->m_treeSize -= _size; + m_pstRootData->m_uiNodeCnts--; + m_pstRootData->m_hRoot = m_stTree.Root(); + //释放handle + _mallocator->Free(hRecord); + } + + return (0); +} + +int TreeData::skip_row(const RowValue &stRow) +{ + if (m_pchContent == NULL) + { + snprintf(m_szErr, sizeof(m_szErr), "rawdata not init yet"); + return (-1); + } + + m_uiOffset = m_uiRowOffset; + if (m_uiOffset >= get_data_size()) + { + snprintf(m_szErr, sizeof(m_szErr), "already at end of data"); + return (-2); + } + + SKIP_TREE_SIZE(sizeof(unsigned char)); // flag + + for (int j = m_uchKeyIdx + 1; j <= stRow.num_fields(); j++) //拷贝一行数据 + { + //id: bug fix skip discard + if (stRow.table_definition()->is_discard(j)) + continue; + int temp = 0; + switch (stRow.field_type(j)) + { + case DField::Unsigned: + case DField::Signed: + GET_TREE_VALUE_AT_OFFSET(temp, int, m_uiOffset); + + if (stRow.field_size(j) > (int)sizeof(int32_t)) + SKIP_TREE_SIZE(sizeof(int64_t)); + else + SKIP_TREE_SIZE(sizeof(int32_t)); + ; + break; + + case DField::Float: //浮点数 + if (stRow.field_size(j) > (int)sizeof(float)) + SKIP_TREE_SIZE(sizeof(double)); + else + SKIP_TREE_SIZE(sizeof(float)); + break; + + case DField::String: //字符串 + case DField::Binary: //二进制数据 + default: + { + int iLen; + GET_TREE_VALUE(iLen, int); + SKIP_TREE_SIZE(iLen); + break; + } + } //end of switch + } + + return (0); + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "skip row error"); + return (-100); +} + +int TreeData::replace_cur_row(const RowValue &stRow, bool isDirty, MEM_HANDLE_T *hRecord) +{ + int iRet = 0; + ALLOC_SIZE_T uiOldOffset; + ALLOC_SIZE_T uiNextRowsSize; + ALLOC_SIZE_T uiNewRowSize = 0; + ALLOC_SIZE_T uiCurRowSize = 0; + ALLOC_SIZE_T uiNextRowsOffset; + ALLOC_SIZE_T uiDataSize = get_data_size(); + + uiOldOffset = m_uiOffset; + if ((iRet = skip_row(stRow)) != 0) + { + goto ERROR_RET; + } + + unsigned char uchRowFlag; + GET_TREE_VALUE_AT_OFFSET(uchRowFlag, unsigned char, m_uiRowOffset); + if (isDirty) + uchRowFlag = OPER_UPDATE; + + uiNewRowSize = calc_tree_row_size(stRow, m_uchKeyIdx); + uiCurRowSize = m_uiOffset - m_uiRowOffset; + uiNextRowsOffset = m_uiOffset; + uiNextRowsSize = uiDataSize - m_uiOffset; + + if (uiNewRowSize > uiCurRowSize) + { + // enlarge buffer + MEM_HANDLE_T hTmp = _mallocator->ReAlloc(*hRecord, uiDataSize + uiNewRowSize - uiCurRowSize); + if (hTmp == INVALID_HANDLE) + { + snprintf(m_szErr, sizeof(m_szErr), "realloc error"); + m_uiNeedSize = uiDataSize + uiNewRowSize - uiCurRowSize; + iRet = EC_NO_MEM; + goto ERROR_RET; + } + m_pstRootData->m_treeSize -= _size; + *hRecord = hTmp; + _size = _mallocator->chunk_size(*hRecord); + m_pstRootData->m_treeSize += _size; + m_pchContent = Pointer(*hRecord); + + // move data + if (uiNextRowsSize > 0) + memmove(m_pchContent + uiNextRowsOffset + (uiNewRowSize - uiCurRowSize), m_pchContent + uiNextRowsOffset, uiNextRowsSize); + + // copy new row + m_uiOffset = m_uiRowOffset; + iRet = encode_tree_row(stRow, uchRowFlag); + if (iRet != 0) + { + if (uiNextRowsSize > 0) + memmove(m_pchContent + uiNextRowsOffset, m_pchContent + uiNextRowsOffset + (uiNewRowSize - uiCurRowSize), uiNextRowsSize); + iRet = -1; + goto ERROR_RET; + } + } + else + { + // back up old row + void *pTmpBuf = MALLOC(uiCurRowSize); + if (pTmpBuf == NULL) + { + m_uiNeedSize = uiCurRowSize; + snprintf(m_szErr, sizeof(m_szErr), "malloc error: %m"); + return (-ENOMEM); + } + memmove(pTmpBuf, m_pchContent + m_uiRowOffset, uiCurRowSize); + + // copy new row + m_uiOffset = m_uiRowOffset; + iRet = encode_tree_row(stRow, uchRowFlag); + if (iRet != 0) + { + memmove(m_pchContent + m_uiRowOffset, pTmpBuf, uiCurRowSize); + FREE(pTmpBuf); + iRet = -1; + goto ERROR_RET; + } + + // move data + if (uiNextRowsSize > 0 && m_uiOffset != uiNextRowsOffset) + memmove(m_pchContent + m_uiOffset, m_pchContent + uiNextRowsOffset, uiNextRowsSize); + FREE(pTmpBuf); + + // shorten buffer + MEM_HANDLE_T hTmp = _mallocator->ReAlloc(*hRecord, uiDataSize + uiNewRowSize - uiCurRowSize); + if (hTmp != INVALID_HANDLE) + { + m_pstRootData->m_treeSize -= _size; + *hRecord = hTmp; + _size = _mallocator->chunk_size(*hRecord); + m_pstRootData->m_treeSize += _size; + m_pchContent = Pointer(*hRecord); + } + } + set_data_size(uiDataSize - uiCurRowSize + uiNewRowSize); + m_pstRootData->m_uiTotalRawSize += (uiNewRowSize - uiCurRowSize); + +ERROR_RET: + m_uiOffset = uiOldOffset + uiNewRowSize - uiCurRowSize; + return (iRet); +} + +int TreeData::delete_cur_row(const RowValue &stRow) +{ + int iRet = 0; + ALLOC_SIZE_T uiOldOffset; + ALLOC_SIZE_T uiNextRowsSize; + + uiOldOffset = m_uiOffset; + if ((iRet = skip_row(stRow)) != 0) + { + log_error("skip error: %d,%s", iRet, get_err_msg()); + goto ERROR_RET; + } + uiNextRowsSize = get_data_size() - m_uiOffset; + + memmove(m_pchContent + m_uiRowOffset, m_pchContent + m_uiOffset, uiNextRowsSize); + set_row_count(get_row_count() - 1); + set_data_size(get_data_size() - (m_uiOffset - m_uiRowOffset)); + + m_pstRootData->m_uiRowCnt--; + m_pstRootData->m_uiTotalRawSize -= (m_uiOffset - m_uiRowOffset); + + m_uiOffset = m_uiRowOffset; + return (iRet); + +ERROR_RET: + m_uiOffset = uiOldOffset; + return (iRet); +} + +int TreeData::get_sub_raw_data(TaskRequest &stTask, MEM_HANDLE_T hRecord) +{ + int laid = stTask.flag_no_cache() ? -1 : stTask.table_definition()->lastacc_field_id(); + + if (stTask.result_full()) + return 0; + + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow; + stpNodeTab = m_pstTab; + stpTaskTab = stTask.table_definition(); + RowValue stNodeRow(stpNodeTab); + RowValue stTaskRow(stpTaskTab); + if (stpNodeTab == stpTaskTab) + { + stpNodeRow = &stTaskRow; + stpTaskRow = &stTaskRow; + } + else + { + stpNodeRow = &stNodeRow; + stpTaskRow = &stTaskRow; + } + + m_pchContent = Pointer(hRecord); + uint32_t rows = get_row_count(); + m_uiOffset = sizeof(unsigned char) + sizeof(uint32_t) * 2; + _size = _mallocator->chunk_size(hRecord); + + unsigned char uchRowFlags; + for (unsigned int j = 0; j < rows; j++) + { + stTask.update_key(*stpNodeRow); // use stpNodeRow is fine, as just modify key field + if ((decode_tree_row(*stpNodeRow, uchRowFlags, 0)) != 0) + { + return (-2); + } + // this pointer compare is ok, as these two is both come from tabledefmanager. if they mean same, they are same object. + if (stpNodeTab != stpTaskTab) + { + stpTaskRow->Copy(stpNodeRow); + } + if (stTask.compare_row(*stpTaskRow) == 0) //如果不符合查询条件 + continue; + + if (stpTaskTab->expire_time_field_id() > 0) + stpTaskRow->update_expire_time(); + //当前行添加到task中 + stTask.append_row(stpTaskRow); + + if (stTask.all_rows() && stTask.result_full()) + { + stTask.set_total_rows((int)rows); + break; + } + } + return 0; +} + +int TreeData::get_sub_raw(TaskRequest &stTask, unsigned int nodeCnt, bool isAsc, SubRowProcess subRowProc) +{ + pResCookie resCookie; + MEM_HANDLE_T pCookie[nodeCnt]; + resCookie.m_handle = pCookie; + + if (stTask.all_rows() && stTask.requestInfo.limit_count() > 0) //condition: ONLY `LIMIT` without `WHERE` + resCookie.nodesNum = stTask.requestInfo.limit_start() + stTask.requestInfo.limit_count(); + else + resCookie.nodesNum = 0; + + m_stTree.traverse_forward(Visit, &resCookie); + + if (isAsc) //升序 + { + for (int i = 0; i < (int)resCookie.nodesGot; i++) + { + int iRet = (this->*subRowProc)(stTask, pCookie[i]); + if (iRet != 0) + return iRet; + } + } + else //降序 + { + for (int i = (int)resCookie.nodesGot - 1; i >= 0; i--) + { + int iRet = (this->*subRowProc)(stTask, pCookie[i]); + if (iRet != 0) + return iRet; + } + } + + return 0; +} + +int TreeData::match_index_condition(TaskRequest &stTask, unsigned int NodeCnt, SubRowProcess subRowProc) +{ + const DTCFieldValue *condition = stTask.request_condition(); + int numfields = 0; //条件字段个数 + bool isAsc = !(m_pstTab->is_desc_order(TTREE_INDEX_POS)); + + if (condition) + numfields = condition->num_fields(); + + int indexIdArr[numfields]; //开辟空间比实际使用的大 + int indexCount = 0; //条件索引个数 + int firstEQIndex = -1; //第一个EQ在indexIdArr中的位置 + + for (int i = 0; i < numfields; i++) + { + if (condition->field_id(i) == TTREE_INDEX_POS) + { + if (firstEQIndex == -1 && condition->field_operation(i) == DField::EQ) + firstEQIndex = i; + indexIdArr[indexCount++] = i; + } + } + + if (indexCount == 0 || (indexCount == 1 && condition->field_operation(indexIdArr[0]) == DField::NE)) + { //平板类型 + int iret = get_sub_raw(stTask, NodeCnt, isAsc, subRowProc); + if (iret != 0) + return iret; + } + else if (firstEQIndex != -1) //有至少一个EQ条件 + { + MEM_HANDLE_T *pRecord = NULL; + + char *indexKey = reinterpret_cast(condition->field_value(firstEQIndex)); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + int iRet = m_stTree.Find(indexKey, &cookie, KeyCompare, pRecord); + if (iRet == -100) + return iRet; + if (pRecord != NULL) + { + iRet = (this->*subRowProc)(stTask, *pRecord); + if (iRet != 0) + return iRet; + } + } + else + { + int leftId = -1; + int rightId = -1; + + for (int i = 0; i < indexCount; i++) + { + switch (condition->field_operation(indexIdArr[i])) + { + case DField::LT: + case DField::LE: + if (rightId == -1) + rightId = indexIdArr[i]; + break; + + case DField::GT: + case DField::GE: + if (leftId == -1) + leftId = indexIdArr[i]; + break; + + default: + break; + } + } + + if (leftId != -1 && rightId == -1) //GE + { + pResCookie resCookie; + MEM_HANDLE_T pCookie[NodeCnt]; + resCookie.m_handle = pCookie; + resCookie.nodesNum = 0; + char *indexKey = reinterpret_cast(condition->field_value(leftId)); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + + if (m_stTree.traverse_forward(indexKey, &cookie, KeyCompare, Visit, &resCookie) != 0) + { + snprintf(m_szErr, sizeof(m_szErr), " traverse tree-data rows error"); + return (-1); + } + + if (isAsc) + { + for (int i = 0; i < (int)resCookie.nodesGot; i++) + { + int iRet = (this->*subRowProc)(stTask, pCookie[i]); + if (iRet != 0) + return iRet; + } + } + else + { + for (int i = (int)resCookie.nodesGot - 1; i >= 0; i--) + { + int iRet = (this->*subRowProc)(stTask, pCookie[i]); + if (iRet != 0) + return iRet; + } + } + } + else if (leftId == -1 && rightId != -1) //LE + { + pResCookie resCookie; + MEM_HANDLE_T pCookie[NodeCnt]; + resCookie.m_handle = pCookie; + resCookie.nodesNum = NodeCnt; + char *indexKey = reinterpret_cast(condition->field_value(rightId)); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + + if (m_stTree.traverse_backward(indexKey, &cookie, KeyCompare, Visit, &resCookie) != 0) + { + snprintf(m_szErr, sizeof(m_szErr), " traverse tree-data rows error"); + return (-1); + } + + if (isAsc) + { + for (int i = (int)resCookie.nodesGot - 1; i >= 0; i--) + { + int iRet = (this->*subRowProc)(stTask, pCookie[i]); + if (iRet != 0) + return iRet; + } + } + else + { + for (int i = 0; i < (int)resCookie.nodesGot; i++) + { + int iRet = (this->*subRowProc)(stTask, pCookie[i]); + if (iRet != 0) + return iRet; + } + } + } + else if (leftId != -1 && rightId != -1) //range + { + pResCookie resCookie; + MEM_HANDLE_T pCookie[NodeCnt]; + resCookie.m_handle = pCookie; + resCookie.nodesNum = 0; + char *beginKey = reinterpret_cast(condition->field_value(leftId)); + char *endKey = reinterpret_cast(condition->field_value(rightId)); + CmpCookie cookie(m_pstTab, TTREE_INDEX_POS); + + if (m_stTree.traverse_forward(beginKey, endKey, &cookie, KeyCompare, Visit, &resCookie) != 0) + { + snprintf(m_szErr, sizeof(m_szErr), " traverse tree-data rows error"); + return (-1); + } + + if (isAsc) + { + for (int i = 0; i < (int)resCookie.nodesGot; i++) + { + int iRet = (this->*subRowProc)(stTask, pCookie[i]); + if (iRet != 0) + return iRet; + } + } + else + { + for (int i = (int)resCookie.nodesGot - 1; i >= 0; i--) + { + int iRet = (this->*subRowProc)(stTask, pCookie[i]); + if (iRet != 0) + return iRet; + } + } + } + else //may all NE, raw data process + { + int iret = get_sub_raw(stTask, NodeCnt, isAsc, subRowProc); + if (iret != 0) + return iret; + } + } + + return 0; +} + +int TreeData::dirty_rows_in_node() +{ + unsigned int uiTotalNodes = m_pstRootData->m_uiNodeCnts; + int dirty_rows = 0; + pResCookie resCookie; + MEM_HANDLE_T pCookie[uiTotalNodes]; + resCookie.m_handle = pCookie; + resCookie.nodesNum = 0; + + RowValue stRow(m_pstTab); + + m_stTree.traverse_forward(Visit, &resCookie); + + for (int i = 0; i < (int)resCookie.nodesGot; i++) + { + m_pchContent = Pointer(pCookie[i]); + uint32_t rows = get_row_count(); + m_uiOffset = sizeof(unsigned char) + sizeof(uint32_t) * 2; + _size = _mallocator->chunk_size(pCookie[i]); + + unsigned char uchRowFlags; + for (unsigned int j = 0; j < rows; j++) + { + if (decode_tree_row(stRow, uchRowFlags, 0) != 0) + { + log_error("subraw-data decode row error: %s", get_err_msg()); + return (-1); + } + + if (uchRowFlags & OPER_DIRTY) + dirty_rows++; + } + } + + return dirty_rows; +} + +int TreeData::flush_tree_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt) +{ + unsigned int uiTotalNodes = m_pstRootData->m_uiNodeCnts; + + uiFlushRowsCnt = 0; + DTCValue astKey[m_pstTab->key_fields()]; + TaskPackedKey::unpack_key(m_pstTab, Key(), astKey); + RowValue stRow(m_pstTab); //一行数据 + for (int i = 0; i < m_pstTab->key_fields(); i++) + stRow[i] = astKey[i]; + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + pResCookie resCookie; + MEM_HANDLE_T pCookie[uiTotalNodes]; + resCookie.m_handle = pCookie; + resCookie.nodesNum = 0; + + m_stTree.traverse_forward(Visit, &resCookie); + + for (int i = 0; i < (int)resCookie.nodesGot; i++) + { + m_pchContent = Pointer(pCookie[i]); + uint32_t rows = get_row_count(); + m_uiOffset = sizeof(unsigned char) + sizeof(uint32_t) * 2; + _size = _mallocator->chunk_size(pCookie[i]); + + unsigned char uchRowFlags; + for (unsigned int j = 0; j < rows; j++) + { + if (decode_tree_row(stRow, uchRowFlags, 0) != 0) + { + log_error("subraw-data decode row error: %s", get_err_msg()); + return (-1); + } + + if ((uchRowFlags & OPER_DIRTY) == false) + continue; + + if (pstFlushReq && pstFlushReq->flush_row(stRow) != 0) + { + log_error("flush_data() invoke flushRow() failed."); + return (-2); + } + set_cur_row_flag(uchRowFlags & ~OPER_DIRTY); + m_llDirtyRowsInc--; + uiFlushRowsCnt++; + } + } + + return 0; +} + +int TreeData::get_tree_data(TaskRequest &stTask) +{ + uint32_t rowCnt = m_pstRootData->m_uiRowCnt; + if (rowCnt == 0) + { + return 0; + } + + stTask.prepare_result(); //准备返回结果对象 + if (stTask.all_rows() && (stTask.count_only() || !stTask.in_range((int)rowCnt, 0))) + { + if (stTask.is_batch_request()) + { + if ((int)rowCnt > 0) + stTask.add_total_rows((int)rowCnt); + } + else + { + stTask.set_total_rows((int)rowCnt); + } + } + else + { + int iret = match_index_condition(stTask, m_pstRootData->m_uiNodeCnts, &TreeData::get_sub_raw_data); + if (iret != 0) + return iret; + } + + return 0; +} + +int TreeData::update_tree_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows) +{ + uint32_t rowCnt = m_pstRootData->m_uiNodeCnts; + if (rowCnt == 0) + { + return 0; + } + + m_pstNode = pstNode; + m_async = async; + m_llDirtyRowsInc = 0; + + return match_index_condition(stTask, rowCnt, &TreeData::update_sub_raw_data); +} + +int TreeData::delete_tree_data(TaskRequest &stTask) +{ + uint32_t rowCnt = m_pstRootData->m_uiNodeCnts; + if (rowCnt == 0) + { + return 0; + } + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + stTask.prepare_result(); //准备返回结果对象 + if (stTask.all_rows() && (stTask.count_only() || !stTask.in_range((int)rowCnt, 0))) + { + if (stTask.is_batch_request()) + { + if ((int)rowCnt > 0) + stTask.add_total_rows((int)rowCnt); + } + else + { + stTask.set_total_rows((int)rowCnt); + } + } + else + { + int iret = match_index_condition(stTask, rowCnt, &TreeData::delete_sub_raw_data); + if (iret != 0) + return iret; + } + + return 0; +} + +int TreeData::get_expire_time(DTCTableDefinition *t, uint32_t &expire) +{ + expire = 0; + if (unlikely(_handle == INVALID_HANDLE)) + { + snprintf(m_szErr, sizeof(m_szErr), "root tree data not init yet"); + return (-1); + } + if (m_iExpireId == -1) + { + expire = 0; + return 0; + } + + MEM_HANDLE_T root = get_tree_root(); + if (unlikely(root == INVALID_HANDLE)) + { + snprintf(m_szErr, sizeof(m_szErr), "root tree data not init yet"); + return (-1); + } + + MEM_HANDLE_T firstHanle = m_stTree.first_node(); + if (unlikely(firstHanle == INVALID_HANDLE)) + { + snprintf(m_szErr, sizeof(m_szErr), "root tree data not init yet"); + return (-1); + } + + m_uiOffset = 0; + _size = _mallocator->chunk_size(firstHanle); + m_pchContent = Pointer(firstHanle); + + SKIP_TREE_SIZE(sizeof(unsigned char)); + + for (int j = m_uchKeyIdx + 1; j <= m_pstTab->num_fields(); j++) + { + if (j == m_iExpireId) + { + expire = *((uint32_t *)(m_pchContent + m_uiOffset)); + break; + } + + switch (m_pstTab->field_type(j)) + { + case DField::Unsigned: + case DField::Signed: + if (m_pstTab->field_size(j) > (int)sizeof(int32_t)) + SKIP_TREE_SIZE(sizeof(int64_t)); + else + SKIP_TREE_SIZE(sizeof(int32_t)); + ; + break; + + case DField::Float: + if (m_pstTab->field_size(j) > (int)sizeof(float)) + SKIP_TREE_SIZE(sizeof(double)); + else + SKIP_TREE_SIZE(sizeof(float)); + break; + + case DField::String: + case DField::Binary: + default: + uint32_t iLen = 0; + GET_TREE_VALUE(iLen, int); + SKIP_TREE_SIZE(iLen); + break; + } //end of switch + } + return 0; + + m_uiOffset = 0; + _size = 0; + m_pchContent = NULL; + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "get expire error"); + return (-100); +} + +ALLOC_SIZE_T TreeData::calc_tree_row_size(const RowValue &stRow, int keyIdx) +{ + if (keyIdx == -1) + log_error("TreeData may not init yet..."); + ALLOC_SIZE_T tSize = 1; // flag + for (int j = keyIdx + 1; j <= stRow.num_fields(); j++) //¿½±´Ò»ÐÐÊý¾Ý + { + if (stRow.table_definition()->is_discard(j)) + continue; + switch (stRow.field_type(j)) + { + case DField::Signed: + case DField::Unsigned: + tSize += unlikely(stRow.field_size(j) > (int)sizeof(int32_t)) ? sizeof(int64_t) : sizeof(int32_t); + break; + + case DField::Float: //¸¡µãÊý + tSize += likely(stRow.field_size(j) > (int)sizeof(float)) ? sizeof(double) : sizeof(float); + break; + + case DField::String: //×Ö·û´® + case DField::Binary: //¶þ½øÖÆÊý¾Ý + default: + { + tSize += sizeof(int); + tSize += stRow.field_value(j)->bin.len; + break; + } + } //end of switch + } + if (tSize < 2) + log_notice("m_uchKeyIdx:%d, stRow.num_fields():%d tSize:%d", keyIdx, stRow.num_fields(), tSize); + + return (tSize); +} + +int TreeData::destroy_sub_tree() +{ + m_stTree.Destroy(); + m_pstRootData->m_uiRowCnt = 0; + m_pstRootData->m_hRoot = INVALID_HANDLE; + m_pstRootData->m_treeSize = 0; + m_pstRootData->m_uiTotalRawSize = 0; + m_pstRootData->m_uiNodeCnts = 0; + return 0; +} + +unsigned int TreeData::get_row_count() +{ + return *(uint32_t *)(m_pchContent + sizeof(unsigned char) + sizeof(uint32_t)); +} + +unsigned int TreeData::get_data_size() +{ + return *(uint32_t *)(m_pchContent + sizeof(unsigned char)); +} + +int TreeData::set_row_count(unsigned int count) +{ + SET_TREE_VALUE_AT_OFFSET(count, uint32_t, sizeof(unsigned char) + sizeof(uint32_t)); + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "set data rowcount error"); + return (-100); +} + +int TreeData::set_data_size(unsigned int data_size) +{ + SET_TREE_VALUE_AT_OFFSET(data_size, uint32_t, sizeof(unsigned char)); + +ERROR_RET: + snprintf(m_szErr, sizeof(m_szErr), "set data size error"); + return (-100); +} + +int TreeData::set_cur_row_flag(unsigned char uchFlag) +{ + if (m_uiRowOffset >= get_data_size()) + { + snprintf(m_szErr, sizeof(m_szErr), "no more rows"); + return (-1); + } + *(unsigned char *)(m_pchContent + m_uiRowOffset) = uchFlag; + + return (0); +} \ No newline at end of file diff --git a/src/search_local/index_storage/cache/tree_data.h b/src/search_local/index_storage/cache/tree_data.h new file mode 100644 index 0000000..169dd56 --- /dev/null +++ b/src/search_local/index_storage/cache/tree_data.h @@ -0,0 +1,502 @@ +/* + * ===================================================================================== + * + * Filename: tree_data.h + * + * Description: T-tree data struct operation. For user invoke. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef TREE_DATA_H +#define TREE_DATA_H + +#include "raw_data.h" +#include "t_tree.h" +#include "protocol.h" +#include "task_request.h" +#include "value.h" +#include "field.h" +#include "section.h" +#include "table_def.h" + +typedef enum _TreeCheckResult +{ + CHK_CONTINUE, // 继续访问这棵子树 + CHK_SKIP, // 忽略这棵子树,继续访问其他节点 + CHK_STOP, // 终止访问循环 + CHK_DESTROY // 销毁这棵子树 +} TreeCheckResult; + +#define TTREE_INDEX_POS 1 + +typedef TreeCheckResult (*CheckTreeFunc)(Mallocator &stMalloc, uint8_t uchIndexCnt, uint8_t uchCurIdxCnt, const RowValue *pstIndexValue, const uint32_t uiTreeRowNum, void *pCookie); +typedef int (*VisitRawData)(Mallocator &stMalloc, uint8_t uchIndexCnt, const RowValue *pstIndexValue, ALLOC_HANDLE_T &hHandle, int64_t &llRowNumInc, void *pCookie); +class TreeData; +typedef int (TreeData::*SubRowProcess)(TaskRequest &stTask, MEM_HANDLE_T hRecord); + +class DTCFlushRequest; + +/************************************************************ + Description: t-tree根节点的数据结构 + Version: DTC 3.0 +***********************************************************/ +struct _RootData +{ + unsigned char m_uchDataType; + uint32_t m_treeSize; + uint32_t m_uiTotalRawSize; //所有RawData总和,不包含Header + uint32_t m_uiNodeCnts; //索引T树中Node总计个数 + uint32_t m_uiRowCnt; //索引T树中总计行数 + uint8_t m_uchGetCount; + uint16_t m_LastAccessHour; + uint16_t m_LastUpdateHour; + uint16_t m_CreateHour; + MEM_HANDLE_T m_hRoot; + char m_achKey[0]; +} __attribute__((packed)); +typedef struct _RootData RootData; + +class DTCTableDefinition; +typedef struct _CmpCookie +{ + const DTCTableDefinition *m_pstTab; + uint8_t m_uchIdx; + _CmpCookie(const DTCTableDefinition *pstTab, uint8_t uchIdx) + { + m_pstTab = pstTab; + m_uchIdx = uchIdx; + } +} CmpCookie; + +typedef struct _pCookie +{ + MEM_HANDLE_T *m_handle; + uint32_t nodesGot; //已经遍历到的节点个数 + uint32_t nodesNum; //需要遍历的节点个数,0代表不限 + uint32_t rowsGot; //已经遍历到的数据行数 + _pCookie() : m_handle(NULL), nodesGot(0), nodesNum(0), rowsGot(0) {} +} pResCookie; + +typedef enum _CondType +{ + COND_VAL_SET, // 查询特定的值列表 + COND_RANGE, // 查询value[0] ~ Key-value[0]<=value[1].s64 + COND_GE, // 查询大于等于value[0]的key + COND_LE, // 查询小于等于value[0]的key + COND_ALL // 遍历所有key +}CondType; + +typedef enum _Order +{ + ORDER_ASC, // 升序 + ORDER_DEC, // 降序 + ORDER_POS, // 后序访问 +} Order; + +/************************************************************ + Description: 查找数据的条件 + Version: DTC 3.0 +***********************************************************/ +typedef struct +{ + unsigned char m_uchCondType; + unsigned char m_uchOrder; + unsigned int m_uiValNum; + DTCValue *m_pstValue; +} TtreeCondition; + +class TreeData +{ +private: + RootData *m_pstRootData; // 注意:地址可能会因为realloc而改变 + Ttree m_stTree; + DTCTableDefinition *m_pstTab; + uint8_t m_uchIndexDepth; + int m_iTableIdx; + char m_szErr[100]; + + ALLOC_SIZE_T m_uiNeedSize; // 最近一次分配内存失败需要的大小 + uint64_t m_ullAffectedrows; + + MEM_HANDLE_T _handle; + uint32_t _size; + uint32_t _root_size; + Mallocator *_mallocator; + Node *m_pstNode; + bool m_async; + int64_t m_llRowsInc; + int64_t m_llDirtyRowsInc; + + int m_iKeySize; + uint8_t m_uchKeyIdx; + int m_iExpireId; + int m_iLAId; + int m_iLCmodId; + ALLOC_SIZE_T m_uiLAOffset; + + ALLOC_SIZE_T m_uiOffset; + ALLOC_SIZE_T m_uiRowOffset; + char *m_pchContent; + + bool m_IndexPartOfUniqField; + MEM_HANDLE_T m_hRecord; + + /************************************************************ + Description: 递归查找数据的cookie参数 + Version: DTC 3.0 + ***********************************************************/ + typedef struct + { + TreeData *m_pstTree; + uint8_t m_uchCondIdxCnt; + uint8_t m_uchCurIndex; + MEM_HANDLE_T m_hHandle; + int64_t m_llAffectRows; + const int *piInclusion; + KeyComparator m_pfComp; + const RowValue *m_pstCond; + RowValue *m_pstIndexValue; + VisitRawData m_pfVisit; + void *m_pCookie; + } CIndexCookie; + + typedef struct + { + TreeData *m_pstTree; + uint8_t m_uchCurCond; + MEM_HANDLE_T m_hHandle; + int64_t m_llAffectRows; + const TtreeCondition *m_pstCond; + KeyComparator m_pfComp; + RowValue *m_pstIndexValue; + CheckTreeFunc m_pfCheck; + VisitRawData m_pfVisit; + void *m_pCookie; + } CSearchCookie; + + int set_data_size(unsigned int data_size); + int set_row_count(unsigned int count); + unsigned int get_data_size(); + unsigned int get_row_count(); + +protected: + template + T *Pointer(void) const { return reinterpret_cast(_mallocator->handle_to_ptr(_handle)); } + + template + T *Pointer(MEM_HANDLE_T handle) const { return reinterpret_cast(_mallocator->handle_to_ptr(handle)); } + + int encode_to_private_area(RawData &raw, RowValue &value, unsigned char value_flag); + + inline int pack_key(const RowValue &stRow, uint8_t uchKeyIdx, int &iKeySize, char *&pchKey, unsigned char achKeyBuf[]); + inline int pack_key(const DTCValue *pstVal, uint8_t uchKeyIdx, int &iKeySize, char *&pchKey, unsigned char achKeyBuf[]); + inline int unpack_key(char *pchKey, uint8_t uchKeyIdx, RowValue &stRow); + + int insert_sub_tree(uint8_t uchCurIndex, uint8_t uchCondIdxCnt, const RowValue &stCondition, KeyComparator pfComp, ALLOC_HANDLE_T hRoot); + int insert_sub_tree(uint8_t uchCondIdxCnt, const RowValue &stCondition, KeyComparator pfComp, ALLOC_HANDLE_T hRoot); + int insert_sub_tree(uint8_t uchCondIdxCnt, KeyComparator pfComp, ALLOC_HANDLE_T hRoot); + int insert_row_flag(uint8_t uchCurIndex, const RowValue &stRow, KeyComparator pfComp, unsigned char uchFlag); + int Find(CIndexCookie *pstIdxCookie); + int Find(uint8_t uchCondIdxCnt, const RowValue &stCondition, KeyComparator pfComp, ALLOC_HANDLE_T &hRecord); + int Find(uint8_t uchCondIdxCnt, const RowValue &stCondition, KeyComparator pfComp, ALLOC_HANDLE_T *&hRecord); + static int search_visit(Mallocator &stMalloc, ALLOC_HANDLE_T &hRecord, void *pCookie); + int Search(CSearchCookie *pstSearchCookie); + int Delete(CIndexCookie *pstIdxCookie); + int Delete(uint8_t uchCondIdxCnt, const RowValue &stCondition, KeyComparator pfComp, ALLOC_HANDLE_T &hRecord); + +public: + TreeData(Mallocator *pstMalloc); + ~TreeData(); + + const char *get_err_msg() { return m_szErr; } + MEM_HANDLE_T get_handle() { return _handle; } + int Attach(MEM_HANDLE_T hHandle); + int Attach(MEM_HANDLE_T hHandle, uint8_t uchKeyIdx, int iKeySize, int laid = -1, int lcmodid = -1, int expireid = -1); + + const MEM_HANDLE_T get_tree_root() const { return m_stTree.Root(); } + + /************************************************* + Description: 新分配一块内存,并初始化 + Input: iKeySize key的格式,0为变长,非0为定长长度 + pchKey 为格式化后的key,变长key的第0字节为长度 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int Init(int iKeySize, const char *pchKey); + int Init(uint8_t uchKeyIdx, int iKeySize, const char *pchKey, int laId = -1, int expireId = -1, int nodeIdx = -1); + int Init(const char *pchKey); + + const char *Key() const { return m_pstRootData ? m_pstRootData->m_achKey : NULL; } + char *Key() { return m_pstRootData ? m_pstRootData->m_achKey : NULL; } + + unsigned int total_rows() { return m_pstRootData->m_uiRowCnt; } + uint64_t get_affectedrows() { return m_ullAffectedrows; } + void set_affected_rows(int num) { m_ullAffectedrows = num; } + + /************************************************* + Description: 最近一次分配内存失败所需要的内存大小 + Input: + Output: + Return: 返回所需要的内存大小 + *************************************************/ + ALLOC_SIZE_T need_size() { return m_uiNeedSize; } + + /************************************************* + Description: 销毁uchLevel以及以下级别的子树 + Input: uchLevel 销毁uchLevel以及以下级别的子树,显然uchLevel应该在1到uchIndexDepth之间 + Output: + Return: 0为成功,非0失败 + *************************************************/ + // int Destroy(uint8_t uchLevel=1); + int Destroy(); + + /************************************************* + Description: 插入一行数据 + Input: stRow 包含index字段以及后面字段的值 + pfComp 用户自定义的key比较函数 + uchFlag 行标记 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int insert_row_flag(const RowValue &stRow, KeyComparator pfComp, unsigned char uchFlag); + + /************************************************* + Description: 插入一行数据 + Input: stRow 包含index字段以及后面字段的值 + pfComp 用户自定义的key比较函数 + isDirty 是否脏数据 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int insert_row(const RowValue &stRow, KeyComparator pfComp, bool isDirty); + + /************************************************* + Description: 查找一行数据 + Input: stCondition 包含各级index字段的值 + pfComp 用户自定义的key比较函数 + + Output: hRecord 查找到的一个指向CRawData的句柄 + Return: 0为找不到,1为找到数据 + *************************************************/ + int Find(const RowValue &stCondition, KeyComparator pfComp, ALLOC_HANDLE_T &hRecord); + + /************************************************* + Description: 按索引条件查找 + Input: pstCond 一个数组,而且大小刚好是uchIndexDepth + pfComp 用户自定义的key比较函数 + pfVisit 当查找到记录时,用户自定义的访问数据函数 + pCookie 访问数据函数使用的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int Search(const TtreeCondition *pstCond, KeyComparator pfComp, VisitRawData pfVisit, CheckTreeFunc pfCheck, void *pCookie); + + /************************************************* + Description: 从小到大遍历所有数据 + Input: pfComp 用户自定义的key比较函数 + pfVisit 当查找到记录时,用户自定义的访问数据函数 + pCookie 访问数据函数使用的cookie参数 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int traverse_forward(KeyComparator pfComp, VisitRawData pfVisit, void *pCookie); + + /************************************************* + Description: 根据指定的index值,删除符合条件的所有行(包括子树) + Input: uchCondIdxCnt 条件index的数量 + stCondition 包含各级index字段的值 + pfComp 用户自定义的key比较函数 + + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int delete_sub_row(uint8_t uchCondIdxCnt, const RowValue &stCondition, KeyComparator pfComp); + + /************************************************* + Description: 将某个级别的index值修改为另外一个值 + Input: uchCondIdxCnt 条件index的数量 + stCondition 包含各级index字段的值 + pfComp 用户自定义的key比较函数 + pstNewValue 对应最后一个条件字段的新index值 + Output: + Return: 0为成功,其他值为错误 + *************************************************/ + int update_index(uint8_t uchCondIdxCnt, const RowValue &stCondition, KeyComparator pfComp, const DTCValue *pstNewValue); + unsigned ask_for_destroy_size(void); + + DTCTableDefinition *get_node_table_def() { return m_pstTab; } + + void change_mallocator(Mallocator *pstMalloc) + { + _mallocator = pstMalloc; + } + + int expand_tree_chunk(MEM_HANDLE_T *pRecord, ALLOC_SIZE_T tExpSize); + + /************************************************* + Description: destroy data in t-tree + Output: + *************************************************/ + int destroy_sub_tree(); + + /************************************************* + Description: copy data from raw to t-tree + Output: + *************************************************/ + int copy_tree_all(RawData *pstRawData); + + /************************************************* + Description: copy data from t-tree to raw + Output: + *************************************************/ + int copy_raw_all(RawData *pstRawData); + + /************************************************* + Description: get tree data from t-tree + Output: + *************************************************/ + int decode_tree_row(RowValue &stRow, unsigned char &uchRowFlags, int iDecodeFlag = 0); + + /************************************************* + Description: set tree data from t-tree + Output: + *************************************************/ + int encode_tree_row(const RowValue &stRow, unsigned char uchOp); + + /************************************************* + Description: compare row data value + Output: + *************************************************/ + int compare_tree_data(RowValue *stpNodeRow); + + /************************************************* + Description: get data in t-tree + Output: + *************************************************/ + int get_tree_data(TaskRequest &stTask); + + /************************************************* + Description: flush data in t-tree + Output: + *************************************************/ + int flush_tree_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt); + + /************************************************* + Description: get data in t-tree + Output: + *************************************************/ + int delete_tree_data(TaskRequest &stTask); + + /************************************************* + Description: 获得T树中的Raw类型的每一行的数据 + Output: + *************************************************/ + int get_sub_raw_data(TaskRequest &stTask, MEM_HANDLE_T hRecord); + + /************************************************* + Description: 删除T树中的Raw类型的行的数据 + Output: + *************************************************/ + int delete_sub_raw_data(TaskRequest &stTask, MEM_HANDLE_T hRecord); + + /************************************************* + Description: 修改T树中的Raw类型的行的数据 + Output: + *************************************************/ + int update_sub_raw_data(TaskRequest &stTask, MEM_HANDLE_T hRecord); + + /************************************************* + Description: 替换T树中的Raw类型的行的数据,如没有此行则创建 + Output: + *************************************************/ + int replace_sub_raw_data(TaskRequest &stTask, MEM_HANDLE_T hRecord); + + /************************************************* + Description: 处理T树中平板类型业务 + Output: + *************************************************/ + int get_sub_raw(TaskRequest &stTask, unsigned int nodeCnt, bool isAsc, SubRowProcess subRowProc); + + /************************************************* + Description: 匹配索引 + Output: + *************************************************/ + int match_index_condition(TaskRequest &stTask, unsigned int rowCnt, SubRowProcess subRowProc); + + /************************************************* + Description: update data in t-tree + Output: + *************************************************/ + int update_tree_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows); + + /************************************************* + Description: replace data in t-tree + Output: + *************************************************/ + int replace_tree_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, unsigned char &RowFlag, bool setrows); + + /************************************************* + Description: calculate row data size + Output: + *************************************************/ + ALLOC_SIZE_T calc_tree_row_size(const RowValue &stRow, int keyIdx); + + /************************************************* + Description: get expire time + Output: + *************************************************/ + int get_expire_time(DTCTableDefinition *t, uint32_t &expire); + + /************************************************* + Description: 替换当前行 + Input: stRow 仅使用row的字段类型等信息,不需要实际数据 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int replace_cur_row(const RowValue &stRow, bool isDirty, MEM_HANDLE_T *hRecord); + + /************************************************* + Description: 删除当前行 + Input: stRow 仅使用row的字段类型等信息,不需要实际数据 + Output: + Return: 0为成功,非0失败 + *************************************************/ + int delete_cur_row(const RowValue &stRow); + + /************************************************* + Description: 调到下一行 + Input: stRow 仅使用row的字段类型等信息,不需要实际数据 + Output: m_uiOffset会指向下一行数据的偏移 + Return: 0为成功,非0失败 + *************************************************/ + int skip_row(const RowValue &stRow); + + /************************************************* + Description: + Output: + *************************************************/ + int64_t dirty_rows_inc() { return m_llDirtyRowsInc; } + + /************************************************* + Description: 查询本次操作增加的行数(可以为负数) + Input: + Output: + Return: 行数 + *************************************************/ + int64_t rows_inc() { return m_llRowsInc; } + + int set_cur_row_flag(unsigned char uchFlag); + + int dirty_rows_in_node(); +}; + +#endif diff --git a/src/search_local/index_storage/cache/tree_data_keycmp.h b/src/search_local/index_storage/cache/tree_data_keycmp.h new file mode 100644 index 0000000..237dc49 --- /dev/null +++ b/src/search_local/index_storage/cache/tree_data_keycmp.h @@ -0,0 +1,93 @@ +/* + * ===================================================================================== + * + * Filename: tree_data_keycmp.h + * + * Description: + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include + +static inline int stricmp(const char *p, const char *q) +{ + while (toupper(*(unsigned char *)p) == toupper(*(unsigned char *)q)) + { + if (*p == '\0') + { + return 0; + } + p += 1; + q += 1; + } + return toupper(*(unsigned char *)p) - toupper(*(unsigned char *)q); +} + +static inline int strincmp(const char *p, const char *q, size_t n) +{ + while (n > 0) + { + int diff = toupper(*(unsigned char *)p) - toupper(*(unsigned char *)q); + if (diff != 0) + { + return diff; + } + else if (*p == '\0') + { + return 0; + } + p += 1; + q += 1; + n -= 1; + } + return 0; +} + +static inline int stricoll(const char *p, const char *q) +{ + char p_buf[256]; + char q_buf[256]; + size_t p_len = strlen(p); + size_t q_len = strlen(q); + char *p_dst = p_buf; + char *q_dst = q_buf; + int i; + if (p_len >= sizeof(p_buf)) + { + p_dst = new char[p_len + 1]; + } + if (q_len >= sizeof(q_buf)) + { + q_dst = new char[q_len + 1]; + } + for (i = 0; p[i] != '\0'; i++) + { + p_dst[i] = toupper(p[i] & 0xFF); + } + p_dst[i] = '\0'; + + for (i = 0; q[i] != '\0'; i++) + { + q_dst[i] = toupper(q[i] & 0xFF); + } + q_dst[i] = '\0'; + + int diff = strcoll(p_dst, q_dst); + if (p_dst != p_buf) + { + delete[] p_dst; + } + if (q_dst != q_buf) + { + delete[] q_dst; + } + return diff; +} diff --git a/src/search_local/index_storage/cache/tree_data_process.cc b/src/search_local/index_storage/cache/tree_data_process.cc new file mode 100644 index 0000000..8c3fa54 --- /dev/null +++ b/src/search_local/index_storage/cache/tree_data_process.cc @@ -0,0 +1,682 @@ +/* + * ===================================================================================== + * + * Filename: tree_data_process.cc + * + * Description: tree data process interface. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ +#include +#include +#include + +#include "tree_data_process.h" +#include "global.h" +#include "log.h" +#include "sys_malloc.h" + +DTC_USING_NAMESPACE + +TreeDataProcess::TreeDataProcess(Mallocator *pstMalloc, DTCTableDefinition *pstTab, DTCBufferPool *pstPool, const UpdateMode *pstUpdateMode) : m_stTreeData(pstMalloc), m_pstTab(pstTab), m_pMallocator(pstMalloc), m_pstPool(pstPool) +{ + memcpy(&m_stUpdateMode, pstUpdateMode, sizeof(m_stUpdateMode)); + nodeSizeLimit = 0; + history_rowsize = statmgr.get_sample(ROW_SIZE_HISTORY_STAT); +} + +TreeDataProcess::~TreeDataProcess() +{ +} + +int TreeDataProcess::get_expire_time(DTCTableDefinition *t, Node *pstNode, uint32_t &expire) +{ + int iRet = 0; + + iRet = m_stTreeData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "attach data error"); + log_error("tree-data attach[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return (iRet); + } + + iRet = m_stTreeData.get_expire_time(t, expire); + if (iRet != 0) + { + log_error("tree data get expire time error: %d", iRet); + return iRet; + } + return 0; +} + +int TreeDataProcess::replace_data(Node *pstNode, RawData *pstRawData) +{ + int iRet; + + log_debug("Replace TreeData start "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + TreeData tmpTreeData(m_pMallocator); + + iRet = tmpTreeData.Init(pstRawData->Key()); + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(tmpTreeData.need_size(), *pstNode) == 0) + iRet = tmpTreeData.Init(pstRawData->Key()); + } + + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "root-data init error: %s", tmpTreeData.get_err_msg()); + tmpTreeData.Destroy(); + return (-2); + } + + iRet = tmpTreeData.copy_tree_all(pstRawData); + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(tmpTreeData.need_size(), *pstNode) == 0) + iRet = tmpTreeData.copy_tree_all(pstRawData); + } + + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "root-data init error: %s", tmpTreeData.get_err_msg()); + tmpTreeData.Destroy(); + return (-2); + } + + if (pstNode->vd_handle() != INVALID_HANDLE) + destroy_data(pstNode); + pstNode->vd_handle() = tmpTreeData.get_handle(); + + if (tmpTreeData.total_rows() > 0) + { + history_rowsize.push(tmpTreeData.total_rows()); + } + return (0); +} + +int TreeDataProcess::append_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool isDirty, bool setrows) +{ + int iRet; + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow, *stpTaskRow; + + iRet = m_stTreeData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "attach data error"); + log_error("tree-data attach[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return (iRet); + } + + stpNodeTab = m_stTreeData.get_node_table_def(); + stpTaskTab = stTask.table_definition(); + RowValue stTaskRow(stpTaskTab); + RowValue stNodeRow(stpNodeTab); + stpTaskRow = &stTaskRow; + stpTaskRow->default_value(); + stTask.update_row(*stpTaskRow); + + if (stpTaskTab->auto_increment_field_id() >= stpTaskTab->key_fields() && stTask.resultInfo.insert_id()) + { + const int iFieldID = stpTaskTab->auto_increment_field_id(); + const uint64_t iVal = stTask.resultInfo.insert_id(); + stpTaskRow->field_value(iFieldID)->Set(iVal); + } + + if (stpNodeTab == stpTaskTab) + { + stpNodeRow = stpTaskRow; + } + else + { + stpNodeRow = &stNodeRow; + stpNodeRow->default_value(); + stpNodeRow->Copy(stpTaskRow); + } + + log_debug("AppendTreeData start! "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + unsigned int uiTotalRows = m_stTreeData.total_rows(); + if (uiTotalRows > 0) + { + if ((isDirty || setrows) && stTask.table_definition()->key_as_uniq_field()) + { + snprintf(m_szErr, sizeof(m_szErr), "duplicate key error"); + return (-1062); + } + if (setrows && stTask.table_definition()->key_part_of_uniq_field()) + { + iRet = m_stTreeData.compare_tree_data(stpNodeRow); + if (iRet < 0) + { + log_error("tree-data decode row error: %d,%s", iRet, m_stTreeData.get_err_msg()); + return iRet; + } + else if (iRet == 0) + { + snprintf(m_szErr, sizeof(m_szErr), "duplicate key error"); + return (-1062); + } + } + } + + // insert clean row + iRet = m_stTreeData.insert_row(*stpNodeRow, KeyCompare, isDirty); + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(m_stTreeData.need_size(), *pstNode) == 0) + iRet = m_stTreeData.insert_row(*stpNodeRow, KeyCompare, isDirty); + } + if (iRet != EC_NO_MEM) + pstNode->vd_handle() = m_stTreeData.get_handle(); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "tree-data insert row error: %s,%d", m_stTreeData.get_err_msg(), iRet); + /*标记加入黑名单*/ + stTask.push_black_list_size(m_stTreeData.need_size()); + return (-2); + } + + if (stTask.resultInfo.affected_rows() == 0 || setrows == true) + stTask.resultInfo.set_affected_rows(1); + m_llRowsInc++; + if (isDirty) + m_llDirtyRowsInc++; + history_rowsize.push(m_stTreeData.total_rows()); + return (0); +} + +int TreeDataProcess::get_data(TaskRequest &stTask, Node *pstNode) +{ + int iRet; + log_debug("Get TreeData start! "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + iRet = m_stTreeData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "attach data error"); + log_error("tree-data attach[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return (-1); + } + + iRet = m_stTreeData.get_tree_data(stTask); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "get tree data error"); + log_error("tree-data get[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return iRet; + } + + /*更新访问时间和查找操作计数*/ + log_debug("node[id:%u] ,Get Count is %d", pstNode->node_id(), m_stTreeData.total_rows()); + return (0); +} + +int TreeDataProcess::expand_node(TaskRequest &stTask, Node *pstNode) +{ + return 0; +} + +int TreeDataProcess::dirty_rows_in_node(TaskRequest &stTask, Node *pstNode) +{ + int iRet = m_stTreeData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "attach data error"); + log_error("tree-data attach[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return (-1); + } + + return m_stTreeData.dirty_rows_in_node(); +} + +int TreeDataProcess::attach_data(Node *pstNode, RawData *pstAffectedRows) +{ + int iRet; + + iRet = m_stTreeData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + log_error("tree-data attach[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return (-1); + } + + if (pstAffectedRows != NULL) + { + iRet = pstAffectedRows->Init(m_stTreeData.Key(), 0); + if (iRet != 0) + { + log_error("tree-data init error: %d,%s", iRet, pstAffectedRows->get_err_msg()); + return (-2); + } + } + + return (0); +} + +int TreeDataProcess::get_all_rows(Node *pstNode, RawData *pstRows) +{ + int iRet = 0; + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + iRet = attach_data(pstNode, pstRows); + if (iRet != 0) + { + log_error("attach data error: %d", iRet); + return (-1); + } + + iRet = m_stTreeData.copy_raw_all(pstRows); + if (iRet != 0) + { + log_error("copy data error: %d,%s", iRet, m_stTreeData.get_err_msg()); + return (-2); + } + + return (0); +} + +int TreeDataProcess::delete_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows) +{ + int iRet; + log_debug("Delete TreeData start! "); + + iRet = m_stTreeData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "attach data error"); + log_error("tree-data attach[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return (-1); + } + + int start = m_stTreeData.total_rows(); + + iRet = m_stTreeData.delete_tree_data(stTask); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "get tree data error"); + log_error("tree-data get[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return iRet; + } + + int iAffectRows = start - m_stTreeData.total_rows(); + if (iAffectRows > 0) + { + if (stTask.resultInfo.affected_rows() == 0 || + (stTask.request_condition() && stTask.request_condition()->has_type_timestamp())) + { + stTask.resultInfo.set_affected_rows(iAffectRows); + } + } + + m_llRowsInc = m_stTreeData.rows_inc(); + m_llDirtyRowsInc = m_stTreeData.dirty_rows_inc(); + + log_debug("node[id:%u] ,Get Count is %d", pstNode->node_id(), m_stTreeData.total_rows()); + return (0); +} + +int TreeDataProcess::replace_data(TaskRequest &stTask, Node *pstNode) +{ + log_debug("replace_data start! "); + DTCTableDefinition *stpNodeTab, *stpTaskTab; + RowValue *stpNodeRow; + + int iRet; + int try_purge_count = 0; + uint64_t all_rows_size = 0; + int laid = stTask.flag_no_cache() || stTask.count_only() ? -1 : stTask.table_definition()->lastacc_field_id(); + int matchedCount = 0; + int limitStart = 0; + int limitStop = 0x10000000; + + stpTaskTab = stTask.table_definition(); + if (DTCColExpand::Instance()->is_expanding()) + stpNodeTab = TableDefinitionManager::Instance()->get_new_table_def(); + else + stpNodeTab = TableDefinitionManager::Instance()->get_cur_table_def(); + RowValue stNodeRow(stpNodeTab); + stpNodeRow = &stNodeRow; + stpNodeRow->default_value(); + + if (laid > 0 && stTask.requestInfo.limit_count() > 0) + { + limitStart = stTask.requestInfo.limit_start(); + if (stTask.requestInfo.limit_start() > 0x10000000) + { + laid = -1; + } + else if (stTask.requestInfo.limit_count() < 0x10000000) + { + limitStop = limitStart + stTask.requestInfo.limit_count(); + } + } + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + if (pstNode->vd_handle() != INVALID_HANDLE) + { + iRet = destroy_data(pstNode); + if (iRet != 0) + return (-1); + } + + iRet = m_stTreeData.Init(stTask.packed_key()); + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(m_stTreeData.need_size(), *pstNode) == 0) + iRet = m_stTreeData.Init(m_pstTab->key_fields() - 1, m_pstTab->key_format(), stTask.packed_key()); + } + if (iRet != EC_NO_MEM) + pstNode->vd_handle() = m_stTreeData.get_handle(); + + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data init error: %s", m_stTreeData.get_err_msg()); + /*标记加入黑名单*/ + stTask.push_black_list_size(m_stTreeData.need_size()); + m_pstPool->purge_node(stTask.packed_key(), *pstNode); + return (-2); + } + + if (stTask.result != NULL) + { + ResultSet *pstResultSet = stTask.result; + for (int i = 0; i < pstResultSet->total_rows(); i++) + { + RowValue *pstRow = pstResultSet->_fetch_row(); + if (pstRow == NULL) + { + log_debug("%s!", "call fetch_row func error"); + m_pstPool->purge_node(stTask.packed_key(), *pstNode); + m_stTreeData.Destroy(); + return (-3); + } + + if (laid > 0 && stTask.compare_row(*pstRow)) + { + if (matchedCount >= limitStart && matchedCount < limitStop) + { + (*pstRow)[laid].s64 = stTask.Timestamp(); + } + matchedCount++; + } + + if (stpTaskTab != stpNodeTab) + { + stpNodeRow->Copy(pstRow); + } + else + { + stpNodeRow = pstRow; + } + + /* 插入当前行 */ + iRet = m_stTreeData.insert_row(*stpNodeRow, KeyCompare, false); + + /* 如果内存空间不足,尝试扩大最多两次 */ + if (iRet == EC_NO_MEM) + { + + if (try_purge_count >= 2) + { + goto ERROR_PROCESS; + } + + /* 尝试次数 */ + ++try_purge_count; + if (m_pstPool->try_purge_size(m_stTreeData.need_size(), *pstNode) == 0) + iRet = m_stTreeData.insert_row(*stpNodeRow, KeyCompare, false); + } + if (iRet != EC_NO_MEM) + pstNode->vd_handle() = m_stTreeData.get_handle(); + + /* 当前行操作成功 */ + if (0 == iRet) + continue; + ERROR_PROCESS: + snprintf(m_szErr, sizeof(m_szErr), "raw-data insert row error: ret=%d,err=%s, cnt=%d", + iRet, m_stTreeData.get_err_msg(), try_purge_count); + /*标记加入黑名单*/ + stTask.push_black_list_size(all_rows_size); + m_pstPool->purge_node(stTask.packed_key(), *pstNode); + m_stTreeData.Destroy(); + return (-4); + } + + m_llRowsInc += pstResultSet->total_rows(); + } + + history_rowsize.push(m_stTreeData.total_rows()); + + return (0); +} + +int TreeDataProcess::replace_rows(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows = false) +{ + int iRet; + log_debug("Replace TreeData start! "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + if (pstNode) + { + iRet = m_stTreeData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + log_error("attach tree data error: %d", iRet); + return (iRet); + } + } + else + { + iRet = m_stTreeData.Init(stTask.packed_key()); + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(m_stTreeData.need_size(), *pstNode) == 0) + iRet = m_stTreeData.Init(stTask.packed_key()); + } + + if (iRet != 0) + { + log_error("tree-data replace[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return iRet; + } + + pstNode->vd_handle() = m_stTreeData.get_handle(); + } + + unsigned char uchRowFlags; + iRet = m_stTreeData.replace_tree_data(stTask, pstNode, pstAffectedRows, async, uchRowFlags, setrows); + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(m_stTreeData.need_size(), *pstNode) == 0) + iRet = m_stTreeData.replace_tree_data(stTask, pstNode, pstAffectedRows, async, uchRowFlags, setrows); + } + + if (iRet != 0) + { + log_error("tree-data replace[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return iRet; + } + + if (uchRowFlags & OPER_DIRTY) + m_llDirtyRowsInc--; + if (async) + m_llDirtyRowsInc++; + + uint64_t ullAffectedRows = m_stTreeData.get_affectedrows(); + if (ullAffectedRows == 0) //insert + { + DTCTableDefinition *stpTaskTab; + RowValue *stpNewRow; + stpTaskTab = stTask.table_definition(); + RowValue stNewRow(stpTaskTab); + stNewRow.default_value(); + stpNewRow = &stNewRow; + stTask.update_row(*stpNewRow); //获取Replace的行 + iRet = m_stTreeData.insert_row(*stpNewRow, KeyCompare, async); // 加进cache + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(m_stTreeData.need_size(), *pstNode) == 0) + iRet = m_stTreeData.insert_row(*stpNewRow, KeyCompare, async); + } + if (iRet != EC_NO_MEM) + pstNode->vd_handle() = m_stTreeData.get_handle(); + + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "raw-data replace row error: %d, %s", + iRet, m_stTreeData.get_err_msg()); + /*标记加入黑名单*/ + stTask.push_black_list_size(m_stTreeData.need_size()); + return (-3); + } + m_llRowsInc++; + ullAffectedRows++; + if (async) + m_llDirtyRowsInc++; + } + if (async == true || setrows == true) + { + stTask.resultInfo.set_affected_rows(ullAffectedRows); + } + else if (ullAffectedRows != stTask.resultInfo.affected_rows()) + { + //如果cache更新纪录数和helper更新的纪录数不相等 + log_debug("unequal affected rows, cache[%lld], helper[%lld]", + (long long)ullAffectedRows, + (long long)stTask.resultInfo.affected_rows()); + } + + return 0; +} + +int TreeDataProcess::update_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows = false) +{ + int iRet; + log_debug("Update TreeData start! "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + iRet = m_stTreeData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + log_error("attach tree data error: %d", iRet); + return (iRet); + } + + m_stTreeData.set_affected_rows(0); + + iRet = m_stTreeData.update_tree_data(stTask, pstNode, pstAffectedRows, async, setrows); + if (iRet == EC_NO_MEM) + { + if (m_pstPool->try_purge_size(m_stTreeData.need_size(), *pstNode) == 0) + iRet = m_stTreeData.update_tree_data(stTask, pstNode, pstAffectedRows, async, setrows); + } + + if (iRet != 0) + { + log_error("tree-data update[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return iRet; + } + + uint64_t ullAffectedRows = m_stTreeData.get_affectedrows(); + m_llDirtyRowsInc = m_stTreeData.dirty_rows_inc(); + + if (async == true || setrows == true) + { + stTask.resultInfo.set_affected_rows(ullAffectedRows); + } + else if (ullAffectedRows != stTask.resultInfo.affected_rows()) + { + //如果cache更新纪录数和helper更新的纪录数不相等 + log_debug("unequal affected rows, cache[%lld], helper[%lld]", + (long long)ullAffectedRows, + (long long)stTask.resultInfo.affected_rows()); + } + + return (0); +} + +int TreeDataProcess::flush_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt) +{ + int iRet; + + log_debug("flush_data start! "); + + m_llRowsInc = 0; + m_llDirtyRowsInc = 0; + + iRet = m_stTreeData.Attach(pstNode->vd_handle()); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "attach data error"); + log_error("tree-data attach[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return (-1); + } + + iRet = m_stTreeData.flush_tree_data(pstFlushReq, pstNode, uiFlushRowsCnt); + if (iRet != 0) + { + snprintf(m_szErr, sizeof(m_szErr), "flush tree data error"); + log_error("tree-data flush[handle:" UINT64FMT "] error: %d,%s", pstNode->vd_handle(), iRet, m_stTreeData.get_err_msg()); + return iRet; + } + + m_llDirtyRowsInc = m_stTreeData.dirty_rows_inc(); + + return (0); +} + +int TreeDataProcess::purge_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt) +{ + int iRet; + + log_debug("purge_data start! "); + + iRet = flush_data(pstFlushReq, pstNode, uiFlushRowsCnt); + if (iRet != 0) + { + return (iRet); + } + m_llRowsInc = 0LL - m_stTreeData.total_rows(); + + return 0; +} + +int TreeDataProcess::destroy_data(Node *pstNode) +{ + if (pstNode->vd_handle() == INVALID_HANDLE) + return 0; + TreeData treeData(m_pMallocator); + treeData.Attach(pstNode->vd_handle()); + treeData.Destroy(); + pstNode->vd_handle() = INVALID_HANDLE; + return 0; +} \ No newline at end of file diff --git a/src/search_local/index_storage/cache/tree_data_process.h b/src/search_local/index_storage/cache/tree_data_process.h new file mode 100644 index 0000000..6a288fd --- /dev/null +++ b/src/search_local/index_storage/cache/tree_data_process.h @@ -0,0 +1,178 @@ +/* + * ===================================================================================== + * + * Filename: tree_data_process.h + * + * Description: tree data process interface. + * + * Version: 1.0 + * Created: 09/08/2020 10:02:05 PM + * Revision: none + * Compiler: gcc + * + * Author: Norton, yangshuang68@jd.com + * Company: JD.com, Inc. + * + * ===================================================================================== + */ + +#ifndef TREE_DATA_PROCESS_H +#define TREE_DATA_PROCESS_H + +#include "buffer_def.h" +#include "protocol.h" +#include "value.h" +#include "field.h" +#include "section.h" +#include "table_def.h" +#include "task_request.h" +#include "stat_dtc.h" +#include "tree_data.h" +#include "node.h" +#include "data_process.h" +#include "buffer_pool.h" +#include "namespace.h" +#include "stat_manager.h" +#include "data_chunk.h" + +DTC_BEGIN_NAMESPACE + +class TaskRequest; +class DTCFlushRequest; + +class TreeDataProcess + : public DataProcess +{ +private: + TreeData m_stTreeData; + DTCTableDefinition *m_pstTab; + Mallocator *m_pMallocator; + DTCBufferPool *m_pstPool; + UpdateMode m_stUpdateMode; + int64_t m_llRowsInc; + int64_t m_llDirtyRowsInc; + char m_szErr[200]; + + unsigned int nodeSizeLimit; // -DEBUG- + + StatSample history_datasize; + StatSample history_rowsize; + +protected: + int attach_data(Node *pstNode, RawData *pstAffectedRows); + +public: + void change_mallocator(Mallocator *pstMalloc) + { + log_debug("oring mallc: %p, new mallc: %p", m_pMallocator, pstMalloc); + m_pMallocator = pstMalloc; + m_stTreeData.change_mallocator(pstMalloc); + } + + TreeDataProcess(Mallocator *pstMalloc, DTCTableDefinition *pstTab, DTCBufferPool *pstPool, const UpdateMode *pstUpdateMode); + ~TreeDataProcess(); + + const char *get_err_msg() { return m_szErr; } + void set_insert_mode(EUpdateMode iMode) {} + void set_insert_order(int iOrder) {} + + /************************************************* + Description: get expire time + Output: + *************************************************/ + int get_expire_time(DTCTableDefinition *t, Node *pstNode, uint32_t &expire); + + /************************************************* + Description: + Output: + *************************************************/ + int expand_node(TaskRequest &stTask, Node *pstNode); + + /************************************************* + Description: + Output: + *************************************************/ + int dirty_rows_in_node(TaskRequest &stTask, Node *pstNode); + + /************************************************* + Description: + Output: + *************************************************/ + int64_t rows_inc() { return m_llRowsInc; }; + + /************************************************* + Description: + Output: + *************************************************/ + int64_t dirty_rows_inc() { return m_llDirtyRowsInc; } + + /************************************************* + Description: + Output: + *************************************************/ + int get_all_rows(Node *pstNode, RawData *pstRows); + + /************************************************* + Description: + Output: + *************************************************/ + int delete_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows); + + /************************************************* + Description: + Output: + *************************************************/ + int replace_data(TaskRequest &stTask, Node *pstNode); + + /************************************************* + Description: + Output: + *************************************************/ + int replace_rows(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows); + + /************************************************* + Description: + Output: + *************************************************/ + int update_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool async, bool setrows); + + /************************************************* + Description: + Output: + *************************************************/ + int flush_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt); + + /************************************************* + Description: + Output: + *************************************************/ + int purge_data(DTCFlushRequest *pstFlushReq, Node *pstNode, unsigned int &uiFlushRowsCnt); + + /************************************************* + Description: append data in t-tree + Output: + *************************************************/ + int append_data(TaskRequest &stTask, Node *pstNode, RawData *pstAffectedRows, bool isDirty, bool setrows); + + /************************************************* + Description: replace data in t-tree + Output: + *************************************************/ + int replace_data(Node *pstNode, RawData *pstRawData); + + /************************************************* + Description: get data in t-tree + Output: + *************************************************/ + int get_data(TaskRequest &stTask, Node *pstNode); + + /************************************************* + Description: destroy t-tree + Output: + *************************************************/ + int destroy_data(Node *pstNode); +}; + +DTC_END_NAMESPACE + +#endif \ No newline at end of file