add vector search

2022-06-01 17:54:56 +08:00 · 2022-06-01 17:54:56 +08:00 · 5bf083cee4
commit 5bf083cee4
parent e960e85e5b
801 changed files with 307516 additions and 5232 deletions
--- a/25
+++ b/25
@ -491,3 +491,28 @@ See https://gitee.com/mirrors/rocksdb/blob/master/LICENSE.Apache
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
 ========================================================================
 See https://github.com/facebookresearch/faiss/LICENSE
 MIT License
 Copyright (c) Facebook, Inc. and its affiliates.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@ -1,3 +1,7 @@
 ### v2.1重大更新！！！
 本次更新新增向量检索功能，赋能算法领域，详细介绍请参考[Vector.md](https://gitee.com/jd-platform-opensource/isearch/blob/master/docs/Vector.md)和[向量检索使用说明文档](https://gitee.com/jd-platform-opensource/isearch/blob/master/docs/Vector.doc)
 ### 项目背景与介绍
 本团队开发的检索服务提供分词和建立索引功能，可自定义中文词库，可满足复杂查询需求，支持包括字段检索、字段排序、布尔搜索、范围检索、地理位置（POI、AOI）查询等功能。
@ -6,6 +10,7 @@
 2)    C++开发，轻量化，4核8G内存的机器就能支持大规模存储。
 3)    支持存储节点分片数动态增加，方便扩容。
 4)    日志即存储。
 5)    支持向量检索。
 ### 体验demo项目
@ -19,6 +24,7 @@
 2.支持http方式进行数据导入和查询（需部署接入层服务），若通过sdk或tcp方式访问，则只需部署索引层服务即可  
 3.采用稳定高效的C++开发，高速搜索响应，架构简洁    
 4.提供了丰富的功能，开发周期更短，支持包括字段检索、字段排序、布尔搜索、范围检索、地理位置（POI、AOI）查询等功能  
 5.支持向量检索，请参考[Vector.md](https://gitee.com/jd-platform-opensource/isearch/blob/master/docs/Vector.md)
 ### 接口文档
@ -59,7 +65,7 @@ field_name：字段名称，由用户自行定义
 is_primary_key：该字段是否为主键，一般需要将文档id字段设置为主键，唯一标识一条记录
-field_type：字段类型，1:INT，2:SHORT_TEXT，3:TEXT，4:IP，5:GEO_POINT，9:DOUBLE，10:LONG，11:联合索引，14:GEO_SHAPE
+field_type：字段类型，1:INT，2:SHORT_TEXT，3:TEXT，4:IP，5:GEO_POINT，9:DOUBLE，10:LONG，11:联合索引，14:GEO_SHAPE，15:VECTOR
 index_tag：是否需要对该字段建索引
--- a/build.sh
+++ b/build.sh
@ -30,6 +30,7 @@ index_read="index_read"
 index_storage="index_storage"
 search_local="search_local"
 search_agent="search_agent"
 vector_index="vector_index_helper"
 src_common="$srcdir/$common"
 src_stat="$srcdir/$common/$stat"
@ -37,6 +38,7 @@ src_index_write="$srcdir/$search_local/$index_write"
 src_index_read="$srcdir/$search_local/$index_read"
 src_index_storage="$srcdir/$search_local/$index_storage"
 src_search_agent="$srcdir/$search_agent"
 src_vector_index="$srcdir/$search_local/$vector_index"
 cd $src_common
 cmake .
@ -65,3 +67,8 @@ cd $src_search_agent
 cmake .
 make
 cd $localdir
 cd $src_vector_index
 cmake .
 make
 cd $localdir
--- a/dockerfiles/Dockerfile
+++ b/dockerfiles/Dockerfile
@ -1,4 +1,4 @@
-FROM intelligentsearch/isearch_env:2.0
+FROM intelligentsearch/isearch_env
 COPY install.sh /root/install.sh
 COPY start.sh /root/start.sh
 RUN /root/install.sh
--- a/dockerfiles/env/Dockerfile
+++ b/dockerfiles/env/Dockerfile
@ -1,20 +1,40 @@
 FROM centos:centos7.2.1511
 RUN yum -y update
 RUN yum install -y wget pcre pcre-devel gcc gcc-c++ make zlib-devel sudo openssh-server vim lrzsz openssl-devel &&\
    yum install -y crypto-policies snappy-devel psmisc git epel-release jq && \
    yum install -y autoconf automake libtool gcc-gfortran bzip2 &&\
    yum clean all && \ 
-    useradd --create-home --no-log-init --shell /bin/bash isearch && echo "isearch:isearch" | chpasswd && \
+    cd /usr/local &&\
    wget http://storage.jd.com/lbs-search-acc/gcc-4.9.3.tar.bz2 &&\
    tar jxvf gcc-4.9.3.tar.bz2 &&\
    cd gcc-4.9.3 &&\
    wget http://storage.jd.com/lbs-search-acc/mpfr-2.4.2.tar.bz2  &&\
    wget http://storage.jd.com/lbs-search-acc/gmp-4.3.2.tar.bz2  &&\
    wget http://storage.jd.com/lbs-search-acc/mpc-0.8.1.tar.gz &&\
    wget http://storage.jd.com/lbs-search-acc/isl-0.12.2.tar.bz2  &&\
    wget http://storage.jd.com/lbs-search-acc/cloog-0.18.1.tar.gz &&\
    wget http://storage.jd.com/lbs-search-acc/download_prerequisites &&\
    mv download_prerequisites ./contrib/download_prerequisites &&\
    chmod +x ./contrib/download_prerequisites &&\
    ./contrib/download_prerequisites &&\
    cd .. &&\
    mkdir build-gcc &&\
    cd build-gcc &&\
    ../gcc-4.9.3/configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu --enable-languages=c,c++ --enable-plugin --enable-initfini-array --disable-libgcj --enable-gnu-indirect-function --with-tune=generic --disable-multilib &&\
    make -j8 &&\
    make install
 RUN useradd --create-home --no-log-init --shell /bin/bash isearch && echo "isearch:isearch" | chpasswd && \
    mkdir -p /env/app &&\
    cd /env/app/ &&\
-    wget https://cmake.org/files/v3.20/cmake-3.20.0-rc3.tar.gz &&\
+    wget --no-check-certificate https://cmake.org/files/v3.21/cmake-3.21.2.tar.gz &&\
-    tar xf cmake-3.20.0-rc3.tar.gz &&\
+    tar xf cmake-3.21.2.tar.gz &&\
-    cd /env/app/cmake-3.20.0-rc3 &&\
+    cd /env/app/cmake-3.21.2 &&\
    ./bootstrap &&\
    gmake &&\
    gmake install &&\
    cd /usr/bin &&\
-    ln -s cmake3 cmake &&\
+    ln -s cmake3 cmake
-    cd /usr/local &&\
+
 RUN cd /usr/local &&\
    git clone https://github.com/facebook/rocksdb.git &&\
    cd rocksdb &&\
    git checkout -b 6.6.0 ad528fe5ca08dafff47d79c85abbf3e1fbb21568 &&\
@ -29,4 +49,30 @@ RUN yum install -y wget pcre pcre-devel gcc gcc-c++ make zlib-devel sudo openssh
    ldconfig &&\
    ln -s /usr/local/lib/libgflags.so.2.2 /lib64
 RUN yum install -y unzip gcc-gfortran && cd /usr/local &&\
    wget http://storage.jd.com/lbs-search-acc/protobuf-3.12.2.zip &&\
    unzip protobuf-3.12.2.zip &&\
    cd protobuf-3.12.2 &&\
    ./autogen.sh &&\
    ./configure &&\
    make -j8 && make install &&\
    ln -s /usr/local/lib/libprotobuf.so.23.0.2 /lib64/libprotobuf.so.23 &&\
    ln -s /usr/local/lib/libprotoc.so.23.0.2 /lib64/libprotoc.so.23
 RUN yum install openblas-devel.x86_64 -y && cd /usr/local &&\
    wget http://storage.jd.com/lbs-search-acc/lapack-3.10.0.tar.gz &&\
    tar zxvf lapack-3.10.0.tar.gz &&\
    cd lapack-3.10.0 &&\
    mkdir build &&\
    cd build &&\
    cmake -DCMAKE_INSTALL_LIBDIR=$HOME/.local/lapack .. &&\
    cmake --build . -j --target install &&\
    ln -s /usr/local/lapack-3.10.0/build/lib/liblapack.a /lib64/ &&\
    cd /usr/local &&\
    wget http://storage.jd.com/lbs-search-acc/faiss-main.zip &&\
    unzip faiss-main.zip &&\
    cd faiss-main &&\
    cmake -B build . -DFAISS_ENABLE_GPU=OFF -DFAISS_ENABLE_PYTHON=OFF -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=ON -DBLA_VENDOR=Generic -DBLAS_LIBRARIES=/lib64/libopenblas.so -DLAPACK_LIBRARIES=/lib64/liblapack.a &&\
    cd build/ &&\
    make -j8 && make install &&\
    ln -s /usr/local/faiss-main/build/faiss/libfaiss.so /lib64/
--- a/dockerfiles/install.sh
+++ b/dockerfiles/install.sh
@ -43,7 +43,7 @@ chmod +x dtcd.sh
 ln -s $dstdir/src/search_local/index_storage/api/c_api_cc/libdtc-gcc-4.8-r4646582.so /lib64/libdtc.so.1
 cd $dstdir
-mkdir index_write index_read search_agent
+mkdir index_write index_read search_agent vector_index_helper
 cd index_write
 mkdir log bin stat conf
 cp $srcdir/resource/index_write/conf/{index_gen.json,index_write.conf,localCluster.json} conf
@ -63,6 +63,14 @@ mkdir log bin conf
 cp $srcdir/resource/search_agent/conf/sa.conf conf
 cp $srcdir/src/search_agent/bin/search_agent bin/
 cd $dstdir/vector_index_helper
 mkdir log bin conf rocksdb
 cp $srcdir/resource/app_field_define.json conf
 cp $srcdir/resource/vector_index/conf/vector_index.conf conf
 cp $srcdir/resource/vector_index/conf/ivfflat.faissindex /tmp
 cp $srcdir/resource/vector_index/conf/trained_index.faissindex /tmp
 cp $srcdir/src/search_local/vector_index_helper/bin/vector_index bin/
 yum install -y jq
 cd /usr/local/isearch
 mkdir tools
--- a/dockerfiles/start.sh
+++ b/dockerfiles/start.sh
@ -10,6 +10,9 @@ INIT(){
 	./dtcd.sh start
 	cd /usr/local/isearch/index_storage/original_data/bin
 	./dtcd.sh start
 	cd /usr/local/isearch/vector_index_helper/bin
 	./vector_index
 	sleep 1
 	cd /usr/local/isearch/index_write/bin
 	./index_write
 	cd /usr/local/isearch/index_read/bin
--- a/docs/Quickstart.md
+++ b/docs/Quickstart.md
@ -1,10 +1,10 @@
 ## 源码编译方式（推荐）
-建议通过isearch_env镜像进行源码编译，获取isearch_env镜像的方式为： `docker pull intelligentsearch/isearch_env:2.0`
+建议通过isearch_env镜像进行源码编译，获取isearch_env镜像的方式为： `docker pull intelligentsearch/isearch_env`
-也可以自行编译isearch_env镜像，Dockerfile文件位于dockerfiles\env目录： `docker build -t intelligentsearch/isearch_env:2.0 .`
+也可以自行编译isearch_env镜像，Dockerfile文件位于dockerfiles\env目录： `docker build -t intelligentsearch/isearch_env .`
-然后运行容器： `docker run -itd intelligentsearch/isearch_env:2.0`
+然后运行容器： `docker run -itd intelligentsearch/isearch_env`
 进入容器： `docker exec -it 容器id /bin/bash`
--- a/docs/Vector.doc
+++ b/docs/Vector.doc
--- a/docs/Vector.md
+++ b/docs/Vector.md
@ -0,0 +1,50 @@
 ## 背景
 向量检索是指用一组数字(向量)来量化一个事物，用大量向量来表示事物集合，用向量计算的方式寻找相似事物的一种检索方式。
 isearch底层采用的向量检索框架为Facebook AI的Faiss，项目地址为：https://github.com/facebookresearch/faiss 
 ## app_field_define表
 在app_field_define表定义时，vector字段类型需定义好dim、index_type和metric_type三个属性，示例如下：
 ```
 {
     "id":3,
     "appId":10065,
     "fieldName":"float_vector",
     "fieldType":15,
     "fieldId":3,
     "IsPrimaryKey":0,
     "indexTag":0,
     "snapshotTag":1,
     "segmentTag":0,
     "segmentFeature":0,
     "unionField":"",
     "createTime":"2021/4/13 15:49:09",
     "dim":128, // 维数
     "index_type": [" PCA80,Flat "], // 索引类型，格式与faiss对外工厂类设置保持一致
     "metric_type": "L2" // 距离计算方式，可选值：InnerProduct、L2
 }
 ```
 说明：index_type参考https://github.com/facebookresearch/faiss/wiki/The-index-factory
 ## 向量插入示例
 ```
 curl -X POST \
  http://127.0.0.1/insert \
  -H 'content-type: application/json' \
  -H 'doc_id: 1' \
  -d '{"appid":10065,"table_content":{"cmd":"add","fields":{"doc_id":"1","random_value":1488981884,"float_vector":[0.005653954876242762, 0.632130963117687, 0.7519577013172226, 0.8568273368123129, 0.2034335192251041, 0.9786219451736441, 0.5948105950093241, 0.9618089054657426]}}}'
 ```
 ## 向量查询示例
 ```
 curl -X POST \
  http://127.0.0.1/search \
  -H 'content-type: application/json' \
  -d '{"appid":10065,"query":{"vector_query":{"float_vector":[0.005653954876242762, 0.632130963117687, 0.7519577013172226, 0.8568273368123129, 0.2034335192251041, 0.9786219451736441, 0.5948105950093241, 0.9618089054657426], "index_type_id":1}} }'
 ```
--- a/install.sh
+++ b/install.sh
@ -50,8 +50,18 @@ chmod +x dtcd.sh
 ln -s $dstdir/src/search_local/index_storage/api/c_api_cc/libdtc-gcc-4.8-r4646582.so /lib64/libdtc.so.1
 cd $dstdir
-mkdir index_write index_read search_agent
+mkdir index_write index_read search_agent vector_index_helper
-cd index_write
+cd $dstdir/vector_index_helper
 mkdir log bin conf rocksdb
 cp $srcdir/resource/app_field_define.json conf
 cp $srcdir/resource/vector_index/conf/vector_index.conf conf
 cp $srcdir/resource/vector_index/conf/ivfflat.faissindex /tmp
 cp $srcdir/resource/vector_index/conf/trained_index.faissindex /tmp
 cp $srcdir/src/search_local/vector_index_helper/bin/vector_index bin/
 cd bin
 ./vector_index
 cd $dstdir/index_write
 mkdir log bin stat conf
 cp $srcdir/resource/index_write/conf/{index_gen.json,index_write.conf,localCluster.json} conf
 cp $srcdir/resource/{app_field_define.json,character_map.txt,msr_training.utf8,phonetic_base.txt,phonetic_map.txt,stop_words.dict,words_base.txt} conf
--- a/resource/app_field_define.json
+++ b/resource/app_field_define.json
@ -1,20 +1,17 @@
 {
 	"tableDefine": [{
 		"id": 1,
 			"appId": 10064,
 			"fieldDefine": [{
 				"fieldName": "doc_id",
 				"fieldType": 2,
 				"fieldId": 1,
 				"IsPrimaryKey": 1,
 				"indexTag": 0,
-		"snapshotTag": 0,
+				"snapshotTag": 1,
 				"segmentTag": 1,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 2,
 		"appId": 10064,
 				"fieldName": "source",
 				"fieldType": 1,
 				"fieldId": 2,
@ -23,11 +20,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 3,
 		"appId": 10064,
 				"fieldName": "style",
 				"fieldType": 1,
 				"fieldId": 3,
@ -36,11 +30,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 4,
 		"appId": 10064,
 				"fieldName": "sub_position",
 				"fieldType": 1,
 				"fieldId": 4,
@ -49,11 +40,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 5,
 		"appId": 10064,
 				"fieldName": "private_status",
 				"fieldType": 1,
 				"fieldId": 5,
@ -62,11 +50,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 6,
 		"appId": 10064,
 				"fieldName": "author_id",
 				"fieldType": 2,
 				"fieldId": 6,
@ -75,11 +60,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 1,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 7,
 		"appId": 10064,
 				"fieldName": "status",
 				"fieldType": 1,
 				"fieldId": 7,
@ -88,11 +70,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 8,
 		"appId": 10064,
 				"fieldName": "createtime",
 				"fieldType": 1,
 				"fieldId": 8,
@ -101,11 +80,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 9,
 		"appId": 10064,
 				"fieldName": "title",
 				"fieldType": 2,
 				"fieldId": 10,
@ -114,11 +90,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 1,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 11,
 		"appId": 10064,
 				"fieldName": "idx_author_title",
 				"fieldType": 11,
 				"fieldId": 11,
@ -127,414 +100,102 @@
 				"snapshotTag": 0,
 				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "6,10",
+				"unionField": "6,10"
 		"createTime": "2021/4/13 15:49:09"
 			}, {
-		"id": 86,
+				"fieldName": "skus",
 		"appId": 10061,
 		"fieldName": "doc_id",
 				"fieldType": 2,
-		"fieldId": 0,
+				"fieldId": 12,
 		"IsPrimaryKey": 1,
 		"indexTag": 0,
 		"snapshotTag": 0,
 		"segmentTag": 0,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 87,
 		"appId": 10061,
 		"fieldName": "poi_name",
 		"fieldType": 2,
 		"fieldId": 1,
 				"IsPrimaryKey": 0,
 				"indexTag": 1,
 				"snapshotTag": 1,
 				"segmentTag": 1,
 		"segmentFeature": 1,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 88,
 		"appId": 10061,
 		"fieldName": "gd_c1_type",
 		"fieldType": 2,
 		"fieldId": 2,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
-		"id": 89,
+				"fieldName": "to_audit",
-		"appId": 10061,
+				"fieldType": 1,
 		"fieldName": "gd_c2_type",
 		"fieldType": 2,
 		"fieldId": 3,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 90,
 		"appId": 10061,
 		"fieldName": "gd_c3_type",
 		"fieldType": 2,
 		"fieldId": 4,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 91,
 		"appId": 10061,
 		"fieldName": "gd_c1_typecode",
 		"fieldType": 2,
 		"fieldId": 5,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 92,
 		"appId": 10061,
 		"fieldName": "gd_c2_typecode",
 		"fieldType": 2,
 		"fieldId": 6,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 93,
 		"appId": 10061,
 		"fieldName": "gd_c3_typecode",
 		"fieldType": 2,
 		"fieldId": 7,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 94,
 		"appId": 10061,
 		"fieldName": "std_region",
 		"fieldType": 2,
 		"fieldId": 8,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 95,
 		"appId": 10061,
 		"fieldName": "std_regionid",
 		"fieldType": 2,
 		"fieldId": 9,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 96,
 		"appId": 10061,
 		"fieldName": "std_province",
 		"fieldType": 2,
 		"fieldId": 10,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 97,
 		"appId": 10061,
 		"fieldName": "std_provinceid",
 		"fieldType": 2,
 		"fieldId": 11,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 98,
 		"appId": 10061,
 		"fieldName": "std_city",
 		"fieldType": 2,
 		"fieldId": 12,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 99,
 		"appId": 10061,
 		"fieldName": "std_cityid",
 		"fieldType": 2,
 				"fieldId": 13,
 				"IsPrimaryKey": 0,
-		"indexTag": 0,
+				"indexTag": 1,
 				"snapshotTag": 1,
-		"segmentTag": 1,
+				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
-		"id": 100,
+				"fieldName": "publishtime",
-		"appId": 10061,
+				"fieldType": 1,
 		"fieldName": "std_district",
 		"fieldType": 2,
 				"fieldId": 14,
 				"IsPrimaryKey": 0,
-		"indexTag": 0,
+				"indexTag": 1,
 				"snapshotTag": 1,
-		"segmentTag": 1,
+				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
-		"id": 101,
+				"fieldName": "last_modify_time",
-		"appId": 10061,
+				"fieldType": 1,
 		"fieldName": "std_districtid",
 		"fieldType": 2,
 				"fieldId": 15,
 				"IsPrimaryKey": 0,
-		"indexTag": 0,
+				"indexTag": 1,
 				"snapshotTag": 1,
-		"segmentTag": 1,
+				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
-		"id": 102,
+				"fieldName": "is_choice",
-		"appId": 10061,
+				"fieldType": 1,
 		"fieldName": "std_town",
 		"fieldType": 2,
 				"fieldId": 16,
 				"IsPrimaryKey": 0,
 				"indexTag": 0,
 				"snapshotTag": 1,
-		"segmentTag": 1,
+				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
-		"id": 103,
+				"fieldName": "recommend",
-		"appId": 10061,
+				"fieldType": 1,
 		"fieldName": "std_townid",
 		"fieldType": 2,
 				"fieldId": 17,
 				"IsPrimaryKey": 0,
 				"indexTag": 0,
 				"snapshotTag": 1,
-		"segmentTag": 1,
+				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
-		"id": 104,
+				"fieldName": "video_flag",
-		"appId": 10061,
+				"fieldType": 1,
 		"fieldName": "jd_region",
 		"fieldType": 2,
 				"fieldId": 18,
 				"IsPrimaryKey": 0,
 				"indexTag": 0,
 				"snapshotTag": 1,
-		"segmentTag": 1,
+				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
-		"id": 105,
+				"fieldName": "three_category",
 		"appId": 10061,
 		"fieldName": "jd_regionid",
 				"fieldType": 2,
 				"fieldId": 19,
 				"IsPrimaryKey": 0,
-		"indexTag": 0,
+				"indexTag": 1,
 				"snapshotTag": 1,
 				"segmentTag": 1,
-		"segmentFeature": 0,
+				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
-		"id": 106,
+				"fieldName": "operator",
 		"appId": 10061,
 		"fieldName": "jd_province",
 				"fieldType": 2,
 				"fieldId": 20,
 				"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 107,
 		"appId": 10061,
 		"fieldName": "jd_provinceid",
 		"fieldType": 2,
 		"fieldId": 21,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 108,
 		"appId": 10061,
 		"fieldName": "jd_city",
 		"fieldType": 2,
 		"fieldId": 22,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 109,
 		"appId": 10061,
 		"fieldName": "jd_cityid",
 		"fieldType": 2,
 		"fieldId": 23,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 112,
 		"appId": 10061,
 		"fieldName": "jd_district",
 		"fieldType": 2,
 		"fieldId": 24,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 113,
 		"appId": 10061,
 		"fieldName": "jd_districtid",
 		"fieldType": 2,
 		"fieldId": 25,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 114,
 		"appId": 10061,
 		"fieldName": "address",
 		"fieldType": 2,
 		"fieldId": 26,
 		"IsPrimaryKey": 0,
 				"indexTag": 1,
 				"snapshotTag": 1,
 				"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 116,
 		"appId": 10061,
 		"fieldName": "location",
 		"fieldType": 5,
 		"fieldId": 27,
 		"IsPrimaryKey": 0,
 		"indexTag": 1,
 		"snapshotTag": 1,
 		"segmentTag": 0,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
-		"createTime": "2021/4/13 15:49:09"
+			}]
-	}, {
+		},
-		"id": 117,
+		{
 		"appId": 10061,
 		"fieldName": "jd_town",
 		"fieldType": 2,
 		"fieldId": 28,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 118,
 		"appId": 10061,
 		"fieldName": "jd_townid",
 		"fieldType": 2,
 		"fieldId": 29,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 1,
 		"segmentTag": 1,
 		"segmentFeature": 0,
 		"unionField": "",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 119,
 		"appId": 10061,
 		"fieldName": "idx_gis_poi",
 		"fieldType": 11,
 		"fieldId": 30,
 		"IsPrimaryKey": 0,
 		"indexTag": 0,
 		"snapshotTag": 0,
 		"segmentTag": 0,
 		"segmentFeature": 0,
 		"unionField": "27,1",
 		"createTime": "2021/4/13 15:49:09"
 	}, {
 		"id": 1,
 			"appId": 10010,
 			"fieldDefine": [{
 				"fieldName": "doc_id",
 				"fieldType": 2,
 				"fieldId": 0,
@ -543,11 +204,8 @@
 				"snapshotTag": 0,
 				"segmentTag": 0,
 				"segmentFeature": 0,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 2,
 		"appId": 10010,
 				"fieldName": "birthPlace",
 				"fieldType": 3,
 				"fieldId": 1,
@ -556,11 +214,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 1,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 3,
 		"appId": 10010,
 				"fieldName": "homeAddress",
 				"fieldType": 3,
 				"fieldId": 2,
@ -569,11 +224,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 1,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 4,
 		"appId": 10010,
 				"fieldName": "dreamPlace",
 				"fieldType": 2,
 				"fieldId": 3,
@ -582,11 +234,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 3,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 5,
 		"appId": 10010,
 				"fieldName": "name",
 				"fieldType": 2,
 				"fieldId": 4,
@ -595,11 +244,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 4,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 6,
 		"appId": 10010,
 				"fieldName": "gender",
 				"fieldType": 2,
 				"fieldId": 14,
@ -608,11 +254,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 7,
 		"appId": 10010,
 				"fieldName": "year",
 				"fieldType": 1,
 				"fieldId": 5,
@ -621,11 +264,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 5,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 8,
 		"appId": 10010,
 				"fieldName": "height",
 				"fieldType": 9,
 				"fieldId": 6,
@ -634,11 +274,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 9,
 		"appId": 10010,
 				"fieldName": "brithday",
 				"fieldType": 10,
 				"fieldId": 7,
@ -647,11 +284,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 5,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 10,
 		"appId": 10010,
 				"fieldName": "ip",
 				"fieldType": 4,
 				"fieldId": 8,
@ -660,11 +294,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 11,
 		"appId": 10010,
 				"fieldName": "currentLocation",
 				"fieldType": 5,
 				"fieldId": 9,
@ -673,11 +304,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 12,
 		"appId": 10010,
 				"fieldName": "preLocation",
 				"fieldType": 5,
 				"fieldId": 10,
@ -686,11 +314,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 13,
 		"appId": 10010,
 				"fieldName": "postLocation",
 				"fieldType": 5,
 				"fieldId": 11,
@ -699,11 +324,8 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
 		"createTime": "2021/4/13 15:49:09"
 			}, {
 		"id": 14,
 		"appId": 10010,
 				"fieldName": "currentShape",
 				"fieldType": 14,
 				"fieldId": 12,
@ -712,11 +334,155 @@
 				"snapshotTag": 1,
 				"segmentTag": 0,
 				"segmentFeature": 2,
-		"unionField": "",
+				"unionField": ""
-		"createTime": "2021/4/13 15:49:09"
+			}]
-	}, {
+		},
-		"id": 15,
+		{
-		"appId": 10010,
+			"appId": 10008,
 			"fieldDefine": [{
 					"fieldName": "doc_id",
 					"fieldType": 2,
 					"fieldId": 0,
 					"IsPrimaryKey": 1,
 					"indexTag": 0,
 					"snapshotTag": 0,
 					"segmentTag": 0,
 					"segmentFeature": 0,
 					"unionField": ""
 				},
 				{
 					"fieldName": "strTest1",
 					"fieldType": 3,
 					"fieldId": 1,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 1,
 					"segmentFeature": 2,
 					"unionField": ""
 				},
 				{
 					"fieldName": "strTest2",
 					"fieldType": 3,
 					"fieldId": 2,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 1,
 					"segmentFeature": 0,
 					"unionField": ""
 				},
 				{
 					"fieldName": "strTest3",
 					"fieldType": 2,
 					"fieldId": 3,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 3,
 					"segmentFeature": 2,
 					"unionField": ""
 				},
 				{
 					"fieldName": "strTest4",
 					"fieldType": 2,
 					"fieldId": 4,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 4,
 					"segmentFeature": 0,
 					"unionField": ""
 				},
 				{
 					"fieldName": "intTest",
 					"fieldType": 1,
 					"fieldId": 5,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 4,
 					"segmentFeature": 0,
 					"unionField": ""
 				},
 				{
 					"fieldName": "doubleTest",
 					"fieldType": 9,
 					"fieldId": 6,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 0,
 					"segmentFeature": 0,
 					"unionField": ""
 				},
 				{
 					"fieldName": "longTest",
 					"fieldType": 10,
 					"fieldId": 7,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 0,
 					"segmentFeature": 0,
 					"unionField": ""
 				},
 				{
 					"fieldName": "ipTest",
 					"fieldType": 4,
 					"fieldId": 8,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 0,
 					"segmentFeature": 0,
 					"unionField": ""
 				},
 				{
 					"fieldName": "geopointTest1",
 					"fieldType": 5,
 					"fieldId": 9,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 0,
 					"segmentFeature": 0,
 					"unionField": ""
 				},
 				{
 					"fieldName": "geopointTest2",
 					"fieldType": 5,
 					"fieldId": 10,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 0,
 					"segmentFeature": 0,
 					"unionField": ""
 				},
 				{
 					"fieldName": "geopointTest3",
 					"fieldType": 5,
 					"fieldId": 11,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 0,
 					"segmentFeature": 0,
 					"unionField": ""
 				},
 				{
 					"fieldName": "geoshapeTest",
 					"fieldType": 14,
 					"fieldId": 12,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 1,
 					"segmentTag": 0,
 					"segmentFeature": 0,
 					"unionField": ""
 				},
 				{
 					"fieldName": "unionTest",
 					"fieldType": 11,
 					"fieldId": 13,
@ -725,7 +491,25 @@
 					"snapshotTag": 0,
 					"segmentTag": 0,
 					"segmentFeature": 0,
-		"unionField": "5,1,2",
+					"unionField": "5,1,2"
-		"createTime": "2021/4/13 15:49:09"
+				},
-	}]
+				{
 					"fieldName": "vectorTest",
 					"fieldType": 15,
 					"fieldId": 14,
 					"IsPrimaryKey": 0,
 					"indexTag": 1,
 					"snapshotTag": 0,
 					"segmentTag": 0,
 					"segmentFeature": 0,
 					"unionField": "",
 					"dim": 128,
 					"indexType": ["IVF128,PQ16", "IDMap,HNSW32,Flat", "IVF128,Flat"],
 					"indexDir": ["/tmp/trained_index.faissindex", "", "/tmp/ivfflat.faissindex"],
 					"searchIndexType": 1,
 					"metricType": "L2"
 				}
 			]
 		}
 	]
 }
--- a/resource/index_read/conf/index_read.conf
+++ b/resource/index_read/conf/index_read.conf
@ -60,7 +60,7 @@
    "synonym_file" : "../data/synonym_data",
    "analyze_file" : "../data/analyze_data",
    "sensitive_file" : "../data/sensitive_data",
-        "app_filed_file" : "../conf/app_field_define.txt",
+    "app_filed_file" : "../conf/app_field_define.json",
    "app_info" : [{"app_id":10001, "cache_switch":0, "en_query_switch":1}],
    "split_mode": "Cache"
 }
--- a/resource/tools/search.json
+++ b/resource/tools/search.json
@ -10,3 +10,4 @@
 {"appid":10010,"query":{"bool":{"must":{"match":{"birthPlace":"上海市"},"term":{"gender":"男"}}}}}
 {"appid":10010,"query":{"match":{"dreamPlace":"阿姆斯"}},"fields":"birthPlace,homeAddress,dreamPlace,name","page_index":1,"page_size":3}
 {"appid":10010,"query":{"range":{"height":{"gte":174 ,"lte": 180}}},"fields":"birthPlace,height,name","sort_type":"5","sort_field":"height"}
 {"appid":10008,"page_size":20,"sort_type":6,"query":{"vector_query":{"vectorTest":[0.1354770042967805,0.8350085899945795,0.96886777112423139,0.2210340429827049,0.30816705050700327,0.54722059636785192,0.1883819760471811,0.99288130191780666,0.9964613255480087,0.96769493701050258,0.72583896321188968,0.98110969177693896,0.10986175084420642,0.79810585674954948,0.29702944955795085,0.0047834844193156683,0.1124645160561803,0.6397633570981528,0.87843064539884386,0.5036626777051697,0.79792861516022606,0.36129400134918088,0.21192433239173361,0.68135953856026599,0.39873851991229114,0.7406472446764214,0.47475868061723475,0.42208768110541323,0.17386517200048032,0.30191312687731969,0.79727991523827568,0.31655044481899425,0.87242882006730027,0.1491139764073704,0.99406849432204192,0.82190326480741094,0.12518276453363444,0.76375001257217945,0.49058903962146072,0.66360552050975297,0.12589663347200125,0.21020907451900617,0.051216425785216686,0.036441251587867714,0.40873116096176038,0.4579891554288949,0.48756892686839826,0.79397497154919272,0.92087479115216175,0.8075310254364011,0.70577425166871988,0.0028184325619839781,0.71070387509071686,0.64396095652194041,0.45603282449743654,0.77391712891365494,0.57375466659659147,0.87675741509077743,0.80817549014121004,0.017773895576552474,0.82124599156697908,0.82084078417511075,0.94007402879790336,0.41266651491147388,0.42316511643373017,0.58095667766390635,0.15805758455470567,0.76173121368946151,0.23015606453392981,0.80973454873485218,0.98852160080352736,0.33244828233827889,0.29983170582314134,0.013539126665220821,0.21723783945880448,0.90736471776617311,0.84846779196443856,0.95501757349145688,0.7788977100551232,0.98745962685749145,0.067595381138767008,0.79359758152412918,0.59450356117106606,0.73279872526175427,0.6952328837749534,0.67981979071298593,0.3923204691980966,0.56155744235816618,0.2080680570519636,0.52737145860861556,0.404208518116701,0.35276240810145393,0.59282387851988838,0.35634516058004628,0.96496637210576108,0.1544384174351362,0.39490821062792641,0.38729590514427764,0.72695472161562691,0.38856980747941944,0.92749284165030865,0.43611756497860993,0.86267818704042931,0.62036001322058332,0.11954718110283109,0.47195680174091409,0.34021969914039862,0.52984198849976039,0.71610070967330008,0.98837939760371851,0.7204934613431202,0.91257749197162252,0.50549850525077833,0.55826875635267637,0.50319002106608313,0.46247420642194731,0.54659196188169124,0.44758440204806038,0.85445098673882325,0.60423148433740215,0.4985441863785155,0.9799256047610887,0.034317313228881506,0.97700203460988366,0.36318646087714079,0.67951969380227262,0.34623339308529066,0.85587513642380542]}}}
--- a/resource/tools/send.json
+++ b/resource/tools/send.json
@ -2,3 +2,13 @@
 {"appid":10010,"table_content":{"cmd":"add","fields":{"doc_id":"2","birthPlace":"中华人民共和国湖北省武汉市","homeAddress":"中华人民共和国上海市长宁区","dreamPlace":"比利时王国","name":"Joy","gender":"男","year":20,"height":174.325,"brithday":19910720,"ip":"192.168.0.35","currentLocation":"39.452, -76.589","preLocation":{"latitude":"-70.154","longitude":"35.247"},"postLocation":["-75.456","40.111"],"currentShape":"POLYGON((121.437271 31.339747, 121.438022 31.337291, 121.435297 31.336814, 121.434524 31.339252, 121.437271 31.339747))"}}}
 {"appid":10010,"table_content":{"cmd":"add","fields":{"doc_id":"3","birthPlace":"中华人民共和国江苏省苏州市","homeAddress":"中华人民共和国上海市闵行区","dreamPlace":"东京郊外调布市","name":"Tom","gender":"男","year":30,"height":180.785,"brithday":19900654,"ip":"192.168.0.98","currentLocation":"34.452, -65.589","preLocation":{"latitude":"-68.355","longitude":"45.121"},"postLocation":["-71.456","27.986"],"currentShape":"POLYGON((121.437271 31.339747, 121.438022 31.337291, 121.435297 31.336814, 121.434524 31.339252, 121.437271 31.339747))"}}}
 {"appid":10010,"table_content":{"cmd":"add","fields":{"doc_id":"4","birthPlace":"中华人民共和国上海市","homeAddress":"中华人民共和国上海市宝山区","dreamPlace":"梵蒂冈高地","name":"Amy","gender":"女","year":40,"height":176.258,"brithday":19931124,"ip":"192.168.0.18","currentLocation":"39.452, -76.589","preLocation":{"latitude":"-70.154","longitude":"35.247"},"postLocation":["-75.456","40.111"],"currentShape":"POLYGON((121.437271 31.339747, 121.438022 31.337291, 121.435297 31.336814, 121.434524 31.339252, 121.437271 31.339747))"}}}
 {"appid":10008,"table_content":{"cmd":"add","fields":{"doc_id":"12346","vectorTest":[0.1354770042967805,0.8350085899945795,0.96886777112423139,0.2210340429827049,0.30816705050700327,0.54722059636785192,0.1883819760471811,0.99288130191780666,0.9964613255480087,0.96769493701050258,0.72583896321188968,0.98110969177693896,0.10986175084420642,0.79810585674954948,0.29702944955795085,0.0047834844193156683,0.1124645160561803,0.6397633570981528,0.87843064539884386,0.5036626777051697,0.79792861516022606,0.36129400134918088,0.21192433239173361,0.68135953856026599,0.39873851991229114,0.7406472446764214,0.47475868061723475,0.42208768110541323,0.17386517200048032,0.30191312687731969,0.79727991523827568,0.31655044481899425,0.87242882006730027,0.1491139764073704,0.99406849432204192,0.82190326480741094,0.12518276453363444,0.76375001257217945,0.49058903962146072,0.66360552050975297,0.12589663347200125,0.21020907451900617,0.051216425785216686,0.036441251587867714,0.40873116096176038,0.4579891554288949,0.48756892686839826,0.79397497154919272,0.92087479115216175,0.8075310254364011,0.70577425166871988,0.0028184325619839781,0.71070387509071686,0.64396095652194041,0.45603282449743654,0.77391712891365494,0.57375466659659147,0.87675741509077743,0.80817549014121004,0.017773895576552474,0.82124599156697908,0.82084078417511075,0.94007402879790336,0.41266651491147388,0.42316511643373017,0.58095667766390635,0.15805758455470567,0.76173121368946151,0.23015606453392981,0.80973454873485218,0.98852160080352736,0.33244828233827889,0.29983170582314134,0.013539126665220821,0.21723783945880448,0.90736471776617311,0.84846779196443856,0.95501757349145688,0.7788977100551232,0.98745962685749145,0.067595381138767008,0.79359758152412918,0.59450356117106606,0.73279872526175427,0.6952328837749534,0.67981979071298593,0.3923204691980966,0.56155744235816618,0.2080680570519636,0.52737145860861556,0.404208518116701,0.35276240810145393,0.59282387851988838,0.35634516058004628,0.96496637210576108,0.1544384174351362,0.39490821062792641,0.38729590514427764,0.72695472161562691,0.38856980747941944,0.92749284165030865,0.43611756497860993,0.86267818704042931,0.62036001322058332,0.11954718110283109,0.47195680174091409,0.34021969914039862,0.52984198849976039,0.71610070967330008,0.98837939760371851,0.7204934613431202,0.91257749197162252,0.50549850525077833,0.55826875635267637,0.50319002106608313,0.46247420642194731,0.54659196188169124,0.44758440204806038,0.85445098673882325,0.60423148433740215,0.4985441863785155,0.9799256047610887,0.034317313228881506,0.97700203460988366,0.36318646087714079,0.67951969380227262,0.34623339308529066,0.85587513642380542]}}}
 {"appid":10008,"table_content":{"cmd":"add","fields":{"doc_id":"12347","vectorTest":[0.045059544094808029,0.66011949437672002,0.74994097310849783,0.13299603129519327,0.9823605637489462,0.095355174332811252,0.28267329073174369,0.80211146924087506,0.077557029404978314,0.62738433796439896,0.0080939118167991288,0.68028705943025525,0.53393310793978888,0.43866683184682742,0.19955121640836643,0.13800132612540608,0.3823329369541742,0.76242128481994542,0.040471111480127123,0.25295597410838094,0.50477099469488207,0.82260495173743753,0.98172312239587001,0.82345541451351123,0.30182730823352844,0.047944285428334774,0.2478476025841348,0.54405610181606667,0.88772613283659318,0.34383953593206912,0.033268568191967621,0.1628721235578886,0.87736391457269247,0.21030186894110842,0.27275303663725298,0.49244198864416966,0.22341994244988414,0.49030149638000253,0.95494337425149289,0.65196867193005381,0.75750363608245008,0.43524568599177982,0.55288120830310039,0.053152570350365379,0.32251074417056375,0.40498138359175145,0.90843361477437978,0.80913724311836766,0.25829589777838335,0.12984652508323136,0.49332685061910792,0.37850042885423724,0.71846994979935908,0.28780498432375512,0.62343557708540298,0.81187398084182172,0.31250797973172167,0.38571052187669819,0.34392975162912265,0.8157690646186514,0.66928513715514626,0.37941883669261023,0.45849690049173913,0.3036139714009391,0.91056498617091952,0.48861771459594799,0.75578989634448279,0.10806191556769178,0.39358985203050745,0.87018676533148898,0.36086071348228205,0.91711791576697288,0.54380550320360765,0.14014383786791224,0.19987287018717742,0.94892508665398356,0.99010995480947361,0.24007594979810565,0.016520584438914156,0.38861517225162973,0.77968936730699501,0.47663807160850724,0.54013800579303461,0.018450843473255246,0.90018319838468996,0.19449538370372918,0.88599811909820625,0.44122346425307085,0.14782900997109752,0.23950246352951696,0.7996531751071253,0.47301506028410484,0.089823156434833601,0.64455053371455973,0.63306366499285383,0.58438223325293892,0.72665438123626658,0.35463813319330767,0.68040664457689737,0.70732152371460555,0.16232851828869077,0.13373638466203583,0.44955607238106804,0.042054125401706374,0.7973641065421615,0.16755578986164549,0.83121428736991876,0.32496353567517772,0.65577989753723975,0.42018995162071154,0.78190915180001563,0.1131925360550774,0.99353471796156745,0.18157297914439866,0.76293141692900435,0.26592065417888117,0.24413826431353905,0.10073849156739444,0.34432791883326347,0.28162734107545695,0.96864037547197279,0.21377273869208266,0.60592821006735054,0.22655131983263552,0.18990550179060917,0.40157147828293832,0.41329063067419869,0.17015227876232472]}}}
 {"appid":10008,"table_content":{"cmd":"add","fields":{"doc_id":"12348","vectorTest":[0.29905181556919952,0.63646856844320787,0.15157415842810568,0.76924317911918783,0.88396860077924533,0.95922160541759061,0.018066270256714624,0.5828463846808728,0.76685408551681611,0.32779776520604992,0.12906616421773956,0.29196078415953741,0.84710964889651008,0.23076439269846946,0.50197438548942142,0.0015979040539258519,0.012538803390733417,0.28658657725412673,0.098699042041561824,0.38475520697043292,0.35534869178452588,0.71997093580061644,0.065445064729718921,0.51385918636755312,0.42397833313572708,0.68980125937240067,0.25394310627598837,0.40151955920269761,0.2644765516521228,0.93425058634250113,0.47252524146290181,0.6109161944732634,0.48840188579761645,0.51451956703716928,0.24501924896215146,0.33300524070857374,0.50236237628537039,0.59605793352906455,0.3101559978653447,0.96299752108005976,0.23240035469147391,0.74690420639531097,0.3225329591660927,0.31823214107916886,0.95247203392433444,0.53472104568080325,0.87180515263286218,0.93460264989907049,0.66711408307814268,0.28968940561095485,0.065524461636820006,0.23188861618227025,0.70472372037415387,0.19510740115071307,0.42221838879855472,0.93605506408926908,0.14221720443409275,0.64318100471258899,0.47273213947199533,0.43693291057222472,0.24431964020067629,0.50591941523023731,0.9069440930810454,0.076503973485243279,0.71390392053252749,0.61720491675449018,0.18908419456132217,0.39698180621568213,0.27766405722602999,0.8580993014386058,0.90893657471213396,0.61727908496204753,0.18081870271285527,0.51502371966558824,0.82463170641380923,0.35932443766660316,0.81611702079313975,0.29338824376092842,0.17211781096811601,0.49278958726353395,0.19532476933030637,0.86430749231905635,0.20419716683441952,0.61627227611362312,0.87805055080718142,0.39246908666573743,0.94657725516070546,0.1666982685536747,0.97187740059316141,0.56396489460552945,0.59836891189866492,0.13122185038932063,0.5667773954145946,0.55438027088135766,0.31048996055066935,0.12064893094382685,0.73779082011113384,0.24272365252001363,0.82285914726045573,0.1854759712392762,0.31267693852496625,0.88172541468590837,0.67421294609063309,0.73929755673071396,0.062543240383407292,0.13156156161827962,0.089952908661651079,0.0083144453049579896,0.45651041185260594,0.63430332075080753,0.52976299227697909,0.49786942847576876,0.33579789850777131,0.25282334928762235,0.23367544138915011,0.17316303178115566,0.59598103030373428,0.7268931438280859,0.87757309281314311,0.70715972667574223,0.23476560334430716,0.47799574635601316,0.018211762976045518,0.74733119867345932,0.65472358145094622,0.078066942349644536,0.47276910082835027,0.99647506705142574]}}}
 {"appid":10008,"table_content":{"cmd":"add","fields":{"doc_id":"12349","vectorTest":[0.53209752212846095,0.89943600631846665,0.77290092728339344,0.20375969594900425,0.90710862227344635,0.37481256134523289,0.84808888796398141,0.3266992459259121,0.32673666016612213,0.65921061981020679,0.58676498234140595,0.048983202321108567,0.76996566551394274,0.49175217103043545,0.48818401507136522,0.42912657074135874,0.3947277153895436,0.77645380145270126,0.9747543864880045,0.74850515939083984,0.6345020038299144,0.072180258738782227,0.63336329689215187,0.52892063256204214,0.29616765120737398,0.32532786911344147,0.63111992425538233,0.99450542257503738,0.516047826830209,0.72445391370448775,0.24557474831080972,0.33233911566637675,0.74860911549332876,0.68296648546241823,0.68127137052491982,0.078842372310105394,0.45893628750434196,0.69814685928250031,0.067884831477613955,0.069140052950585146,0.52704281782583717,0.26204879867867692,0.48337516245434181,0.13080546435135015,0.094440768579352727,0.12038168191802026,0.96983720583795852,0.75628164548976873,0.26817637859790333,0.72490598353503599,0.41576251067237624,0.47471840183160319,0.055441426413381609,0.61374414308737224,0.49061918484399575,0.90447486705798485,0.93521427019843606,0.91827661968396046,0.30382527123853392,0.79010177679462179,0.22622740788954832,0.44895186246850521,0.5101089129865477,0.2306319254000474,0.7118807799653547,0.98445260976133198,0.6552132481979186,0.27266698284101465,0.46901556324736426,0.0030012052562900463,0.44642393301728095,0.97094415029747405,0.30466350012516413,0.21510904463584052,0.24758507205903463,0.8064510265914171,0.48637109859284816,0.40750191563781307,0.21562929415923682,0.54025208697484839,0.72302423751473532,0.28369391607292677,0.54286209731170565,0.7559246848659652,0.33197728643260188,0.80176297340482106,0.8570724318896098,0.014891700298874645,0.88494266527440912,0.59308694076898927,0.08621722904292714,0.92487594242585114,0.91371957205314269,0.98075677513764081,0.90002210937108118,0.94971458396046482,0.93452195465742094,0.0083242709932422158,0.77779403110867984,0.025958877742563866,0.58787704962513754,0.64978077069606677,0.32823645491996561,0.11107936201603065,0.12427265769972091,0.97391505899143815,0.53322947059262671,0.66230028184641732,0.7082576325094061,0.068738579831602517,0.20595557840122133,0.43342967667061805,0.011192586468981847,0.069262525726323201,0.34306180995340479,0.31223008050896711,0.992742614956165,0.26906433620973008,0.85706624129552889,0.35561091505567988,0.74676827977366611,0.84485626968536864,0.21713862333908432,0.96495217914822318,0.71219265309149937,0.98266786794162286,0.75989900495501139,0.19326037486254038]}}}
 {"appid":10008,"table_content":{"cmd":"add","fields":{"doc_id":"12350","vectorTest":[0.35258951120634702,0.19630034168275226,0.83206792723445577,0.95259476709033009,0.76737005175526174,0.86196880929772701,0.45566373740083016,0.33268383938437129,0.83825996341290421,0.55430878965379038,0.12086557279476341,0.21218372969038427,0.49123925544039726,0.40965479770022917,0.75951148963832615,0.35706422593741233,0.77178605580516824,0.98154040522381525,0.84616172213486074,0.83462162550860552,0.36719448154918022,0.05062594416792137,0.97556160014614923,0.84258929348663736,0.86946038841117557,0.075498432542996277,0.032376068351151872,0.61393180618063836,0.73568028568935406,0.61552780427737097,0.66263007945057284,0.95126442640604114,0.26588751469842486,0.91938572650657346,0.11839654767950655,0.31306570592392291,0.8721153998486223,0.23364369626139017,0.80805849464059343,0.55653176675427418,0.94359231057656368,0.50370346282237377,0.66641682513111866,0.152965732621862,0.9732287748090126,0.63376690736606478,0.17864978296455705,0.074142804182379923,0.49497478032059061,0.78597990736130663,0.46467973894620568,0.88303166101481367,0.6473688994928064,0.90324096949696342,0.0059736147077088485,0.69356180838194603,0.58508964450503598,0.10536129242182218,0.9903198966734128,0.22999202528719415,0.33290783370089844,0.68938285128010068,0.96142074592379967,0.22659909325500499,0.9242414058579258,0.87624619113450253,0.43174452913505501,0.47088740858944883,0.5621024799799601,0.83761541216733892,0.14830887913425128,0.63927818279591209,0.99559368710030405,0.63939410341134173,0.43951832473946401,0.028388349997248458,0.083150901549952805,0.53036718520301496,0.78200151364646486,0.73664522522360798,0.19016096743561928,0.26182726354717567,0.45224476831002447,0.46002688479963155,0.67363714314757628,0.17695307230926255,0.18783083538236289,0.096486812470241948,0.32387460718180816,0.17032649497514579,0.02473407283649055,0.83064298464373387,0.24482838427247269,0.14203500743382547,0.155441967742288,0.30301072471516943,0.033572317566844256,0.40928938190113723,0.78312364537561463,0.44337191176366764,0.017488211050555126,0.28578436548810532,0.95139670111270336,0.17131502419427105,0.094149831099923309,0.2739117480583611,0.20136393596759561,0.75311635524155063,0.80087942088456832,0.58608418716722799,0.49699451966368002,0.59097324490230907,0.56879152289672819,0.18509800607011648,0.90165664014894153,0.024462146621605348,0.26437647428613903,0.78251526364644597,0.59970211907500148,0.55621543656602657,0.096532746434284625,0.92678704009859136,0.39839131711446285,0.039902940420634012,0.5851167777699211,0.413214935801061,0.12953341445998653,0.60893897507558692]}}}
 {"appid":10008,"table_content":{"cmd":"add","fields":{"doc_id":"12351","vectorTest":[0.055406415963613273,0.84829575792650769,0.45206391858681827,0.48501437846913781,0.067446854282731991,0.22100588377787092,0.64436472637801667,0.9758318250981417,0.94934234351922842,0.73647249098995105,0.42085361228010498,0.68573383455841619,0.68621608359800945,0.23126626565009306,0.99477475306062657,0.43559307221756238,0.74379263345505187,0.33805053832454512,0.3629157686456575,0.98988325182469172,0.72507827801721458,0.83750665175969652,0.39803542472724762,0.85169574196309028,0.17160291510408146,0.94739425722706627,0.35089664593454906,0.38487863346010254,0.39973002751754338,0.71836642716219079,0.83591752963790977,0.95087424388203567,0.063981459267835467,0.66452567422344377,0.17631153579951739,0.37095806831981953,0.16839990685081802,0.11320269155597265,0.15257344276905091,0.39551678526416928,0.39647512806994878,0.0279644976983323,0.45003226768701349,0.098543784333021647,0.91601781127611137,0.46631701752984406,0.74912879517897013,0.72932927881213305,0.73630694653317386,0.65654828851078395,0.2158032653699469,0.051492646343747513,0.058698541004942192,0.42140682397906343,0.81041041558970084,0.31681912726092532,0.88409165024456293,0.40910812226314991,0.20156698907381579,0.025499661649159764,0.0664404424743337,0.67639679598018987,0.081075484246918739,0.32941153446352728,0.36957977620284993,0.79685747581001076,0.17648521779636109,0.092710447341862373,0.059614530973715274,0.28690663401925826,0.71598590002125551,0.49354184617753655,0.90512452640266094,0.40455306018690851,0.067376302855054235,0.37502464964392657,0.84857856795770115,0.36470809363852186,0.14261117992415187,0.42169955514842217,0.86896255779501907,0.99704139967440508,0.5336924886928649,0.39456635492897724,0.33226634593118193,0.043031781021428538,0.85651533100962329,0.18069310600523686,0.33331615982469898,0.43136435501940273,0.32746071762222156,0.2525902895964709,0.13156505155971882,0.090521716451334638,0.95214313933669159,0.27954441965189625,0.57815892193401497,0.35300998396704369,0.31696236625642582,0.77674011425408573,0.37017951600601467,0.19148647321189122,0.011024580859226772,0.39227447939425575,0.90667332742403306,0.98728317371878849,0.70892344342944946,0.63795277690437113,0.62105451992092897,0.92509477103766213,0.880747919775559,0.019452320901134378,0.72040518768955719,0.59107682396007999,0.056511152139247123,0.63877826581291042,0.29647154856897984,0.46455997904910501,0.9479332292697924,0.55193245708044292,0.50341031319253848,0.13333647653690811,0.71312535160241242,0.92510012718318979,0.90354881759386851,0.63629236154578905,0.34196903535843692,0.66365336925392249]}}}
 {"appid":10008,"table_content":{"cmd":"add","fields":{"doc_id":"12352","vectorTest":[0.65961039283734224,0.27043822368000936,0.64228011661821827,0.42925378820168242,0.80221269164431075,0.33218356260748488,0.71050662351852933,0.65427794925079275,0.24065527243765758,0.91911724249535554,0.60383730399920066,0.38324548171087564,0.78012549613894244,0.49500474090610397,0.8850606087121462,0.39798760206009975,0.27793777053714158,0.24252637809551877,0.001491292986987604,0.39498723711381994,0.20199736629259149,0.77207475857061525,0.60845674447471743,0.77905694243619905,0.49939220582378929,0.38451120369112124,0.83073091454206527,0.12795719598683805,0.24966503634883214,0.50315260074299939,0.12821142650170969,0.97868662605597823,0.72686931506817121,0.22339902215918594,0.68257701856147601,0.44870802553145994,0.67205892552563284,0.9093655533252093,0.83876213881392359,0.21188940542140325,0.9510701731194583,0.87789161796290094,0.70907033857424562,0.97071340918420512,0.42290940478792355,0.83727640471667952,0.093319298668515474,0.67342663674760483,0.098370607329168497,0.69749160189395742,0.2159513171494539,0.77783001693849019,0.080736606798452307,0.40530116468846167,0.20749043980753698,0.0850326700803513,0.51918253436962469,0.97848730865066746,0.16609021261992671,0.91847447970690632,0.30475189937952751,0.58088937509850147,0.84387769538082191,0.38807225067909934,0.006013542362938629,0.0099198187469985172,0.19837282209053317,0.39338207855759494,0.19652059267528174,0.83525831696845099,0.77973899510812839,0.077203401155966855,0.29141988305195327,0.27548021073090562,0.62170599174955166,0.92117119198747288,0.22541836601784981,0.28479555655095845,0.084633757289208522,0.32779707474589798,0.052098838713169643,0.61088435902621085,0.54457275013200823,0.34790379110112785,0.52696657837076599,0.99986129656112255,0.85524936481328451,0.90296345448298465,0.12880547991231042,0.61581807243544762,0.83034150681294006,0.98783082795265009,0.64561858090415947,0.75033229995275952,0.75737191723932962,0.6666441784728917,0.54002750223954288,0.73025470653451319,0.7797417912568716,0.48864703385541985,0.5937586680812138,0.96717315589314123,0.078377669698640781,0.035169148120987301,0.37221134184466426,0.077913408785959051,0.78418740996227654,0.47711991577670193,0.44065240237601522,0.28179312628502173,0.5168050097130471,0.36786059572694435,0.11789148815409142,0.59862499547703829,0.016582696039288143,0.46777718053112016,0.10650807929166588,0.33872911667368116,0.086891982522225783,0.2051579591936995,0.51777902545708288,0.47677920542266761,0.035190217559164842,0.85313907269957323,0.9506138612544236,0.85293546227676487,0.98030031889532698,0.86230742109630143]}}}
 {"appid":10008,"table_content":{"cmd":"add","fields":{"doc_id":"12353","vectorTest":[0.82130702248722987,0.9186241625140954,0.59823175057913258,0.82677444923261578,0.53760613430941817,0.91313850580957534,0.16778230533418065,0.018888616561144433,0.85650013645039746,0.84137671503333744,0.78818697282168737,0.72951853437880876,0.73454747567935474,0.58646338610459858,0.56322989895808739,0.92423252863375127,0.60523717737783067,0.10132215259552757,0.61901455485986334,0.73382012763777205,0.29096417890059195,0.31862486346954216,0.57389281935306014,0.64126374312500078,0.055421672900554303,0.97991478334537507,0.79251656408869309,0.63918024288783604,0.32432636209359522,0.9726790432724185,0.50523496106227839,0.92144340084459286,0.56961180884957663,0.44634660576222368,0.9644654826551845,0.12019674014869007,0.98492383730834943,0.22996981635632008,0.59699592964111137,0.57034774100336616,0.76464368773990565,0.41218261105424869,0.055884800907335802,0.50600170827561475,0.17204127152434723,0.52902597369175597,0.53451704251200149,0.68268495125368245,0.47242774760386919,0.3308144240787394,0.37451495910346583,0.13792588610633247,0.72938972986852157,0.32929940199828933,0.6605067691215244,0.063743577493817358,0.90738567591048525,0.10465814744042191,0.099251408526371765,0.3925202309977241,0.66566938882541304,0.49559840269873012,0.46239214033813542,0.21128634923057701,0.40950281704058961,0.10772450495246307,0.48932110241618637,0.11182301270892964,0.35859875528277679,0.29066239379962194,0.32780356458362209,0.25902873683140926,0.54520721365249203,0.069631787542110954,0.017341512027321333,0.13491919052872706,0.89653988570293874,0.74232258366714543,0.60366320404305873,0.95288536858770467,0.91416284856262098,0.24235147367636539,0.90206768606790244,0.44313358172253092,0.56268228188900893,0.3511370644505763,0.036678086748390244,0.56378333591095398,0.34718907779809138,0.77172187033929218,0.6301894492101997,0.89260869808938548,0.8619935455244877,0.33761208011213228,0.66378191190711866,0.68436139747838953,0.40008104471235384,0.01839210625092124,0.0040859997984107168,0.39029852175984098,0.99994157765779978,0.20006453232932001,0.62804272888694401,0.91948257849243231,0.85278242934290061,0.82784204752069745,0.92361117014591654,0.27586964429918942,0.64500972410953861,0.41818816484999627,0.99204604336694946,0.47074555329563728,0.67533593788396917,0.93333014692671412,0.57110298126565762,0.43834231671080348,0.32004613833743212,0.91546389875969847,0.98210810491215195,0.27970508545893846,0.019777211168980006,0.33629865757128785,0.88614749984473973,0.11126526746074224,0.31207737611228842,0.52482484592440626,0.11937510433781434,0.76087648040617739]}}}
 {"appid":10008,"table_content":{"cmd":"add","fields":{"doc_id":"12354","vectorTest":[0.31265323822866586,0.28898696136778479,0.32581034342832244,0.4116571229760157,0.26335448065321354,0.48641836815138084,0.22670860510024859,0.93130090111439345,0.044546970145443919,0.97647496610892903,0.78299561695797548,0.45052378923006187,0.21396262812970818,0.10309799774952215,0.54499044701817789,0.53608718224103835,0.66793490836282021,0.080833325453249033,0.64677732308666191,0.14574249117349108,0.40309229511871225,0.91843952843595855,0.42344651162391339,0.6423154572340809,0.61565416936927408,0.79964706879157843,0.44274479313500803,0.70024695295536565,0.81906407348331955,0.26536088297566807,0.055705387537330064,0.70748686552600393,0.8006655719443545,0.96730533518953232,0.99205181768606532,0.04223770313568824,0.92326281083232464,0.062820452781813585,0.38148959264674714,0.12771899044556073,0.43916055585884994,0.0086025580802102306,0.87400330601717835,0.88701412252952716,0.51849535885514442,0.30577770340823324,0.39639546674340481,0.40264022738594668,0.2249493095996562,0.55129953840906532,0.28589522916281113,0.37123154794387098,0.04945971030838911,0.81414304424438333,0.62129855381329036,0.31081234117221246,0.229096830918359,0.43081385097527836,0.72451673838089226,0.86206817407708924,0.68211163430283606,0.57721462044482363,0.80196840770352162,0.16708968884371983,0.63749432301138764,0.99158095219083042,0.90937719525642091,0.47388863306754708,0.61014860666972792,0.89781440163183579,0.71248061523183592,0.42557317447229925,0.52765755639824363,0.2285892200017475,0.8945813684977858,0.10878499614216956,0.94990810770142642,0.39808010838315533,0.028031065829517844,0.44706663881503356,0.20591714324483928,0.89086477895522354,0.94040956512155471,0.075977005152540786,0.13853079003848592,0.51399432954599922,0.86708658845729125,0.80627047789028261,0.84004596031108669,0.91662608354294028,0.82453455906913531,0.65617604233241755,0.90359382824439638,0.2785574998309937,0.8242417056502126,0.68329762037249242,0.71683092743807675,0.78088097371678511,0.66226071796824582,0.75080708984512923,0.20214098300627556,0.56878285154138963,0.64860908549723439,0.69780533109888565,0.6375211567052862,0.80913026625581441,0.78933857435172894,0.72537706237728294,0.02710776644268904,0.9530982047343034,0.13636698329591951,0.20891467502954517,0.13072549310254955,0.87139316906722586,0.88893312620717446,0.0063695346598086466,0.69535676681000802,0.81980114929185277,0.65441192709784035,0.4601282251094474,0.73311793326531804,0.56695487603881578,0.44377212229593344,0.52568060145480677,0.56590513790197583,0.64090397672669353,0.59603473118356753,0.76720121424140808]}}}
 {"appid":10008,"table_content":{"cmd":"add","fields":{"doc_id":"12355","vectorTest":[0.67810676580837181,0.13421466572583357,0.25083093417919566,0.33135466923252338,0.18346626722404674,0.59522134876650568,0.77430153763813014,0.34432489838214175,0.29537487142608049,0.9039396532630688,0.47633490375062953,0.57388812140075707,0.27272957599472919,0.15895720555689735,0.69658825723907714,0.193626339864134,0.13914813491434547,0.10669623883764742,0.30246918566812414,0.29443325816996935,0.0040217295691217037,0.13815482853161054,0.74397447096132752,0.17255257809047406,0.65089739508791233,0.80380974710730901,0.51908419056722643,0.088518434125616649,0.69736803003621795,0.86502103955261012,0.45535497267523978,0.88917447361735924,0.53201411515255659,0.32341796943462409,0.18112580106769566,0.55913517678826696,0.24954929050322169,0.37730780971257599,0.10234260994825629,0.98691653456915684,0.2533289211060637,0.41803545032885175,0.43667769039463317,0.73228695239795438,0.38384140495643992,0.29525424227775293,0.84881112942615444,0.12672813558177082,0.47693037905352675,0.78511103533431759,0.99804107585853163,0.79446762459192932,0.027299528580495055,0.95931306061417099,0.33549308149197554,0.047020827180929092,0.60593774709599257,0.97405717397740121,0.69139981493038116,0.67448450826925976,0.20081161669479264,0.20889479518408852,0.86334933475850872,0.8655373787086591,0.72112000293188572,0.020168148208130541,0.37465107735510672,0.31326104333420235,0.49830528847023176,0.72918336212682089,0.71494113101354773,0.12601531070141264,0.80061328465390602,0.33078745267173137,0.41369388353731662,0.32917482478582732,0.61541218307897283,0.31390747220587878,0.78384467911208999,0.67044839831046299,0.53743088758722912,0.72163531704395389,0.72978266345708775,0.63948188228408853,0.63619900111688898,0.10201007725815835,0.11472285350170748,0.55607979443268662,0.46696316783867342,0.74360437436173477,0.091721445833211013,0.79706476436000451,0.54563823196146477,0.97046833808482602,0.2058049134403544,0.90078616423315472,0.73511648455698619,0.78997177858845613,0.39748337429155289,0.90910258669025801,0.11458484944533004,0.14916663586992726,0.21470750841288119,0.13049222050077761,0.069356053478695626,0.089530436685474918,0.59674358746714229,0.95694353937847043,0.12084549653900037,0.85557082799421846,0.82223050117207352,0.47203539361681895,0.67260331159354492,0.16400607474205534,0.76594369341342561,0.24159144418786649,0.53736001734354355,0.72297219679073987,0.20547764609138386,0.58412451775740848,0.14447777213082161,0.45722565332879539,0.41714833443539767,0.39586392191392844,0.45094302827511457,0.62429363673927996,0.56010656724626351,0.78282753096055624]}}}
--- a/resource/vector_index/conf/ivfflat.faissindex
+++ b/resource/vector_index/conf/ivfflat.faissindex
--- a/resource/vector_index/conf/trained_index.faissindex
+++ b/resource/vector_index/conf/trained_index.faissindex
--- a/resource/vector_index/conf/vector_index.conf
+++ b/resource/vector_index/conf/vector_index.conf
@ -0,0 +1,11 @@
 {
    "service_name":"vector_index",
    "pid_file" : "vector_index.pid",
    "log" : "../log/",
    "log_level" : 3,
    "daemon": true,
    "listen_addr": "127.0.0.1",
    "listen_port": 12004,
    "socket_dir" : "/tmp/vector_index",
    "rocksdb_storage_dir": "../rocksdb"
 }
--- a/src/3rdlib/faiss/AutoTune.h
+++ b/src/3rdlib/faiss/AutoTune.h
@ -0,0 +1,214 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_AUTO_TUNE_H
 #define FAISS_AUTO_TUNE_H
 #include <stdint.h>
 #include <unordered_map>
 #include <vector>
 #include <faiss/Index.h>
 #include <faiss/IndexBinary.h>
 namespace faiss {
 /**
 * Evaluation criterion. Returns a performance measure in [0,1],
 * higher is better.
 */
 struct AutoTuneCriterion {
    typedef Index::idx_t idx_t;
    idx_t nq;     ///< nb of queries this criterion is evaluated on
    idx_t nnn;    ///< nb of NNs that the query should request
    idx_t gt_nnn; ///< nb of GT NNs required to evaluate criterion
    std::vector<float> gt_D; ///< Ground-truth distances (size nq * gt_nnn)
    std::vector<idx_t> gt_I; ///< Ground-truth indexes (size nq * gt_nnn)
    AutoTuneCriterion(idx_t nq, idx_t nnn);
    /** Intitializes the gt_D and gt_I vectors. Must be called before evaluating
     *
     * @param gt_D_in  size nq * gt_nnn
     * @param gt_I_in  size nq * gt_nnn
     */
    void set_groundtruth(
            int gt_nnn,
            const float* gt_D_in,
            const idx_t* gt_I_in);
    /** Evaluate the criterion.
     *
     * @param D  size nq * nnn
     * @param I  size nq * nnn
     * @return the criterion, between 0 and 1. Larger is better.
     */
    virtual double evaluate(const float* D, const idx_t* I) const = 0;
    virtual ~AutoTuneCriterion() {}
 };
 struct OneRecallAtRCriterion : AutoTuneCriterion {
    idx_t R;
    OneRecallAtRCriterion(idx_t nq, idx_t R);
    double evaluate(const float* D, const idx_t* I) const override;
    ~OneRecallAtRCriterion() override {}
 };
 struct IntersectionCriterion : AutoTuneCriterion {
    idx_t R;
    IntersectionCriterion(idx_t nq, idx_t R);
    double evaluate(const float* D, const idx_t* I) const override;
    ~IntersectionCriterion() override {}
 };
 /**
 * Maintains a list of experimental results. Each operating point is a
 * (perf, t, key) triplet, where higher perf and lower t is
 * better. The key field is an arbitrary identifier for the operating point.
 *
 * Includes primitives to extract the Pareto-optimal operating points in the
 * (perf, t) space.
 */
 struct OperatingPoint {
    double perf;     ///< performance measure (output of a Criterion)
    double t;        ///< corresponding execution time (ms)
    std::string key; ///< key that identifies this op pt
    int64_t cno;     ///< integer identifer
 };
 struct OperatingPoints {
    /// all operating points
    std::vector<OperatingPoint> all_pts;
    /// optimal operating points, sorted by perf
    std::vector<OperatingPoint> optimal_pts;
    // begins with a single operating point: t=0, perf=0
    OperatingPoints();
    /// add operating points from other to this, with a prefix to the keys
    int merge_with(
            const OperatingPoints& other,
            const std::string& prefix = "");
    void clear();
    /// add a performance measure. Return whether it is an optimal point
    bool add(double perf, double t, const std::string& key, size_t cno = 0);
    /// get time required to obtain a given performance measure
    double t_for_perf(double perf) const;
    /// easy-to-read output
    void display(bool only_optimal = true) const;
    /// output to a format easy to digest by gnuplot
    void all_to_gnuplot(const char* fname) const;
    void optimal_to_gnuplot(const char* fname) const;
 };
 /// possible values of a parameter, sorted from least to most expensive/accurate
 struct ParameterRange {
    std::string name;
    std::vector<double> values;
 };
 /** Uses a-priori knowledge on the Faiss indexes to extract tunable parameters.
 */
 struct ParameterSpace {
    /// all tunable parameters
    std::vector<ParameterRange> parameter_ranges;
    // exploration parameters
    /// verbosity during exploration
    int verbose;
    /// nb of experiments during optimization (0 = try all combinations)
    int n_experiments;
    /// maximum number of queries to submit at a time.
    size_t batchsize;
    /// use multithreading over batches (useful to benchmark
    /// independent single-searches)
    bool thread_over_batches;
    /// run tests several times until they reach at least this
    /// duration (to avoid jittering in MT mode)
    double min_test_duration;
    ParameterSpace();
    /// nb of combinations, = product of values sizes
    size_t n_combinations() const;
    /// returns whether combinations c1 >= c2 in the tuple sense
    bool combination_ge(size_t c1, size_t c2) const;
    /// get string representation of the combination
    std::string combination_name(size_t cno) const;
    /// print a description on stdout
    void display() const;
    /// add a new parameter (or return it if it exists)
    ParameterRange& add_range(const std::string& name);
    /// initialize with reasonable parameters for the index
    virtual void initialize(const Index* index);
    /// set a combination of parameters on an index
    void set_index_parameters(Index* index, size_t cno) const;
    /// set a combination of parameters described by a string
    void set_index_parameters(Index* index, const char* param_string) const;
    /// set one of the parameters, returns whether setting was successful
    virtual void set_index_parameter(
            Index* index,
            const std::string& name,
            double val) const;
    /** find an upper bound on the performance and a lower bound on t
     * for configuration cno given another operating point op */
    void update_bounds(
            size_t cno,
            const OperatingPoint& op,
            double* upper_bound_perf,
            double* lower_bound_t) const;
    /** explore operating points
     * @param index   index to run on
     * @param xq      query vectors (size nq * index.d)
     * @param crit    selection criterion
     * @param ops     resulting operating points
     */
    void explore(
            Index* index,
            size_t nq,
            const float* xq,
            const AutoTuneCriterion& crit,
            OperatingPoints* ops) const;
    virtual ~ParameterSpace() {}
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/Clustering.h
+++ b/src/3rdlib/faiss/Clustering.h
@ -0,0 +1,182 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_CLUSTERING_H
 #define FAISS_CLUSTERING_H
 #include <faiss/Index.h>
 #include <vector>
 namespace faiss {
 /** Class for the clustering parameters. Can be passed to the
 * constructor of the Clustering object.
 */
 struct ClusteringParameters {
    int niter; ///< clustering iterations
    int nredo; ///< redo clustering this many times and keep best
    bool verbose;
    bool spherical;        ///< do we want normalized centroids?
    bool int_centroids;    ///< round centroids coordinates to integer
    bool update_index;     ///< re-train index after each iteration?
    bool frozen_centroids; ///< use the centroids provided as input and do not
                           ///< change them during iterations
    int min_points_per_centroid; ///< otherwise you get a warning
    int max_points_per_centroid; ///< to limit size of dataset
    int seed; ///< seed for the random number generator
    size_t decode_block_size; ///< how many vectors at a time to decode
    /// sets reasonable defaults
    ClusteringParameters();
 };
 struct ClusteringIterationStats {
    float obj;   ///< objective values (sum of distances reported by index)
    double time; ///< seconds for iteration
    double time_search;      ///< seconds for just search
    double imbalance_factor; ///< imbalance factor of iteration
    int nsplit;              ///< number of cluster splits
 };
 /** K-means clustering based on assignment - centroid update iterations
 *
 * The clustering is based on an Index object that assigns training
 * points to the centroids. Therefore, at each iteration the centroids
 * are added to the index.
 *
 * On output, the centoids table is set to the latest version
 * of the centroids and they are also added to the index. If the
 * centroids table it is not empty on input, it is also used for
 * initialization.
 *
 */
 struct Clustering : ClusteringParameters {
    typedef Index::idx_t idx_t;
    size_t d; ///< dimension of the vectors
    size_t k; ///< nb of centroids
    /** centroids (k * d)
     * if centroids are set on input to train, they will be used as
     * initialization
     */
    std::vector<float> centroids;
    /// stats at every iteration of clustering
    std::vector<ClusteringIterationStats> iteration_stats;
    Clustering(int d, int k);
    Clustering(int d, int k, const ClusteringParameters& cp);
    /** run k-means training
     *
     * @param x          training vectors, size n * d
     * @param index      index used for assignment
     * @param x_weights  weight associated to each vector: NULL or size n
     */
    virtual void train(
            idx_t n,
            const float* x,
            faiss::Index& index,
            const float* x_weights = nullptr);
    /** run with encoded vectors
     *
     * win addition to train()'s parameters takes a codec as parameter
     * to decode the input vectors.
     *
     * @param codec      codec used to decode the vectors (nullptr =
     *                   vectors are in fact floats)     *
     */
    void train_encoded(
            idx_t nx,
            const uint8_t* x_in,
            const Index* codec,
            Index& index,
            const float* weights = nullptr);
    /// Post-process the centroids after each centroid update.
    /// includes optional L2 normalization and nearest integer rounding
    void post_process_centroids();
    virtual ~Clustering() {}
 };
 struct ProgressiveDimClusteringParameters : ClusteringParameters {
    int progressive_dim_steps; ///< number of incremental steps
    bool apply_pca;            ///< apply PCA on input
    ProgressiveDimClusteringParameters();
 };
 /** generates an index suitable for clustering when called */
 struct ProgressiveDimIndexFactory {
    /// ownership transferred to caller
    virtual Index* operator()(int dim);
    virtual ~ProgressiveDimIndexFactory() {}
 };
 /** K-means clustering with progressive dimensions used
 *
 * The clustering first happens in dim 1, then with exponentially increasing
 * dimension until d (I steps). This is typically applied after a PCA
 * transformation (optional). Reference:
 *
 * "Improved Residual Vector Quantization for High-dimensional Approximate
 * Nearest Neighbor Search"
 *
 * Shicong Liu, Hongtao Lu, Junru Shao, AAAI'15
 *
 * https://arxiv.org/abs/1509.05195
 */
 struct ProgressiveDimClustering : ProgressiveDimClusteringParameters {
    using idx_t = Index::idx_t;
    size_t d; ///< dimension of the vectors
    size_t k; ///< nb of centroids
    /** centroids (k * d) */
    std::vector<float> centroids;
    /// stats at every iteration of clustering
    std::vector<ClusteringIterationStats> iteration_stats;
    ProgressiveDimClustering(int d, int k);
    ProgressiveDimClustering(
            int d,
            int k,
            const ProgressiveDimClusteringParameters& cp);
    void train(idx_t n, const float* x, ProgressiveDimIndexFactory& factory);
    virtual ~ProgressiveDimClustering() {}
 };
 /** simplified interface
 *
 * @param d dimension of the data
 * @param n nb of training vectors
 * @param k nb of output centroids
 * @param x training set (size n * d)
 * @param centroids output centroids (size k * d)
 * @return final quantization error
 */
 float kmeans_clustering(
        size_t d,
        size_t n,
        size_t k,
        const float* x,
        float* centroids);
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IVFlib.h
+++ b/src/3rdlib/faiss/IVFlib.h
@ -0,0 +1,151 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_IVFLIB_H
 #define FAISS_IVFLIB_H
 /** Since IVF (inverted file) indexes are of so much use for
 * large-scale use cases, we group a few functions related to them in
 * this small library. Most functions work both on IndexIVFs and
 * IndexIVFs embedded within an IndexPreTransform.
 */
 #include <faiss/IndexIVF.h>
 #include <vector>
 namespace faiss {
 namespace ivflib {
 /** check if two indexes have the same parameters and are trained in
 * the same way, otherwise throw. */
 void check_compatible_for_merge(const Index* index1, const Index* index2);
 /** get an IndexIVF from an index. The index may be an IndexIVF or
 * some wrapper class that encloses an IndexIVF
 *
 * throws an exception if this is not the case.
 */
 const IndexIVF* extract_index_ivf(const Index* index);
 IndexIVF* extract_index_ivf(Index* index);
 /// same as above but returns nullptr instead of throwing on failure
 const IndexIVF* try_extract_index_ivf(const Index* index);
 IndexIVF* try_extract_index_ivf(Index* index);
 /** Merge index1 into index0. Works on IndexIVF's and IndexIVF's
 *  embedded in a IndexPreTransform. On output, the index1 is empty.
 *
 * @param shift_ids: translate the ids from index1 to index0->prev_ntotal
 */
 void merge_into(Index* index0, Index* index1, bool shift_ids);
 typedef Index::idx_t idx_t;
 /* Returns the cluster the embeddings belong to.
 *
 * @param index      Index, which should be an IVF index
 *                   (otherwise there are no clusters)
 * @param embeddings object descriptors for which the centroids should be found,
 *                   size num_objects * d
 * @param centroid_ids
 *                   cluster id each object belongs to, size num_objects
 */
 void search_centroid(Index* index, const float* x, int n, idx_t* centroid_ids);
 /* Returns the cluster the embeddings belong to.
 *
 * @param index      Index, which should be an IVF index
 *                   (otherwise there are no clusters)
 * @param query_centroid_ids
 *                   centroid ids corresponding to the query vectors (size n)
 * @param result_centroid_ids
 *                   centroid ids corresponding to the results (size n * k)
 * other arguments are the same as the standard search function
 */
 void search_and_return_centroids(
        Index* index,
        size_t n,
        const float* xin,
        long k,
        float* distances,
        idx_t* labels,
        idx_t* query_centroid_ids,
        idx_t* result_centroid_ids);
 /** A set of IndexIVFs concatenated together in a FIFO fashion.
 * at each "step", the oldest index slice is removed and a new index is added.
 */
 struct SlidingIndexWindow {
    /// common index that contains the sliding window
    Index* index;
    /// InvertedLists of index
    ArrayInvertedLists* ils;
    /// number of slices currently in index
    int n_slice;
    /// same as index->nlist
    size_t nlist;
    /// cumulative list sizes at each slice
    std::vector<std::vector<size_t>> sizes;
    /// index should be initially empty and trained
    SlidingIndexWindow(Index* index);
    /** Add one index to the current index and remove the oldest one.
     *
     * @param sub_index        slice to swap in (can be NULL)
     * @param remove_oldest    if true, remove the oldest slices */
    void step(const Index* sub_index, bool remove_oldest);
 };
 /// Get a subset of inverted lists [i0, i1)
 ArrayInvertedLists* get_invlist_range(const Index* index, long i0, long i1);
 /// Set a subset of inverted lists
 void set_invlist_range(Index* index, long i0, long i1, ArrayInvertedLists* src);
 /** search an IndexIVF, possibly embedded in an IndexPreTransform with
 * given parameters. This is a way to set the nprobe and get
 * statdistics in a thread-safe way.
 *
 * Optionally returns (if non-nullptr):
 * - nb_dis: number of distances computed
 * - ms_per_stage: [0]: preprocessing time
 *                 [1]: coarse quantization,
 *                 [2]: list scanning
 */
 void search_with_parameters(
        const Index* index,
        idx_t n,
        const float* x,
        idx_t k,
        float* distances,
        idx_t* labels,
        const IVFSearchParameters* params,
        size_t* nb_dis = nullptr,
        double* ms_per_stage = nullptr);
 /** same as search_with_parameters but for range search */
 void range_search_with_parameters(
        const Index* index,
        idx_t n,
        const float* x,
        float radius,
        RangeSearchResult* result,
        const IVFSearchParameters* params,
        size_t* nb_dis = nullptr,
        double* ms_per_stage = nullptr);
 } // namespace ivflib
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/Index.h
+++ b/src/3rdlib/faiss/Index.h
@ -0,0 +1,248 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_INDEX_H
 #define FAISS_INDEX_H
 #include <faiss/MetricType.h>
 #include <cstdio>
 #include <sstream>
 #include <string>
 #include <typeinfo>
 #define FAISS_VERSION_MAJOR 1
 #define FAISS_VERSION_MINOR 7
 #define FAISS_VERSION_PATCH 1
 /**
 * @namespace faiss
 *
 * Throughout the library, vectors are provided as float * pointers.
 * Most algorithms can be optimized when several vectors are processed
 * (added/searched) together in a batch. In this case, they are passed
 * in as a matrix. When n vectors of size d are provided as float * x,
 * component j of vector i is
 *
 *   x[ i * d + j ]
 *
 * where 0 <= i < n and 0 <= j < d. In other words, matrices are
 * always compact. When specifying the size of the matrix, we call it
 * an n*d matrix, which implies a row-major storage.
 */
 namespace faiss {
 /// Forward declarations see AuxIndexStructures.h
 struct IDSelector;
 struct RangeSearchResult;
 struct DistanceComputer;
 /** Abstract structure for an index, supports adding vectors and searching them.
 *
 * All vectors provided at add or search time are 32-bit float arrays,
 * although the internal representation may vary.
 */
 struct Index {
    using idx_t = int64_t; ///< all indices are this type
    using component_t = float;
    using distance_t = float;
    int d;        ///< vector dimension
    idx_t ntotal; ///< total nb of indexed vectors
    bool verbose; ///< verbosity level
    /// set if the Index does not require training, or if training is
    /// done already
    bool is_trained;
    /// type of metric this index uses for search
    MetricType metric_type;
    float metric_arg; ///< argument of the metric type
    explicit Index(idx_t d = 0, MetricType metric = METRIC_L2)
            : d(d),
              ntotal(0),
              verbose(false),
              is_trained(true),
              metric_type(metric),
              metric_arg(0) {}
    virtual ~Index();
    /** Perform training on a representative set of vectors
     *
     * @param n      nb of training vectors
     * @param x      training vecors, size n * d
     */
    virtual void train(idx_t n, const float* x);
    /** Add n vectors of dimension d to the index.
     *
     * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
     * This function slices the input vectors in chunks smaller than
     * blocksize_add and calls add_core.
     * @param x      input matrix, size n * d
     */
    virtual void add(idx_t n, const float* x) = 0;
    /** Same as add, but stores xids instead of sequential ids.
     *
     * The default implementation fails with an assertion, as it is
     * not supported by all indexes.
     *
     * @param xids if non-null, ids to store for the vectors (size n)
     */
    virtual void add_with_ids(idx_t n, const float* x, const idx_t* xids);
    /** query n vectors of dimension d to the index.
     *
     * return at most k vectors. If there are not enough results for a
     * query, the result array is padded with -1s.
     *
     * @param x           input vectors to search, size n * d
     * @param labels      output labels of the NNs, size n*k
     * @param distances   output pairwise distances, size n*k
     */
    virtual void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const = 0;
    /** query n vectors of dimension d to the index.
     *
     * return all vectors with distance < radius. Note that many
     * indexes do not implement the range_search (only the k-NN search
     * is mandatory).
     *
     * @param x           input vectors to search, size n * d
     * @param radius      search radius
     * @param result      result table
     */
    virtual void range_search(
            idx_t n,
            const float* x,
            float radius,
            RangeSearchResult* result) const;
    /** return the indexes of the k vectors closest to the query x.
     *
     * This function is identical as search but only return labels of neighbors.
     * @param x           input vectors to search, size n * d
     * @param labels      output labels of the NNs, size n*k
     */
    virtual void assign(idx_t n, const float* x, idx_t* labels, idx_t k = 1)
            const;
    /// removes all elements from the database.
    virtual void reset() = 0;
    /** removes IDs from the index. Not supported by all
     * indexes. Returns the number of elements removed.
     */
    virtual size_t remove_ids(const IDSelector& sel);
    /** Reconstruct a stored vector (or an approximation if lossy coding)
     *
     * this function may not be defined for some indexes
     * @param key         id of the vector to reconstruct
     * @param recons      reconstucted vector (size d)
     */
    virtual void reconstruct(idx_t key, float* recons) const;
    /** Reconstruct vectors i0 to i0 + ni - 1
     *
     * this function may not be defined for some indexes
     * @param recons      reconstucted vector (size ni * d)
     */
    virtual void reconstruct_n(idx_t i0, idx_t ni, float* recons) const;
    /** Similar to search, but also reconstructs the stored vectors (or an
     * approximation in the case of lossy coding) for the search results.
     *
     * If there are not enough results for a query, the resulting arrays
     * is padded with -1s.
     *
     * @param recons      reconstructed vectors size (n, k, d)
     **/
    virtual void search_and_reconstruct(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels,
            float* recons) const;
    /** Computes a residual vector after indexing encoding.
     *
     * The residual vector is the difference between a vector and the
     * reconstruction that can be decoded from its representation in
     * the index. The residual can be used for multiple-stage indexing
     * methods, like IndexIVF's methods.
     *
     * @param x           input vector, size d
     * @param residual    output residual vector, size d
     * @param key         encoded index, as returned by search and assign
     */
    virtual void compute_residual(const float* x, float* residual, idx_t key)
            const;
    /** Computes a residual vector after indexing encoding (batch form).
     * Equivalent to calling compute_residual for each vector.
     *
     * The residual vector is the difference between a vector and the
     * reconstruction that can be decoded from its representation in
     * the index. The residual can be used for multiple-stage indexing
     * methods, like IndexIVF's methods.
     *
     * @param n           number of vectors
     * @param xs          input vectors, size (n x d)
     * @param residuals   output residual vectors, size (n x d)
     * @param keys        encoded index, as returned by search and assign
     */
    virtual void compute_residual_n(
            idx_t n,
            const float* xs,
            float* residuals,
            const idx_t* keys) const;
    /** Get a DistanceComputer (defined in AuxIndexStructures) object
     * for this kind of index.
     *
     * DistanceComputer is implemented for indexes that support random
     * access of their vectors.
     */
    virtual DistanceComputer* get_distance_computer() const;
    /* The standalone codec interface */
    /** size of the produced codes in bytes */
    virtual size_t sa_code_size() const;
    /** encode a set of vectors
     *
     * @param n       number of vectors
     * @param x       input vectors, size n * d
     * @param bytes   output encoded vectors, size n * sa_code_size()
     */
    virtual void sa_encode(idx_t n, const float* x, uint8_t* bytes) const;
    /** encode a set of vectors
     *
     * @param n       number of vectors
     * @param bytes   input encoded vectors, size n * sa_code_size()
     * @param x       output vectors, size n * d
     */
    virtual void sa_decode(idx_t n, const uint8_t* bytes, float* x) const;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/Index2Layer.h
+++ b/src/3rdlib/faiss/Index2Layer.h
@ -0,0 +1,85 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <vector>
 #include <faiss/IndexIVF.h>
 #include <faiss/IndexPQ.h>
 namespace faiss {
 struct IndexIVFPQ;
 /** Same as an IndexIVFPQ without the inverted lists: codes are stored
 * sequentially
 *
 * The class is mainly inteded to store encoded vectors that can be
 * accessed randomly, the search function is not implemented.
 */
 struct Index2Layer : Index {
    /// first level quantizer
    Level1Quantizer q1;
    /// second level quantizer is always a PQ
    ProductQuantizer pq;
    /// Codes. Size ntotal * code_size.
    std::vector<uint8_t> codes;
    /// size of the code for the first level (ceil(log8(q1.nlist)))
    size_t code_size_1;
    /// size of the code for the second level
    size_t code_size_2;
    /// code_size_1 + code_size_2
    size_t code_size;
    Index2Layer(
            Index* quantizer,
            size_t nlist,
            int M,
            int nbit = 8,
            MetricType metric = METRIC_L2);
    Index2Layer();
    ~Index2Layer();
    void train(idx_t n, const float* x) override;
    void add(idx_t n, const float* x) override;
    /// not implemented
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
    void reconstruct(idx_t key, float* recons) const override;
    void reset() override;
    DistanceComputer* get_distance_computer() const override;
    /// transfer the flat codes to an IVFPQ index
    void transfer_to_IVFPQ(IndexIVFPQ& other) const;
    /* The standalone codec interface */
    size_t sa_code_size() const override;
    void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/IndexAdditiveQuantizer.h
+++ b/src/3rdlib/faiss/IndexAdditiveQuantizer.h
@ -0,0 +1,207 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #ifndef FAISS_INDEX_ADDITIVE_QUANTIZER_H
 #define FAISS_INDEX_ADDITIVE_QUANTIZER_H
 #include <faiss/impl/AdditiveQuantizer.h>
 #include <cstdint>
 #include <vector>
 #include <faiss/Index.h>
 #include <faiss/impl/LocalSearchQuantizer.h>
 #include <faiss/impl/ResidualQuantizer.h>
 #include <faiss/impl/platform_macros.h>
 namespace faiss {
 /// Abstract class for additive quantizers. The search functions are in common.
 struct IndexAdditiveQuantizer : Index {
    // the quantizer, this points to the relevant field in the inheriting
    // classes
    AdditiveQuantizer* aq;
    using Search_type_t = AdditiveQuantizer::Search_type_t;
    explicit IndexAdditiveQuantizer(
            idx_t d = 0,
            AdditiveQuantizer* aq = nullptr,
            MetricType metric = METRIC_L2);
    /// size of residual quantizer codes + norms
    size_t code_size;
    /// Codes. Size ntotal * rq.code_size
    std::vector<uint8_t> codes;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void reset() override;
    void add(idx_t n, const float* x) override;
    /* The standalone codec interface */
    size_t sa_code_size() const override;
    void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
 };
 /** Index based on a residual quantizer. Stored vectors are
 * approximated by residual quantization codes.
 * Can also be used as a codec
 */
 struct IndexResidualQuantizer : IndexAdditiveQuantizer {
    /// The residual quantizer used to encode the vectors
    ResidualQuantizer rq;
    /** Constructor.
     *
     * @param d      dimensionality of the input vectors
     * @param M      number of subquantizers
     * @param nbits  number of bit per subvector index
     */
    IndexResidualQuantizer(
            int d,        ///< dimensionality of the input vectors
            size_t M,     ///< number of subquantizers
            size_t nbits, ///< number of bit per subvector index
            MetricType metric = METRIC_L2,
            Search_type_t search_type = AdditiveQuantizer::ST_decompress);
    IndexResidualQuantizer(
            int d,
            const std::vector<size_t>& nbits,
            MetricType metric = METRIC_L2,
            Search_type_t search_type = AdditiveQuantizer::ST_decompress);
    IndexResidualQuantizer();
    void train(idx_t n, const float* x) override;
 };
 struct IndexLocalSearchQuantizer : IndexAdditiveQuantizer {
    LocalSearchQuantizer lsq;
    /** Constructor.
     *
     * @param d      dimensionality of the input vectors
     * @param M      number of subquantizers
     * @param nbits  number of bit per subvector index
     */
    IndexLocalSearchQuantizer(
            int d,        ///< dimensionality of the input vectors
            size_t M,     ///< number of subquantizers
            size_t nbits, ///< number of bit per subvector index
            MetricType metric = METRIC_L2,
            Search_type_t search_type = AdditiveQuantizer::ST_decompress);
    IndexLocalSearchQuantizer();
    void train(idx_t n, const float* x) override;
 };
 /** A "virtual" index where the elements are the residual quantizer centroids.
 *
 * Intended for use as a coarse quantizer in an IndexIVF.
 */
 struct AdditiveCoarseQuantizer : Index {
    AdditiveQuantizer* aq;
    explicit AdditiveCoarseQuantizer(
            idx_t d = 0,
            AdditiveQuantizer* aq = nullptr,
            MetricType metric = METRIC_L2);
    /// norms of centroids, useful for knn-search
    std::vector<float> centroid_norms;
    /// N/A
    void add(idx_t n, const float* x) override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void reconstruct(idx_t key, float* recons) const override;
    void train(idx_t n, const float* x) override;
    /// N/A
    void reset() override;
 };
 /** The ResidualCoarseQuantizer is a bit specialized compared to the
 * default AdditiveCoarseQuantizer because it can use a beam search
 * at search time (slow but may be useful for very large vocabularies) */
 struct ResidualCoarseQuantizer : AdditiveCoarseQuantizer {
    /// The residual quantizer used to encode the vectors
    ResidualQuantizer rq;
    /// factor between the beam size and the search k
    /// if negative, use exact search-to-centroid
    float beam_factor;
    /// computes centroid norms if required
    void set_beam_factor(float new_beam_factor);
    /** Constructor.
     *
     * @param d      dimensionality of the input vectors
     * @param M      number of subquantizers
     * @param nbits  number of bit per subvector index
     */
    ResidualCoarseQuantizer(
            int d,        ///< dimensionality of the input vectors
            size_t M,     ///< number of subquantizers
            size_t nbits, ///< number of bit per subvector index
            MetricType metric = METRIC_L2);
    ResidualCoarseQuantizer(
            int d,
            const std::vector<size_t>& nbits,
            MetricType metric = METRIC_L2);
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    ResidualCoarseQuantizer();
 };
 struct LocalSearchCoarseQuantizer : AdditiveCoarseQuantizer {
    /// The residual quantizer used to encode the vectors
    LocalSearchQuantizer lsq;
    /** Constructor.
     *
     * @param d      dimensionality of the input vectors
     * @param M      number of subquantizers
     * @param nbits  number of bit per subvector index
     */
    LocalSearchCoarseQuantizer(
            int d,        ///< dimensionality of the input vectors
            size_t M,     ///< number of subquantizers
            size_t nbits, ///< number of bit per subvector index
            MetricType metric = METRIC_L2);
    LocalSearchCoarseQuantizer();
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexBinary.h
+++ b/src/3rdlib/faiss/IndexBinary.h
@ -0,0 +1,175 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_INDEX_BINARY_H
 #define FAISS_INDEX_BINARY_H
 #include <cstdio>
 #include <sstream>
 #include <string>
 #include <typeinfo>
 #include <faiss/Index.h>
 #include <faiss/impl/FaissAssert.h>
 namespace faiss {
 /// Forward declarations see AuxIndexStructures.h
 struct IDSelector;
 struct RangeSearchResult;
 /** Abstract structure for a binary index.
 *
 * Supports adding vertices and searching them.
 *
 * All queries are symmetric because there is no distinction between codes and
 * vectors.
 */
 struct IndexBinary {
    using idx_t = Index::idx_t; ///< all indices are this type
    using component_t = uint8_t;
    using distance_t = int32_t;
    int d;         ///< vector dimension
    int code_size; ///< number of bytes per vector ( = d / 8 )
    idx_t ntotal;  ///< total nb of indexed vectors
    bool verbose;  ///< verbosity level
    /// set if the Index does not require training, or if training is done
    /// already
    bool is_trained;
    /// type of metric this index uses for search
    MetricType metric_type;
    explicit IndexBinary(idx_t d = 0, MetricType metric = METRIC_L2)
            : d(d),
              code_size(d / 8),
              ntotal(0),
              verbose(false),
              is_trained(true),
              metric_type(metric) {
        FAISS_THROW_IF_NOT(d % 8 == 0);
    }
    virtual ~IndexBinary();
    /** Perform training on a representative set of vectors.
     *
     * @param n      nb of training vectors
     * @param x      training vecors, size n * d / 8
     */
    virtual void train(idx_t n, const uint8_t* x);
    /** Add n vectors of dimension d to the index.
     *
     * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1
     * @param x      input matrix, size n * d / 8
     */
    virtual void add(idx_t n, const uint8_t* x) = 0;
    /** Same as add, but stores xids instead of sequential ids.
     *
     * The default implementation fails with an assertion, as it is
     * not supported by all indexes.
     *
     * @param xids if non-null, ids to store for the vectors (size n)
     */
    virtual void add_with_ids(idx_t n, const uint8_t* x, const idx_t* xids);
    /** Query n vectors of dimension d to the index.
     *
     * return at most k vectors. If there are not enough results for a
     * query, the result array is padded with -1s.
     *
     * @param x           input vectors to search, size n * d / 8
     * @param labels      output labels of the NNs, size n*k
     * @param distances   output pairwise distances, size n*k
     */
    virtual void search(
            idx_t n,
            const uint8_t* x,
            idx_t k,
            int32_t* distances,
            idx_t* labels) const = 0;
    /** Query n vectors of dimension d to the index.
     *
     * return all vectors with distance < radius. Note that many indexes
     * do not implement the range_search (only the k-NN search is
     * mandatory). The distances are converted to float to reuse the
     * RangeSearchResult structure, but they are integer. By convention,
     * only distances < radius (strict comparison) are returned,
     * ie. radius = 0 does not return any result and 1 returns only
     * exact same vectors.
     *
     * @param x           input vectors to search, size n * d / 8
     * @param radius      search radius
     * @param result      result table
     */
    virtual void range_search(
            idx_t n,
            const uint8_t* x,
            int radius,
            RangeSearchResult* result) const;
    /** Return the indexes of the k vectors closest to the query x.
     *
     * This function is identical to search but only returns labels of
     * neighbors.
     * @param x           input vectors to search, size n * d / 8
     * @param labels      output labels of the NNs, size n*k
     */
    void assign(idx_t n, const uint8_t* x, idx_t* labels, idx_t k = 1) const;
    /// Removes all elements from the database.
    virtual void reset() = 0;
    /** Removes IDs from the index. Not supported by all indexes.
     */
    virtual size_t remove_ids(const IDSelector& sel);
    /** Reconstruct a stored vector.
     *
     * This function may not be defined for some indexes.
     * @param key         id of the vector to reconstruct
     * @param recons      reconstucted vector (size d / 8)
     */
    virtual void reconstruct(idx_t key, uint8_t* recons) const;
    /** Reconstruct vectors i0 to i0 + ni - 1.
     *
     * This function may not be defined for some indexes.
     * @param recons      reconstucted vectors (size ni * d / 8)
     */
    virtual void reconstruct_n(idx_t i0, idx_t ni, uint8_t* recons) const;
    /** Similar to search, but also reconstructs the stored vectors (or an
     * approximation in the case of lossy coding) for the search results.
     *
     * If there are not enough results for a query, the resulting array
     * is padded with -1s.
     *
     * @param recons      reconstructed vectors size (n, k, d)
     **/
    virtual void search_and_reconstruct(
            idx_t n,
            const uint8_t* x,
            idx_t k,
            int32_t* distances,
            idx_t* labels,
            uint8_t* recons) const;
    /** Display the actual class name and some more info. */
    void display() const;
 };
 } // namespace faiss
 #endif // FAISS_INDEX_BINARY_H
--- a/src/3rdlib/faiss/IndexBinaryFlat.h
+++ b/src/3rdlib/faiss/IndexBinaryFlat.h
@ -0,0 +1,62 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef INDEX_BINARY_FLAT_H
 #define INDEX_BINARY_FLAT_H
 #include <vector>
 #include <faiss/IndexBinary.h>
 namespace faiss {
 /** Index that stores the full vectors and performs exhaustive search. */
 struct IndexBinaryFlat : IndexBinary {
    /// database vectors, size ntotal * d / 8
    std::vector<uint8_t> xb;
    /** Select between using a heap or counting to select the k smallest values
     * when scanning inverted lists.
     */
    bool use_heap = true;
    size_t query_batch_size = 32;
    explicit IndexBinaryFlat(idx_t d);
    void add(idx_t n, const uint8_t* x) override;
    void reset() override;
    void search(
            idx_t n,
            const uint8_t* x,
            idx_t k,
            int32_t* distances,
            idx_t* labels) const override;
    void range_search(
            idx_t n,
            const uint8_t* x,
            int radius,
            RangeSearchResult* result) const override;
    void reconstruct(idx_t key, uint8_t* recons) const override;
    /** Remove some ids. Note that because of the indexing structure,
     * the semantics of this operation are different from the usual ones:
     * the new ids are shifted. */
    size_t remove_ids(const IDSelector& sel) override;
    IndexBinaryFlat() {}
 };
 } // namespace faiss
 #endif // INDEX_BINARY_FLAT_H
--- a/src/3rdlib/faiss/IndexBinaryFromFloat.h
+++ b/src/3rdlib/faiss/IndexBinaryFromFloat.h
@ -0,0 +1,53 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_INDEX_BINARY_FROM_FLOAT_H
 #define FAISS_INDEX_BINARY_FROM_FLOAT_H
 #include <faiss/IndexBinary.h>
 namespace faiss {
 struct Index;
 /** IndexBinary backed by a float Index.
 *
 * Supports adding vertices and searching them.
 *
 * All queries are symmetric because there is no distinction between codes and
 * vectors.
 */
 struct IndexBinaryFromFloat : IndexBinary {
    Index* index = nullptr;
    bool own_fields = false; ///< Whether object owns the index pointer.
    IndexBinaryFromFloat();
    explicit IndexBinaryFromFloat(Index* index);
    ~IndexBinaryFromFloat();
    void add(idx_t n, const uint8_t* x) override;
    void reset() override;
    void search(
            idx_t n,
            const uint8_t* x,
            idx_t k,
            int32_t* distances,
            idx_t* labels) const override;
    void train(idx_t n, const uint8_t* x) override;
 };
 } // namespace faiss
 #endif // FAISS_INDEX_BINARY_FROM_FLOAT_H
--- a/src/3rdlib/faiss/IndexBinaryHNSW.h
+++ b/src/3rdlib/faiss/IndexBinaryHNSW.h
@ -0,0 +1,57 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <faiss/IndexBinaryFlat.h>
 #include <faiss/impl/HNSW.h>
 #include <faiss/utils/utils.h>
 namespace faiss {
 /** The HNSW index is a normal random-access index with a HNSW
 * link structure built on top */
 struct IndexBinaryHNSW : IndexBinary {
    typedef HNSW::storage_idx_t storage_idx_t;
    // the link strcuture
    HNSW hnsw;
    // the sequential storage
    bool own_fields;
    IndexBinary* storage;
    explicit IndexBinaryHNSW();
    explicit IndexBinaryHNSW(int d, int M = 32);
    explicit IndexBinaryHNSW(IndexBinary* storage, int M = 32);
    ~IndexBinaryHNSW() override;
    DistanceComputer* get_distance_computer() const;
    void add(idx_t n, const uint8_t* x) override;
    /// Trains the storage if needed
    void train(idx_t n, const uint8_t* x) override;
    /// entry point for search
    void search(
            idx_t n,
            const uint8_t* x,
            idx_t k,
            int32_t* distances,
            idx_t* labels) const override;
    void reconstruct(idx_t key, uint8_t* recons) const override;
    void reset() override;
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/IndexBinaryHash.h
+++ b/src/3rdlib/faiss/IndexBinaryHash.h
@ -0,0 +1,124 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_BINARY_HASH_H
 #define FAISS_BINARY_HASH_H
 #include <unordered_map>
 #include <vector>
 #include <faiss/IndexBinary.h>
 #include <faiss/IndexBinaryFlat.h>
 #include <faiss/impl/platform_macros.h>
 #include <faiss/utils/Heap.h>
 namespace faiss {
 struct RangeSearchResult;
 /** just uses the b first bits as a hash value */
 struct IndexBinaryHash : IndexBinary {
    struct InvertedList {
        std::vector<idx_t> ids;
        std::vector<uint8_t> vecs;
        void add(idx_t id, size_t code_size, const uint8_t* code);
    };
    using InvertedListMap = std::unordered_map<idx_t, InvertedList>;
    InvertedListMap invlists;
    int b, nflip;
    IndexBinaryHash(int d, int b);
    IndexBinaryHash();
    void reset() override;
    void add(idx_t n, const uint8_t* x) override;
    void add_with_ids(idx_t n, const uint8_t* x, const idx_t* xids) override;
    void range_search(
            idx_t n,
            const uint8_t* x,
            int radius,
            RangeSearchResult* result) const override;
    void search(
            idx_t n,
            const uint8_t* x,
            idx_t k,
            int32_t* distances,
            idx_t* labels) const override;
    void display() const;
    size_t hashtable_size() const;
 };
 struct IndexBinaryHashStats {
    size_t nq;    // nb of queries run
    size_t n0;    // nb of empty lists
    size_t nlist; // nb of non-empty inverted lists scanned
    size_t ndis;  // nb of distancs computed
    IndexBinaryHashStats() {
        reset();
    }
    void reset();
 };
 FAISS_API extern IndexBinaryHashStats indexBinaryHash_stats;
 /** just uses the b first bits as a hash value */
 struct IndexBinaryMultiHash : IndexBinary {
    // where the vectors are actually stored
    IndexBinaryFlat* storage;
    bool own_fields;
    // maps hash values to the ids that hash to them
    using Map = std::unordered_map<idx_t, std::vector<idx_t>>;
    // the different hashes, size nhash
    std::vector<Map> maps;
    int nhash; ///< nb of hash maps
    int b;     ///< nb bits per hash map
    int nflip; ///< nb bit flips to use at search time
    IndexBinaryMultiHash(int d, int nhash, int b);
    IndexBinaryMultiHash();
    ~IndexBinaryMultiHash();
    void reset() override;
    void add(idx_t n, const uint8_t* x) override;
    void range_search(
            idx_t n,
            const uint8_t* x,
            int radius,
            RangeSearchResult* result) const override;
    void search(
            idx_t n,
            const uint8_t* x,
            idx_t k,
            int32_t* distances,
            idx_t* labels) const override;
    size_t hashtable_size() const;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexBinaryIVF.h
+++ b/src/3rdlib/faiss/IndexBinaryIVF.h
@ -0,0 +1,250 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_INDEX_BINARY_IVF_H
 #define FAISS_INDEX_BINARY_IVF_H
 #include <vector>
 #include <faiss/Clustering.h>
 #include <faiss/IndexBinary.h>
 #include <faiss/IndexIVF.h>
 #include <faiss/utils/Heap.h>
 namespace faiss {
 struct BinaryInvertedListScanner;
 /** Index based on a inverted file (IVF)
 *
 * In the inverted file, the quantizer (an IndexBinary instance) provides a
 * quantization index for each vector to be added. The quantization
 * index maps to a list (aka inverted list or posting list), where the
 * id of the vector is stored.
 *
 * Otherwise the object is similar to the IndexIVF
 */
 struct IndexBinaryIVF : IndexBinary {
    /// Access to the actual data
    InvertedLists* invlists;
    bool own_invlists;
    size_t nprobe;    ///< number of probes at query time
    size_t max_codes; ///< max nb of codes to visit to do a query
    /** Select between using a heap or counting to select the k smallest values
     * when scanning inverted lists.
     */
    bool use_heap = true;
    /// map for direct access to the elements. Enables reconstruct().
    DirectMap direct_map;
    IndexBinary* quantizer; ///< quantizer that maps vectors to inverted lists
    size_t nlist;           ///< number of possible key values
    bool own_fields; ///< whether object owns the quantizer
    ClusteringParameters cp; ///< to override default clustering params
    Index* clustering_index; ///< to override index used during clustering
    /** The Inverted file takes a quantizer (an IndexBinary) on input,
     * which implements the function mapping a vector to a list
     * identifier. The pointer is borrowed: the quantizer should not
     * be deleted while the IndexBinaryIVF is in use.
     */
    IndexBinaryIVF(IndexBinary* quantizer, size_t d, size_t nlist);
    IndexBinaryIVF();
    ~IndexBinaryIVF() override;
    void reset() override;
    /// Trains the quantizer
    void train(idx_t n, const uint8_t* x) override;
    void add(idx_t n, const uint8_t* x) override;
    void add_with_ids(idx_t n, const uint8_t* x, const idx_t* xids) override;
    /** Implementation of vector addition where the vector assignments are
     * predefined.
     *
     * @param precomputed_idx    quantization indices for the input vectors
     * (size n)
     */
    void add_core(
            idx_t n,
            const uint8_t* x,
            const idx_t* xids,
            const idx_t* precomputed_idx);
    /** Search a set of vectors, that are pre-quantized by the IVF
     *  quantizer. Fill in the corresponding heaps with the query
     *  results. search() calls this.
     *
     * @param n      nb of vectors to query
     * @param x      query vectors, size nx * d
     * @param assign coarse quantization indices, size nx * nprobe
     * @param centroid_dis
     *               distances to coarse centroids, size nx * nprobe
     * @param distance
     *               output distances, size n * k
     * @param labels output labels, size n * k
     * @param store_pairs store inv list index + inv list offset
     *                     instead in upper/lower 32 bit of result,
     *                     instead of ids (used for reranking).
     * @param params used to override the object's search parameters
     */
    void search_preassigned(
            idx_t n,
            const uint8_t* x,
            idx_t k,
            const idx_t* assign,
            const int32_t* centroid_dis,
            int32_t* distances,
            idx_t* labels,
            bool store_pairs,
            const IVFSearchParameters* params = nullptr) const;
    virtual BinaryInvertedListScanner* get_InvertedListScanner(
            bool store_pairs = false) const;
    /** assign the vectors, then call search_preassign */
    void search(
            idx_t n,
            const uint8_t* x,
            idx_t k,
            int32_t* distances,
            idx_t* labels) const override;
    void range_search(
            idx_t n,
            const uint8_t* x,
            int radius,
            RangeSearchResult* result) const override;
    void range_search_preassigned(
            idx_t n,
            const uint8_t* x,
            int radius,
            const idx_t* assign,
            const int32_t* centroid_dis,
            RangeSearchResult* result) const;
    void reconstruct(idx_t key, uint8_t* recons) const override;
    /** Reconstruct a subset of the indexed vectors.
     *
     * Overrides default implementation to bypass reconstruct() which requires
     * direct_map to be maintained.
     *
     * @param i0     first vector to reconstruct
     * @param ni     nb of vectors to reconstruct
     * @param recons output array of reconstructed vectors, size ni * d / 8
     */
    void reconstruct_n(idx_t i0, idx_t ni, uint8_t* recons) const override;
    /** Similar to search, but also reconstructs the stored vectors (or an
     * approximation in the case of lossy coding) for the search results.
     *
     * Overrides default implementation to avoid having to maintain direct_map
     * and instead fetch the code offsets through the `store_pairs` flag in
     * search_preassigned().
     *
     * @param recons      reconstructed vectors size (n, k, d / 8)
     */
    void search_and_reconstruct(
            idx_t n,
            const uint8_t* x,
            idx_t k,
            int32_t* distances,
            idx_t* labels,
            uint8_t* recons) const override;
    /** Reconstruct a vector given the location in terms of (inv list index +
     * inv list offset) instead of the id.
     *
     * Useful for reconstructing when the direct_map is not maintained and
     * the inv list offset is computed by search_preassigned() with
     * `store_pairs` set.
     */
    virtual void reconstruct_from_offset(
            idx_t list_no,
            idx_t offset,
            uint8_t* recons) const;
    /// Dataset manipulation functions
    size_t remove_ids(const IDSelector& sel) override;
    /** moves the entries from another dataset to self. On output,
     * other is empty. add_id is added to all moved ids (for
     * sequential ids, this would be this->ntotal */
    virtual void merge_from(IndexBinaryIVF& other, idx_t add_id);
    size_t get_list_size(size_t list_no) const {
        return invlists->list_size(list_no);
    }
    /** intialize a direct map
     *
     * @param new_maintain_direct_map    if true, create a direct map,
     *                                   else clear it
     */
    void make_direct_map(bool new_maintain_direct_map = true);
    void set_direct_map_type(DirectMap::Type type);
    void replace_invlists(InvertedLists* il, bool own = false);
 };
 struct BinaryInvertedListScanner {
    using idx_t = Index::idx_t;
    /// from now on we handle this query.
    virtual void set_query(const uint8_t* query_vector) = 0;
    /// following codes come from this inverted list
    virtual void set_list(idx_t list_no, uint8_t coarse_dis) = 0;
    /// compute a single query-to-code distance
    virtual uint32_t distance_to_code(const uint8_t* code) const = 0;
    /** compute the distances to codes. (distances, labels) should be
     * organized as a min- or max-heap
     *
     * @param n      number of codes to scan
     * @param codes  codes to scan (n * code_size)
     * @param ids        corresponding ids (ignored if store_pairs)
     * @param distances  heap distances (size k)
     * @param labels     heap labels (size k)
     * @param k          heap size
     */
    virtual size_t scan_codes(
            size_t n,
            const uint8_t* codes,
            const idx_t* ids,
            int32_t* distances,
            idx_t* labels,
            size_t k) const = 0;
    virtual void scan_codes_range(
            size_t n,
            const uint8_t* codes,
            const idx_t* ids,
            int radius,
            RangeQueryResult& result) const = 0;
    virtual ~BinaryInvertedListScanner() {}
 };
 } // namespace faiss
 #endif // FAISS_INDEX_BINARY_IVF_H
--- a/src/3rdlib/faiss/IndexFlat.h
+++ b/src/3rdlib/faiss/IndexFlat.h
@ -0,0 +1,114 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef INDEX_FLAT_H
 #define INDEX_FLAT_H
 #include <vector>
 #include <faiss/Index.h>
 namespace faiss {
 /** Index that stores the full vectors and performs exhaustive search */
 struct IndexFlat : Index {
    /// database vectors, size ntotal * d
    std::vector<float> xb;
    explicit IndexFlat(idx_t d, MetricType metric = METRIC_L2);
    void add(idx_t n, const float* x) override;
    void reset() override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void range_search(
            idx_t n,
            const float* x,
            float radius,
            RangeSearchResult* result) const override;
    void reconstruct(idx_t key, float* recons) const override;
    /** compute distance with a subset of vectors
     *
     * @param x       query vectors, size n * d
     * @param labels  indices of the vectors that should be compared
     *                for each query vector, size n * k
     * @param distances
     *                corresponding output distances, size n * k
     */
    void compute_distance_subset(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            const idx_t* labels) const;
    /** remove some ids. NB that Because of the structure of the
     * indexing structure, the semantics of this operation are
     * different from the usual ones: the new ids are shifted */
    size_t remove_ids(const IDSelector& sel) override;
    IndexFlat() {}
    DistanceComputer* get_distance_computer() const override;
    /* The stanadlone codec interface (just memcopies in this case) */
    size_t sa_code_size() const override;
    void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
 };
 struct IndexFlatIP : IndexFlat {
    explicit IndexFlatIP(idx_t d) : IndexFlat(d, METRIC_INNER_PRODUCT) {}
    IndexFlatIP() {}
 };
 struct IndexFlatL2 : IndexFlat {
    explicit IndexFlatL2(idx_t d) : IndexFlat(d, METRIC_L2) {}
    IndexFlatL2() {}
 };
 /// optimized version for 1D "vectors".
 struct IndexFlat1D : IndexFlatL2 {
    bool continuous_update; ///< is the permutation updated continuously?
    std::vector<idx_t> perm; ///< sorted database indices
    explicit IndexFlat1D(bool continuous_update = true);
    /// if not continuous_update, call this between the last add and
    /// the first search
    void update_permutation();
    void add(idx_t n, const float* x) override;
    void reset() override;
    /// Warn: the distances returned are L1 not L2
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexHNSW.h
+++ b/src/3rdlib/faiss/IndexHNSW.h
@ -0,0 +1,186 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <vector>
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexPQ.h>
 #include <faiss/IndexScalarQuantizer.h>
 #include <faiss/impl/HNSW.h>
 #include <faiss/utils/utils.h>
 namespace faiss {
 struct IndexHNSW;
 struct ReconstructFromNeighbors {
    typedef Index::idx_t idx_t;
    typedef HNSW::storage_idx_t storage_idx_t;
    const IndexHNSW& index;
    size_t M;   // number of neighbors
    size_t k;   // number of codebook entries
    size_t nsq; // number of subvectors
    size_t code_size;
    int k_reorder; // nb to reorder. -1 = all
    std::vector<float> codebook; // size nsq * k * (M + 1)
    std::vector<uint8_t> codes; // size ntotal * code_size
    size_t ntotal;
    size_t d, dsub; // derived values
    explicit ReconstructFromNeighbors(
            const IndexHNSW& index,
            size_t k = 256,
            size_t nsq = 1);
    /// codes must be added in the correct order and the IndexHNSW
    /// must be populated and sorted
    void add_codes(size_t n, const float* x);
    size_t compute_distances(
            size_t n,
            const idx_t* shortlist,
            const float* query,
            float* distances) const;
    /// called by add_codes
    void estimate_code(const float* x, storage_idx_t i, uint8_t* code) const;
    /// called by compute_distances
    void reconstruct(storage_idx_t i, float* x, float* tmp) const;
    void reconstruct_n(storage_idx_t n0, storage_idx_t ni, float* x) const;
    /// get the M+1 -by-d table for neighbor coordinates for vector i
    void get_neighbor_table(storage_idx_t i, float* out) const;
 };
 /** The HNSW index is a normal random-access index with a HNSW
 * link structure built on top */
 struct IndexHNSW : Index {
    typedef HNSW::storage_idx_t storage_idx_t;
    // the link strcuture
    HNSW hnsw;
    // the sequential storage
    bool own_fields;
    Index* storage;
    ReconstructFromNeighbors* reconstruct_from_neighbors;
    explicit IndexHNSW(int d = 0, int M = 32, MetricType metric = METRIC_L2);
    explicit IndexHNSW(Index* storage, int M = 32);
    ~IndexHNSW() override;
    void add(idx_t n, const float* x) override;
    /// Trains the storage if needed
    void train(idx_t n, const float* x) override;
    /// entry point for search
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void reconstruct(idx_t key, float* recons) const override;
    void reset() override;
    void shrink_level_0_neighbors(int size);
    /** Perform search only on level 0, given the starting points for
     * each vertex.
     *
     * @param search_type 1:perform one search per nprobe, 2: enqueue
     *                    all entry points
     */
    void search_level_0(
            idx_t n,
            const float* x,
            idx_t k,
            const storage_idx_t* nearest,
            const float* nearest_d,
            float* distances,
            idx_t* labels,
            int nprobe = 1,
            int search_type = 1) const;
    /// alternative graph building
    void init_level_0_from_knngraph(int k, const float* D, const idx_t* I);
    /// alternative graph building
    void init_level_0_from_entry_points(
            int npt,
            const storage_idx_t* points,
            const storage_idx_t* nearests);
    // reorder links from nearest to farthest
    void reorder_links();
    void link_singletons();
 };
 /** Flat index topped with with a HNSW structure to access elements
 *  more efficiently.
 */
 struct IndexHNSWFlat : IndexHNSW {
    IndexHNSWFlat();
    IndexHNSWFlat(int d, int M, MetricType metric = METRIC_L2);
 };
 /** PQ index topped with with a HNSW structure to access elements
 *  more efficiently.
 */
 struct IndexHNSWPQ : IndexHNSW {
    IndexHNSWPQ();
    IndexHNSWPQ(int d, int pq_m, int M);
    void train(idx_t n, const float* x) override;
 };
 /** SQ index topped with with a HNSW structure to access elements
 *  more efficiently.
 */
 struct IndexHNSWSQ : IndexHNSW {
    IndexHNSWSQ();
    IndexHNSWSQ(
            int d,
            ScalarQuantizer::QuantizerType qtype,
            int M,
            MetricType metric = METRIC_L2);
 };
 /** 2-level code structure with fast random access
 */
 struct IndexHNSW2Level : IndexHNSW {
    IndexHNSW2Level();
    IndexHNSW2Level(Index* quantizer, size_t nlist, int m_pq, int M);
    void flip_to_ivf();
    /// entry point for search
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/IndexIVF.h
+++ b/src/3rdlib/faiss/IndexIVF.h
@ -0,0 +1,434 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_INDEX_IVF_H
 #define FAISS_INDEX_IVF_H
 #include <stdint.h>
 #include <unordered_map>
 #include <vector>
 #include <faiss/Clustering.h>
 #include <faiss/Index.h>
 #include <faiss/impl/platform_macros.h>
 #include <faiss/invlists/DirectMap.h>
 #include <faiss/invlists/InvertedLists.h>
 #include <faiss/utils/Heap.h>
 namespace faiss {
 /** Encapsulates a quantizer object for the IndexIVF
 *
 * The class isolates the fields that are independent of the storage
 * of the lists (especially training)
 */
 struct Level1Quantizer {
    Index* quantizer; ///< quantizer that maps vectors to inverted lists
    size_t nlist;     ///< number of possible key values
    /**
     * = 0: use the quantizer as index in a kmeans training
     * = 1: just pass on the training set to the train() of the quantizer
     * = 2: kmeans training on a flat index + add the centroids to the quantizer
     */
    char quantizer_trains_alone;
    bool own_fields; ///< whether object owns the quantizer (false by default)
    ClusteringParameters cp; ///< to override default clustering params
    Index* clustering_index; ///< to override index used during clustering
    /// Trains the quantizer and calls train_residual to train sub-quantizers
    void train_q1(
            size_t n,
            const float* x,
            bool verbose,
            MetricType metric_type);
    /// compute the number of bytes required to store list ids
    size_t coarse_code_size() const;
    void encode_listno(Index::idx_t list_no, uint8_t* code) const;
    Index::idx_t decode_listno(const uint8_t* code) const;
    Level1Quantizer(Index* quantizer, size_t nlist);
    Level1Quantizer();
    ~Level1Quantizer();
 };
 struct IVFSearchParameters {
    size_t nprobe;    ///< number of probes at query time
    size_t max_codes; ///< max nb of codes to visit to do a query
    IVFSearchParameters() : nprobe(1), max_codes(0) {}
    virtual ~IVFSearchParameters() {}
 };
 struct InvertedListScanner;
 struct IndexIVFStats;
 /** Index based on a inverted file (IVF)
 *
 * In the inverted file, the quantizer (an Index instance) provides a
 * quantization index for each vector to be added. The quantization
 * index maps to a list (aka inverted list or posting list), where the
 * id of the vector is stored.
 *
 * The inverted list object is required only after trainng. If none is
 * set externally, an ArrayInvertedLists is used automatically.
 *
 * At search time, the vector to be searched is also quantized, and
 * only the list corresponding to the quantization index is
 * searched. This speeds up the search by making it
 * non-exhaustive. This can be relaxed using multi-probe search: a few
 * (nprobe) quantization indices are selected and several inverted
 * lists are visited.
 *
 * Sub-classes implement a post-filtering of the index that refines
 * the distance estimation from the query to databse vectors.
 */
 struct IndexIVF : Index, Level1Quantizer {
    /// Access to the actual data
    InvertedLists* invlists;
    bool own_invlists;
    size_t code_size; ///< code size per vector in bytes
    size_t nprobe;    ///< number of probes at query time
    size_t max_codes; ///< max nb of codes to visit to do a query
    /** Parallel mode determines how queries are parallelized with OpenMP
     *
     * 0 (default): split over queries
     * 1: parallelize over inverted lists
     * 2: parallelize over both
     * 3: split over queries with a finer granularity
     *
     * PARALLEL_MODE_NO_HEAP_INIT: binary or with the previous to
     * prevent the heap to be initialized and finalized
     */
    int parallel_mode;
    const int PARALLEL_MODE_NO_HEAP_INIT = 1024;
    /** optional map that maps back ids to invlist entries. This
     *  enables reconstruct() */
    DirectMap direct_map;
    /** The Inverted file takes a quantizer (an Index) on input,
     * which implements the function mapping a vector to a list
     * identifier.
     */
    IndexIVF(
            Index* quantizer,
            size_t d,
            size_t nlist,
            size_t code_size,
            MetricType metric = METRIC_L2);
    void reset() override;
    /// Trains the quantizer and calls train_residual to train sub-quantizers
    void train(idx_t n, const float* x) override;
    /// Calls add_with_ids with NULL ids
    void add(idx_t n, const float* x) override;
    /// default implementation that calls encode_vectors
    void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
    /** Implementation of vector addition where the vector assignments are
     * predefined. The default implementation hands over the code extraction to
     * encode_vectors.
     *
     * @param precomputed_idx    quantization indices for the input vectors
     * (size n)
     */
    virtual void add_core(
            idx_t n,
            const float* x,
            const idx_t* xids,
            const idx_t* precomputed_idx);
    /** Encodes a set of vectors as they would appear in the inverted lists
     *
     * @param list_nos   inverted list ids as returned by the
     *                   quantizer (size n). -1s are ignored.
     * @param codes      output codes, size n * code_size
     * @param include_listno
     *                   include the list ids in the code (in this case add
     *                   ceil(log8(nlist)) to the code size)
     */
    virtual void encode_vectors(
            idx_t n,
            const float* x,
            const idx_t* list_nos,
            uint8_t* codes,
            bool include_listno = false) const = 0;
    /** Add vectors that are computed with the standalone codec
     *
     * @param codes  codes to add size n * sa_code_size()
     * @param xids   corresponding ids, size n
     */
    void add_sa_codes(idx_t n, const uint8_t* codes, const idx_t* xids);
    /// Sub-classes that encode the residuals can train their encoders here
    /// does nothing by default
    virtual void train_residual(idx_t n, const float* x);
    /** search a set of vectors, that are pre-quantized by the IVF
     *  quantizer. Fill in the corresponding heaps with the query
     *  results. The default implementation uses InvertedListScanners
     *  to do the search.
     *
     * @param n      nb of vectors to query
     * @param x      query vectors, size nx * d
     * @param assign coarse quantization indices, size nx * nprobe
     * @param centroid_dis
     *               distances to coarse centroids, size nx * nprobe
     * @param distance
     *               output distances, size n * k
     * @param labels output labels, size n * k
     * @param store_pairs store inv list index + inv list offset
     *                     instead in upper/lower 32 bit of result,
     *                     instead of ids (used for reranking).
     * @param params used to override the object's search parameters
     * @param stats  search stats to be updated (can be null)
     */
    virtual void search_preassigned(
            idx_t n,
            const float* x,
            idx_t k,
            const idx_t* assign,
            const float* centroid_dis,
            float* distances,
            idx_t* labels,
            bool store_pairs,
            const IVFSearchParameters* params = nullptr,
            IndexIVFStats* stats = nullptr) const;
    /** assign the vectors, then call search_preassign */
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void range_search(
            idx_t n,
            const float* x,
            float radius,
            RangeSearchResult* result) const override;
    void range_search_preassigned(
            idx_t nx,
            const float* x,
            float radius,
            const idx_t* keys,
            const float* coarse_dis,
            RangeSearchResult* result,
            bool store_pairs = false,
            const IVFSearchParameters* params = nullptr,
            IndexIVFStats* stats = nullptr) const;
    /** Get a scanner for this index (store_pairs means ignore labels)
     *
     * The default search implementation uses this to compute the distances
     */
    virtual InvertedListScanner* get_InvertedListScanner(
            bool store_pairs = false) const;
    /** reconstruct a vector. Works only if maintain_direct_map is set to 1 or 2
     */
    void reconstruct(idx_t key, float* recons) const override;
    /** Update a subset of vectors.
     *
     * The index must have a direct_map
     *
     * @param nv     nb of vectors to update
     * @param idx    vector indices to update, size nv
     * @param v      vectors of new values, size nv*d
     */
    virtual void update_vectors(int nv, const idx_t* idx, const float* v);
    /** Reconstruct a subset of the indexed vectors.
     *
     * Overrides default implementation to bypass reconstruct() which requires
     * direct_map to be maintained.
     *
     * @param i0     first vector to reconstruct
     * @param ni     nb of vectors to reconstruct
     * @param recons output array of reconstructed vectors, size ni * d
     */
    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
    /** Similar to search, but also reconstructs the stored vectors (or an
     * approximation in the case of lossy coding) for the search results.
     *
     * Overrides default implementation to avoid having to maintain direct_map
     * and instead fetch the code offsets through the `store_pairs` flag in
     * search_preassigned().
     *
     * @param recons      reconstructed vectors size (n, k, d)
     */
    void search_and_reconstruct(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels,
            float* recons) const override;
    /** Reconstruct a vector given the location in terms of (inv list index +
     * inv list offset) instead of the id.
     *
     * Useful for reconstructing when the direct_map is not maintained and
     * the inv list offset is computed by search_preassigned() with
     * `store_pairs` set.
     */
    virtual void reconstruct_from_offset(
            int64_t list_no,
            int64_t offset,
            float* recons) const;
    /// Dataset manipulation functions
    size_t remove_ids(const IDSelector& sel) override;
    /** check that the two indexes are compatible (ie, they are
     * trained in the same way and have the same
     * parameters). Otherwise throw. */
    void check_compatible_for_merge(const IndexIVF& other) const;
    /** moves the entries from another dataset to self. On output,
     * other is empty. add_id is added to all moved ids (for
     * sequential ids, this would be this->ntotal */
    virtual void merge_from(IndexIVF& other, idx_t add_id);
    /** copy a subset of the entries index to the other index
     *
     * if subset_type == 0: copies ids in [a1, a2)
     * if subset_type == 1: copies ids if id % a1 == a2
     * if subset_type == 2: copies inverted lists such that a1
     *                      elements are left before and a2 elements are after
     */
    virtual void copy_subset_to(
            IndexIVF& other,
            int subset_type,
            idx_t a1,
            idx_t a2) const;
    ~IndexIVF() override;
    size_t get_list_size(size_t list_no) const {
        return invlists->list_size(list_no);
    }
    /** intialize a direct map
     *
     * @param new_maintain_direct_map    if true, create a direct map,
     *                                   else clear it
     */
    void make_direct_map(bool new_maintain_direct_map = true);
    void set_direct_map_type(DirectMap::Type type);
    /// replace the inverted lists, old one is deallocated if own_invlists
    void replace_invlists(InvertedLists* il, bool own = false);
    /* The standalone codec interface (except sa_decode that is specific) */
    size_t sa_code_size() const override;
    void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
    IndexIVF();
 };
 struct RangeQueryResult;
 /** Object that handles a query. The inverted lists to scan are
 * provided externally. The object has a lot of state, but
 * distance_to_code and scan_codes can be called in multiple
 * threads */
 struct InvertedListScanner {
    using idx_t = Index::idx_t;
    idx_t list_no = -1;    ///< remember current list
    bool keep_max = false; ///< keep maximum instead of minimum
    /// store positions in invlists rather than labels
    bool store_pairs = false;
    /// used in default implementation of scan_codes
    size_t code_size = 0;
    /// from now on we handle this query.
    virtual void set_query(const float* query_vector) = 0;
    /// following codes come from this inverted list
    virtual void set_list(idx_t list_no, float coarse_dis) = 0;
    /// compute a single query-to-code distance
    virtual float distance_to_code(const uint8_t* code) const = 0;
    /** scan a set of codes, compute distances to current query and
     * update heap of results if necessary. Default implemetation
     * calls distance_to_code.
     *
     * @param n      number of codes to scan
     * @param codes  codes to scan (n * code_size)
     * @param ids        corresponding ids (ignored if store_pairs)
     * @param distances  heap distances (size k)
     * @param labels     heap labels (size k)
     * @param k          heap size
     * @return number of heap updates performed
     */
    virtual size_t scan_codes(
            size_t n,
            const uint8_t* codes,
            const idx_t* ids,
            float* distances,
            idx_t* labels,
            size_t k) const;
    /** scan a set of codes, compute distances to current query and
     * update results if distances are below radius
     *
     * (default implementation fails) */
    virtual void scan_codes_range(
            size_t n,
            const uint8_t* codes,
            const idx_t* ids,
            float radius,
            RangeQueryResult& result) const;
    virtual ~InvertedListScanner() {}
 };
 struct IndexIVFStats {
    size_t nq;                // nb of queries run
    size_t nlist;             // nb of inverted lists scanned
    size_t ndis;              // nb of distances computed
    size_t nheap_updates;     // nb of times the heap was updated
    double quantization_time; // time spent quantizing vectors (in ms)
    double search_time;       // time spent searching lists (in ms)
    IndexIVFStats() {
        reset();
    }
    void reset();
    void add(const IndexIVFStats& other);
 };
 // global var that collects them all
 FAISS_API extern IndexIVFStats indexIVF_stats;
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexIVFAdditiveQuantizer.h
+++ b/src/3rdlib/faiss/IndexIVFAdditiveQuantizer.h
@ -0,0 +1,121 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #ifndef FAISS_INDEX_IVF_ADDITIVE_QUANTIZER_H
 #define FAISS_INDEX_IVF_ADDITIVE_QUANTIZER_H
 #include <faiss/impl/AdditiveQuantizer.h>
 #include <cstdint>
 #include <vector>
 #include <faiss/IndexIVF.h>
 #include <faiss/impl/LocalSearchQuantizer.h>
 #include <faiss/impl/ResidualQuantizer.h>
 #include <faiss/impl/platform_macros.h>
 namespace faiss {
 /// Abstract class for IVF additive quantizers.
 /// The search functions are in common.
 struct IndexIVFAdditiveQuantizer : IndexIVF {
    // the quantizer
    AdditiveQuantizer* aq;
    bool by_residual = true;
    int use_precomputed_table = 0; // for future use
    using Search_type_t = AdditiveQuantizer::Search_type_t;
    IndexIVFAdditiveQuantizer(
            AdditiveQuantizer* aq,
            Index* quantizer,
            size_t d,
            size_t nlist,
            MetricType metric = METRIC_L2);
    explicit IndexIVFAdditiveQuantizer(AdditiveQuantizer* aq);
    void train_residual(idx_t n, const float* x) override;
    void encode_vectors(
            idx_t n,
            const float* x,
            const idx_t* list_nos,
            uint8_t* codes,
            bool include_listnos = false) const override;
    InvertedListScanner* get_InvertedListScanner(
            bool store_pairs) const override;
    ~IndexIVFAdditiveQuantizer() override;
 };
 /** IndexIVF based on a residual quantizer. Stored vectors are
 * approximated by residual quantization codes.
 */
 struct IndexIVFResidualQuantizer : IndexIVFAdditiveQuantizer {
    /// The residual quantizer used to encode the vectors
    ResidualQuantizer rq;
    /** Constructor.
     *
     * @param d      dimensionality of the input vectors
     * @param M      number of subquantizers
     * @param nbits  number of bit per subvector index
     */
    IndexIVFResidualQuantizer(
            Index* quantizer,
            size_t d,
            size_t nlist,
            const std::vector<size_t>& nbits,
            MetricType metric = METRIC_L2,
            Search_type_t search_type = AdditiveQuantizer::ST_decompress);
    IndexIVFResidualQuantizer(
            Index* quantizer,
            size_t d,
            size_t nlist,
            size_t M,     /* number of subquantizers */
            size_t nbits, /* number of bit per subvector index */
            MetricType metric = METRIC_L2,
            Search_type_t search_type = AdditiveQuantizer::ST_decompress);
    IndexIVFResidualQuantizer();
    virtual ~IndexIVFResidualQuantizer();
 };
 /** IndexIVF based on a residual quantizer. Stored vectors are
 * approximated by residual quantization codes.
 */
 struct IndexIVFLocalSearchQuantizer : IndexIVFAdditiveQuantizer {
    /// The LSQ quantizer used to encode the vectors
    LocalSearchQuantizer lsq;
    /** Constructor.
     *
     * @param d      dimensionality of the input vectors
     * @param M      number of subquantizers
     * @param nbits  number of bit per subvector index
     */
    IndexIVFLocalSearchQuantizer(
            Index* quantizer,
            size_t d,
            size_t nlist,
            size_t M,     /* number of subquantizers */
            size_t nbits, /* number of bit per subvector index */
            MetricType metric = METRIC_L2,
            Search_type_t search_type = AdditiveQuantizer::ST_decompress);
    IndexIVFLocalSearchQuantizer();
    virtual ~IndexIVFLocalSearchQuantizer();
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexIVFFlat.h
+++ b/src/3rdlib/faiss/IndexIVFFlat.h
@ -0,0 +1,106 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_INDEX_IVF_FLAT_H
 #define FAISS_INDEX_IVF_FLAT_H
 #include <stdint.h>
 #include <unordered_map>
 #include <faiss/IndexIVF.h>
 namespace faiss {
 /** Inverted file with stored vectors. Here the inverted file
 * pre-selects the vectors to be searched, but they are not otherwise
 * encoded, the code array just contains the raw float entries.
 */
 struct IndexIVFFlat : IndexIVF {
    IndexIVFFlat(
            Index* quantizer,
            size_t d,
            size_t nlist_,
            MetricType = METRIC_L2);
    void add_core(
            idx_t n,
            const float* x,
            const idx_t* xids,
            const idx_t* precomputed_idx) override;
    void encode_vectors(
            idx_t n,
            const float* x,
            const idx_t* list_nos,
            uint8_t* codes,
            bool include_listnos = false) const override;
    InvertedListScanner* get_InvertedListScanner(
            bool store_pairs) const override;
    void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
            const override;
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
    IndexIVFFlat() {}
 };
 struct IndexIVFFlatDedup : IndexIVFFlat {
    /** Maps ids stored in the index to the ids of vectors that are
     *  the same. When a vector is unique, it does not appear in the
     *  instances map */
    std::unordered_multimap<idx_t, idx_t> instances;
    IndexIVFFlatDedup(
            Index* quantizer,
            size_t d,
            size_t nlist_,
            MetricType = METRIC_L2);
    /// also dedups the training set
    void train(idx_t n, const float* x) override;
    /// implemented for all IndexIVF* classes
    void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
    void search_preassigned(
            idx_t n,
            const float* x,
            idx_t k,
            const idx_t* assign,
            const float* centroid_dis,
            float* distances,
            idx_t* labels,
            bool store_pairs,
            const IVFSearchParameters* params = nullptr,
            IndexIVFStats* stats = nullptr) const override;
    size_t remove_ids(const IDSelector& sel) override;
    /// not implemented
    void range_search(
            idx_t n,
            const float* x,
            float radius,
            RangeSearchResult* result) const override;
    /// not implemented
    void update_vectors(int nv, const idx_t* idx, const float* v) override;
    /// not implemented
    void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
            const override;
    IndexIVFFlatDedup() {}
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexIVFPQ.h
+++ b/src/3rdlib/faiss/IndexIVFPQ.h
@ -0,0 +1,186 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_INDEX_IVFPQ_H
 #define FAISS_INDEX_IVFPQ_H
 #include <vector>
 #include <faiss/IndexIVF.h>
 #include <faiss/IndexPQ.h>
 #include <faiss/impl/platform_macros.h>
 #include <faiss/utils/AlignedTable.h>
 namespace faiss {
 struct IVFPQSearchParameters : IVFSearchParameters {
    size_t scan_table_threshold; ///< use table computation or on-the-fly?
    int polysemous_ht;           ///< Hamming thresh for polysemous filtering
    IVFPQSearchParameters() : scan_table_threshold(0), polysemous_ht(0) {}
    ~IVFPQSearchParameters() {}
 };
 FAISS_API extern size_t precomputed_table_max_bytes;
 /** Inverted file with Product Quantizer encoding. Each residual
 * vector is encoded as a product quantizer code.
 */
 struct IndexIVFPQ : IndexIVF {
    bool by_residual; ///< Encode residual or plain vector?
    ProductQuantizer pq; ///< produces the codes
    bool do_polysemous_training; ///< reorder PQ centroids after training?
    PolysemousTraining* polysemous_training; ///< if NULL, use default
    // search-time parameters
    size_t scan_table_threshold; ///< use table computation or on-the-fly?
    int polysemous_ht;           ///< Hamming thresh for polysemous filtering
    /** Precompute table that speed up query preprocessing at some
     * memory cost (used only for by_residual with L2 metric)
     */
    int use_precomputed_table;
    /// if use_precompute_table
    /// size nlist * pq.M * pq.ksub
    AlignedTable<float> precomputed_table;
    IndexIVFPQ(
            Index* quantizer,
            size_t d,
            size_t nlist,
            size_t M,
            size_t nbits_per_idx,
            MetricType metric = METRIC_L2);
    void encode_vectors(
            idx_t n,
            const float* x,
            const idx_t* list_nos,
            uint8_t* codes,
            bool include_listnos = false) const override;
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
    void add_core(
            idx_t n,
            const float* x,
            const idx_t* xids,
            const idx_t* precomputed_idx) override;
    /// same as add_core, also:
    /// - output 2nd level residuals if residuals_2 != NULL
    /// - accepts precomputed_idx = nullptr
    void add_core_o(
            idx_t n,
            const float* x,
            const idx_t* xids,
            float* residuals_2,
            const idx_t* precomputed_idx = nullptr);
    /// trains the product quantizer
    void train_residual(idx_t n, const float* x) override;
    /// same as train_residual, also output 2nd level residuals
    void train_residual_o(idx_t n, const float* x, float* residuals_2);
    void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
            const override;
    /** Find exact duplicates in the dataset.
     *
     * the duplicates are returned in pre-allocated arrays (see the
     * max sizes).
     *
     * @param lims   limits between groups of duplicates
     *                (max size ntotal / 2 + 1)
     * @param ids    ids[lims[i]] : ids[lims[i+1]-1] is a group of
     *                duplicates (max size ntotal)
     * @return n      number of groups found
     */
    size_t find_duplicates(idx_t* ids, size_t* lims) const;
    // map a vector to a binary code knowning the index
    void encode(idx_t key, const float* x, uint8_t* code) const;
    /** Encode multiple vectors
     *
     * @param n       nb vectors to encode
     * @param keys    posting list ids for those vectors (size n)
     * @param x       vectors (size n * d)
     * @param codes   output codes (size n * code_size)
     * @param compute_keys  if false, assume keys are precomputed,
     *                      otherwise compute them
     */
    void encode_multiple(
            size_t n,
            idx_t* keys,
            const float* x,
            uint8_t* codes,
            bool compute_keys = false) const;
    /// inverse of encode_multiple
    void decode_multiple(
            size_t n,
            const idx_t* keys,
            const uint8_t* xcodes,
            float* x) const;
    InvertedListScanner* get_InvertedListScanner(
            bool store_pairs) const override;
    /// build precomputed table
    void precompute_table();
    IndexIVFPQ();
 };
 /** Pre-compute distance tables for IVFPQ with by-residual and METRIC_L2
 *
 * @param use_precomputed_table (I/O)
 *        =-1: force disable
 *        =0: decide heuristically (default: use tables only if they are
 *            < precomputed_tables_max_bytes), set use_precomputed_table on
 * output =1: tables that work for all quantizers (size 256 * nlist * M) =2:
 * specific version for MultiIndexQuantizer (much more compact)
 * @param precomputed_table precomputed table to initialize
 */
 void initialize_IVFPQ_precomputed_table(
        int& use_precomputed_table,
        const Index* quantizer,
        const ProductQuantizer& pq,
        AlignedTable<float>& precomputed_table,
        bool verbose);
 /// statistics are robust to internal threading, but not if
 /// IndexIVFPQ::search_preassigned is called by multiple threads
 struct IndexIVFPQStats {
    size_t nrefine; ///< nb of refines (IVFPQR)
    size_t n_hamming_pass;
    ///< nb of passed Hamming distance tests (for polysemous)
    // timings measured with the CPU RTC on all threads
    size_t search_cycles;
    size_t refine_cycles; ///< only for IVFPQR
    IndexIVFPQStats() {
        reset();
    }
    void reset();
 };
 // global var that collects them all
 FAISS_API extern IndexIVFPQStats indexIVFPQ_stats;
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexIVFPQFastScan.h
+++ b/src/3rdlib/faiss/IndexIVFPQFastScan.h
@ -0,0 +1,191 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <memory>
 #include <faiss/IndexIVFPQ.h>
 #include <faiss/impl/ProductQuantizer.h>
 #include <faiss/utils/AlignedTable.h>
 namespace faiss {
 /** Fast scan version of IVFPQ. Works for 4-bit PQ for now.
 *
 * The codes in the inverted lists are not stored sequentially but
 * grouped in blocks of size bbs. This makes it possible to very quickly
 * compute distances with SIMD instructions.
 *
 * Implementations (implem):
 * 0: auto-select implementation (default)
 * 1: orig's search, re-implemented
 * 2: orig's search, re-ordered by invlist
 * 10: optimizer int16 search, collect results in heap, no qbs
 * 11: idem, collect results in reservoir
 * 12: optimizer int16 search, collect results in heap, uses qbs
 * 13: idem, collect results in reservoir
 */
 struct IndexIVFPQFastScan : IndexIVF {
    bool by_residual;    ///< Encode residual or plain vector?
    ProductQuantizer pq; ///< produces the codes
    // size of the kernel
    int bbs; // set at build time
    // M rounded up to a multiple of 2
    size_t M2;
    /// precomputed tables management
    int use_precomputed_table = 0;
    /// if use_precompute_table size (nlist, pq.M, pq.ksub)
    AlignedTable<float> precomputed_table;
    // search-time implementation
    int implem = 0;
    // skip some parts of the computation (for timing)
    int skip = 0;
    // batching factors at search time (0 = default)
    int qbs = 0;
    size_t qbs2 = 0;
    IndexIVFPQFastScan(
            Index* quantizer,
            size_t d,
            size_t nlist,
            size_t M,
            size_t nbits_per_idx,
            MetricType metric = METRIC_L2,
            int bbs = 32);
    IndexIVFPQFastScan();
    // built from an IndexIVFPQ
    explicit IndexIVFPQFastScan(const IndexIVFPQ& orig, int bbs = 32);
    /// orig's inverted lists (for debugging)
    InvertedLists* orig_invlists = nullptr;
    void train_residual(idx_t n, const float* x) override;
    /// build precomputed table, possibly updating use_precomputed_table
    void precompute_table();
    /// same as the regular IVFPQ encoder. The codes are not reorganized by
    /// blocks a that point
    void encode_vectors(
            idx_t n,
            const float* x,
            const idx_t* list_nos,
            uint8_t* codes,
            bool include_listno = false) const override;
    void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    // prepare look-up tables
    void compute_LUT(
            size_t n,
            const float* x,
            const idx_t* coarse_ids,
            const float* coarse_dis,
            AlignedTable<float>& dis_tables,
            AlignedTable<float>& biases) const;
    void compute_LUT_uint8(
            size_t n,
            const float* x,
            const idx_t* coarse_ids,
            const float* coarse_dis,
            AlignedTable<uint8_t>& dis_tables,
            AlignedTable<uint16_t>& biases,
            float* normalizers) const;
    // internal search funcs
    template <bool is_max>
    void search_dispatch_implem(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const;
    template <class C>
    void search_implem_1(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const;
    template <class C>
    void search_implem_2(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const;
    // implem 10 and 12 are not multithreaded internally, so
    // export search stats
    template <class C>
    void search_implem_10(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels,
            int impl,
            size_t* ndis_out,
            size_t* nlist_out) const;
    template <class C>
    void search_implem_12(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels,
            int impl,
            size_t* ndis_out,
            size_t* nlist_out) const;
 };
 struct IVFFastScanStats {
    uint64_t times[10];
    uint64_t t_compute_distance_tables, t_round;
    uint64_t t_copy_pack, t_scan, t_to_flat;
    uint64_t reservoir_times[4];
    double Mcy_at(int i) {
        return times[i] / (1000 * 1000.0);
    }
    double Mcy_reservoir_at(int i) {
        return reservoir_times[i] / (1000 * 1000.0);
    }
    IVFFastScanStats() {
        reset();
    }
    void reset() {
        memset(this, 0, sizeof(*this));
    }
 };
 FAISS_API extern IVFFastScanStats IVFFastScan_stats;
 } // namespace faiss
--- a/src/3rdlib/faiss/IndexIVFPQR.h
+++ b/src/3rdlib/faiss/IndexIVFPQR.h
@ -0,0 +1,71 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <vector>
 #include <faiss/IndexIVFPQ.h>
 namespace faiss {
 /** Index with an additional level of PQ refinement */
 struct IndexIVFPQR : IndexIVFPQ {
    ProductQuantizer refine_pq;        ///< 3rd level quantizer
    std::vector<uint8_t> refine_codes; ///< corresponding codes
    /// factor between k requested in search and the k requested from the IVFPQ
    float k_factor;
    IndexIVFPQR(
            Index* quantizer,
            size_t d,
            size_t nlist,
            size_t M,
            size_t nbits_per_idx,
            size_t M_refine,
            size_t nbits_per_idx_refine);
    void reset() override;
    size_t remove_ids(const IDSelector& sel) override;
    /// trains the two product quantizers
    void train_residual(idx_t n, const float* x) override;
    void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
    /// same as add_with_ids, but optionally use the precomputed list ids
    void add_core(
            idx_t n,
            const float* x,
            const idx_t* xids,
            const idx_t* precomputed_idx) override;
    void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
            const override;
    void merge_from(IndexIVF& other, idx_t add_id) override;
    void search_preassigned(
            idx_t n,
            const float* x,
            idx_t k,
            const idx_t* assign,
            const float* centroid_dis,
            float* distances,
            idx_t* labels,
            bool store_pairs,
            const IVFSearchParameters* params = nullptr,
            IndexIVFStats* stats = nullptr) const override;
    IndexIVFPQR();
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/IndexIVFSpectralHash.h
+++ b/src/3rdlib/faiss/IndexIVFSpectralHash.h
@ -0,0 +1,73 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_INDEX_IVFSH_H
 #define FAISS_INDEX_IVFSH_H
 #include <vector>
 #include <faiss/IndexIVF.h>
 namespace faiss {
 struct VectorTransform;
 /** Inverted list that stores binary codes of size nbit. Before the
 * binary conversion, the dimension of the vectors is transformed from
 * dim d into dim nbit by vt (a random rotation by default).
 *
 * Each coordinate is subtracted from a value determined by
 * threshold_type, and split into intervals of size period. Half of
 * the interval is a 0 bit, the other half a 1.
 */
 struct IndexIVFSpectralHash : IndexIVF {
    VectorTransform* vt; // transformation from d to nbit dim
    bool own_fields;
    int nbit;
    float period;
    enum ThresholdType {
        Thresh_global,
        Thresh_centroid,
        Thresh_centroid_half,
        Thresh_median
    };
    ThresholdType threshold_type;
    // size nlist * nbit or 0 if Thresh_global
    std::vector<float> trained;
    IndexIVFSpectralHash(
            Index* quantizer,
            size_t d,
            size_t nlist,
            int nbit,
            float period);
    IndexIVFSpectralHash();
    void train_residual(idx_t n, const float* x) override;
    void encode_vectors(
            idx_t n,
            const float* x,
            const idx_t* list_nos,
            uint8_t* codes,
            bool include_listnos = false) const override;
    InvertedListScanner* get_InvertedListScanner(
            bool store_pairs) const override;
    ~IndexIVFSpectralHash() override;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexLSH.h
+++ b/src/3rdlib/faiss/IndexLSH.h
@ -0,0 +1,85 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef INDEX_LSH_H
 #define INDEX_LSH_H
 #include <vector>
 #include <faiss/Index.h>
 #include <faiss/VectorTransform.h>
 namespace faiss {
 /** The sign of each vector component is put in a binary signature */
 struct IndexLSH : Index {
    typedef unsigned char uint8_t;
    int nbits;             ///< nb of bits per vector
    int bytes_per_vec;     ///< nb of 8-bits per encoded vector
    bool rotate_data;      ///< whether to apply a random rotation to input
    bool train_thresholds; ///< whether we train thresholds or use 0
    RandomRotationMatrix rrot; ///< optional random rotation
    std::vector<float> thresholds; ///< thresholds to compare with
    /// encoded dataset
    std::vector<uint8_t> codes;
    IndexLSH(
            idx_t d,
            int nbits,
            bool rotate_data = true,
            bool train_thresholds = false);
    /** Preprocesses and resizes the input to the size required to
     * binarize the data
     *
     * @param x input vectors, size n * d
     * @return output vectors, size n * bits. May be the same pointer
     *         as x, otherwise it should be deleted by the caller
     */
    const float* apply_preprocess(idx_t n, const float* x) const;
    void train(idx_t n, const float* x) override;
    void add(idx_t n, const float* x) override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void reset() override;
    /// transfer the thresholds to a pre-processing stage (and unset
    /// train_thresholds)
    void transfer_thresholds(LinearTransform* vt);
    ~IndexLSH() override {}
    IndexLSH();
    /* standalone codec interface.
     *
     * The vectors are decoded to +/- 1 (not 0, 1) */
    size_t sa_code_size() const override;
    void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexLattice.h
+++ b/src/3rdlib/faiss/IndexLattice.h
@ -0,0 +1,63 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_INDEX_LATTICE_H
 #define FAISS_INDEX_LATTICE_H
 #include <vector>
 #include <faiss/IndexIVF.h>
 #include <faiss/impl/lattice_Zn.h>
 namespace faiss {
 /** Index that encodes a vector with a series of Zn lattice quantizers
 */
 struct IndexLattice : Index {
    /// number of sub-vectors
    int nsq;
    /// dimension of sub-vectors
    size_t dsq;
    /// the lattice quantizer
    ZnSphereCodecAlt zn_sphere_codec;
    /// nb bits used to encode the scale, per subvector
    int scale_nbit, lattice_nbit;
    /// total, in bytes
    size_t code_size;
    /// mins and maxes of the vector norms, per subquantizer
    std::vector<float> trained;
    IndexLattice(idx_t d, int nsq, int scale_nbit, int r2);
    void train(idx_t n, const float* x) override;
    /* The standalone codec interface */
    size_t sa_code_size() const override;
    void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
    /// not implemented
    void add(idx_t n, const float* x) override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void reset() override;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexNNDescent.h
+++ b/src/3rdlib/faiss/IndexNNDescent.h
@ -0,0 +1,72 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <vector>
 #include <faiss/IndexFlat.h>
 #include <faiss/impl/NNDescent.h>
 #include <faiss/utils/utils.h>
 namespace faiss {
 /** The NNDescent index is a normal random-access index with an NNDescent
 * link structure built on top */
 struct IndexNNDescent : Index {
    // internal storage of vectors (32 bits)
    using storage_idx_t = NNDescent::storage_idx_t;
    /// Faiss results are 64-bit
    using idx_t = Index::idx_t;
    // the link strcuture
    NNDescent nndescent;
    // the sequential storage
    bool own_fields;
    Index* storage;
    explicit IndexNNDescent(
            int d = 0,
            int K = 32,
            MetricType metric = METRIC_L2);
    explicit IndexNNDescent(Index* storage, int K = 32);
    ~IndexNNDescent() override;
    void add(idx_t n, const float* x) override;
    /// Trains the storage if needed
    void train(idx_t n, const float* x) override;
    /// entry point for search
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void reconstruct(idx_t key, float* recons) const override;
    void reset() override;
 };
 /** Flat index topped with with a NNDescent structure to access elements
 *  more efficiently.
 */
 struct IndexNNDescentFlat : IndexNNDescent {
    IndexNNDescentFlat();
    IndexNNDescentFlat(int d, int K, MetricType metric = METRIC_L2);
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/IndexNSG.h
+++ b/src/3rdlib/faiss/IndexNSG.h
@ -0,0 +1,85 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <vector>
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexNNDescent.h>
 #include <faiss/impl/NSG.h>
 #include <faiss/utils/utils.h>
 namespace faiss {
 /** The NSG index is a normal random-access index with a NSG
 * link structure built on top */
 struct IndexNSG : Index {
    /// the link strcuture
    NSG nsg;
    /// the sequential storage
    bool own_fields;
    Index* storage;
    /// the index is built or not
    bool is_built;
    /// K of KNN graph for building
    int GK;
    /// indicate how to build a knn graph
    /// - 0: build NSG with brute force search
    /// - 1: build NSG with NNDescent
    char build_type;
    /// parameters for nndescent
    int nndescent_S;
    int nndescent_R;
    int nndescent_L;
    int nndescent_iter;
    explicit IndexNSG(int d = 0, int R = 32, MetricType metric = METRIC_L2);
    explicit IndexNSG(Index* storage, int R = 32);
    ~IndexNSG() override;
    void build(idx_t n, const float* x, idx_t* knn_graph, int GK);
    void add(idx_t n, const float* x) override;
    /// Trains the storage if needed
    void train(idx_t n, const float* x) override;
    /// entry point for search
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void reconstruct(idx_t key, float* recons) const override;
    void reset() override;
    void check_knn_graph(const idx_t* knn_graph, idx_t n, int K) const;
 };
 /** Flat index topped with with a NSG structure to access elements
 *  more efficiently.
 */
 struct IndexNSGFlat : IndexNSG {
    IndexNSGFlat();
    IndexNSGFlat(int d, int R, MetricType metric = METRIC_L2);
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/IndexPQ.h
+++ b/src/3rdlib/faiss/IndexPQ.h
@ -0,0 +1,198 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #ifndef FAISS_INDEX_PQ_H
 #define FAISS_INDEX_PQ_H
 #include <stdint.h>
 #include <vector>
 #include <faiss/Index.h>
 #include <faiss/impl/PolysemousTraining.h>
 #include <faiss/impl/ProductQuantizer.h>
 #include <faiss/impl/platform_macros.h>
 namespace faiss {
 /** Index based on a product quantizer. Stored vectors are
 * approximated by PQ codes. */
 struct IndexPQ : Index {
    /// The product quantizer used to encode the vectors
    ProductQuantizer pq;
    /// Codes. Size ntotal * pq.code_size
    std::vector<uint8_t> codes;
    /** Constructor.
     *
     * @param d      dimensionality of the input vectors
     * @param M      number of subquantizers
     * @param nbits  number of bit per subvector index
     */
    IndexPQ(int d,        ///< dimensionality of the input vectors
            size_t M,     ///< number of subquantizers
            size_t nbits, ///< number of bit per subvector index
            MetricType metric = METRIC_L2);
    IndexPQ();
    void train(idx_t n, const float* x) override;
    void add(idx_t n, const float* x) override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void reset() override;
    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
    void reconstruct(idx_t key, float* recons) const override;
    size_t remove_ids(const IDSelector& sel) override;
    /* The standalone codec interface */
    size_t sa_code_size() const override;
    void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
    DistanceComputer* get_distance_computer() const override;
    /******************************************************
     * Polysemous codes implementation
     ******************************************************/
    bool do_polysemous_training; ///< false = standard PQ
    /// parameters used for the polysemous training
    PolysemousTraining polysemous_training;
    /// how to perform the search in search_core
    enum Search_type_t {
        ST_PQ,                    ///< asymmetric product quantizer (default)
        ST_HE,                    ///< Hamming distance on codes
        ST_generalized_HE,        ///< nb of same codes
        ST_SDC,                   ///< symmetric product quantizer (SDC)
        ST_polysemous,            ///< HE filter (using ht) + PQ combination
        ST_polysemous_generalize, ///< Filter on generalized Hamming
    };
    Search_type_t search_type;
    // just encode the sign of the components, instead of using the PQ encoder
    // used only for the queries
    bool encode_signs;
    /// Hamming threshold used for polysemy
    int polysemous_ht;
    // actual polysemous search
    void search_core_polysemous(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const;
    /// prepare query for a polysemous search, but instead of
    /// computing the result, just get the histogram of Hamming
    /// distances. May be computed on a provided dataset if xb != NULL
    /// @param dist_histogram (M * nbits + 1)
    void hamming_distance_histogram(
            idx_t n,
            const float* x,
            idx_t nb,
            const float* xb,
            int64_t* dist_histogram);
    /** compute pairwise distances between queries and database
     *
     * @param n    nb of query vectors
     * @param x    query vector, size n * d
     * @param dis  output distances, size n * ntotal
     */
    void hamming_distance_table(idx_t n, const float* x, int32_t* dis) const;
 };
 /// statistics are robust to internal threading, but not if
 /// IndexPQ::search is called by multiple threads
 struct IndexPQStats {
    size_t nq;    // nb of queries run
    size_t ncode; // nb of codes visited
    size_t n_hamming_pass; // nb of passed Hamming distance tests (for polysemy)
    IndexPQStats() {
        reset();
    }
    void reset();
 };
 FAISS_API extern IndexPQStats indexPQ_stats;
 /** Quantizer where centroids are virtual: they are the Cartesian
 *  product of sub-centroids. */
 struct MultiIndexQuantizer : Index {
    ProductQuantizer pq;
    MultiIndexQuantizer(
            int d,         ///< dimension of the input vectors
            size_t M,      ///< number of subquantizers
            size_t nbits); ///< number of bit per subvector index
    void train(idx_t n, const float* x) override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    /// add and reset will crash at runtime
    void add(idx_t n, const float* x) override;
    void reset() override;
    MultiIndexQuantizer() {}
    void reconstruct(idx_t key, float* recons) const override;
 };
 /** MultiIndexQuantizer where the PQ assignmnet is performed by sub-indexes
 */
 struct MultiIndexQuantizer2 : MultiIndexQuantizer {
    /// M Indexes on d / M dimensions
    std::vector<Index*> assign_indexes;
    bool own_fields;
    MultiIndexQuantizer2(int d, size_t M, size_t nbits, Index** indexes);
    MultiIndexQuantizer2(
            int d,
            size_t nbits,
            Index* assign_index_0,
            Index* assign_index_1);
    void train(idx_t n, const float* x) override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexPQFastScan.h
+++ b/src/3rdlib/faiss/IndexPQFastScan.h
@ -0,0 +1,125 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <faiss/IndexPQ.h>
 #include <faiss/impl/ProductQuantizer.h>
 #include <faiss/utils/AlignedTable.h>
 namespace faiss {
 /** Fast scan version of IndexPQ. Works for 4-bit PQ for now.
 *
 * The codes are not stored sequentially but grouped in blocks of size bbs.
 * This makes it possible to compute distances quickly with SIMD instructions.
 *
 * Implementations:
 * 12: blocked loop with internal loop on Q with qbs
 * 13: same with reservoir accumulator to store results
 * 14: no qbs with heap accumulator
 * 15: no qbs with reservoir accumulator
 */
 struct IndexPQFastScan : Index {
    ProductQuantizer pq;
    // implementation to select
    int implem = 0;
    // skip some parts of the computation (for timing)
    int skip = 0;
    // size of the kernel
    int bbs;     // set at build time
    int qbs = 0; // query block size 0 = use default
    // packed version of the codes
    size_t ntotal2;
    size_t M2;
    AlignedTable<uint8_t> codes;
    // this is for testing purposes only (set when initialized by IndexPQ)
    const uint8_t* orig_codes = nullptr;
    IndexPQFastScan(
            int d,
            size_t M,
            size_t nbits,
            MetricType metric = METRIC_L2,
            int bbs = 32);
    IndexPQFastScan();
    /// build from an existing IndexPQ
    explicit IndexPQFastScan(const IndexPQ& orig, int bbs = 32);
    void train(idx_t n, const float* x) override;
    void add(idx_t n, const float* x) override;
    void reset() override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    // called by search function
    void compute_quantized_LUT(
            idx_t n,
            const float* x,
            uint8_t* lut,
            float* normalizers) const;
    template <bool is_max>
    void search_dispatch_implem(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const;
    template <class C>
    void search_implem_2(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const;
    template <class C>
    void search_implem_12(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels,
            int impl) const;
    template <class C>
    void search_implem_14(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels,
            int impl) const;
 };
 struct FastScanStats {
    uint64_t t0, t1, t2, t3;
    FastScanStats() {
        reset();
    }
    void reset() {
        memset(this, 0, sizeof(*this));
    }
 };
 FAISS_API extern FastScanStats FastScan_stats;
 } // namespace faiss
--- a/src/3rdlib/faiss/IndexPreTransform.h
+++ b/src/3rdlib/faiss/IndexPreTransform.h
@ -0,0 +1,90 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <faiss/Index.h>
 #include <faiss/VectorTransform.h>
 namespace faiss {
 /** Index that applies a LinearTransform transform on vectors before
 *  handing them over to a sub-index */
 struct IndexPreTransform : Index {
    std::vector<VectorTransform*> chain; ///! chain of tranforms
    Index* index;                        ///! the sub-index
    bool own_fields; ///! whether pointers are deleted in destructor
    explicit IndexPreTransform(Index* index);
    IndexPreTransform();
    /// ltrans is the last transform before the index
    IndexPreTransform(VectorTransform* ltrans, Index* index);
    void prepend_transform(VectorTransform* ltrans);
    void train(idx_t n, const float* x) override;
    void add(idx_t n, const float* x) override;
    void add_with_ids(idx_t n, const float* x, const idx_t* xids) override;
    void reset() override;
    /** removes IDs from the index. Not supported by all indexes.
     */
    size_t remove_ids(const IDSelector& sel) override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    /* range search, no attempt is done to change the radius */
    void range_search(
            idx_t n,
            const float* x,
            float radius,
            RangeSearchResult* result) const override;
    void reconstruct(idx_t key, float* recons) const override;
    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
    void search_and_reconstruct(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels,
            float* recons) const override;
    /// apply the transforms in the chain. The returned float * may be
    /// equal to x, otherwise it should be deallocated.
    const float* apply_chain(idx_t n, const float* x) const;
    /// Reverse the transforms in the chain. May not be implemented for
    /// all transforms in the chain or may return approximate results.
    void reverse_chain(idx_t n, const float* xt, float* x) const;
    DistanceComputer* get_distance_computer() const override;
    /* standalone codec interface */
    size_t sa_code_size() const override;
    void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
    ~IndexPreTransform() override;
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/IndexRefine.h
+++ b/src/3rdlib/faiss/IndexRefine.h
@ -0,0 +1,72 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <faiss/Index.h>
 namespace faiss {
 /** Index that queries in a base_index (a fast one) and refines the
 *  results with an exact search, hopefully improving the results.
 */
 struct IndexRefine : Index {
    /// faster index to pre-select the vectors that should be filtered
    Index* base_index;
    /// refinement index
    Index* refine_index;
    bool own_fields;       ///< should the base index be deallocated?
    bool own_refine_index; ///< same with the refinement index
    /// factor between k requested in search and the k requested from
    /// the base_index (should be >= 1)
    float k_factor = 1;
    /// initialize from empty index
    IndexRefine(Index* base_index, Index* refine_index);
    IndexRefine();
    void train(idx_t n, const float* x) override;
    void add(idx_t n, const float* x) override;
    void reset() override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    // reconstruct is routed to the refine_index
    void reconstruct(idx_t key, float* recons) const override;
    ~IndexRefine() override;
 };
 /** Version where the refinement index is an IndexFlat. It has one additional
 * constructor that takes a table of elements to add to the flat refinement
 * index */
 struct IndexRefineFlat : IndexRefine {
    explicit IndexRefineFlat(Index* base_index);
    IndexRefineFlat(Index* base_index, const float* xb);
    IndexRefineFlat();
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/IndexReplicas.h
+++ b/src/3rdlib/faiss/IndexReplicas.h
@ -0,0 +1,88 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <faiss/Index.h>
 #include <faiss/IndexBinary.h>
 #include <faiss/impl/ThreadedIndex.h>
 namespace faiss {
 /// Takes individual faiss::Index instances, and splits queries for
 /// sending to each Index instance, and joins the results together
 /// when done.
 /// Each index is managed by a separate CPU thread.
 template <typename IndexT>
 class IndexReplicasTemplate : public ThreadedIndex<IndexT> {
   public:
    using idx_t = typename IndexT::idx_t;
    using component_t = typename IndexT::component_t;
    using distance_t = typename IndexT::distance_t;
    /// The dimension that all sub-indices must share will be the dimension of
    /// the first sub-index added
    /// @param threaded do we use one thread per sub-index or do queries
    /// sequentially?
    explicit IndexReplicasTemplate(bool threaded = true);
    /// @param d the dimension that all sub-indices must share
    /// @param threaded do we use one thread per sub index or do queries
    /// sequentially?
    explicit IndexReplicasTemplate(idx_t d, bool threaded = true);
    /// int version due to the implicit bool conversion ambiguity of int as
    /// dimension
    explicit IndexReplicasTemplate(int d, bool threaded = true);
    /// Alias for addIndex()
    void add_replica(IndexT* index) {
        this->addIndex(index);
    }
    /// Alias for removeIndex()
    void remove_replica(IndexT* index) {
        this->removeIndex(index);
    }
    /// faiss::Index API
    /// All indices receive the same call
    void train(idx_t n, const component_t* x) override;
    /// faiss::Index API
    /// All indices receive the same call
    void add(idx_t n, const component_t* x) override;
    /// faiss::Index API
    /// Query is partitioned into a slice for each sub-index
    /// split by ceil(n / #indices) for our sub-indices
    void search(
            idx_t n,
            const component_t* x,
            idx_t k,
            distance_t* distances,
            idx_t* labels) const override;
    /// reconstructs from the first index
    void reconstruct(idx_t, component_t* v) const override;
    /// Synchronize the top-level index (IndexShards) with data in the
    /// sub-indices
    void syncWithSubIndexes();
   protected:
    /// Called just after an index is added
    void onAfterAddIndex(IndexT* index) override;
    /// Called just after an index is removed
    void onAfterRemoveIndex(IndexT* index) override;
 };
 using IndexReplicas = IndexReplicasTemplate<Index>;
 using IndexBinaryReplicas = IndexReplicasTemplate<IndexBinary>;
 } // namespace faiss
--- a/src/3rdlib/faiss/IndexScalarQuantizer.h
+++ b/src/3rdlib/faiss/IndexScalarQuantizer.h
@ -0,0 +1,123 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_INDEX_SCALAR_QUANTIZER_H
 #define FAISS_INDEX_SCALAR_QUANTIZER_H
 #include <stdint.h>
 #include <vector>
 #include <faiss/IndexIVF.h>
 #include <faiss/impl/ScalarQuantizer.h>
 namespace faiss {
 /**
 * The uniform quantizer has a range [vmin, vmax]. The range can be
 * the same for all dimensions (uniform) or specific per dimension
 * (default).
 */
 struct IndexScalarQuantizer : Index {
    /// Used to encode the vectors
    ScalarQuantizer sq;
    /// Codes. Size ntotal * pq.code_size
    std::vector<uint8_t> codes;
    size_t code_size;
    /** Constructor.
     *
     * @param d      dimensionality of the input vectors
     * @param M      number of subquantizers
     * @param nbits  number of bit per subvector index
     */
    IndexScalarQuantizer(
            int d,
            ScalarQuantizer::QuantizerType qtype,
            MetricType metric = METRIC_L2);
    IndexScalarQuantizer();
    void train(idx_t n, const float* x) override;
    void add(idx_t n, const float* x) override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void reset() override;
    void reconstruct_n(idx_t i0, idx_t ni, float* recons) const override;
    void reconstruct(idx_t key, float* recons) const override;
    DistanceComputer* get_distance_computer() const override;
    /* standalone codec interface */
    size_t sa_code_size() const override;
    void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
 };
 /** An IVF implementation where the components of the residuals are
 * encoded with a scalar quantizer. All distance computations
 * are asymmetric, so the encoded vectors are decoded and approximate
 * distances are computed.
 */
 struct IndexIVFScalarQuantizer : IndexIVF {
    ScalarQuantizer sq;
    bool by_residual;
    IndexIVFScalarQuantizer(
            Index* quantizer,
            size_t d,
            size_t nlist,
            ScalarQuantizer::QuantizerType qtype,
            MetricType metric = METRIC_L2,
            bool encode_residual = true);
    IndexIVFScalarQuantizer();
    void train_residual(idx_t n, const float* x) override;
    void encode_vectors(
            idx_t n,
            const float* x,
            const idx_t* list_nos,
            uint8_t* codes,
            bool include_listnos = false) const override;
    void add_core(
            idx_t n,
            const float* x,
            const idx_t* xids,
            const idx_t* precomputed_idx) override;
    InvertedListScanner* get_InvertedListScanner(
            bool store_pairs) const override;
    void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
            const override;
    /* standalone codec interface */
    void sa_decode(idx_t n, const uint8_t* bytes, float* x) const override;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/IndexShards.h
+++ b/src/3rdlib/faiss/IndexShards.h
@ -0,0 +1,111 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <faiss/Index.h>
 #include <faiss/IndexBinary.h>
 #include <faiss/impl/ThreadedIndex.h>
 namespace faiss {
 /**
 * Index that concatenates the results from several sub-indexes
 */
 template <typename IndexT>
 struct IndexShardsTemplate : public ThreadedIndex<IndexT> {
    using idx_t = typename IndexT::idx_t;
    using component_t = typename IndexT::component_t;
    using distance_t = typename IndexT::distance_t;
    /**
     * The dimension that all sub-indices must share will be the dimension of
     * the first sub-index added
     *
     * @param threaded     do we use one thread per sub_index or do
     *                     queries sequentially?
     * @param successive_ids should we shift the returned ids by
     *                     the size of each sub-index or return them
     *                     as they are?
     */
    explicit IndexShardsTemplate(
            bool threaded = false,
            bool successive_ids = true);
    /**
     * @param threaded     do we use one thread per sub_index or do
     *                     queries sequentially?
     * @param successive_ids should we shift the returned ids by
     *                     the size of each sub-index or return them
     *                     as they are?
     */
    explicit IndexShardsTemplate(
            idx_t d,
            bool threaded = false,
            bool successive_ids = true);
    /// int version due to the implicit bool conversion ambiguity of int as
    /// dimension
    explicit IndexShardsTemplate(
            int d,
            bool threaded = false,
            bool successive_ids = true);
    /// Alias for addIndex()
    void add_shard(IndexT* index) {
        this->addIndex(index);
    }
    /// Alias for removeIndex()
    void remove_shard(IndexT* index) {
        this->removeIndex(index);
    }
    /// supported only for sub-indices that implement add_with_ids
    void add(idx_t n, const component_t* x) override;
    /**
     * Cases (successive_ids, xids):
     * - true, non-NULL       ERROR: it makes no sense to pass in ids and
     *                        request them to be shifted
     * - true, NULL           OK, but should be called only once (calls add()
     *                        on sub-indexes).
     * - false, non-NULL      OK: will call add_with_ids with passed in xids
     *                        distributed evenly over shards
     * - false, NULL          OK: will call add_with_ids on each sub-index,
     *                        starting at ntotal
     */
    void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
            override;
    void search(
            idx_t n,
            const component_t* x,
            idx_t k,
            distance_t* distances,
            idx_t* labels) const override;
    void train(idx_t n, const component_t* x) override;
    bool successive_ids;
    /// Synchronize the top-level index (IndexShards) with data in the
    /// sub-indices
    void syncWithSubIndexes();
   protected:
    /// Called just after an index is added
    void onAfterAddIndex(IndexT* index) override;
    /// Called just after an index is removed
    void onAfterRemoveIndex(IndexT* index) override;
 };
 using IndexShards = IndexShardsTemplate<Index>;
 using IndexBinaryShards = IndexShardsTemplate<IndexBinary>;
 } // namespace faiss
--- a/src/3rdlib/faiss/MatrixStats.h
+++ b/src/3rdlib/faiss/MatrixStats.h
@ -0,0 +1,59 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <stdint.h>
 #include <string>
 #include <unordered_map>
 #include <vector>
 namespace faiss {
 /** Reports some statistics on a dataset and comments on them.
 *
 * It is a class rather than a function so that all stats can also be
 * accessed from code */
 struct MatrixStats {
    MatrixStats(size_t n, size_t d, const float* x);
    std::string comments;
    // raw statistics
    size_t n, d;
    size_t n_collision, n_valid, n0;
    double min_norm2, max_norm2;
    struct PerDimStats {
        size_t n, n_nan, n_inf, n0;
        float min, max;
        double sum, sum2;
        size_t n_valid;
        double mean, stddev;
        PerDimStats();
        void add(float x);
        void compute_mean_std();
    };
    std::vector<PerDimStats> per_dim_stats;
    struct Occurrence {
        size_t first;
        size_t count;
    };
    std::unordered_map<uint64_t, Occurrence> occurrences;
    char* buf;
    size_t nbuf;
    void do_comment(const char* fmt, ...);
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/MetaIndexes.h
+++ b/src/3rdlib/faiss/MetaIndexes.h
@ -0,0 +1,132 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef META_INDEXES_H
 #define META_INDEXES_H
 #include <faiss/Index.h>
 #include <faiss/IndexReplicas.h>
 #include <faiss/IndexShards.h>
 #include <unordered_map>
 #include <vector>
 namespace faiss {
 /** Index that translates search results to ids */
 template <typename IndexT>
 struct IndexIDMapTemplate : IndexT {
    using idx_t = typename IndexT::idx_t;
    using component_t = typename IndexT::component_t;
    using distance_t = typename IndexT::distance_t;
    IndexT* index;   ///! the sub-index
    bool own_fields; ///! whether pointers are deleted in destructo
    std::vector<idx_t> id_map;
    explicit IndexIDMapTemplate(IndexT* index);
    /// @param xids if non-null, ids to store for the vectors (size n)
    void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
            override;
    /// this will fail. Use add_with_ids
    void add(idx_t n, const component_t* x) override;
    void search(
            idx_t n,
            const component_t* x,
            idx_t k,
            distance_t* distances,
            idx_t* labels) const override;
    void train(idx_t n, const component_t* x) override;
    void reset() override;
    /// remove ids adapted to IndexFlat
    size_t remove_ids(const IDSelector& sel) override;
    void range_search(
            idx_t n,
            const component_t* x,
            distance_t radius,
            RangeSearchResult* result) const override;
    ~IndexIDMapTemplate() override;
    IndexIDMapTemplate() {
        own_fields = false;
        index = nullptr;
    }
 };
 using IndexIDMap = IndexIDMapTemplate<Index>;
 using IndexBinaryIDMap = IndexIDMapTemplate<IndexBinary>;
 /** same as IndexIDMap but also provides an efficient reconstruction
 *  implementation via a 2-way index */
 template <typename IndexT>
 struct IndexIDMap2Template : IndexIDMapTemplate<IndexT> {
    using idx_t = typename IndexT::idx_t;
    using component_t = typename IndexT::component_t;
    using distance_t = typename IndexT::distance_t;
    std::unordered_map<idx_t, idx_t> rev_map;
    explicit IndexIDMap2Template(IndexT* index);
    /// make the rev_map from scratch
    void construct_rev_map();
    void add_with_ids(idx_t n, const component_t* x, const idx_t* xids)
            override;
    size_t remove_ids(const IDSelector& sel) override;
    void reconstruct(idx_t key, component_t* recons) const override;
    ~IndexIDMap2Template() override {}
    IndexIDMap2Template() {}
 };
 using IndexIDMap2 = IndexIDMap2Template<Index>;
 using IndexBinaryIDMap2 = IndexIDMap2Template<IndexBinary>;
 /** splits input vectors in segments and assigns each segment to a sub-index
 * used to distribute a MultiIndexQuantizer
 */
 struct IndexSplitVectors : Index {
    bool own_fields;
    bool threaded;
    std::vector<Index*> sub_indexes;
    idx_t sum_d; /// sum of dimensions seen so far
    explicit IndexSplitVectors(idx_t d, bool threaded = false);
    void add_sub_index(Index*);
    void sync_with_sub_indexes();
    void add(idx_t n, const float* x) override;
    void search(
            idx_t n,
            const float* x,
            idx_t k,
            float* distances,
            idx_t* labels) const override;
    void train(idx_t n, const float* x) override;
    void reset() override;
    ~IndexSplitVectors() override;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/MetricType.h
+++ b/src/3rdlib/faiss/MetricType.h
@ -0,0 +1,36 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_METRIC_TYPE_H
 #define FAISS_METRIC_TYPE_H
 namespace faiss {
 /// The metric space for vector comparison for Faiss indices and algorithms.
 ///
 /// Most algorithms support both inner product and L2, with the flat
 /// (brute-force) indices supporting additional metric types for vector
 /// comparison.
 enum MetricType {
    METRIC_INNER_PRODUCT = 0, ///< maximum inner product search
    METRIC_L2 = 1,            ///< squared L2 search
    METRIC_L1,                ///< L1 (aka cityblock)
    METRIC_Linf,              ///< infinity distance
    METRIC_Lp,                ///< L_p distance, p is given by a faiss::Index
                              /// metric_arg
    /// some additional metrics defined in scipy.spatial.distance
    METRIC_Canberra = 20,
    METRIC_BrayCurtis,
    METRIC_JensenShannon,
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/VectorTransform.h
+++ b/src/3rdlib/faiss/VectorTransform.h
@ -0,0 +1,294 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_VECTOR_TRANSFORM_H
 #define FAISS_VECTOR_TRANSFORM_H
 /** Defines a few objects that apply transformations to a set of
 * vectors Often these are pre-processing steps.
 */
 #include <stdint.h>
 #include <vector>
 #include <faiss/Index.h>
 namespace faiss {
 /** Any transformation applied on a set of vectors */
 struct VectorTransform {
    typedef Index::idx_t idx_t;
    int d_in;  ///! input dimension
    int d_out; ///! output dimension
    explicit VectorTransform(int d_in = 0, int d_out = 0)
            : d_in(d_in), d_out(d_out), is_trained(true) {}
    /// set if the VectorTransform does not require training, or if
    /// training is done already
    bool is_trained;
    /** Perform training on a representative set of vectors. Does
     * nothing by default.
     *
     * @param n      nb of training vectors
     * @param x      training vecors, size n * d
     */
    virtual void train(idx_t n, const float* x);
    /** apply the random rotation, return new allocated matrix
     * @param     x size n * d_in
     * @return    size n * d_out
     */
    float* apply(idx_t n, const float* x) const;
    /// same as apply, but result is pre-allocated
    virtual void apply_noalloc(idx_t n, const float* x, float* xt) const = 0;
    /// reverse transformation. May not be implemented or may return
    /// approximate result
    virtual void reverse_transform(idx_t n, const float* xt, float* x) const;
    virtual ~VectorTransform() {}
 };
 /** Generic linear transformation, with bias term applied on output
 * y = A * x + b
 */
 struct LinearTransform : VectorTransform {
    bool have_bias; ///! whether to use the bias term
    /// check if matrix A is orthonormal (enables reverse_transform)
    bool is_orthonormal;
    /// Transformation matrix, size d_out * d_in
    std::vector<float> A;
    /// bias vector, size d_out
    std::vector<float> b;
    /// both d_in > d_out and d_out < d_in are supported
    explicit LinearTransform(
            int d_in = 0,
            int d_out = 0,
            bool have_bias = false);
    /// same as apply, but result is pre-allocated
    void apply_noalloc(idx_t n, const float* x, float* xt) const override;
    /// compute x = A^T * (x - b)
    /// is reverse transform if A has orthonormal lines
    void transform_transpose(idx_t n, const float* y, float* x) const;
    /// works only if is_orthonormal
    void reverse_transform(idx_t n, const float* xt, float* x) const override;
    /// compute A^T * A to set the is_orthonormal flag
    void set_is_orthonormal();
    bool verbose;
    void print_if_verbose(
            const char* name,
            const std::vector<double>& mat,
            int n,
            int d) const;
    ~LinearTransform() override {}
 };
 /// Randomly rotate a set of vectors
 struct RandomRotationMatrix : LinearTransform {
    /// both d_in > d_out and d_out < d_in are supported
    RandomRotationMatrix(int d_in, int d_out)
            : LinearTransform(d_in, d_out, false) {}
    /// must be called before the transform is used
    void init(int seed);
    // intializes with an arbitrary seed
    void train(idx_t n, const float* x) override;
    RandomRotationMatrix() {}
 };
 /** Applies a principal component analysis on a set of vectors,
 *  with optionally whitening and random rotation. */
 struct PCAMatrix : LinearTransform {
    /** after transformation the components are multiplied by
     * eigenvalues^eigen_power
     *
     * =0: no whitening
     * =-0.5: full whitening
     */
    float eigen_power;
    /// random rotation after PCA
    bool random_rotation;
    /// ratio between # training vectors and dimension
    size_t max_points_per_d;
    /// try to distribute output eigenvectors in this many bins
    int balanced_bins;
    /// Mean, size d_in
    std::vector<float> mean;
    /// eigenvalues of covariance matrix (= squared singular values)
    std::vector<float> eigenvalues;
    /// PCA matrix, size d_in * d_in
    std::vector<float> PCAMat;
    // the final matrix is computed after random rotation and/or whitening
    explicit PCAMatrix(
            int d_in = 0,
            int d_out = 0,
            float eigen_power = 0,
            bool random_rotation = false);
    /// train on n vectors. If n < d_in then the eigenvector matrix
    /// will be completed with 0s
    void train(idx_t n, const float* x) override;
    /// copy pre-trained PCA matrix
    void copy_from(const PCAMatrix& other);
    /// called after mean, PCAMat and eigenvalues are computed
    void prepare_Ab();
 };
 /** ITQ implementation from
 *
 *     Iterative quantization: A procrustean approach to learning binary codes
 *     for large-scale image retrieval,
 *
 * Yunchao Gong, Svetlana Lazebnik, Albert Gordo, Florent Perronnin,
 * PAMI'12.
 */
 struct ITQMatrix : LinearTransform {
    int max_iter;
    int seed;
    // force initialization of the rotation (for debugging)
    std::vector<double> init_rotation;
    explicit ITQMatrix(int d = 0);
    void train(idx_t n, const float* x) override;
 };
 /** The full ITQ transform, including normalizations and PCA transformation
 */
 struct ITQTransform : VectorTransform {
    std::vector<float> mean;
    bool do_pca;
    ITQMatrix itq;
    /// max training points per dimension
    int max_train_per_dim;
    // concatenation of PCA + ITQ transformation
    LinearTransform pca_then_itq;
    explicit ITQTransform(int d_in = 0, int d_out = 0, bool do_pca = false);
    void train(idx_t n, const float* x) override;
    void apply_noalloc(idx_t n, const float* x, float* xt) const override;
 };
 struct ProductQuantizer;
 /** Applies a rotation to align the dimensions with a PQ to minimize
 *  the reconstruction error. Can be used before an IndexPQ or an
 *  IndexIVFPQ. The method is the non-parametric version described in:
 *
 * "Optimized Product Quantization for Approximate Nearest Neighbor Search"
 * Tiezheng Ge, Kaiming He, Qifa Ke, Jian Sun, CVPR'13
 *
 */
 struct OPQMatrix : LinearTransform {
    int M;          ///< nb of subquantizers
    int niter;      ///< Number of outer training iterations
    int niter_pq;   ///< Number of training iterations for the PQ
    int niter_pq_0; ///< same, for the first outer iteration
    /// if there are too many training points, resample
    size_t max_train_points;
    bool verbose;
    /// if non-NULL, use this product quantizer for training
    /// should be constructed with (d_out, M, _)
    ProductQuantizer* pq;
    /// if d2 != -1, output vectors of this dimension
    explicit OPQMatrix(int d = 0, int M = 1, int d2 = -1);
    void train(idx_t n, const float* x) override;
 };
 /** remap dimensions for intput vectors, possibly inserting 0s
 * strictly speaking this is also a linear transform but we don't want
 * to compute it with matrix multiplies */
 struct RemapDimensionsTransform : VectorTransform {
    /// map from output dimension to input, size d_out
    /// -1 -> set output to 0
    std::vector<int> map;
    RemapDimensionsTransform(int d_in, int d_out, const int* map);
    /// remap input to output, skipping or inserting dimensions as needed
    /// if uniform: distribute dimensions uniformly
    /// otherwise just take the d_out first ones.
    RemapDimensionsTransform(int d_in, int d_out, bool uniform = true);
    void apply_noalloc(idx_t n, const float* x, float* xt) const override;
    /// reverse transform correct only when the mapping is a permutation
    void reverse_transform(idx_t n, const float* xt, float* x) const override;
    RemapDimensionsTransform() {}
 };
 /** per-vector normalization */
 struct NormalizationTransform : VectorTransform {
    float norm;
    explicit NormalizationTransform(int d, float norm = 2.0);
    NormalizationTransform();
    void apply_noalloc(idx_t n, const float* x, float* xt) const override;
    /// Identity transform since norm is not revertible
    void reverse_transform(idx_t n, const float* xt, float* x) const override;
 };
 /** Subtract the mean of each component from the vectors. */
 struct CenteringTransform : VectorTransform {
    /// Mean, size d_in = d_out
    std::vector<float> mean;
    explicit CenteringTransform(int d = 0);
    /// train on n vectors.
    void train(idx_t n, const float* x) override;
    /// subtract the mean
    void apply_noalloc(idx_t n, const float* x, float* xt) const override;
    /// add the mean
    void reverse_transform(idx_t n, const float* xt, float* x) const override;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/clone_index.h
+++ b/src/3rdlib/faiss/clone_index.h
@ -0,0 +1,33 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 // I/O code for indexes
 #pragma once
 namespace faiss {
 struct Index;
 struct IndexIVF;
 struct VectorTransform;
 /* cloning functions */
 Index* clone_index(const Index*);
 /** Cloner class, useful to override classes with other cloning
 * functions. The cloning function above just calls
 * Cloner::clone_Index. */
 struct Cloner {
    virtual VectorTransform* clone_VectorTransform(const VectorTransform*);
    virtual Index* clone_Index(const Index*);
    virtual IndexIVF* clone_IndexIVF(const IndexIVF*);
    virtual ~Cloner() {}
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/impl/AdditiveQuantizer.h
+++ b/src/3rdlib/faiss/impl/AdditiveQuantizer.h
@ -0,0 +1,164 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <cstdint>
 #include <vector>
 #include <faiss/Index.h>
 namespace faiss {
 /** Abstract structure for additive quantizers
 *
 * Different from the product quantizer in which the decoded vector is the
 * concatenation of M sub-vectors, additive quantizers sum M sub-vectors
 * to get the decoded vector.
 */
 struct AdditiveQuantizer {
    size_t d;                     ///< size of the input vectors
    size_t M;                     ///< number of codebooks
    std::vector<size_t> nbits;    ///< bits for each step
    std::vector<float> codebooks; ///< codebooks
    // derived values
    std::vector<uint64_t> codebook_offsets;
    size_t code_size;           ///< code size in bytes
    size_t tot_bits;            ///< total number of bits
    size_t total_codebook_size; ///< size of the codebook in vectors
    bool only_8bit;             ///< are all nbits = 8 (use faster decoder)
    bool verbose;    ///< verbose during training?
    bool is_trained; ///< is trained or not
    /// Encodes how search is performed and how vectors are encoded
    enum Search_type_t {
        ST_decompress,    ///< decompress database vector
        ST_LUT_nonorm,    ///< use a LUT, don't include norms (OK for IP or
                          ///< normalized vectors)
        ST_norm_from_LUT, ///< compute the norms from the look-up tables (cost
                          ///< is in O(M^2))
        ST_norm_float, ///< use a LUT, and store float32 norm with the vectors
        ST_norm_qint8, ///< use a LUT, and store 8bit-quantized norm
        ST_norm_qint4,
    };
    AdditiveQuantizer(
            size_t d,
            const std::vector<size_t>& nbits,
            Search_type_t search_type = ST_decompress);
    AdditiveQuantizer();
    ///< compute derived values when d, M and nbits have been set
    void set_derived_values();
    ///< Train the additive quantizer
    virtual void train(size_t n, const float* x) = 0;
    /** Encode a set of vectors
     *
     * @param x      vectors to encode, size n * d
     * @param codes  output codes, size n * code_size
     */
    virtual void compute_codes(const float* x, uint8_t* codes, size_t n)
            const = 0;
    /** pack a series of code to bit-compact format
     *
     * @param codes        codes to be packed, size n * code_size
     * @param packed_codes output bit-compact codes
     * @param ld_codes     leading dimension of codes
     * @param norms        norms of the vectors (size n). Will be computed if
     *                     needed but not provided
     */
    void pack_codes(
            size_t n,
            const int32_t* codes,
            uint8_t* packed_codes,
            int64_t ld_codes = -1,
            const float* norms = nullptr) const;
    /** Decode a set of vectors
     *
     * @param codes  codes to decode, size n * code_size
     * @param x      output vectors, size n * d
     */
    void decode(const uint8_t* codes, float* x, size_t n) const;
    /** Decode a set of vectors in non-packed format
     *
     * @param codes  codes to decode, size n * ld_codes
     * @param x      output vectors, size n * d
     */
    void decode_unpacked(
            const int32_t* codes,
            float* x,
            size_t n,
            int64_t ld_codes = -1) const;
    /****************************************************************************
     * Search functions in an external set of codes.
     ****************************************************************************/
    /// Also determines what's in the codes
    Search_type_t search_type;
    /// min/max for quantization of norms
    float norm_min, norm_max;
    template <bool is_IP, Search_type_t effective_search_type>
    float compute_1_distance_LUT(const uint8_t* codes, const float* LUT) const;
    /*
        float compute_1_L2sqr(const uint8_t* codes, const float* LUT);
    */
    /****************************************************************************
     * Support for exhaustive distance computations with all the centroids.
     * Hence, the number of these centroids should not be too large.
     ****************************************************************************/
    using idx_t = Index::idx_t;
    /// decoding function for a code in a 64-bit word
    void decode_64bit(idx_t n, float* x) const;
    /** Compute inner-product look-up tables. Used in the centroid search
     * functions.
     *
     * @param xq     query vector, size (n, d)
     * @param LUT    look-up table, size (n, total_codebook_size)
     */
    void compute_LUT(size_t n, const float* xq, float* LUT) const;
    /// exact IP search
    void knn_centroids_inner_product(
            idx_t n,
            const float* xq,
            idx_t k,
            float* distances,
            idx_t* labels) const;
    /** For L2 search we need the L2 norms of the centroids
     *
     * @param norms    output norms table, size total_codebook_size
     */
    void compute_centroid_norms(float* norms) const;
    /** Exact L2 search, with precomputed norms */
    void knn_centroids_L2(
            idx_t n,
            const float* xq,
            idx_t k,
            float* distances,
            idx_t* labels,
            const float* centroid_norms) const;
    virtual ~AdditiveQuantizer();
 };
 }; // namespace faiss
--- a/src/3rdlib/faiss/impl/AuxIndexStructures.h
+++ b/src/3rdlib/faiss/impl/AuxIndexStructures.h
@ -0,0 +1,276 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 // Auxiliary index structures, that are used in indexes but that can
 // be forward-declared
 #ifndef FAISS_AUX_INDEX_STRUCTURES_H
 #define FAISS_AUX_INDEX_STRUCTURES_H
 #include <stdint.h>
 #include <cstring>
 #include <memory>
 #include <mutex>
 #include <unordered_set>
 #include <vector>
 #include <faiss/Index.h>
 #include <faiss/impl/platform_macros.h>
 namespace faiss {
 /** The objective is to have a simple result structure while
 *  minimizing the number of mem copies in the result. The method
 *  do_allocation can be overloaded to allocate the result tables in
 *  the matrix type of a scripting language like Lua or Python. */
 struct RangeSearchResult {
    size_t nq;    ///< nb of queries
    size_t* lims; ///< size (nq + 1)
    typedef Index::idx_t idx_t;
    idx_t* labels;    ///< result for query i is labels[lims[i]:lims[i+1]]
    float* distances; ///< corresponding distances (not sorted)
    size_t buffer_size; ///< size of the result buffers used
    /// lims must be allocated on input to range_search.
    explicit RangeSearchResult(idx_t nq, bool alloc_lims = true);
    /// called when lims contains the nb of elements result entries
    /// for each query
    virtual void do_allocation();
    virtual ~RangeSearchResult();
 };
 /** Encapsulates a set of ids to remove. */
 struct IDSelector {
    typedef Index::idx_t idx_t;
    virtual bool is_member(idx_t id) const = 0;
    virtual ~IDSelector() {}
 };
 /** remove ids between [imni, imax) */
 struct IDSelectorRange : IDSelector {
    idx_t imin, imax;
    IDSelectorRange(idx_t imin, idx_t imax);
    bool is_member(idx_t id) const override;
    ~IDSelectorRange() override {}
 };
 /** simple list of elements to remove
 *
 * this is inefficient in most cases, except for IndexIVF with
 * maintain_direct_map
 */
 struct IDSelectorArray : IDSelector {
    size_t n;
    const idx_t* ids;
    IDSelectorArray(size_t n, const idx_t* ids);
    bool is_member(idx_t id) const override;
    ~IDSelectorArray() override {}
 };
 /** Remove ids from a set. Repetitions of ids in the indices set
 * passed to the constructor does not hurt performance. The hash
 * function used for the bloom filter and GCC's implementation of
 * unordered_set are just the least significant bits of the id. This
 * works fine for random ids or ids in sequences but will produce many
 * hash collisions if lsb's are always the same */
 struct IDSelectorBatch : IDSelector {
    std::unordered_set<idx_t> set;
    typedef unsigned char uint8_t;
    std::vector<uint8_t> bloom; // assumes low bits of id are a good hash value
    int nbits;
    idx_t mask;
    IDSelectorBatch(size_t n, const idx_t* indices);
    bool is_member(idx_t id) const override;
    ~IDSelectorBatch() override {}
 };
 /****************************************************************
 * Result structures for range search.
 *
 * The main constraint here is that we want to support parallel
 * queries from different threads in various ways: 1 thread per query,
 * several threads per query. We store the actual results in blocks of
 * fixed size rather than exponentially increasing memory. At the end,
 * we copy the block content to a linear result array.
 *****************************************************************/
 /** List of temporary buffers used to store results before they are
 *  copied to the RangeSearchResult object. */
 struct BufferList {
    typedef Index::idx_t idx_t;
    // buffer sizes in # entries
    size_t buffer_size;
    struct Buffer {
        idx_t* ids;
        float* dis;
    };
    std::vector<Buffer> buffers;
    size_t wp; ///< write pointer in the last buffer.
    explicit BufferList(size_t buffer_size);
    ~BufferList();
    /// create a new buffer
    void append_buffer();
    /// add one result, possibly appending a new buffer if needed
    void add(idx_t id, float dis);
    /// copy elemnts ofs:ofs+n-1 seen as linear data in the buffers to
    /// tables dest_ids, dest_dis
    void copy_range(size_t ofs, size_t n, idx_t* dest_ids, float* dest_dis);
 };
 struct RangeSearchPartialResult;
 /// result structure for a single query
 struct RangeQueryResult {
    using idx_t = Index::idx_t;
    idx_t qno;   //< id of the query
    size_t nres; //< nb of results for this query
    RangeSearchPartialResult* pres;
    /// called by search function to report a new result
    void add(float dis, idx_t id);
 };
 /// the entries in the buffers are split per query
 struct RangeSearchPartialResult : BufferList {
    RangeSearchResult* res;
    /// eventually the result will be stored in res_in
    explicit RangeSearchPartialResult(RangeSearchResult* res_in);
    /// query ids + nb of results per query.
    std::vector<RangeQueryResult> queries;
    /// begin a new result
    RangeQueryResult& new_result(idx_t qno);
    /*****************************************
     * functions used at the end of the search to merge the result
     * lists */
    void finalize();
    /// called by range_search before do_allocation
    void set_lims();
    /// called by range_search after do_allocation
    void copy_result(bool incremental = false);
    /// merge a set of PartialResult's into one RangeSearchResult
    /// on ouptut the partialresults are empty!
    static void merge(
            std::vector<RangeSearchPartialResult*>& partial_results,
            bool do_delete = true);
 };
 /***********************************************************
 * The distance computer maintains a current query and computes
 * distances to elements in an index that supports random access.
 *
 * The DistanceComputer is not intended to be thread-safe (eg. because
 * it maintains counters) so the distance functions are not const,
 * instantiate one from each thread if needed.
 ***********************************************************/
 struct DistanceComputer {
    using idx_t = Index::idx_t;
    /// called before computing distances. Pointer x should remain valid
    /// while operator () is called
    virtual void set_query(const float* x) = 0;
    /// compute distance of vector i to current query
    virtual float operator()(idx_t i) = 0;
    /// compute distance between two stored vectors
    virtual float symmetric_dis(idx_t i, idx_t j) = 0;
    virtual ~DistanceComputer() {}
 };
 /***********************************************************
 * Interrupt callback
 ***********************************************************/
 struct FAISS_API InterruptCallback {
    virtual bool want_interrupt() = 0;
    virtual ~InterruptCallback() {}
    // lock that protects concurrent calls to is_interrupted
    static std::mutex lock;
    static std::unique_ptr<InterruptCallback> instance;
    static void clear_instance();
    /** check if:
     * - an interrupt callback is set
     * - the callback returns true
     * if this is the case, then throw an exception. Should not be called
     * from multiple threads.
     */
    static void check();
    /// same as check() but return true if is interrupted instead of
    /// throwing. Can be called from multiple threads.
    static bool is_interrupted();
    /** assuming each iteration takes a certain number of flops, what
     * is a reasonable interval to check for interrupts?
     */
    static size_t get_period_hint(size_t flops);
 };
 /// set implementation optimized for fast access.
 struct VisitedTable {
    std::vector<uint8_t> visited;
    int visno;
    explicit VisitedTable(int size) : visited(size), visno(1) {}
    /// set flag #no to true
    void set(int no) {
        visited[no] = visno;
    }
    /// get flag #no
    bool get(int no) const {
        return visited[no] == visno;
    }
    /// reset all flags to false
    void advance() {
        visno++;
        if (visno == 250) {
            // 250 rather than 255 because sometimes we use visno and visno+1
            memset(visited.data(), 0, sizeof(visited[0]) * visited.size());
            visno = 1;
        }
    }
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/impl/FaissAssert.h
+++ b/src/3rdlib/faiss/impl/FaissAssert.h
@ -0,0 +1,111 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_ASSERT_INCLUDED
 #define FAISS_ASSERT_INCLUDED
 #include <faiss/impl/FaissException.h>
 #include <faiss/impl/platform_macros.h>
 #include <cstdio>
 #include <cstdlib>
 #include <string>
 ///
 /// Assertions
 ///
 #define FAISS_ASSERT(X)                                  \
    do {                                                 \
        if (!(X)) {                                      \
            fprintf(stderr,                              \
                    "Faiss assertion '%s' failed in %s " \
                    "at %s:%d\n",                        \
                    #X,                                  \
                    __PRETTY_FUNCTION__,                 \
                    __FILE__,                            \
                    __LINE__);                           \
            abort();                                     \
        }                                                \
    } while (false)
 #define FAISS_ASSERT_MSG(X, MSG)                         \
    do {                                                 \
        if (!(X)) {                                      \
            fprintf(stderr,                              \
                    "Faiss assertion '%s' failed in %s " \
                    "at %s:%d; details: " MSG "\n",      \
                    #X,                                  \
                    __PRETTY_FUNCTION__,                 \
                    __FILE__,                            \
                    __LINE__);                           \
            abort();                                     \
        }                                                \
    } while (false)
 #define FAISS_ASSERT_FMT(X, FMT, ...)                    \
    do {                                                 \
        if (!(X)) {                                      \
            fprintf(stderr,                              \
                    "Faiss assertion '%s' failed in %s " \
                    "at %s:%d; details: " FMT "\n",      \
                    #X,                                  \
                    __PRETTY_FUNCTION__,                 \
                    __FILE__,                            \
                    __LINE__,                            \
                    __VA_ARGS__);                        \
            abort();                                     \
        }                                                \
    } while (false)
 ///
 /// Exceptions for returning user errors
 ///
 #define FAISS_THROW_MSG(MSG)                                   \
    do {                                                       \
        throw faiss::FaissException(                           \
                MSG, __PRETTY_FUNCTION__, __FILE__, __LINE__); \
    } while (false)
 #define FAISS_THROW_FMT(FMT, ...)                              \
    do {                                                       \
        std::string __s;                                       \
        int __size = snprintf(nullptr, 0, FMT, __VA_ARGS__);   \
        __s.resize(__size + 1);                                \
        snprintf(&__s[0], __s.size(), FMT, __VA_ARGS__);       \
        throw faiss::FaissException(                           \
                __s, __PRETTY_FUNCTION__, __FILE__, __LINE__); \
    } while (false)
 ///
 /// Exceptions thrown upon a conditional failure
 ///
 #define FAISS_THROW_IF_NOT(X)                          \
    do {                                               \
        if (!(X)) {                                    \
            FAISS_THROW_FMT("Error: '%s' failed", #X); \
        }                                              \
    } while (false)
 #define FAISS_THROW_IF_NOT_MSG(X, MSG)                       \
    do {                                                     \
        if (!(X)) {                                          \
            FAISS_THROW_FMT("Error: '%s' failed: " MSG, #X); \
        }                                                    \
    } while (false)
 #define FAISS_THROW_IF_NOT_FMT(X, FMT, ...)                               \
    do {                                                                  \
        if (!(X)) {                                                       \
            FAISS_THROW_FMT("Error: '%s' failed: " FMT, #X, __VA_ARGS__); \
        }                                                                 \
    } while (false)
 #endif
--- a/src/3rdlib/faiss/impl/FaissException.h
+++ b/src/3rdlib/faiss/impl/FaissException.h
@ -0,0 +1,87 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_EXCEPTION_INCLUDED
 #define FAISS_EXCEPTION_INCLUDED
 #include <exception>
 #include <string>
 #include <utility>
 #include <vector>
 namespace faiss {
 /// Base class for Faiss exceptions
 class FaissException : public std::exception {
   public:
    explicit FaissException(const std::string& msg);
    FaissException(
            const std::string& msg,
            const char* funcName,
            const char* file,
            int line);
    /// from std::exception
    const char* what() const noexcept override;
    std::string msg;
 };
 /// Handle multiple exceptions from worker threads, throwing an appropriate
 /// exception that aggregates the information
 /// The pair int is the thread that generated the exception
 void handleExceptions(
        std::vector<std::pair<int, std::exception_ptr>>& exceptions);
 /** bare-bones unique_ptr
 * this one deletes with delete [] */
 template <class T>
 struct ScopeDeleter {
    const T* ptr;
    explicit ScopeDeleter(const T* ptr = nullptr) : ptr(ptr) {}
    void release() {
        ptr = nullptr;
    }
    void set(const T* ptr_in) {
        ptr = ptr_in;
    }
    void swap(ScopeDeleter<T>& other) {
        std::swap(ptr, other.ptr);
    }
    ~ScopeDeleter() {
        delete[] ptr;
    }
 };
 /** same but deletes with the simple delete (least common case) */
 template <class T>
 struct ScopeDeleter1 {
    const T* ptr;
    explicit ScopeDeleter1(const T* ptr = nullptr) : ptr(ptr) {}
    void release() {
        ptr = nullptr;
    }
    void set(const T* ptr_in) {
        ptr = ptr_in;
    }
    void swap(ScopeDeleter1<T>& other) {
        std::swap(ptr, other.ptr);
    }
    ~ScopeDeleter1() {
        delete ptr;
    }
 };
 /// make typeids more readable
 std::string demangle_cpp_symbol(const char* name);
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/impl/HNSW.h
+++ b/src/3rdlib/faiss/impl/HNSW.h
@ -0,0 +1,262 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <queue>
 #include <unordered_set>
 #include <vector>
 #include <omp.h>
 #include <faiss/Index.h>
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/impl/platform_macros.h>
 #include <faiss/utils/Heap.h>
 #include <faiss/utils/random.h>
 namespace faiss {
 /** Implementation of the Hierarchical Navigable Small World
 * datastructure.
 *
 * Efficient and robust approximate nearest neighbor search using
 * Hierarchical Navigable Small World graphs
 *
 *  Yu. A. Malkov, D. A. Yashunin, arXiv 2017
 *
 * This implementation is heavily influenced by the NMSlib
 * implementation by Yury Malkov and Leonid Boystov
 * (https://github.com/searchivarius/nmslib)
 *
 * The HNSW object stores only the neighbor link structure, see
 * IndexHNSW.h for the full index object.
 */
 struct VisitedTable;
 struct DistanceComputer; // from AuxIndexStructures
 struct HNSWStats;
 struct HNSW {
    /// internal storage of vectors (32 bits: this is expensive)
    typedef int storage_idx_t;
    /// Faiss results are 64-bit
    typedef Index::idx_t idx_t;
    typedef std::pair<float, storage_idx_t> Node;
    /** Heap structure that allows fast
     */
    struct MinimaxHeap {
        int n;
        int k;
        int nvalid;
        std::vector<storage_idx_t> ids;
        std::vector<float> dis;
        typedef faiss::CMax<float, storage_idx_t> HC;
        explicit MinimaxHeap(int n) : n(n), k(0), nvalid(0), ids(n), dis(n) {}
        void push(storage_idx_t i, float v);
        float max() const;
        int size() const;
        void clear();
        int pop_min(float* vmin_out = nullptr);
        int count_below(float thresh);
    };
    /// to sort pairs of (id, distance) from nearest to fathest or the reverse
    struct NodeDistCloser {
        float d;
        int id;
        NodeDistCloser(float d, int id) : d(d), id(id) {}
        bool operator<(const NodeDistCloser& obj1) const {
            return d < obj1.d;
        }
    };
    struct NodeDistFarther {
        float d;
        int id;
        NodeDistFarther(float d, int id) : d(d), id(id) {}
        bool operator<(const NodeDistFarther& obj1) const {
            return d > obj1.d;
        }
    };
    /// assignment probability to each layer (sum=1)
    std::vector<double> assign_probas;
    /// number of neighbors stored per layer (cumulative), should not
    /// be changed after first add
    std::vector<int> cum_nneighbor_per_level;
    /// level of each vector (base level = 1), size = ntotal
    std::vector<int> levels;
    /// offsets[i] is the offset in the neighbors array where vector i is stored
    /// size ntotal + 1
    std::vector<size_t> offsets;
    /// neighbors[offsets[i]:offsets[i+1]] is the list of neighbors of vector i
    /// for all levels. this is where all storage goes.
    std::vector<storage_idx_t> neighbors;
    /// entry point in the search structure (one of the points with maximum
    /// level
    storage_idx_t entry_point;
    faiss::RandomGenerator rng;
    /// maximum level
    int max_level;
    /// expansion factor at construction time
    int efConstruction;
    /// expansion factor at search time
    int efSearch;
    /// during search: do we check whether the next best distance is good
    /// enough?
    bool check_relative_distance = true;
    /// number of entry points in levels > 0.
    int upper_beam;
    /// use bounded queue during exploration
    bool search_bounded_queue = true;
    // methods that initialize the tree sizes
    /// initialize the assign_probas and cum_nneighbor_per_level to
    /// have 2*M links on level 0 and M links on levels > 0
    void set_default_probas(int M, float levelMult);
    /// set nb of neighbors for this level (before adding anything)
    void set_nb_neighbors(int level_no, int n);
    // methods that access the tree sizes
    /// nb of neighbors for this level
    int nb_neighbors(int layer_no) const;
    /// cumumlative nb up to (and excluding) this level
    int cum_nb_neighbors(int layer_no) const;
    /// range of entries in the neighbors table of vertex no at layer_no
    void neighbor_range(idx_t no, int layer_no, size_t* begin, size_t* end)
            const;
    /// only mandatory parameter: nb of neighbors
    explicit HNSW(int M = 32);
    /// pick a random level for a new point
    int random_level();
    /// add n random levels to table (for debugging...)
    void fill_with_random_links(size_t n);
    void add_links_starting_from(
            DistanceComputer& ptdis,
            storage_idx_t pt_id,
            storage_idx_t nearest,
            float d_nearest,
            int level,
            omp_lock_t* locks,
            VisitedTable& vt);
    /** add point pt_id on all levels <= pt_level and build the link
     * structure for them. */
    void add_with_locks(
            DistanceComputer& ptdis,
            int pt_level,
            int pt_id,
            std::vector<omp_lock_t>& locks,
            VisitedTable& vt);
    int search_from_candidates(
            DistanceComputer& qdis,
            int k,
            idx_t* I,
            float* D,
            MinimaxHeap& candidates,
            VisitedTable& vt,
            HNSWStats& stats,
            int level,
            int nres_in = 0) const;
    std::priority_queue<Node> search_from_candidate_unbounded(
            const Node& node,
            DistanceComputer& qdis,
            int ef,
            VisitedTable* vt,
            HNSWStats& stats) const;
    /// search interface
    HNSWStats search(
            DistanceComputer& qdis,
            int k,
            idx_t* I,
            float* D,
            VisitedTable& vt) const;
    void reset();
    void clear_neighbor_tables(int level);
    void print_neighbor_stats(int level) const;
    int prepare_level_tab(size_t n, bool preset_levels = false);
    static void shrink_neighbor_list(
            DistanceComputer& qdis,
            std::priority_queue<NodeDistFarther>& input,
            std::vector<NodeDistFarther>& output,
            int max_size);
 };
 struct HNSWStats {
    size_t n1, n2, n3;
    size_t ndis;
    size_t nreorder;
    HNSWStats(
            size_t n1 = 0,
            size_t n2 = 0,
            size_t n3 = 0,
            size_t ndis = 0,
            size_t nreorder = 0)
            : n1(n1), n2(n2), n3(n3), ndis(ndis), nreorder(nreorder) {}
    void reset() {
        n1 = n2 = n3 = 0;
        ndis = 0;
        nreorder = 0;
    }
    void combine(const HNSWStats& other) {
        n1 += other.n1;
        n2 += other.n2;
        n3 += other.n3;
        ndis += other.ndis;
        nreorder += other.nreorder;
    }
 };
 // global var that collects them all
 FAISS_API extern HNSWStats hnsw_stats;
 } // namespace faiss
--- a/src/3rdlib/faiss/impl/LocalSearchQuantizer.h
+++ b/src/3rdlib/faiss/impl/LocalSearchQuantizer.h
@ -0,0 +1,180 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <stdint.h>
 #include <random>
 #include <string>
 #include <unordered_map>
 #include <vector>
 #include <faiss/impl/AdditiveQuantizer.h>
 #include <faiss/impl/platform_macros.h>
 #include <faiss/utils/utils.h>
 namespace faiss {
 /** Implementation of LSQ/LSQ++ described in the following two papers:
 *
 * Revisiting additive quantization
 * Julieta Martinez, et al. ECCV 2016
 *
 * LSQ++: Lower running time and higher recall in multi-codebook quantization
 * Julieta Martinez, et al. ECCV 2018
 *
 * This implementation is mostly translated from the Julia implementations
 * by Julieta Martinez:
 * (https://github.com/una-dinosauria/local-search-quantization,
 *  https://github.com/una-dinosauria/Rayuela.jl)
 *
 * The trained codes are stored in `codebooks` which is called
 * `centroids` in PQ and RQ.
 */
 struct LocalSearchQuantizer : AdditiveQuantizer {
    size_t K; ///< number of codes per codebook
    size_t train_iters; ///< number of iterations in training
    size_t encode_ils_iters; ///< iterations of local search in encoding
    size_t train_ils_iters;  ///< iterations of local search in training
    size_t icm_iters;        ///< number of iterations in icm
    float p;     ///< temperature factor
    float lambd; ///< regularization factor
    size_t chunk_size; ///< nb of vectors to encode at a time
    int random_seed; ///< seed for random generator
    size_t nperts;   ///< number of perturbation in each code
    bool update_codebooks_with_double = true;
    LocalSearchQuantizer(
            size_t d,     /* dimensionality of the input vectors */
            size_t M,     /* number of subquantizers */
            size_t nbits, /* number of bit per subvector index */
            Search_type_t search_type =
                    ST_decompress /* determines the storage type */
    );
    LocalSearchQuantizer();
    // Train the local search quantizer
    void train(size_t n, const float* x) override;
    /** Encode a set of vectors
     *
     * @param x      vectors to encode, size n * d
     * @param codes  output codes, size n * code_size
     */
    void compute_codes(const float* x, uint8_t* codes, size_t n) const override;
    /** Update codebooks given encodings
     *
     * @param x      training vectors, size n * d
     * @param codes  encoded training vectors, size n * M
     */
    void update_codebooks(const float* x, const int32_t* codes, size_t n);
    /** Encode vectors given codebooks using iterative conditional mode (icm).
     *
     * @param x      vectors to encode, size n * d
     * @param codes  output codes, size n * M
     * @param ils_iters number of iterations of iterative local search
     */
    void icm_encode(
            const float* x,
            int32_t* codes,
            size_t n,
            size_t ils_iters,
            std::mt19937& gen) const;
    void icm_encode_partial(
            size_t index,
            const float* x,
            int32_t* codes,
            size_t n,
            const float* binaries,
            size_t ils_iters,
            std::mt19937& gen) const;
    void icm_encode_step(
            const float* unaries,
            const float* binaries,
            int32_t* codes,
            size_t n) const;
    /** Add some perturbation to codebooks
     *
     * @param T         temperature of simulated annealing
     * @param stddev    standard derivations of each dimension in training data
     */
    void perturb_codebooks(
            float T,
            const std::vector<float>& stddev,
            std::mt19937& gen);
    /** Add some perturbation to codes
     *
     * @param codes codes to be perturbed, size n * M
     */
    void perturb_codes(int32_t* codes, size_t n, std::mt19937& gen) const;
    /** Compute binary terms
     *
     * @param binaries binary terms, size M * M * K * K
     */
    void compute_binary_terms(float* binaries) const;
    /** Compute unary terms
     *
     * @param x       vectors to encode, size n * d
     * @param unaries unary terms, size n * M * K
     */
    void compute_unary_terms(const float* x, float* unaries, size_t n) const;
    /** Helper function to compute reconstruction error
     *
     * @param x     vectors to encode, size n * d
     * @param codes encoded codes, size n * M
     * @param objs  if it is not null, store reconstruction
                    error of each vector into it, size n
     */
    float evaluate(
            const int32_t* codes,
            const float* x,
            size_t n,
            float* objs = nullptr) const;
 };
 /** A helper struct to count consuming time during training.
 *  It is NOT thread-safe.
 */
 struct LSQTimer {
    std::unordered_map<std::string, double> duration;
    std::unordered_map<std::string, double> t0;
    std::unordered_map<std::string, bool> started;
    LSQTimer() {
        reset();
    }
    double get(const std::string& name);
    void start(const std::string& name);
    void end(const std::string& name);
    void reset();
 };
 FAISS_API extern LSQTimer lsq_timer; ///< timer to count consuming time
 } // namespace faiss
--- a/src/3rdlib/faiss/impl/NNDescent.h
+++ b/src/3rdlib/faiss/impl/NNDescent.h
@ -0,0 +1,154 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <algorithm>
 #include <mutex>
 #include <queue>
 #include <random>
 #include <unordered_set>
 #include <vector>
 #include <omp.h>
 #include <faiss/Index.h>
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/impl/platform_macros.h>
 #include <faiss/utils/Heap.h>
 #include <faiss/utils/random.h>
 namespace faiss {
 /** Implementation of NNDescent which is one of the most popular
 *  KNN graph building algorithms
 *
 * Efficient K-Nearest Neighbor Graph Construction for Generic
 * Similarity Measures
 *
 *  Dong, Wei, Charikar Moses, and Kai Li, WWW 2011
 *
 * This implmentation is heavily influenced by the efanna
 * implementation by Cong Fu and the KGraph library by Wei Dong
 * (https://github.com/ZJULearning/efanna_graph)
 * (https://github.com/aaalgo/kgraph)
 *
 * The NNDescent object stores only the neighbor link structure,
 * see IndexNNDescent.h for the full index object.
 */
 struct VisitedTable;
 struct DistanceComputer;
 namespace nndescent {
 struct Neighbor {
    int id;
    float distance;
    bool flag;
    Neighbor() = default;
    Neighbor(int id, float distance, bool f)
            : id(id), distance(distance), flag(f) {}
    inline bool operator<(const Neighbor& other) const {
        return distance < other.distance;
    }
 };
 struct Nhood {
    std::mutex lock;
    std::vector<Neighbor> pool; // candidate pool (a max heap)
    int M;                      // number of new neighbors to be operated
    std::vector<int> nn_old;  // old neighbors
    std::vector<int> nn_new;  // new neighbors
    std::vector<int> rnn_old; // reverse old neighbors
    std::vector<int> rnn_new; // reverse new neighbors
    Nhood() = default;
    Nhood(int l, int s, std::mt19937& rng, int N);
    Nhood& operator=(const Nhood& other);
    Nhood(const Nhood& other);
    void insert(int id, float dist);
    template <typename C>
    void join(C callback) const;
 };
 } // namespace nndescent
 struct NNDescent {
    using storage_idx_t = int;
    using idx_t = Index::idx_t;
    using KNNGraph = std::vector<nndescent::Nhood>;
    explicit NNDescent(const int d, const int K);
    ~NNDescent();
    void build(DistanceComputer& qdis, const int n, bool verbose);
    void search(
            DistanceComputer& qdis,
            const int topk,
            idx_t* indices,
            float* dists,
            VisitedTable& vt) const;
    void reset();
    /// Initialize the KNN graph randomly
    void init_graph(DistanceComputer& qdis);
    /// Perform NNDescent algorithm
    void nndescent(DistanceComputer& qdis, bool verbose);
    /// Perform local join on each node
    void join(DistanceComputer& qdis);
    /// Sample new neighbors for each node to peform local join later
    void update();
    /// Sample a small number of points to evaluate the quality of KNNG built
    void generate_eval_set(
            DistanceComputer& qdis,
            std::vector<int>& c,
            std::vector<std::vector<int>>& v,
            int N);
    /// Evaluate the quality of KNNG built
    float eval_recall(
            std::vector<int>& ctrl_points,
            std::vector<std::vector<int>>& acc_eval_set);
    bool has_built;
    int K; // K in KNN graph
    int S; // number of sample neighbors to be updated for each node
    int R; // size of reverse links, 0 means the reverse links will not be used
    int L; // size of the candidate pool in building
    int iter;        // number of iterations to iterate over
    int search_L;    // size of candidate pool in searching
    int random_seed; // random seed for generators
    int d; // dimensions
    int ntotal;
    KNNGraph graph;
    std::vector<int> final_graph;
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/impl/NSG.h
+++ b/src/3rdlib/faiss/impl/NSG.h
@ -0,0 +1,199 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <memory>
 #include <mutex>
 #include <vector>
 #include <omp.h>
 #include <faiss/Index.h>
 #include <faiss/impl/AuxIndexStructures.h>
 #include <faiss/impl/FaissAssert.h>
 #include <faiss/utils/Heap.h>
 #include <faiss/utils/random.h>
 namespace faiss {
 /** Implementation of the Navigating Spreading-out Graph (NSG)
 * datastructure.
 *
 * Fast Approximate Nearest Neighbor Search With The
 * Navigating Spreading-out Graph
 *
 *  Cong Fu, Chao Xiang, Changxu Wang, Deng Cai, VLDB 2019
 *
 * This implementation is heavily influenced by the NSG
 * implementation by ZJULearning Group
 * (https://github.com/zjulearning/nsg)
 *
 * The NSG object stores only the neighbor link structure, see
 * IndexNSG.h for the full index object.
 */
 struct DistanceComputer; // from AuxIndexStructures
 struct Neighbor;
 struct Node;
 namespace nsg {
 /***********************************************************
 * Graph structure to store a graph.
 *
 * It is represented by an adjacency matrix `data`, where
 * data[i, j] is the j-th neighbor of node i.
 ***********************************************************/
 template <class node_t>
 struct Graph {
    node_t* data;    ///< the flattened adjacency matrix
    int K;           ///< nb of neighbors per node
    int N;           ///< total nb of nodes
    bool own_fields; ///< the underlying data owned by itself or not
    // construct from a known graph
    Graph(node_t* data, int N, int K)
            : data(data), K(K), N(N), own_fields(false) {}
    // construct an empty graph
    // NOTE: the newly allocated data needs to be destroyed at destruction time
    Graph(int N, int K) : K(K), N(N), own_fields(true) {
        data = new node_t[N * K];
    }
    // copy constructor
    Graph(const Graph& g) : Graph(g.N, g.K) {
        memcpy(data, g.data, N * K * sizeof(node_t));
    }
    // release the allocated memory if needed
    ~Graph() {
        if (own_fields) {
            delete[] data;
        }
    }
    // access the j-th neighbor of node i
    inline node_t at(int i, int j) const {
        return data[i * K + j];
    }
    // access the j-th neighbor of node i by reference
    inline node_t& at(int i, int j) {
        return data[i * K + j];
    }
 };
 DistanceComputer* storage_distance_computer(const Index* storage);
 } // namespace nsg
 struct NSG {
    /// internal storage of vectors (32 bits: this is expensive)
    using storage_idx_t = int;
    /// Faiss results are 64-bit
    using idx_t = Index::idx_t;
    int ntotal; ///< nb of nodes
    /// construction-time parameters
    int R; ///< nb of neighbors per node
    int L; ///< length of the search path at construction time
    int C; ///< candidate pool size at construction time
    // search-time parameters
    int search_L; ///< length of the search path
    int enterpoint; ///< enterpoint
    std::shared_ptr<nsg::Graph<int>> final_graph; ///< NSG graph structure
    bool is_built; ///< NSG is built or not
    RandomGenerator rng; ///< random generator
    explicit NSG(int R = 32);
    // build NSG from a KNN graph
    void build(
            Index* storage,
            idx_t n,
            const nsg::Graph<idx_t>& knn_graph,
            bool verbose);
    // reset the graph
    void reset();
    // search interface
    void search(
            DistanceComputer& dis,
            int k,
            idx_t* I,
            float* D,
            VisitedTable& vt) const;
    // Compute the center point
    void init_graph(Index* storage, const nsg::Graph<idx_t>& knn_graph);
    // Search on a built graph.
    // If collect_fullset is true, the visited nodes will be
    // collected in `fullset`.
    template <bool collect_fullset, class index_t>
    void search_on_graph(
            const nsg::Graph<index_t>& graph,
            DistanceComputer& dis,
            VisitedTable& vt,
            int ep,
            int pool_size,
            std::vector<Neighbor>& retset,
            std::vector<Node>& fullset) const;
    // Add reverse links
    void add_reverse_links(
            int q,
            std::vector<std::mutex>& locks,
            DistanceComputer& dis,
            nsg::Graph<Node>& graph);
    void sync_prune(
            int q,
            std::vector<Node>& pool,
            DistanceComputer& dis,
            VisitedTable& vt,
            const nsg::Graph<idx_t>& knn_graph,
            nsg::Graph<Node>& graph);
    void link(
            Index* storage,
            const nsg::Graph<idx_t>& knn_graph,
            nsg::Graph<Node>& graph,
            bool verbose);
    // make NSG be fully connected
    int tree_grow(Index* storage, std::vector<int>& degrees);
    // count the size of the connected component
    // using depth first search start by root
    int dfs(VisitedTable& vt, int root, int cnt) const;
    // attach one unlinked node
    int attach_unlinked(
            Index* storage,
            VisitedTable& vt,
            VisitedTable& vt2,
            std::vector<int>& degrees);
    // check the integrity of the NSG built
    void check_graph() const;
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/impl/PolysemousTraining.h
+++ b/src/3rdlib/faiss/impl/PolysemousTraining.h
@ -0,0 +1,155 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_POLYSEMOUS_TRAINING_INCLUDED
 #define FAISS_POLYSEMOUS_TRAINING_INCLUDED
 #include <faiss/impl/ProductQuantizer.h>
 namespace faiss {
 /// parameters used for the simulated annealing method
 struct SimulatedAnnealingParameters {
    // optimization parameters
    double init_temperature;  // init probability of accepting a bad swap
    double temperature_decay; // at each iteration the temp is multiplied by
                              // this
    int n_iter;               // nb of iterations
    int n_redo;               // nb of runs of the simulation
    int seed;                 // random seed
    int verbose;
    bool only_bit_flips; // restrict permutation changes to bit flips
    bool init_random;    // initialize with a random permutation (not identity)
    // set reasonable defaults
    SimulatedAnnealingParameters();
 };
 /// abstract class for the loss function
 struct PermutationObjective {
    int n;
    virtual double compute_cost(const int* perm) const = 0;
    // what would the cost update be if iw and jw were swapped?
    // default implementation just computes both and computes the difference
    virtual double cost_update(const int* perm, int iw, int jw) const;
    virtual ~PermutationObjective() {}
 };
 struct ReproduceDistancesObjective : PermutationObjective {
    double dis_weight_factor;
    static double sqr(double x) {
        return x * x;
    }
    // weighting of distances: it is more important to reproduce small
    // distances well
    double dis_weight(double x) const;
    std::vector<double> source_dis; ///< "real" corrected distances (size n^2)
    const double* target_dis;       ///< wanted distances (size n^2)
    std::vector<double> weights;    ///< weights for each distance (size n^2)
    double get_source_dis(int i, int j) const;
    // cost = quadratic difference between actual distance and Hamming distance
    double compute_cost(const int* perm) const override;
    // what would the cost update be if iw and jw were swapped?
    // computed in O(n) instead of O(n^2) for the full re-computation
    double cost_update(const int* perm, int iw, int jw) const override;
    ReproduceDistancesObjective(
            int n,
            const double* source_dis_in,
            const double* target_dis_in,
            double dis_weight_factor);
    static void compute_mean_stdev(
            const double* tab,
            size_t n2,
            double* mean_out,
            double* stddev_out);
    void set_affine_target_dis(const double* source_dis_in);
    ~ReproduceDistancesObjective() override {}
 };
 struct RandomGenerator;
 /// Simulated annealing optimization algorithm for permutations.
 struct SimulatedAnnealingOptimizer : SimulatedAnnealingParameters {
    PermutationObjective* obj;
    int n;         ///< size of the permutation
    FILE* logfile; /// logs values of the cost function
    SimulatedAnnealingOptimizer(
            PermutationObjective* obj,
            const SimulatedAnnealingParameters& p);
    RandomGenerator* rnd;
    /// remember initial cost of optimization
    double init_cost;
    // main entry point. Perform the optimization loop, starting from
    // and modifying permutation in-place
    double optimize(int* perm);
    // run the optimization and return the best result in best_perm
    double run_optimization(int* best_perm);
    virtual ~SimulatedAnnealingOptimizer();
 };
 /// optimizes the order of indices in a ProductQuantizer
 struct PolysemousTraining : SimulatedAnnealingParameters {
    enum Optimization_type_t {
        OT_None,
        OT_ReproduceDistances_affine, ///< default
        OT_Ranking_weighted_diff ///< same as _2, but use rank of y+ - rank of
                                 ///< y-
    };
    Optimization_type_t optimization_type;
    /** use 1/4 of the training points for the optimization, with
     * max. ntrain_permutation. If ntrain_permutation == 0: train on
     * centroids */
    int ntrain_permutation;
    double dis_weight_factor; ///< decay of exp that weights distance loss
    /// refuse to train if it would require more than that amount of RAM
    size_t max_memory;
    // filename pattern for the logging of iterations
    std::string log_pattern;
    // sets default values
    PolysemousTraining();
    /// reorder the centroids so that the Hamming distance becomes a
    /// good approximation of the SDC distance (called by train)
    void optimize_pq_for_hamming(ProductQuantizer& pq, size_t n, const float* x)
            const;
    /// called by optimize_pq_for_hamming
    void optimize_ranking(ProductQuantizer& pq, size_t n, const float* x) const;
    /// called by optimize_pq_for_hamming
    void optimize_reproduce_distances(ProductQuantizer& pq) const;
    /// make sure we don't blow up the memory
    size_t memory_usage_per_thread(const ProductQuantizer& pq) const;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/impl/ProductQuantizer-inl.h
+++ b/src/3rdlib/faiss/impl/ProductQuantizer-inl.h
@ -0,0 +1,116 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 namespace faiss {
 inline PQEncoderGeneric::PQEncoderGeneric(
        uint8_t* code,
        int nbits,
        uint8_t offset)
        : code(code), offset(offset), nbits(nbits), reg(0) {
    assert(nbits <= 64);
    if (offset > 0) {
        reg = (*code & ((1 << offset) - 1));
    }
 }
 inline void PQEncoderGeneric::encode(uint64_t x) {
    reg |= (uint8_t)(x << offset);
    x >>= (8 - offset);
    if (offset + nbits >= 8) {
        *code++ = reg;
        for (int i = 0; i < (nbits - (8 - offset)) / 8; ++i) {
            *code++ = (uint8_t)x;
            x >>= 8;
        }
        offset += nbits;
        offset &= 7;
        reg = (uint8_t)x;
    } else {
        offset += nbits;
    }
 }
 inline PQEncoderGeneric::~PQEncoderGeneric() {
    if (offset > 0) {
        *code = reg;
    }
 }
 inline PQEncoder8::PQEncoder8(uint8_t* code, int nbits) : code(code) {
    assert(8 == nbits);
 }
 inline void PQEncoder8::encode(uint64_t x) {
    *code++ = (uint8_t)x;
 }
 inline PQEncoder16::PQEncoder16(uint8_t* code, int nbits)
        : code((uint16_t*)code) {
    assert(16 == nbits);
 }
 inline void PQEncoder16::encode(uint64_t x) {
    *code++ = (uint16_t)x;
 }
 inline PQDecoderGeneric::PQDecoderGeneric(const uint8_t* code, int nbits)
        : code(code),
          offset(0),
          nbits(nbits),
          mask((1ull << nbits) - 1),
          reg(0) {
    assert(nbits <= 64);
 }
 inline uint64_t PQDecoderGeneric::decode() {
    if (offset == 0) {
        reg = *code;
    }
    uint64_t c = (reg >> offset);
    if (offset + nbits >= 8) {
        uint64_t e = 8 - offset;
        ++code;
        for (int i = 0; i < (nbits - (8 - offset)) / 8; ++i) {
            c |= ((uint64_t)(*code++) << e);
            e += 8;
        }
        offset += nbits;
        offset &= 7;
        if (offset > 0) {
            reg = *code;
            c |= ((uint64_t)reg << e);
        }
    } else {
        offset += nbits;
    }
    return c & mask;
 }
 inline PQDecoder8::PQDecoder8(const uint8_t* code, int nbits_in) : code(code) {
    assert(8 == nbits_in);
 }
 inline uint64_t PQDecoder8::decode() {
    return (uint64_t)(*code++);
 }
 inline PQDecoder16::PQDecoder16(const uint8_t* code, int nbits_in)
        : code((uint16_t*)code) {
    assert(16 == nbits_in);
 }
 inline uint64_t PQDecoder16::decode() {
    return (uint64_t)(*code++);
 }
 } // namespace faiss
--- a/src/3rdlib/faiss/impl/ProductQuantizer.h
+++ b/src/3rdlib/faiss/impl/ProductQuantizer.h
@ -0,0 +1,228 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_PRODUCT_QUANTIZER_H
 #define FAISS_PRODUCT_QUANTIZER_H
 #include <stdint.h>
 #include <vector>
 #include <faiss/Clustering.h>
 #include <faiss/utils/Heap.h>
 namespace faiss {
 /** Product Quantizer. Implemented only for METRIC_L2 */
 struct ProductQuantizer {
    using idx_t = Index::idx_t;
    size_t d;     ///< size of the input vectors
    size_t M;     ///< number of subquantizers
    size_t nbits; ///< number of bits per quantization index
    // values derived from the above
    size_t dsub;      ///< dimensionality of each subvector
    size_t code_size; ///< bytes per indexed vector
    size_t ksub;      ///< number of centroids for each subquantizer
    bool verbose;     ///< verbose during training?
    /// initialization
    enum train_type_t {
        Train_default,
        Train_hot_start,     ///< the centroids are already initialized
        Train_shared,        ///< share dictionary accross PQ segments
        Train_hypercube,     ///< intialize centroids with nbits-D hypercube
        Train_hypercube_pca, ///< intialize centroids with nbits-D hypercube
    };
    train_type_t train_type;
    ClusteringParameters cp; ///< parameters used during clustering
    /// if non-NULL, use this index for assignment (should be of size
    /// d / M)
    Index* assign_index;
    /// Centroid table, size M * ksub * dsub
    std::vector<float> centroids;
    /// return the centroids associated with subvector m
    float* get_centroids(size_t m, size_t i) {
        return &centroids[(m * ksub + i) * dsub];
    }
    const float* get_centroids(size_t m, size_t i) const {
        return &centroids[(m * ksub + i) * dsub];
    }
    // Train the product quantizer on a set of points. A clustering
    // can be set on input to define non-default clustering parameters
    void train(int n, const float* x);
    ProductQuantizer(
            size_t d,      /* dimensionality of the input vectors */
            size_t M,      /* number of subquantizers */
            size_t nbits); /* number of bit per subvector index */
    ProductQuantizer();
    /// compute derived values when d, M and nbits have been set
    void set_derived_values();
    /// Define the centroids for subquantizer m
    void set_params(const float* centroids, int m);
    /// Quantize one vector with the product quantizer
    void compute_code(const float* x, uint8_t* code) const;
    /// same as compute_code for several vectors
    void compute_codes(const float* x, uint8_t* codes, size_t n) const;
    /// speed up code assignment using assign_index
    /// (non-const because the index is changed)
    void compute_codes_with_assign_index(
            const float* x,
            uint8_t* codes,
            size_t n);
    /// decode a vector from a given code (or n vectors if third argument)
    void decode(const uint8_t* code, float* x) const;
    void decode(const uint8_t* code, float* x, size_t n) const;
    /// If we happen to have the distance tables precomputed, this is
    /// more efficient to compute the codes.
    void compute_code_from_distance_table(const float* tab, uint8_t* code)
            const;
    /** Compute distance table for one vector.
     *
     * The distance table for x = [x_0 x_1 .. x_(M-1)] is a M * ksub
     * matrix that contains
     *
     *   dis_table (m, j) = || x_m - c_(m, j)||^2
     *   for m = 0..M-1 and j = 0 .. ksub - 1
     *
     * where c_(m, j) is the centroid no j of sub-quantizer m.
     *
     * @param x         input vector size d
     * @param dis_table output table, size M * ksub
     */
    void compute_distance_table(const float* x, float* dis_table) const;
    void compute_inner_prod_table(const float* x, float* dis_table) const;
    /** compute distance table for several vectors
     * @param nx        nb of input vectors
     * @param x         input vector size nx * d
     * @param dis_table output table, size nx * M * ksub
     */
    void compute_distance_tables(size_t nx, const float* x, float* dis_tables)
            const;
    void compute_inner_prod_tables(size_t nx, const float* x, float* dis_tables)
            const;
    /** perform a search (L2 distance)
     * @param x        query vectors, size nx * d
     * @param nx       nb of queries
     * @param codes    database codes, size ncodes * code_size
     * @param ncodes   nb of nb vectors
     * @param res      heap array to store results (nh == nx)
     * @param init_finalize_heap  initialize heap (input) and sort (output)?
     */
    void search(
            const float* x,
            size_t nx,
            const uint8_t* codes,
            const size_t ncodes,
            float_maxheap_array_t* res,
            bool init_finalize_heap = true) const;
    /** same as search, but with inner product similarity */
    void search_ip(
            const float* x,
            size_t nx,
            const uint8_t* codes,
            const size_t ncodes,
            float_minheap_array_t* res,
            bool init_finalize_heap = true) const;
    /// Symmetric Distance Table
    std::vector<float> sdc_table;
    // intitialize the SDC table from the centroids
    void compute_sdc_table();
    void search_sdc(
            const uint8_t* qcodes,
            size_t nq,
            const uint8_t* bcodes,
            const size_t ncodes,
            float_maxheap_array_t* res,
            bool init_finalize_heap = true) const;
 };
 /*************************************************
 * Objects to encode / decode strings of bits
 *************************************************/
 struct PQEncoderGeneric {
    uint8_t* code; ///< code for this vector
    uint8_t offset;
    const int nbits; ///< number of bits per subquantizer index
    uint8_t reg;
    PQEncoderGeneric(uint8_t* code, int nbits, uint8_t offset = 0);
    void encode(uint64_t x);
    ~PQEncoderGeneric();
 };
 struct PQEncoder8 {
    uint8_t* code;
    PQEncoder8(uint8_t* code, int nbits);
    void encode(uint64_t x);
 };
 struct PQEncoder16 {
    uint16_t* code;
    PQEncoder16(uint8_t* code, int nbits);
    void encode(uint64_t x);
 };
 struct PQDecoderGeneric {
    const uint8_t* code;
    uint8_t offset;
    const int nbits;
    const uint64_t mask;
    uint8_t reg;
    PQDecoderGeneric(const uint8_t* code, int nbits);
    uint64_t decode();
 };
 struct PQDecoder8 {
    static const int nbits = 8;
    const uint8_t* code;
    PQDecoder8(const uint8_t* code, int nbits);
    uint64_t decode();
 };
 struct PQDecoder16 {
    static const int nbits = 16;
    const uint16_t* code;
    PQDecoder16(const uint8_t* code, int nbits);
    uint64_t decode();
 };
 } // namespace faiss
 #include <faiss/impl/ProductQuantizer-inl.h>
 #endif
--- a/src/3rdlib/faiss/impl/ResidualQuantizer.h
+++ b/src/3rdlib/faiss/impl/ResidualQuantizer.h
@ -0,0 +1,182 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <cstdint>
 #include <vector>
 #include <faiss/Clustering.h>
 #include <faiss/impl/AdditiveQuantizer.h>
 namespace faiss {
 /** Residual quantizer with variable number of bits per sub-quantizer
 *
 * The residual centroids are stored in a big cumulative centroid table.
 * The codes are represented either as a non-compact table of size (n, M) or
 * as the compact output (n, code_size).
 */
 struct ResidualQuantizer : AdditiveQuantizer {
    /// initialization
    enum train_type_t {
        Train_default,         ///< regular k-means
        Train_progressive_dim, ///< progressive dim clustering
    };
    train_type_t train_type;
    // set this bit on train_type if beam is to be trained only on the
    // first element of the beam (faster but less accurate)
    static const int Train_top_beam = 1024;
    // set this bit to not autmatically compute the codebook tables
    // after training
    static const int Skip_codebook_tables = 2048;
    /// beam size used for training and for encoding
    int max_beam_size;
    /// use LUT for beam search
    int use_beam_LUT;
    /// distance matrixes with beam search can get large, so use this
    /// to batch computations at encoding time.
    size_t max_mem_distances;
    /// clustering parameters
    ProgressiveDimClusteringParameters cp;
    /// if non-NULL, use this index for assignment
    ProgressiveDimIndexFactory* assign_index_factory;
    ResidualQuantizer(
            size_t d,
            const std::vector<size_t>& nbits,
            Search_type_t search_type = ST_decompress);
    ResidualQuantizer(
            size_t d,     /* dimensionality of the input vectors */
            size_t M,     /* number of subquantizers */
            size_t nbits, /* number of bit per subvector index */
            Search_type_t search_type = ST_decompress);
    ResidualQuantizer();
    // Train the residual quantizer
    void train(size_t n, const float* x) override;
    /** Encode a set of vectors
     *
     * @param x      vectors to encode, size n * d
     * @param codes  output codes, size n * code_size
     */
    void compute_codes(const float* x, uint8_t* codes, size_t n) const override;
    /** lower-level encode function
     *
     * @param n              number of vectors to hanlde
     * @param residuals      vectors to encode, size (n, beam_size, d)
     * @param beam_size      input beam size
     * @param new_beam_size  output beam size (should be <= K * beam_size)
     * @param new_codes      output codes, size (n, new_beam_size, m + 1)
     * @param new_residuals  output residuals, size (n, new_beam_size, d)
     * @param new_distances  output distances, size (n, new_beam_size)
     */
    void refine_beam(
            size_t n,
            size_t beam_size,
            const float* residuals,
            int new_beam_size,
            int32_t* new_codes,
            float* new_residuals = nullptr,
            float* new_distances = nullptr) const;
    void refine_beam_LUT(
            size_t n,
            const float* query_norms,
            const float* query_cp,
            int new_beam_size,
            int32_t* new_codes,
            float* new_distances = nullptr) const;
    /** Beam search can consume a lot of memory. This function estimates the
     * amount of mem used by refine_beam to adjust the batch size
     *
     * @param beam_size  if != -1, override the beam size
     */
    size_t memory_per_point(int beam_size = -1) const;
    /** Cross products used in codebook tables
     *
     * These are used to keep trak of norms of centroids.
     */
    void compute_codebook_tables();
    /// dot products of all codebook vectors with each other
    /// size total_codebook_size * total_codebook_size
    std::vector<float> codebook_cross_products;
    /// norms of all vectors
    std::vector<float> cent_norms;
 };
 /** Encode a residual by sampling from a centroid table.
 *
 * This is a single encoding step the residual quantizer.
 * It allows low-level access to the encoding function, exposed mainly for unit
 * tests.
 *
 * @param n              number of vectors to hanlde
 * @param residuals      vectors to encode, size (n, beam_size, d)
 * @param cent           centroids, size (K, d)
 * @param beam_size      input beam size
 * @param m              size of the codes for the previous encoding steps
 * @param codes          code array for the previous steps of the beam (n,
 * beam_size, m)
 * @param new_beam_size  output beam size (should be <= K * beam_size)
 * @param new_codes      output codes, size (n, new_beam_size, m + 1)
 * @param new_residuals  output residuals, size (n, new_beam_size, d)
 * @param new_distances  output distances, size (n, new_beam_size)
 * @param assign_index   if non-NULL, will be used to perform assignment
 */
 void beam_search_encode_step(
        size_t d,
        size_t K,
        const float* cent,
        size_t n,
        size_t beam_size,
        const float* residuals,
        size_t m,
        const int32_t* codes,
        size_t new_beam_size,
        int32_t* new_codes,
        float* new_residuals,
        float* new_distances,
        Index* assign_index = nullptr);
 /** Encode a set of vectors using their dot products with the codebooks
 *
 */
 void beam_search_encode_step_tab(
        size_t K,
        size_t n,
        size_t beam_size,                  // input sizes
        const float* codebook_cross_norms, // size K * ldc
        size_t ldc,                        // >= K
        const uint64_t* codebook_offsets,  // m
        const float* query_cp,             // size n * ldqc
        size_t ldqc,                       // >= K
        const float* cent_norms_i,         // size K
        size_t m,
        const int32_t* codes,   // n * beam_size * m
        const float* distances, // n * beam_size
        size_t new_beam_size,
        int32_t* new_codes,    // n * new_beam_size * (m + 1)
        float* new_distances); // n * new_beam_size
 }; // namespace faiss
--- a/src/3rdlib/faiss/impl/ResultHandler.h
+++ b/src/3rdlib/faiss/impl/ResultHandler.h
@ -0,0 +1,416 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 /*
 * Structures that collect search results from distance computations
 */
 #pragma once
 #include <faiss/impl/AuxIndexStructures.h>
 #include <faiss/utils/Heap.h>
 #include <faiss/utils/partitioning.h>
 namespace faiss {
 /*****************************************************************
 * Heap based result handler
 *****************************************************************/
 template <class C>
 struct HeapResultHandler {
    using T = typename C::T;
    using TI = typename C::TI;
    int nq;
    T* heap_dis_tab;
    TI* heap_ids_tab;
    int64_t k; // number of results to keep
    HeapResultHandler(size_t nq, T* heap_dis_tab, TI* heap_ids_tab, size_t k)
            : nq(nq),
              heap_dis_tab(heap_dis_tab),
              heap_ids_tab(heap_ids_tab),
              k(k) {}
    /******************************************************
     * API for 1 result at a time (each SingleResultHandler is
     * called from 1 thread)
     */
    struct SingleResultHandler {
        HeapResultHandler& hr;
        size_t k;
        T* heap_dis;
        TI* heap_ids;
        T thresh;
        SingleResultHandler(HeapResultHandler& hr) : hr(hr), k(hr.k) {}
        /// begin results for query # i
        void begin(size_t i) {
            heap_dis = hr.heap_dis_tab + i * k;
            heap_ids = hr.heap_ids_tab + i * k;
            heap_heapify<C>(k, heap_dis, heap_ids);
            thresh = heap_dis[0];
        }
        /// add one result for query i
        void add_result(T dis, TI idx) {
            if (C::cmp(heap_dis[0], dis)) {
                heap_replace_top<C>(k, heap_dis, heap_ids, dis, idx);
                thresh = heap_dis[0];
            }
        }
        /// series of results for query i is done
        void end() {
            heap_reorder<C>(k, heap_dis, heap_ids);
        }
    };
    /******************************************************
     * API for multiple results (called from 1 thread)
     */
    size_t i0, i1;
    /// begin
    void begin_multiple(size_t i0, size_t i1) {
        this->i0 = i0;
        this->i1 = i1;
        for (size_t i = i0; i < i1; i++) {
            heap_heapify<C>(k, heap_dis_tab + i * k, heap_ids_tab + i * k);
        }
    }
    /// add results for query i0..i1 and j0..j1
    void add_results(size_t j0, size_t j1, const T* dis_tab) {
 #pragma omp parallel for
        for (int64_t i = i0; i < i1; i++) {
            T* heap_dis = heap_dis_tab + i * k;
            TI* heap_ids = heap_ids_tab + i * k;
            const T* dis_tab_i = dis_tab + (j1 - j0) * (i - i0) - j0;
            T thresh = heap_dis[0];
            for (size_t j = j0; j < j1; j++) {
                T dis = dis_tab_i[j];
                if (C::cmp(thresh, dis)) {
                    heap_replace_top<C>(k, heap_dis, heap_ids, dis, j);
                    thresh = heap_dis[0];
                }
            }
        }
    }
    /// series of results for queries i0..i1 is done
    void end_multiple() {
        // maybe parallel for
        for (size_t i = i0; i < i1; i++) {
            heap_reorder<C>(k, heap_dis_tab + i * k, heap_ids_tab + i * k);
        }
    }
 };
 /*****************************************************************
 * Reservoir result handler
 *
 * A reservoir is a result array of size capacity > n (number of requested
 * results) all results below a threshold are stored in an arbitrary order. When
 * the capacity is reached, a new threshold is chosen by partitionning the
 * distance array.
 *****************************************************************/
 /// Reservoir for a single query
 template <class C>
 struct ReservoirTopN {
    using T = typename C::T;
    using TI = typename C::TI;
    T* vals;
    TI* ids;
    size_t i;        // number of stored elements
    size_t n;        // number of requested elements
    size_t capacity; // size of storage
    T threshold; // current threshold
    ReservoirTopN() {}
    ReservoirTopN(size_t n, size_t capacity, T* vals, TI* ids)
            : vals(vals), ids(ids), i(0), n(n), capacity(capacity) {
        assert(n < capacity);
        threshold = C::neutral();
    }
    void add(T val, TI id) {
        if (C::cmp(threshold, val)) {
            if (i == capacity) {
                shrink_fuzzy();
            }
            vals[i] = val;
            ids[i] = id;
            i++;
        }
    }
    // reduce storage from capacity to anything
    // between n and (capacity + n) / 2
    void shrink_fuzzy() {
        assert(i == capacity);
        threshold = partition_fuzzy<C>(
                vals, ids, capacity, n, (capacity + n) / 2, &i);
    }
    void to_result(T* heap_dis, TI* heap_ids) const {
        for (int j = 0; j < std::min(i, n); j++) {
            heap_push<C>(j + 1, heap_dis, heap_ids, vals[j], ids[j]);
        }
        if (i < n) {
            heap_reorder<C>(i, heap_dis, heap_ids);
            // add empty results
            heap_heapify<C>(n - i, heap_dis + i, heap_ids + i);
        } else {
            // add remaining elements
            heap_addn<C>(n, heap_dis, heap_ids, vals + n, ids + n, i - n);
            heap_reorder<C>(n, heap_dis, heap_ids);
        }
    }
 };
 template <class C>
 struct ReservoirResultHandler {
    using T = typename C::T;
    using TI = typename C::TI;
    int nq;
    T* heap_dis_tab;
    TI* heap_ids_tab;
    int64_t k;       // number of results to keep
    size_t capacity; // capacity of the reservoirs
    ReservoirResultHandler(
            size_t nq,
            T* heap_dis_tab,
            TI* heap_ids_tab,
            size_t k)
            : nq(nq),
              heap_dis_tab(heap_dis_tab),
              heap_ids_tab(heap_ids_tab),
              k(k) {
        // double then round up to multiple of 16 (for SIMD alignment)
        capacity = (2 * k + 15) & ~15;
    }
    /******************************************************
     * API for 1 result at a time (each SingleResultHandler is
     * called from 1 thread)
     */
    struct SingleResultHandler {
        ReservoirResultHandler& hr;
        std::vector<T> reservoir_dis;
        std::vector<TI> reservoir_ids;
        ReservoirTopN<C> res1;
        SingleResultHandler(ReservoirResultHandler& hr)
                : hr(hr),
                  reservoir_dis(hr.capacity),
                  reservoir_ids(hr.capacity) {}
        size_t i;
        /// begin results for query # i
        void begin(size_t i) {
            res1 = ReservoirTopN<C>(
                    hr.k,
                    hr.capacity,
                    reservoir_dis.data(),
                    reservoir_ids.data());
            this->i = i;
        }
        /// add one result for query i
        void add_result(T dis, TI idx) {
            res1.add(dis, idx);
        }
        /// series of results for query i is done
        void end() {
            T* heap_dis = hr.heap_dis_tab + i * hr.k;
            TI* heap_ids = hr.heap_ids_tab + i * hr.k;
            res1.to_result(heap_dis, heap_ids);
        }
    };
    /******************************************************
     * API for multiple results (called from 1 thread)
     */
    size_t i0, i1;
    std::vector<T> reservoir_dis;
    std::vector<TI> reservoir_ids;
    std::vector<ReservoirTopN<C>> reservoirs;
    /// begin
    void begin_multiple(size_t i0, size_t i1) {
        this->i0 = i0;
        this->i1 = i1;
        reservoir_dis.resize((i1 - i0) * capacity);
        reservoir_ids.resize((i1 - i0) * capacity);
        reservoirs.clear();
        for (size_t i = i0; i < i1; i++) {
            reservoirs.emplace_back(
                    k,
                    capacity,
                    reservoir_dis.data() + (i - i0) * capacity,
                    reservoir_ids.data() + (i - i0) * capacity);
        }
    }
    /// add results for query i0..i1 and j0..j1
    void add_results(size_t j0, size_t j1, const T* dis_tab) {
        // maybe parallel for
 #pragma omp parallel for
        for (int64_t i = i0; i < i1; i++) {
            ReservoirTopN<C>& reservoir = reservoirs[i - i0];
            const T* dis_tab_i = dis_tab + (j1 - j0) * (i - i0) - j0;
            for (size_t j = j0; j < j1; j++) {
                T dis = dis_tab_i[j];
                reservoir.add(dis, j);
            }
        }
    }
    /// series of results for queries i0..i1 is done
    void end_multiple() {
        // maybe parallel for
        for (size_t i = i0; i < i1; i++) {
            reservoirs[i - i0].to_result(
                    heap_dis_tab + i * k, heap_ids_tab + i * k);
        }
    }
 };
 /*****************************************************************
 * Result handler for range searches
 *****************************************************************/
 template <class C>
 struct RangeSearchResultHandler {
    using T = typename C::T;
    using TI = typename C::TI;
    RangeSearchResult* res;
    float radius;
    RangeSearchResultHandler(RangeSearchResult* res, float radius)
            : res(res), radius(radius) {}
    /******************************************************
     * API for 1 result at a time (each SingleResultHandler is
     * called from 1 thread)
     ******************************************************/
    struct SingleResultHandler {
        // almost the same interface as RangeSearchResultHandler
        RangeSearchPartialResult pres;
        float radius;
        RangeQueryResult* qr = nullptr;
        SingleResultHandler(RangeSearchResultHandler& rh)
                : pres(rh.res), radius(rh.radius) {}
        /// begin results for query # i
        void begin(size_t i) {
            qr = &pres.new_result(i);
        }
        /// add one result for query i
        void add_result(T dis, TI idx) {
            if (C::cmp(radius, dis)) {
                qr->add(dis, idx);
            }
        }
        /// series of results for query i is done
        void end() {}
        ~SingleResultHandler() {
            pres.finalize();
        }
    };
    /******************************************************
     * API for multiple results (called from 1 thread)
     ******************************************************/
    size_t i0, i1;
    std::vector<RangeSearchPartialResult*> partial_results;
    std::vector<size_t> j0s;
    int pr = 0;
    /// begin
    void begin_multiple(size_t i0, size_t i1) {
        this->i0 = i0;
        this->i1 = i1;
    }
    /// add results for query i0..i1 and j0..j1
    void add_results(size_t j0, size_t j1, const T* dis_tab) {
        RangeSearchPartialResult* pres;
        // there is one RangeSearchPartialResult structure per j0
        // (= block of columns of the large distance matrix)
        // it is a bit tricky to find the poper PartialResult structure
        // because the inner loop is on db not on queries.
        if (pr < j0s.size() && j0 == j0s[pr]) {
            pres = partial_results[pr];
            pr++;
        } else if (j0 == 0 && j0s.size() > 0) {
            pr = 0;
            pres = partial_results[pr];
            pr++;
        } else { // did not find this j0
            pres = new RangeSearchPartialResult(res);
            partial_results.push_back(pres);
            j0s.push_back(j0);
            pr = partial_results.size();
        }
        for (size_t i = i0; i < i1; i++) {
            const float* ip_line = dis_tab + (i - i0) * (j1 - j0);
            RangeQueryResult& qres = pres->new_result(i);
            for (size_t j = j0; j < j1; j++) {
                float dis = *ip_line++;
                if (C::cmp(radius, dis)) {
                    qres.add(dis, j);
                }
            }
        }
    }
    void end_multiple() {}
    ~RangeSearchResultHandler() {
        if (partial_results.size() > 0) {
            RangeSearchPartialResult::merge(partial_results);
        }
    }
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/impl/ScalarQuantizer.h
+++ b/src/3rdlib/faiss/impl/ScalarQuantizer.h
@ -0,0 +1,128 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #pragma once
 #include <faiss/IndexIVF.h>
 #include <faiss/impl/AuxIndexStructures.h>
 namespace faiss {
 /**
 * The uniform quantizer has a range [vmin, vmax]. The range can be
 * the same for all dimensions (uniform) or specific per dimension
 * (default).
 */
 struct ScalarQuantizer {
    enum QuantizerType {
        QT_8bit,         ///< 8 bits per component
        QT_4bit,         ///< 4 bits per component
        QT_8bit_uniform, ///< same, shared range for all dimensions
        QT_4bit_uniform,
        QT_fp16,
        QT_8bit_direct, ///< fast indexing of uint8s
        QT_6bit,        ///< 6 bits per component
    };
    QuantizerType qtype;
    /** The uniform encoder can estimate the range of representable
     * values of the unform encoder using different statistics. Here
     * rs = rangestat_arg */
    // rangestat_arg.
    enum RangeStat {
        RS_minmax,    ///< [min - rs*(max-min), max + rs*(max-min)]
        RS_meanstd,   ///< [mean - std * rs, mean + std * rs]
        RS_quantiles, ///< [Q(rs), Q(1-rs)]
        RS_optim,     ///< alternate optimization of reconstruction error
    };
    RangeStat rangestat;
    float rangestat_arg;
    /// dimension of input vectors
    size_t d;
    /// bits per scalar code
    size_t bits;
    /// bytes per vector
    size_t code_size;
    /// trained values (including the range)
    std::vector<float> trained;
    ScalarQuantizer(size_t d, QuantizerType qtype);
    ScalarQuantizer();
    /// updates internal values based on qtype and d
    void set_derived_sizes();
    void train(size_t n, const float* x);
    /// Used by an IVF index to train based on the residuals
    void train_residual(
            size_t n,
            const float* x,
            Index* quantizer,
            bool by_residual,
            bool verbose);
    /** Encode a set of vectors
     *
     * @param x      vectors to encode, size n * d
     * @param codes  output codes, size n * code_size
     */
    void compute_codes(const float* x, uint8_t* codes, size_t n) const;
    /** Decode a set of vectors
     *
     * @param codes  codes to decode, size n * code_size
     * @param x      output vectors, size n * d
     */
    void decode(const uint8_t* code, float* x, size_t n) const;
    /*****************************************************
     * Objects that provide methods for encoding/decoding, distance
     * computation and inverted list scanning
     *****************************************************/
    struct Quantizer {
        // encodes one vector. Assumes code is filled with 0s on input!
        virtual void encode_vector(const float* x, uint8_t* code) const = 0;
        virtual void decode_vector(const uint8_t* code, float* x) const = 0;
        virtual ~Quantizer() {}
    };
    Quantizer* select_quantizer() const;
    struct SQDistanceComputer : DistanceComputer {
        const float* q;
        const uint8_t* codes;
        size_t code_size;
        SQDistanceComputer() : q(nullptr), codes(nullptr), code_size(0) {}
        virtual float query_to_code(const uint8_t* code) const = 0;
    };
    SQDistanceComputer* get_distance_computer(
            MetricType metric = METRIC_L2) const;
    InvertedListScanner* select_InvertedListScanner(
            MetricType mt,
            const Index* quantizer,
            bool store_pairs,
            bool by_residual = false) const;
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/impl/ThreadedIndex-inl.h
+++ b/src/3rdlib/faiss/impl/ThreadedIndex-inl.h
@ -0,0 +1,190 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #include <faiss/impl/FaissAssert.h>
 #include <exception>
 #include <iostream>
 namespace faiss {
 template <typename IndexT>
 ThreadedIndex<IndexT>::ThreadedIndex(bool threaded)
        // 0 is default dimension
        : ThreadedIndex(0, threaded) {}
 template <typename IndexT>
 ThreadedIndex<IndexT>::ThreadedIndex(int d, bool threaded)
        : IndexT(d), own_fields(false), isThreaded_(threaded) {}
 template <typename IndexT>
 ThreadedIndex<IndexT>::~ThreadedIndex() {
    for (auto& p : indices_) {
        if (isThreaded_) {
            // should have worker thread
            FAISS_ASSERT((bool)p.second);
            // This will also flush all pending work
            p.second->stop();
            p.second->waitForThreadExit();
        } else {
            // should not have worker thread
            FAISS_ASSERT(!(bool)p.second);
        }
        if (own_fields) {
            delete p.first;
        }
    }
 }
 template <typename IndexT>
 void ThreadedIndex<IndexT>::addIndex(IndexT* index) {
    // We inherit the dimension from the first index added to us if we don't
    // have a set dimension
    if (indices_.empty() && this->d == 0) {
        this->d = index->d;
    }
    // The new index must match our set dimension
    FAISS_THROW_IF_NOT_FMT(
            this->d == index->d,
            "addIndex: dimension mismatch for "
            "newly added index; expecting dim %d, "
            "new index has dim %d",
            this->d,
            index->d);
    if (!indices_.empty()) {
        auto& existing = indices_.front().first;
        FAISS_THROW_IF_NOT_MSG(
                index->metric_type == existing->metric_type,
                "addIndex: newly added index is "
                "of different metric type than old index");
        // Make sure this index is not duplicated
        for (auto& p : indices_) {
            FAISS_THROW_IF_NOT_MSG(
                    p.first != index,
                    "addIndex: attempting to add index "
                    "that is already in the collection");
        }
    }
    indices_.emplace_back(std::make_pair(
            index,
            std::unique_ptr<WorkerThread>(
                    isThreaded_ ? new WorkerThread : nullptr)));
    onAfterAddIndex(index);
 }
 template <typename IndexT>
 void ThreadedIndex<IndexT>::removeIndex(IndexT* index) {
    for (auto it = indices_.begin(); it != indices_.end(); ++it) {
        if (it->first == index) {
            // This is our index; stop the worker thread before removing it,
            // to ensure that it has finished before function exit
            if (isThreaded_) {
                // should have worker thread
                FAISS_ASSERT((bool)it->second);
                it->second->stop();
                it->second->waitForThreadExit();
            } else {
                // should not have worker thread
                FAISS_ASSERT(!(bool)it->second);
            }
            indices_.erase(it);
            onAfterRemoveIndex(index);
            if (own_fields) {
                delete index;
            }
            return;
        }
    }
    // could not find our index
    FAISS_THROW_MSG("IndexReplicas::removeIndex: index not found");
 }
 template <typename IndexT>
 void ThreadedIndex<IndexT>::runOnIndex(std::function<void(int, IndexT*)> f) {
    if (isThreaded_) {
        std::vector<std::future<bool>> v;
        for (int i = 0; i < this->indices_.size(); ++i) {
            auto& p = this->indices_[i];
            auto indexPtr = p.first;
            v.emplace_back(
                    p.second->add([f, i, indexPtr]() { f(i, indexPtr); }));
        }
        waitAndHandleFutures(v);
    } else {
        // Multiple exceptions may be thrown; gather them as we encounter them,
        // while letting everything else run to completion
        std::vector<std::pair<int, std::exception_ptr>> exceptions;
        for (int i = 0; i < this->indices_.size(); ++i) {
            auto& p = this->indices_[i];
            try {
                f(i, p.first);
            } catch (...) {
                exceptions.emplace_back(
                        std::make_pair(i, std::current_exception()));
            }
        }
        handleExceptions(exceptions);
    }
 }
 template <typename IndexT>
 void ThreadedIndex<IndexT>::runOnIndex(
        std::function<void(int, const IndexT*)> f) const {
    const_cast<ThreadedIndex<IndexT>*>(this)->runOnIndex(
            [f](int i, IndexT* idx) { f(i, idx); });
 }
 template <typename IndexT>
 void ThreadedIndex<IndexT>::reset() {
    runOnIndex([](int, IndexT* index) { index->reset(); });
    this->ntotal = 0;
    this->is_trained = false;
 }
 template <typename IndexT>
 void ThreadedIndex<IndexT>::onAfterAddIndex(IndexT* index) {}
 template <typename IndexT>
 void ThreadedIndex<IndexT>::onAfterRemoveIndex(IndexT* index) {}
 template <typename IndexT>
 void ThreadedIndex<IndexT>::waitAndHandleFutures(
        std::vector<std::future<bool>>& v) {
    // Blocking wait for completion for all of the indices, capturing any
    // exceptions that are generated
    std::vector<std::pair<int, std::exception_ptr>> exceptions;
    for (int i = 0; i < v.size(); ++i) {
        auto& fut = v[i];
        try {
            fut.get();
        } catch (...) {
            exceptions.emplace_back(
                    std::make_pair(i, std::current_exception()));
        }
    }
    handleExceptions(exceptions);
 }
 } // namespace faiss
--- a/src/3rdlib/faiss/impl/ThreadedIndex.h
+++ b/src/3rdlib/faiss/impl/ThreadedIndex.h
@ -0,0 +1,86 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <faiss/Index.h>
 #include <faiss/IndexBinary.h>
 #include <faiss/utils/WorkerThread.h>
 #include <memory>
 #include <vector>
 namespace faiss {
 /// A holder of indices in a collection of threads
 /// The interface to this class itself is not thread safe
 template <typename IndexT>
 class ThreadedIndex : public IndexT {
   public:
    explicit ThreadedIndex(bool threaded);
    explicit ThreadedIndex(int d, bool threaded);
    ~ThreadedIndex() override;
    /// override an index that is managed by ourselves.
    /// WARNING: once an index is added, it becomes unsafe to touch it from any
    /// other thread than that on which is managing it, until we are shut
    /// down. Use runOnIndex to perform work on it instead.
    void addIndex(IndexT* index);
    /// Remove an index that is managed by ourselves.
    /// This will flush all pending work on that index, and then shut
    /// down its managing thread, and will remove the index.
    void removeIndex(IndexT* index);
    /// Run a function on all indices, in the thread that the index is
    /// managed in.
    /// Function arguments are (index in collection, index pointer)
    void runOnIndex(std::function<void(int, IndexT*)> f);
    void runOnIndex(std::function<void(int, const IndexT*)> f) const;
    /// faiss::Index API
    /// All indices receive the same call
    void reset() override;
    /// Returns the number of sub-indices
    int count() const {
        return indices_.size();
    }
    /// Returns the i-th sub-index
    IndexT* at(int i) {
        return indices_[i].first;
    }
    /// Returns the i-th sub-index (const version)
    const IndexT* at(int i) const {
        return indices_[i].first;
    }
    /// Whether or not we are responsible for deleting our contained indices
    bool own_fields;
   protected:
    /// Called just after an index is added
    virtual void onAfterAddIndex(IndexT* index);
    /// Called just after an index is removed
    virtual void onAfterRemoveIndex(IndexT* index);
   protected:
    static void waitAndHandleFutures(std::vector<std::future<bool>>& v);
    /// Collection of Index instances, with their managing worker thread if any
    std::vector<std::pair<IndexT*, std::unique_ptr<WorkerThread>>> indices_;
    /// Is this index multi-threaded?
    bool isThreaded_;
 };
 } // namespace faiss
 #include <faiss/impl/ThreadedIndex-inl.h>
--- a/src/3rdlib/faiss/impl/io.h
+++ b/src/3rdlib/faiss/impl/io.h
@ -0,0 +1,145 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 /***********************************************************
 * Abstract I/O objects
 *
 * I/O is always sequential, seek does not need to be supported
 * (indexes could be read or written to a pipe).
 ***********************************************************/
 #pragma once
 #include <cstdio>
 #include <string>
 #include <vector>
 #include <faiss/Index.h>
 namespace faiss {
 struct IOReader {
    // name that can be used in error messages
    std::string name;
    // fread. Returns number of items read or 0 in case of EOF.
    virtual size_t operator()(void* ptr, size_t size, size_t nitems) = 0;
    // return a file number that can be memory-mapped
    virtual int fileno();
    virtual ~IOReader() {}
 };
 struct IOWriter {
    // name that can be used in error messages
    std::string name;
    // fwrite. Return number of items written
    virtual size_t operator()(const void* ptr, size_t size, size_t nitems) = 0;
    // return a file number that can be memory-mapped
    virtual int fileno();
    virtual ~IOWriter() noexcept(false) {}
 };
 struct VectorIOReader : IOReader {
    std::vector<uint8_t> data;
    size_t rp = 0;
    size_t operator()(void* ptr, size_t size, size_t nitems) override;
 };
 struct VectorIOWriter : IOWriter {
    std::vector<uint8_t> data;
    size_t operator()(const void* ptr, size_t size, size_t nitems) override;
 };
 struct FileIOReader : IOReader {
    FILE* f = nullptr;
    bool need_close = false;
    FileIOReader(FILE* rf);
    FileIOReader(const char* fname);
    ~FileIOReader() override;
    size_t operator()(void* ptr, size_t size, size_t nitems) override;
    int fileno() override;
 };
 struct FileIOWriter : IOWriter {
    FILE* f = nullptr;
    bool need_close = false;
    FileIOWriter(FILE* wf);
    FileIOWriter(const char* fname);
    ~FileIOWriter() override;
    size_t operator()(const void* ptr, size_t size, size_t nitems) override;
    int fileno() override;
 };
 /*******************************************************
 * Buffered reader + writer
 *
 * They attempt to read and write only buffers of size bsz to the
 * underlying reader or writer. This is done by splitting or merging
 * the read/write functions.
 *******************************************************/
 /** wraps an ioreader to make buffered reads to avoid too small reads */
 struct BufferedIOReader : IOReader {
    IOReader* reader;
    size_t bsz;
    size_t ofs;    ///< offset in input stream
    size_t ofs2;   ///< number of bytes returned to caller
    size_t b0, b1; ///< range of available bytes in the buffer
    std::vector<char> buffer;
    /**
     * @param bsz    buffer size (bytes). Reads will be done by batched of
     *               this size
     */
    explicit BufferedIOReader(IOReader* reader, size_t bsz = 1024 * 1024);
    size_t operator()(void* ptr, size_t size, size_t nitems) override;
 };
 struct BufferedIOWriter : IOWriter {
    IOWriter* writer;
    size_t bsz;
    size_t ofs;
    size_t ofs2; ///< number of bytes received from caller
    size_t b0;   ///< amount of data in buffer
    std::vector<char> buffer;
    explicit BufferedIOWriter(IOWriter* writer, size_t bsz = 1024 * 1024);
    size_t operator()(const void* ptr, size_t size, size_t nitems) override;
    // flushes
    ~BufferedIOWriter() override;
 };
 /// cast a 4-character string to a uint32_t that can be written and read easily
 uint32_t fourcc(const char sx[4]);
 uint32_t fourcc(const std::string& sx);
 // decoding of fourcc (int32 -> string)
 void fourcc_inv(uint32_t x, char str[5]);
 std::string fourcc_inv(uint32_t x);
 std::string fourcc_inv_printable(uint32_t x);
 } // namespace faiss
--- a/src/3rdlib/faiss/impl/io_macros.h
+++ b/src/3rdlib/faiss/impl/io_macros.h
@ -0,0 +1,68 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 /*************************************************************
 * I/O macros
 *
 * we use macros so that we have a line number to report in abort
 * (). This makes debugging a lot easier. The IOReader or IOWriter is
 * always called f and thus is not passed in as a macro parameter.
 **************************************************************/
 #define READANDCHECK(ptr, n)                         \
    {                                                \
        size_t ret = (*f)(ptr, sizeof(*(ptr)), n);   \
        FAISS_THROW_IF_NOT_FMT(                      \
                ret == (n),                          \
                "read error in %s: %zd != %zd (%s)", \
                f->name.c_str(),                     \
                ret,                                 \
                size_t(n),                           \
                strerror(errno));                    \
    }
 #define READ1(x) READANDCHECK(&(x), 1)
 // will fail if we write 256G of data at once...
 #define READVECTOR(vec)                                              \
    {                                                                \
        size_t size;                                                 \
        READANDCHECK(&size, 1);                                      \
        FAISS_THROW_IF_NOT(size >= 0 && size < (uint64_t{1} << 40)); \
        (vec).resize(size);                                          \
        READANDCHECK((vec).data(), size);                            \
    }
 #define READSTRING(s)                     \
    {                                     \
        size_t size = (s).size();         \
        WRITEANDCHECK(&size, 1);          \
        WRITEANDCHECK((s).c_str(), size); \
    }
 #define WRITEANDCHECK(ptr, n)                         \
    {                                                 \
        size_t ret = (*f)(ptr, sizeof(*(ptr)), n);    \
        FAISS_THROW_IF_NOT_FMT(                       \
                ret == (n),                           \
                "write error in %s: %zd != %zd (%s)", \
                f->name.c_str(),                      \
                ret,                                  \
                size_t(n),                            \
                strerror(errno));                     \
    }
 #define WRITE1(x) WRITEANDCHECK(&(x), 1)
 #define WRITEVECTOR(vec)                   \
    {                                      \
        size_t size = (vec).size();        \
        WRITEANDCHECK(&size, 1);           \
        WRITEANDCHECK((vec).data(), size); \
    }
--- a/src/3rdlib/faiss/impl/lattice_Zn.h
+++ b/src/3rdlib/faiss/impl/lattice_Zn.h
@ -0,0 +1,188 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_LATTICE_ZN_H
 #define FAISS_LATTICE_ZN_H
 #include <stddef.h>
 #include <stdint.h>
 #include <vector>
 namespace faiss {
 /** returns the nearest vertex in the sphere to a query. Returns only
 * the coordinates, not an id.
 *
 * Algorithm: all points are derived from a one atom vector up to a
 * permutation and sign changes. The search function finds the most
 * appropriate atom and transformation.
 */
 struct ZnSphereSearch {
    int dimS, r2;
    int natom;
    /// size dim * ntatom
    std::vector<float> voc;
    ZnSphereSearch(int dim, int r2);
    /// find nearest centroid. x does not need to be normalized
    float search(const float* x, float* c) const;
    /// full call. Requires externally-allocated temp space
    float search(
            const float* x,
            float* c,
            float* tmp,   // size 2 *dim
            int* tmp_int, // size dim
            int* ibest_out = nullptr) const;
    // multi-threaded
    void search_multi(int n, const float* x, float* c_out, float* dp_out);
 };
 /***************************************************************************
 * Support ids as well.
 *
 * Limitations: ids are limited to 64 bit
 ***************************************************************************/
 struct EnumeratedVectors {
    /// size of the collection
    uint64_t nv;
    int dim;
    explicit EnumeratedVectors(int dim) : nv(0), dim(dim) {}
    /// encode a vector from a collection
    virtual uint64_t encode(const float* x) const = 0;
    /// decode it
    virtual void decode(uint64_t code, float* c) const = 0;
    // call encode on nc vectors
    void encode_multi(size_t nc, const float* c, uint64_t* codes) const;
    // call decode on nc codes
    void decode_multi(size_t nc, const uint64_t* codes, float* c) const;
    // find the nearest neighbor of each xq
    // (decodes and computes distances)
    void find_nn(
            size_t n,
            const uint64_t* codes,
            size_t nq,
            const float* xq,
            int64_t* idx,
            float* dis);
    virtual ~EnumeratedVectors() {}
 };
 struct Repeat {
    float val;
    int n;
 };
 /** Repeats: used to encode a vector that has n occurrences of
 *  val. Encodes the signs and permutation of the vector. Useful for
 *  atoms.
 */
 struct Repeats {
    int dim;
    std::vector<Repeat> repeats;
    // initialize from a template of the atom.
    Repeats(int dim = 0, const float* c = nullptr);
    // count number of possible codes for this atom
    uint64_t count() const;
    uint64_t encode(const float* c) const;
    void decode(uint64_t code, float* c) const;
 };
 /** codec that can return ids for the encoded vectors
 *
 * uses the ZnSphereSearch to encode the vector by encoding the
 * permutation and signs. Depends on ZnSphereSearch because it uses
 * the atom numbers */
 struct ZnSphereCodec : ZnSphereSearch, EnumeratedVectors {
    struct CodeSegment : Repeats {
        explicit CodeSegment(const Repeats& r) : Repeats(r) {}
        uint64_t c0; // first code assigned to segment
        int signbits;
    };
    std::vector<CodeSegment> code_segments;
    uint64_t nv;
    size_t code_size;
    ZnSphereCodec(int dim, int r2);
    uint64_t search_and_encode(const float* x) const;
    void decode(uint64_t code, float* c) const override;
    /// takes vectors that do not need to be centroids
    uint64_t encode(const float* x) const override;
 };
 /** recursive sphere codec
 *
 * Uses a recursive decomposition on the dimensions to encode
 * centroids found by the ZnSphereSearch. The codes are *not*
 * compatible with the ones of ZnSpehreCodec
 */
 struct ZnSphereCodecRec : EnumeratedVectors {
    int r2;
    int log2_dim;
    int code_size;
    ZnSphereCodecRec(int dim, int r2);
    uint64_t encode_centroid(const float* c) const;
    void decode(uint64_t code, float* c) const override;
    /// vectors need to be centroids (does not work on arbitrary
    /// vectors)
    uint64_t encode(const float* x) const override;
    std::vector<uint64_t> all_nv;
    std::vector<uint64_t> all_nv_cum;
    int decode_cache_ld;
    std::vector<std::vector<float>> decode_cache;
    // nb of vectors in the sphere in dim 2^ld with r2 radius
    uint64_t get_nv(int ld, int r2a) const;
    // cumulative version
    uint64_t get_nv_cum(int ld, int r2t, int r2a) const;
    void set_nv_cum(int ld, int r2t, int r2a, uint64_t v);
 };
 /** Codec that uses the recursive codec if dim is a power of 2 and
 * the regular one otherwise */
 struct ZnSphereCodecAlt : ZnSphereCodec {
    bool use_rec;
    ZnSphereCodecRec znc_rec;
    ZnSphereCodecAlt(int dim, int r2);
    uint64_t encode(const float* x) const override;
    void decode(uint64_t code, float* c) const override;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/impl/platform_macros.h
+++ b/src/3rdlib/faiss/impl/platform_macros.h
@ -0,0 +1,89 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #ifdef _MSC_VER
 /*******************************************************
 * Windows specific macros
 *******************************************************/
 #ifdef FAISS_MAIN_LIB
 #define FAISS_API __declspec(dllexport)
 #else // _FAISS_MAIN_LIB
 #define FAISS_API __declspec(dllimport)
 #endif // FAISS_MAIN_LIB
 #define __PRETTY_FUNCTION__ __FUNCSIG__
 #define posix_memalign(p, a, s) \
    (((*(p)) = _aligned_malloc((s), (a))), *(p) ? 0 : errno)
 #define posix_memalign_free _aligned_free
 // aligned should be in front of the declaration
 #define ALIGNED(x) __declspec(align(x))
 // redefine the GCC intrinsics with Windows equivalents
 #include <intrin.h>
 inline int __builtin_ctzll(uint64_t x) {
    unsigned long ret;
    _BitScanForward64(&ret, x);
    return (int)ret;
 }
 // cudatoolkit provides __builtin_ctz for NVCC >= 11.0
 #if !defined(__CUDACC__) || __CUDACC_VER_MAJOR__ < 11
 inline int __builtin_ctz(unsigned long x) {
    unsigned long ret;
    _BitScanForward(&ret, x);
    return (int)ret;
 }
 #endif
 inline int __builtin_clzll(uint64_t x) {
    return (int)__lzcnt64(x);
 }
 #define __builtin_popcount __popcnt
 #define __builtin_popcountl __popcnt64
 // MSVC does not define __SSEx__, and _M_IX86_FP is only defined on 32-bit
 // processors cf.
 // https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros
 #ifdef __AVX__
 #define __SSE__ 1
 #define __SSE2__ 1
 #define __SSE3__ 1
 #define __SSE4_1__ 1
 #define __SSE4_2__ 1
 #endif
 // MSVC sets FMA and F16C automatically when using AVX2
 // Ref. FMA (under /arch:AVX2):
 // https://docs.microsoft.com/en-us/cpp/build/reference/arch-x64 Ref. F16C (2nd
 // paragraph): https://walbourn.github.io/directxmath-avx2/
 #ifdef __AVX2__
 #define __FMA__ 1
 #define __F16C__ 1
 #endif
 #else
 /*******************************************************
 * Linux and OSX
 *******************************************************/
 #define FAISS_API
 #define posix_memalign_free free
 // aligned should be *in front* of the declaration, for compatibility with
 // windows
 #define ALIGNED(x) __attribute__((aligned(x)))
 #endif // _MSC_VER
--- a/src/3rdlib/faiss/impl/pq4_fast_scan.h
+++ b/src/3rdlib/faiss/impl/pq4_fast_scan.h
@ -0,0 +1,160 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <cstdint>
 #include <cstdlib>
 /** PQ4 SIMD packing and accumulation functions
 *
 * The basic kernel accumulates nq query vectors with bbs = nb * 2 * 16 vectors
 * and produces an output matrix for that. It is interesting for nq * nb <= 4,
 * otherwise register spilling becomes too large.
 *
 * The implementation of these functions is spread over 3 cpp files to reduce
 * parallel compile times. Templates are instanciated explicitly.
 */
 namespace faiss {
 /** Pack codes for consumption by the SIMD kernels.
 *  The unused bytes are set to 0.
 *
 * @param codes   input codes, size (ntotal, ceil(M / 2))
 * @param nototal number of input codes
 * @param nb      output number of codes (ntotal rounded up to a multiple of
 *                bbs)
 * @param M2      number of sub-quantizers (=M rounded up to a muliple of 2)
 * @param bbs     size of database blocks (multiple of 32)
 * @param blocks  output array, size nb * nsq / 2.
 */
 void pq4_pack_codes(
        const uint8_t* codes,
        size_t ntotal,
        size_t M,
        size_t nb,
        size_t bbs,
        size_t M2,
        uint8_t* blocks);
 /** Same as pack_codes but write in a given range of the output,
 * leaving the rest untouched. Assumes allocated entries are 0 on input.
 *
 * @param codes   input codes, size (i1 - i0, ceil(M / 2))
 * @param i0      first output code to write
 * @param i1      last output code to write
 * @param blocks  output array, size at least ceil(i1 / bbs) * bbs * nsq / 2
 */
 void pq4_pack_codes_range(
        const uint8_t* codes,
        size_t M,
        size_t i0,
        size_t i1,
        size_t bbs,
        size_t M2,
        uint8_t* blocks);
 /** get a single element from a packed codes table
 *
 * @param i        vector id
 * @param sq       subquantizer (< nsq)
 */
 uint8_t pq4_get_packed_element(
        const uint8_t* data,
        size_t bbs,
        size_t nsq,
        size_t i,
        size_t sq);
 /** Pack Look-up table for consumption by the kernel.
 *
 * @param nq      number of queries
 * @param nsq     number of sub-quantizers (muliple of 2)
 * @param src     input array, size (nq, 16)
 * @param dest    output array, size (nq, 16)
 */
 void pq4_pack_LUT(int nq, int nsq, const uint8_t* src, uint8_t* dest);
 /** Loop over database elements and accumulate results into result handler
 *
 * @param nq      number of queries
 * @param nb      number of database elements
 * @param bbs     size of database blocks (multiple of 32)
 * @param nsq     number of sub-quantizers (muliple of 2)
 * @param codes   packed codes array
 * @param LUT     packed look-up table
 */
 template <class ResultHandler>
 void pq4_accumulate_loop(
        int nq,
        size_t nb,
        int bbs,
        int nsq,
        const uint8_t* codes,
        const uint8_t* LUT,
        ResultHandler& res);
 /* qbs versions, supported only for bbs=32.
 *
 * The kernel function runs the kernel for *several* query blocks
 * and bbs database vectors. The sizes of the blocks are encoded in qbs as
 * base-16 digits.
 *
 * For example, qbs = 0x1223 means that the kernel will be run 4 times, the
 * first time with 3 query vectors, second time with 2 query vectors, then 2
 * vectors again and finally with 1 query vector. The output block will thus be
 * nq = 3 + 2 + 2 + 1 = 6 queries. For a given total block size, the optimal
 * decomposition into sub-blocks (measured empirically) is given by
 * preferred_qbs().
 */
 /* compute the number of queries from a base-16 decomposition */
 int pq4_qbs_to_nq(int qbs);
 /** return the preferred decomposition in blocks for a nb of queries. */
 int pq4_preferred_qbs(int nq);
 /** Pack Look-up table for consumption by the kernel.
 *
 * @param qbs     4-bit encoded number of query blocks, the total number of
 *                queries handled (nq) is deduced from it
 * @param nsq     number of sub-quantizers (muliple of 2)
 * @param src     input array, size (nq, 16)
 * @param dest    output array, size (nq, 16)
 * @return nq
 */
 int pq4_pack_LUT_qbs(int fqbs, int nsq, const uint8_t* src, uint8_t* dest);
 /** Same as pq4_pack_LUT_qbs, except the source vectors are remapped with q_map
 */
 int pq4_pack_LUT_qbs_q_map(
        int qbs,
        int nsq,
        const uint8_t* src,
        const int* q_map,
        uint8_t* dest);
 /** Run accumulation loop.
 *
 * @param qbs     4-bit encded number of queries
 * @param nb      number of database codes (mutliple of bbs)
 * @param nsq     number of sub-quantizers
 * @param codes   encoded database vectors (packed)
 * @param LUT     look-up table (packed)
 * @param res     call-back for the resutls
 */
 template <class ResultHandler>
 void pq4_accumulate_loop_qbs(
        int qbs,
        size_t nb,
        int nsq,
        const uint8_t* codes,
        const uint8_t* LUT,
        ResultHandler& res);
 } // namespace faiss
--- a/src/3rdlib/faiss/impl/simd_result_handlers.h
+++ b/src/3rdlib/faiss/impl/simd_result_handlers.h
@ -0,0 +1,531 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <algorithm>
 #include <type_traits>
 #include <vector>
 #include <faiss/utils/Heap.h>
 #include <faiss/utils/simdlib.h>
 #include <faiss/impl/platform_macros.h>
 #include <faiss/utils/AlignedTable.h>
 #include <faiss/utils/partitioning.h>
 /** This file contains callbacks for kernels that compute distances.
 *
 * The SIMDResultHandler object is intended to be templated and inlined.
 * Methods:
 * - handle(): called when 32 distances are computed and provided in two
 *   simd16uint16. (q, b) indicate which entry it is in the block.
 * - set_block_origin(): set the sub-matrix that is being computed
 */
 namespace faiss {
 namespace simd_result_handlers {
 /** Dummy structure that just computes a checksum on results
 * (to avoid the computation to be optimized away) */
 struct DummyResultHandler {
    size_t cs = 0;
    void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1) {
        cs += q * 123 + b * 789 + d0.get_scalar_0() + d1.get_scalar_0();
    }
    void set_block_origin(size_t, size_t) {}
 };
 /** memorize results in a nq-by-nb matrix.
 *
 * j0 is the current upper-left block of the matrix
 */
 struct StoreResultHandler {
    uint16_t* data;
    size_t ld; // total number of columns
    size_t i0 = 0;
    size_t j0 = 0;
    StoreResultHandler(uint16_t* data, size_t ld) : data(data), ld(ld) {}
    void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1) {
        size_t ofs = (q + i0) * ld + j0 + b * 32;
        d0.store(data + ofs);
        d1.store(data + ofs + 16);
    }
    void set_block_origin(size_t i0, size_t j0) {
        this->i0 = i0;
        this->j0 = j0;
    }
 };
 /** stores results in fixed-size matrix. */
 template <int NQ, int BB>
 struct FixedStorageHandler {
    simd16uint16 dis[NQ][BB];
    int i0 = 0;
    void handle(int q, int b, simd16uint16 d0, simd16uint16 d1) {
        dis[q + i0][2 * b] = d0;
        dis[q + i0][2 * b + 1] = d1;
    }
    void set_block_origin(size_t i0, size_t j0) {
        this->i0 = i0;
        assert(j0 == 0);
    }
    template <class OtherResultHandler>
    void to_other_handler(OtherResultHandler& other) const {
        for (int q = 0; q < NQ; q++) {
            for (int b = 0; b < BB; b += 2) {
                other.handle(q, b / 2, dis[q][b], dis[q][b + 1]);
            }
        }
    }
 };
 /** Record origin of current block  */
 template <class C, bool with_id_map>
 struct SIMDResultHandler {
    using TI = typename C::TI;
    bool disable = false;
    int64_t i0 = 0; // query origin
    int64_t j0 = 0; // db origin
    size_t ntotal;  // ignore excess elements after ntotal
    /// these fields are used mainly for the IVF variants (with_id_map=true)
    const TI* id_map;      // map offset in invlist to vector id
    const int* q_map;      // map q to global query
    const uint16_t* dbias; // table of biases to add to each query
    explicit SIMDResultHandler(size_t ntotal)
            : ntotal(ntotal), id_map(nullptr), q_map(nullptr), dbias(nullptr) {}
    void set_block_origin(size_t i0, size_t j0) {
        this->i0 = i0;
        this->j0 = j0;
    }
    // adjust handler data for IVF.
    void adjust_with_origin(size_t& q, simd16uint16& d0, simd16uint16& d1) {
        q += i0;
        if (dbias) {
            simd16uint16 dbias16(dbias[q]);
            d0 += dbias16;
            d1 += dbias16;
        }
        if (with_id_map) { // FIXME test on q_map instead
            q = q_map[q];
        }
    }
    // compute and adjust idx
    int64_t adjust_id(size_t b, size_t j) {
        int64_t idx = j0 + 32 * b + j;
        if (with_id_map) {
            idx = id_map[idx];
        }
        return idx;
    }
    /// return binary mask of elements below thr in (d0, d1)
    /// inverse_test returns elements above
    uint32_t get_lt_mask(
            uint16_t thr,
            size_t b,
            simd16uint16 d0,
            simd16uint16 d1) {
        simd16uint16 thr16(thr);
        uint32_t lt_mask;
        constexpr bool keep_min = C::is_max;
        if (keep_min) {
            lt_mask = ~cmp_ge32(d0, d1, thr16);
        } else {
            lt_mask = ~cmp_le32(d0, d1, thr16);
        }
        if (lt_mask == 0) {
            return 0;
        }
        uint64_t idx = j0 + b * 32;
        if (idx + 32 > ntotal) {
            if (idx >= ntotal) {
                return 0;
            }
            int nbit = (ntotal - idx);
            lt_mask &= (uint32_t(1) << nbit) - 1;
        }
        return lt_mask;
    }
    virtual void to_flat_arrays(
            float* distances,
            int64_t* labels,
            const float* normalizers = nullptr) = 0;
    virtual ~SIMDResultHandler() {}
 };
 /** Special version for k=1 */
 template <class C, bool with_id_map = false>
 struct SingleResultHandler : SIMDResultHandler<C, with_id_map> {
    using T = typename C::T;
    using TI = typename C::TI;
    struct Result {
        T val;
        TI id;
    };
    std::vector<Result> results;
    SingleResultHandler(size_t nq, size_t ntotal)
            : SIMDResultHandler<C, with_id_map>(ntotal), results(nq) {
        for (int i = 0; i < nq; i++) {
            Result res = {C::neutral(), -1};
            results[i] = res;
        }
    }
    void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1) {
        if (this->disable) {
            return;
        }
        this->adjust_with_origin(q, d0, d1);
        Result& res = results[q];
        uint32_t lt_mask = this->get_lt_mask(res.val, b, d0, d1);
        if (!lt_mask) {
            return;
        }
        ALIGNED(32) uint16_t d32tab[32];
        d0.store(d32tab);
        d1.store(d32tab + 16);
        while (lt_mask) {
            // find first non-zero
            int j = __builtin_ctz(lt_mask);
            lt_mask -= 1 << j;
            T dis = d32tab[j];
            if (C::cmp(res.val, dis)) {
                res.val = dis;
                res.id = this->adjust_id(b, j);
            }
        }
    }
    void to_flat_arrays(
            float* distances,
            int64_t* labels,
            const float* normalizers = nullptr) override {
        for (int q = 0; q < results.size(); q++) {
            if (!normalizers) {
                distances[q] = results[q].val;
            } else {
                float one_a = 1 / normalizers[2 * q];
                float b = normalizers[2 * q + 1];
                distances[q] = b + results[q].val * one_a;
            }
            labels[q] = results[q].id;
        }
    }
 };
 /** Structure that collects results in a min- or max-heap */
 template <class C, bool with_id_map = false>
 struct HeapHandler : SIMDResultHandler<C, with_id_map> {
    using T = typename C::T;
    using TI = typename C::TI;
    int nq;
    T* heap_dis_tab;
    TI* heap_ids_tab;
    int64_t k; // number of results to keep
    HeapHandler(
            int nq,
            T* heap_dis_tab,
            TI* heap_ids_tab,
            size_t k,
            size_t ntotal)
            : SIMDResultHandler<C, with_id_map>(ntotal),
              nq(nq),
              heap_dis_tab(heap_dis_tab),
              heap_ids_tab(heap_ids_tab),
              k(k) {
        for (int q = 0; q < nq; q++) {
            T* heap_dis_in = heap_dis_tab + q * k;
            TI* heap_ids_in = heap_ids_tab + q * k;
            heap_heapify<C>(k, heap_dis_in, heap_ids_in);
        }
    }
    void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1) {
        if (this->disable) {
            return;
        }
        this->adjust_with_origin(q, d0, d1);
        T* heap_dis = heap_dis_tab + q * k;
        TI* heap_ids = heap_ids_tab + q * k;
        uint16_t cur_thresh =
                heap_dis[0] < 65536 ? (uint16_t)(heap_dis[0]) : 0xffff;
        // here we handle the reverse comparison case as well
        uint32_t lt_mask = this->get_lt_mask(cur_thresh, b, d0, d1);
        if (!lt_mask) {
            return;
        }
        ALIGNED(32) uint16_t d32tab[32];
        d0.store(d32tab);
        d1.store(d32tab + 16);
        while (lt_mask) {
            // find first non-zero
            int j = __builtin_ctz(lt_mask);
            lt_mask -= 1 << j;
            T dis = d32tab[j];
            if (C::cmp(heap_dis[0], dis)) {
                int64_t idx = this->adjust_id(b, j);
                heap_pop<C>(k, heap_dis, heap_ids);
                heap_push<C>(k, heap_dis, heap_ids, dis, idx);
            }
        }
    }
    void to_flat_arrays(
            float* distances,
            int64_t* labels,
            const float* normalizers = nullptr) override {
        for (int q = 0; q < nq; q++) {
            T* heap_dis_in = heap_dis_tab + q * k;
            TI* heap_ids_in = heap_ids_tab + q * k;
            heap_reorder<C>(k, heap_dis_in, heap_ids_in);
            int64_t* heap_ids = labels + q * k;
            float* heap_dis = distances + q * k;
            float one_a = 1.0, b = 0.0;
            if (normalizers) {
                one_a = 1 / normalizers[2 * q];
                b = normalizers[2 * q + 1];
            }
            for (int j = 0; j < k; j++) {
                heap_ids[j] = heap_ids_in[j];
                heap_dis[j] = heap_dis_in[j] * one_a + b;
            }
        }
    }
 };
 /** Simple top-N implementation using a reservoir.
 *
 * Results are stored when they are below the threshold until the capacity is
 * reached. Then a partition sort is used to update the threshold. */
 namespace {
 uint64_t get_cy() {
 #ifdef MICRO_BENCHMARK
    uint32_t high, low;
    asm volatile("rdtsc \n\t" : "=a"(low), "=d"(high));
    return ((uint64_t)high << 32) | (low);
 #else
    return 0;
 #endif
 }
 } // anonymous namespace
 template <class C>
 struct ReservoirTopN {
    using T = typename C::T;
    using TI = typename C::TI;
    T* vals;
    TI* ids;
    size_t i;        // number of stored elements
    size_t n;        // number of requested elements
    size_t capacity; // size of storage
    size_t cycles = 0;
    T threshold; // current threshold
    ReservoirTopN(size_t n, size_t capacity, T* vals, TI* ids)
            : vals(vals), ids(ids), i(0), n(n), capacity(capacity) {
        assert(n < capacity);
        threshold = C::neutral();
    }
    void add(T val, TI id) {
        if (C::cmp(threshold, val)) {
            if (i == capacity) {
                shrink_fuzzy();
            }
            vals[i] = val;
            ids[i] = id;
            i++;
        }
    }
    /// shrink number of stored elements to n
    void shrink_xx() {
        uint64_t t0 = get_cy();
        qselect(vals, ids, i, n);
        i = n; // forget all elements above i = n
        threshold = C::Crev::neutral();
        for (size_t j = 0; j < n; j++) {
            if (C::cmp(vals[j], threshold)) {
                threshold = vals[j];
            }
        }
        cycles += get_cy() - t0;
    }
    void shrink() {
        uint64_t t0 = get_cy();
        threshold = partition<C>(vals, ids, i, n);
        i = n;
        cycles += get_cy() - t0;
    }
    void shrink_fuzzy() {
        uint64_t t0 = get_cy();
        assert(i == capacity);
        threshold = partition_fuzzy<C>(
                vals, ids, capacity, n, (capacity + n) / 2, &i);
        cycles += get_cy() - t0;
    }
 };
 /** Handler built from several ReservoirTopN (one per query) */
 template <class C, bool with_id_map = false>
 struct ReservoirHandler : SIMDResultHandler<C, with_id_map> {
    using T = typename C::T;
    using TI = typename C::TI;
    size_t capacity; // rounded up to multiple of 16
    std::vector<TI> all_ids;
    AlignedTable<T> all_vals;
    std::vector<ReservoirTopN<C>> reservoirs;
    uint64_t times[4];
    ReservoirHandler(size_t nq, size_t ntotal, size_t n, size_t capacity_in)
            : SIMDResultHandler<C, with_id_map>(ntotal),
              capacity((capacity_in + 15) & ~15),
              all_ids(nq * capacity),
              all_vals(nq * capacity) {
        assert(capacity % 16 == 0);
        for (size_t i = 0; i < nq; i++) {
            reservoirs.emplace_back(
                    n,
                    capacity,
                    all_vals.get() + i * capacity,
                    all_ids.data() + i * capacity);
        }
        times[0] = times[1] = times[2] = times[3] = 0;
    }
    void handle(size_t q, size_t b, simd16uint16 d0, simd16uint16 d1) {
        uint64_t t0 = get_cy();
        if (this->disable) {
            return;
        }
        this->adjust_with_origin(q, d0, d1);
        ReservoirTopN<C>& res = reservoirs[q];
        uint32_t lt_mask = this->get_lt_mask(res.threshold, b, d0, d1);
        uint64_t t1 = get_cy();
        times[0] += t1 - t0;
        if (!lt_mask) {
            return;
        }
        ALIGNED(32) uint16_t d32tab[32];
        d0.store(d32tab);
        d1.store(d32tab + 16);
        while (lt_mask) {
            // find first non-zero
            int j = __builtin_ctz(lt_mask);
            lt_mask -= 1 << j;
            T dis = d32tab[j];
            res.add(dis, this->adjust_id(b, j));
        }
        times[1] += get_cy() - t1;
    }
    void to_flat_arrays(
            float* distances,
            int64_t* labels,
            const float* normalizers = nullptr) override {
        using Cf = typename std::conditional<
                C::is_max,
                CMax<float, int64_t>,
                CMin<float, int64_t>>::type;
        uint64_t t0 = get_cy();
        uint64_t t3 = 0;
        std::vector<int> perm(reservoirs[0].n);
        for (int q = 0; q < reservoirs.size(); q++) {
            ReservoirTopN<C>& res = reservoirs[q];
            size_t n = res.n;
            if (res.i > res.n) {
                res.shrink();
            }
            int64_t* heap_ids = labels + q * n;
            float* heap_dis = distances + q * n;
            float one_a = 1.0, b = 0.0;
            if (normalizers) {
                one_a = 1 / normalizers[2 * q];
                b = normalizers[2 * q + 1];
            }
            for (int i = 0; i < res.i; i++) {
                perm[i] = i;
            }
            // indirect sort of result arrays
            std::sort(perm.begin(), perm.begin() + res.i, [&res](int i, int j) {
                return C::cmp(res.vals[j], res.vals[i]);
            });
            for (int i = 0; i < res.i; i++) {
                heap_dis[i] = res.vals[perm[i]] * one_a + b;
                heap_ids[i] = res.ids[perm[i]];
            }
            // possibly add empty results
            heap_heapify<Cf>(n - res.i, heap_dis + res.i, heap_ids + res.i);
            t3 += res.cycles;
        }
        times[2] += get_cy() - t0;
        times[3] += t3;
    }
 };
 } // namespace simd_result_handlers
 } // namespace faiss
--- a/src/3rdlib/faiss/index_factory.h
+++ b/src/3rdlib/faiss/index_factory.h
@ -0,0 +1,24 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <faiss/Index.h>
 #include <faiss/IndexBinary.h>
 namespace faiss {
 /** Build and index with the sequence of processing steps described in
 *  the string. */
 Index* index_factory(
        int d,
        const char* description,
        MetricType metric = METRIC_L2);
 IndexBinary* index_binary_factory(int d, const char* description);
 } // namespace faiss
--- a/src/3rdlib/faiss/index_io.h
+++ b/src/3rdlib/faiss/index_io.h
@ -0,0 +1,79 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 // I/O code for indexes
 #ifndef FAISS_INDEX_IO_H
 #define FAISS_INDEX_IO_H
 #include <cstdio>
 #include <string>
 #include <typeinfo>
 #include <vector>
 /** I/O functions can read/write to a filename, a file handle or to an
 * object that abstracts the medium.
 *
 * The read functions return objects that should be deallocated with
 * delete. All references within these objectes are owned by the
 * object.
 */
 namespace faiss {
 struct Index;
 struct IndexBinary;
 struct VectorTransform;
 struct ProductQuantizer;
 struct IOReader;
 struct IOWriter;
 struct InvertedLists;
 void write_index(const Index* idx, const char* fname);
 void write_index(const Index* idx, FILE* f);
 void write_index(const Index* idx, IOWriter* writer);
 void write_index_binary(const IndexBinary* idx, const char* fname);
 void write_index_binary(const IndexBinary* idx, FILE* f);
 void write_index_binary(const IndexBinary* idx, IOWriter* writer);
 // The read_index flags are implemented only for a subset of index types.
 const int IO_FLAG_READ_ONLY = 2;
 // strip directory component from ondisk filename, and assume it's in
 // the same directory as the index file
 const int IO_FLAG_ONDISK_SAME_DIR = 4;
 // don't load IVF data to RAM, only list sizes
 const int IO_FLAG_SKIP_IVF_DATA = 8;
 // try to memmap data (useful to load an ArrayInvertedLists as an
 // OnDiskInvertedLists)
 const int IO_FLAG_MMAP = IO_FLAG_SKIP_IVF_DATA | 0x646f0000;
 Index* read_index(const char* fname, int io_flags = 0);
 Index* read_index(FILE* f, int io_flags = 0);
 Index* read_index(IOReader* reader, int io_flags = 0);
 IndexBinary* read_index_binary(const char* fname, int io_flags = 0);
 IndexBinary* read_index_binary(FILE* f, int io_flags = 0);
 IndexBinary* read_index_binary(IOReader* reader, int io_flags = 0);
 void write_VectorTransform(const VectorTransform* vt, const char* fname);
 VectorTransform* read_VectorTransform(const char* fname);
 ProductQuantizer* read_ProductQuantizer(const char* fname);
 ProductQuantizer* read_ProductQuantizer(IOReader* reader);
 void write_ProductQuantizer(const ProductQuantizer* pq, const char* fname);
 void write_ProductQuantizer(const ProductQuantizer* pq, IOWriter* f);
 void write_InvertedLists(const InvertedLists* ils, IOWriter* f);
 InvertedLists* read_InvertedLists(IOReader* reader, int io_flags = 0);
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/invlists/BlockInvertedLists.h
+++ b/src/3rdlib/faiss/invlists/BlockInvertedLists.h
@ -0,0 +1,74 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <faiss/index_io.h>
 #include <faiss/invlists/InvertedLists.h>
 #include <faiss/invlists/InvertedListsIOHook.h>
 #include <faiss/utils/AlignedTable.h>
 namespace faiss {
 /** Inverted Lists that are organized by blocks.
 *
 * Different from the regular inverted lists, the codes are organized by blocks
 * of size block_size bytes that reprsent a set of n_per_block. Therefore, code
 * allocations are always rounded up to block_size bytes. The codes are also
 * aligned on 32-byte boundaries for use with SIMD.
 *
 * To avoid misinterpretations, the code_size is set to (size_t)(-1), even if
 * arguably the amount of memory consumed by code is block_size / n_per_block.
 *
 * The writing functions add_entries and update_entries operate on block-aligned
 * data.
 */
 struct BlockInvertedLists : InvertedLists {
    size_t n_per_block; // nb of vectors stored per block
    size_t block_size;  // nb bytes per block
    std::vector<AlignedTable<uint8_t>> codes;
    std::vector<std::vector<idx_t>> ids;
    BlockInvertedLists(size_t nlist, size_t vec_per_block, size_t block_size);
    BlockInvertedLists();
    size_t list_size(size_t list_no) const override;
    const uint8_t* get_codes(size_t list_no) const override;
    const idx_t* get_ids(size_t list_no) const override;
    // works only on empty BlockInvertedLists
    // the codes should be of size ceil(n_entry / n_per_block) * block_size
    // and padded with 0s
    size_t add_entries(
            size_t list_no,
            size_t n_entry,
            const idx_t* ids,
            const uint8_t* code) override;
    /// not implemented
    void update_entries(
            size_t list_no,
            size_t offset,
            size_t n_entry,
            const idx_t* ids,
            const uint8_t* code) override;
    // also pads new data with 0s
    void resize(size_t list_no, size_t new_size) override;
    ~BlockInvertedLists() override;
 };
 struct BlockInvertedListsIOHook : InvertedListsIOHook {
    BlockInvertedListsIOHook();
    void write(const InvertedLists* ils, IOWriter* f) const override;
    InvertedLists* read(IOReader* f, int io_flags) const override;
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/invlists/DirectMap.h
+++ b/src/3rdlib/faiss/invlists/DirectMap.h
@ -0,0 +1,116 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_DIRECT_MAP_H
 #define FAISS_DIRECT_MAP_H
 #include <faiss/invlists/InvertedLists.h>
 #include <unordered_map>
 namespace faiss {
 // When offsets list id + offset are encoded in an uint64
 // we call this LO = list-offset
 inline uint64_t lo_build(uint64_t list_id, uint64_t offset) {
    return list_id << 32 | offset;
 }
 inline uint64_t lo_listno(uint64_t lo) {
    return lo >> 32;
 }
 inline uint64_t lo_offset(uint64_t lo) {
    return lo & 0xffffffff;
 }
 /**
 * Direct map: a way to map back from ids to inverted lists
 */
 struct DirectMap {
    typedef Index::idx_t idx_t;
    enum Type {
        NoMap = 0,    // default
        Array = 1,    // sequential ids (only for add, no add_with_ids)
        Hashtable = 2 // arbitrary ids
    };
    Type type;
    /// map for direct access to the elements. Map ids to LO-encoded entries.
    std::vector<idx_t> array;
    std::unordered_map<idx_t, idx_t> hashtable;
    DirectMap();
    /// set type and initialize
    void set_type(Type new_type, const InvertedLists* invlists, size_t ntotal);
    /// get an entry
    idx_t get(idx_t id) const;
    /// for quick checks
    bool no() const {
        return type == NoMap;
    }
    /**
     * update the direct_map
     */
    /// throw if Array and ids is not NULL
    void check_can_add(const idx_t* ids);
    /// non thread-safe version
    void add_single_id(idx_t id, idx_t list_no, size_t offset);
    /// remove all entries
    void clear();
    /**
     * operations on inverted lists that require translation with a DirectMap
     */
    /// remove ids from the InvertedLists, possibly using the direct map
    size_t remove_ids(const IDSelector& sel, InvertedLists* invlists);
    /// update entries, using the direct map
    void update_codes(
            InvertedLists* invlists,
            int n,
            const idx_t* ids,
            const idx_t* list_nos,
            const uint8_t* codes);
 };
 /// Thread-safe way of updating the direct_map
 struct DirectMapAdd {
    typedef Index::idx_t idx_t;
    using Type = DirectMap::Type;
    DirectMap& direct_map;
    DirectMap::Type type;
    size_t ntotal;
    size_t n;
    const idx_t* xids;
    std::vector<idx_t> all_ofs;
    DirectMapAdd(DirectMap& direct_map, size_t n, const idx_t* xids);
    /// add vector i (with id xids[i]) at list_no and offset
    void add(size_t i, idx_t list_no, size_t offset);
    ~DirectMapAdd();
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/invlists/InvertedLists.h
+++ b/src/3rdlib/faiss/invlists/InvertedLists.h
@ -0,0 +1,366 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_INVERTEDLISTS_IVF_H
 #define FAISS_INVERTEDLISTS_IVF_H
 /**
 * Definition of inverted lists + a few common classes that implement
 * the interface.
 */
 #include <faiss/Index.h>
 #include <vector>
 namespace faiss {
 /** Table of inverted lists
 * multithreading rules:
 * - concurrent read accesses are allowed
 * - concurrent update accesses are allowed
 * - for resize and add_entries, only concurrent access to different lists
 *   are allowed
 */
 struct InvertedLists {
    typedef Index::idx_t idx_t;
    size_t nlist;     ///< number of possible key values
    size_t code_size; ///< code size per vector in bytes
    InvertedLists(size_t nlist, size_t code_size);
    /// used for BlockInvertedLists, where the codes are packed into groups
    /// and the individual code size is meaningless
    static const size_t INVALID_CODE_SIZE = static_cast<size_t>(-1);
    /*************************
     *  Read only functions */
    /// get the size of a list
    virtual size_t list_size(size_t list_no) const = 0;
    /** get the codes for an inverted list
     * must be released by release_codes
     *
     * @return codes    size list_size * code_size
     */
    virtual const uint8_t* get_codes(size_t list_no) const = 0;
    /** get the ids for an inverted list
     * must be released by release_ids
     *
     * @return ids      size list_size
     */
    virtual const idx_t* get_ids(size_t list_no) const = 0;
    /// release codes returned by get_codes (default implementation is nop
    virtual void release_codes(size_t list_no, const uint8_t* codes) const;
    /// release ids returned by get_ids
    virtual void release_ids(size_t list_no, const idx_t* ids) const;
    /// @return a single id in an inverted list
    virtual idx_t get_single_id(size_t list_no, size_t offset) const;
    /// @return a single code in an inverted list
    /// (should be deallocated with release_codes)
    virtual const uint8_t* get_single_code(size_t list_no, size_t offset) const;
    /// prepare the following lists (default does nothing)
    /// a list can be -1 hence the signed long
    virtual void prefetch_lists(const idx_t* list_nos, int nlist) const;
    /*************************
     * writing functions     */
    /// add one entry to an inverted list
    virtual size_t add_entry(size_t list_no, idx_t theid, const uint8_t* code);
    virtual size_t add_entries(
            size_t list_no,
            size_t n_entry,
            const idx_t* ids,
            const uint8_t* code) = 0;
    virtual void update_entry(
            size_t list_no,
            size_t offset,
            idx_t id,
            const uint8_t* code);
    virtual void update_entries(
            size_t list_no,
            size_t offset,
            size_t n_entry,
            const idx_t* ids,
            const uint8_t* code) = 0;
    virtual void resize(size_t list_no, size_t new_size) = 0;
    virtual void reset();
    /// move all entries from oivf (empty on output)
    void merge_from(InvertedLists* oivf, size_t add_id);
    virtual ~InvertedLists();
    /*************************
     * statistics            */
    /// 1= perfectly balanced, >1: imbalanced
    double imbalance_factor() const;
    /// display some stats about the inverted lists
    void print_stats() const;
    /// sum up list sizes
    size_t compute_ntotal() const;
    /**************************************
     * Scoped inverted lists (for automatic deallocation)
     *
     * instead of writing:
     *
     *     uint8_t * codes = invlists->get_codes (10);
     *     ... use codes
     *     invlists->release_codes(10, codes)
     *
     * write:
     *
     *    ScopedCodes codes (invlists, 10);
     *    ... use codes.get()
     *    // release called automatically when codes goes out of scope
     *
     * the following function call also works:
     *
     *    foo (123, ScopedCodes (invlists, 10).get(), 456);
     *
     */
    struct ScopedIds {
        const InvertedLists* il;
        const idx_t* ids;
        size_t list_no;
        ScopedIds(const InvertedLists* il, size_t list_no)
                : il(il), ids(il->get_ids(list_no)), list_no(list_no) {}
        const idx_t* get() {
            return ids;
        }
        idx_t operator[](size_t i) const {
            return ids[i];
        }
        ~ScopedIds() {
            il->release_ids(list_no, ids);
        }
    };
    struct ScopedCodes {
        const InvertedLists* il;
        const uint8_t* codes;
        size_t list_no;
        ScopedCodes(const InvertedLists* il, size_t list_no)
                : il(il), codes(il->get_codes(list_no)), list_no(list_no) {}
        ScopedCodes(const InvertedLists* il, size_t list_no, size_t offset)
                : il(il),
                  codes(il->get_single_code(list_no, offset)),
                  list_no(list_no) {}
        const uint8_t* get() {
            return codes;
        }
        ~ScopedCodes() {
            il->release_codes(list_no, codes);
        }
    };
 };
 /// simple (default) implementation as an array of inverted lists
 struct ArrayInvertedLists : InvertedLists {
    std::vector<std::vector<uint8_t>> codes; // binary codes, size nlist
    std::vector<std::vector<idx_t>> ids;     ///< Inverted lists for indexes
    ArrayInvertedLists(size_t nlist, size_t code_size);
    size_t list_size(size_t list_no) const override;
    const uint8_t* get_codes(size_t list_no) const override;
    const idx_t* get_ids(size_t list_no) const override;
    size_t add_entries(
            size_t list_no,
            size_t n_entry,
            const idx_t* ids,
            const uint8_t* code) override;
    void update_entries(
            size_t list_no,
            size_t offset,
            size_t n_entry,
            const idx_t* ids,
            const uint8_t* code) override;
    void resize(size_t list_no, size_t new_size) override;
    ~ArrayInvertedLists() override;
 };
 /*****************************************************************
 * Meta-inverted lists
 *
 * About terminology: the inverted lists are seen as a sparse matrix,
 * that can be stacked horizontally, vertically and sliced.
 *****************************************************************/
 /// invlists that fail for all write functions
 struct ReadOnlyInvertedLists : InvertedLists {
    ReadOnlyInvertedLists(size_t nlist, size_t code_size)
            : InvertedLists(nlist, code_size) {}
    size_t add_entries(
            size_t list_no,
            size_t n_entry,
            const idx_t* ids,
            const uint8_t* code) override;
    void update_entries(
            size_t list_no,
            size_t offset,
            size_t n_entry,
            const idx_t* ids,
            const uint8_t* code) override;
    void resize(size_t list_no, size_t new_size) override;
 };
 /// Horizontal stack of inverted lists
 struct HStackInvertedLists : ReadOnlyInvertedLists {
    std::vector<const InvertedLists*> ils;
    /// build InvertedLists by concatenating nil of them
    HStackInvertedLists(int nil, const InvertedLists** ils);
    size_t list_size(size_t list_no) const override;
    const uint8_t* get_codes(size_t list_no) const override;
    const idx_t* get_ids(size_t list_no) const override;
    void prefetch_lists(const idx_t* list_nos, int nlist) const override;
    void release_codes(size_t list_no, const uint8_t* codes) const override;
    void release_ids(size_t list_no, const idx_t* ids) const override;
    idx_t get_single_id(size_t list_no, size_t offset) const override;
    const uint8_t* get_single_code(size_t list_no, size_t offset)
            const override;
 };
 using ConcatenatedInvertedLists = HStackInvertedLists;
 /// vertical slice of indexes in another InvertedLists
 struct SliceInvertedLists : ReadOnlyInvertedLists {
    const InvertedLists* il;
    idx_t i0, i1;
    SliceInvertedLists(const InvertedLists* il, idx_t i0, idx_t i1);
    size_t list_size(size_t list_no) const override;
    const uint8_t* get_codes(size_t list_no) const override;
    const idx_t* get_ids(size_t list_no) const override;
    void release_codes(size_t list_no, const uint8_t* codes) const override;
    void release_ids(size_t list_no, const idx_t* ids) const override;
    idx_t get_single_id(size_t list_no, size_t offset) const override;
    const uint8_t* get_single_code(size_t list_no, size_t offset)
            const override;
    void prefetch_lists(const idx_t* list_nos, int nlist) const override;
 };
 struct VStackInvertedLists : ReadOnlyInvertedLists {
    std::vector<const InvertedLists*> ils;
    std::vector<idx_t> cumsz;
    /// build InvertedLists by concatenating nil of them
    VStackInvertedLists(int nil, const InvertedLists** ils);
    size_t list_size(size_t list_no) const override;
    const uint8_t* get_codes(size_t list_no) const override;
    const idx_t* get_ids(size_t list_no) const override;
    void release_codes(size_t list_no, const uint8_t* codes) const override;
    void release_ids(size_t list_no, const idx_t* ids) const override;
    idx_t get_single_id(size_t list_no, size_t offset) const override;
    const uint8_t* get_single_code(size_t list_no, size_t offset)
            const override;
    void prefetch_lists(const idx_t* list_nos, int nlist) const override;
 };
 /** use the first inverted lists if they are non-empty otherwise use the second
 *
 * This is useful if il1 has a few inverted lists that are too long,
 * and that il0 has replacement lists for those, with empty lists for
 * the others. */
 struct MaskedInvertedLists : ReadOnlyInvertedLists {
    const InvertedLists* il0;
    const InvertedLists* il1;
    MaskedInvertedLists(const InvertedLists* il0, const InvertedLists* il1);
    size_t list_size(size_t list_no) const override;
    const uint8_t* get_codes(size_t list_no) const override;
    const idx_t* get_ids(size_t list_no) const override;
    void release_codes(size_t list_no, const uint8_t* codes) const override;
    void release_ids(size_t list_no, const idx_t* ids) const override;
    idx_t get_single_id(size_t list_no, size_t offset) const override;
    const uint8_t* get_single_code(size_t list_no, size_t offset)
            const override;
    void prefetch_lists(const idx_t* list_nos, int nlist) const override;
 };
 /** if the inverted list in il is smaller than maxsize then return it,
 *  otherwise return an empty invlist */
 struct StopWordsInvertedLists : ReadOnlyInvertedLists {
    const InvertedLists* il0;
    size_t maxsize;
    StopWordsInvertedLists(const InvertedLists* il, size_t maxsize);
    size_t list_size(size_t list_no) const override;
    const uint8_t* get_codes(size_t list_no) const override;
    const idx_t* get_ids(size_t list_no) const override;
    void release_codes(size_t list_no, const uint8_t* codes) const override;
    void release_ids(size_t list_no, const idx_t* ids) const override;
    idx_t get_single_id(size_t list_no, size_t offset) const override;
    const uint8_t* get_single_code(size_t list_no, size_t offset)
            const override;
    void prefetch_lists(const idx_t* list_nos, int nlist) const override;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/invlists/InvertedListsIOHook.h
+++ b/src/3rdlib/faiss/invlists/InvertedListsIOHook.h
@ -0,0 +1,62 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <faiss/impl/io.h>
 #include <faiss/invlists/InvertedLists.h>
 #include <string>
 namespace faiss {
 /** Callbacks to handle other types of InvertedList objects.
 *
 * The callbacks should be registered with add_callback before calling
 * read_index or read_InvertedLists. The callbacks for
 * OnDiskInvertedLists are registrered by default. The invlist type is
 * identified by:
 *
 * - the key (a fourcc) at read time
 * - the class name (as given by typeid.name) at write time
 */
 struct InvertedListsIOHook {
    const std::string key;       ///< string version of the fourcc
    const std::string classname; ///< typeid.name
    InvertedListsIOHook(const std::string& key, const std::string& classname);
    /// write the index to the IOWriter (including the fourcc)
    virtual void write(const InvertedLists* ils, IOWriter* f) const = 0;
    /// called when the fourcc matches this class's fourcc
    virtual InvertedLists* read(IOReader* f, int io_flags) const = 0;
    /** read from a ArrayInvertedLists into this invertedlist type.
     * For this to work, the callback has to be enabled and the io_flag has to
     * be set to IO_FLAG_SKIP_IVF_DATA | (16 upper bits of the fourcc)
     *
     * (default implementation fails)
     */
    virtual InvertedLists* read_ArrayInvertedLists(
            IOReader* f,
            int io_flags,
            size_t nlist,
            size_t code_size,
            const std::vector<size_t>& sizes) const;
    virtual ~InvertedListsIOHook() {}
    /**************************** Manage the set of callbacks ******/
    // transfers ownership
    static void add_callback(InvertedListsIOHook*);
    static void print_callbacks();
    static InvertedListsIOHook* lookup(int h);
    static InvertedListsIOHook* lookup_classname(const std::string& classname);
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/invlists/OnDiskInvertedLists.h
+++ b/src/3rdlib/faiss/invlists/OnDiskInvertedLists.h
@ -0,0 +1,155 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 #ifndef FAISS_ON_DISK_INVERTED_LISTS_H
 #define FAISS_ON_DISK_INVERTED_LISTS_H
 #include <list>
 #include <typeinfo>
 #include <vector>
 #include <faiss/IndexIVF.h>
 #include <faiss/index_io.h>
 #include <faiss/invlists/InvertedListsIOHook.h>
 namespace faiss {
 struct LockLevels;
 struct OnDiskOneList {
    size_t size;     // size of inverted list (entries)
    size_t capacity; // allocated size (entries)
    size_t offset;   // offset in buffer (bytes)
    OnDiskOneList();
 };
 /** On-disk storage of inverted lists.
 *
 * The data is stored in a mmapped chunk of memory (base ptointer ptr,
 * size totsize). Each list is a range of memory that contains (object
 * List) that contains:
 *
 * - uint8_t codes[capacity * code_size]
 * - followed by idx_t ids[capacity]
 *
 * in each of the arrays, the size <= capacity first elements are
 * used, the rest is not initialized.
 *
 * Addition and resize are supported by:
 * - roundind up the capacity of the lists to a power of two
 * - maintaining a list of empty slots, sorted by size.
 * - resizing the mmapped block is adjusted as needed.
 *
 * An OnDiskInvertedLists is compact if the size == capacity for all
 * lists and there are no available slots.
 *
 * Addition to the invlists is slow. For incremental add it is better
 * to use a default ArrayInvertedLists object and convert it to an
 * OnDisk with merge_from.
 *
 * When it is known that a set of lists will be accessed, it is useful
 * to call prefetch_lists, that launches a set of threads to read the
 * lists in parallel.
 */
 struct OnDiskInvertedLists : InvertedLists {
    using List = OnDiskOneList;
    // size nlist
    std::vector<List> lists;
    struct Slot {
        size_t offset;   // bytes
        size_t capacity; // bytes
        Slot(size_t offset, size_t capacity);
        Slot();
    };
    // size whatever space remains
    std::list<Slot> slots;
    std::string filename;
    size_t totsize;
    uint8_t* ptr;   // mmap base pointer
    bool read_only; /// are inverted lists mapped read-only
    OnDiskInvertedLists(size_t nlist, size_t code_size, const char* filename);
    size_t list_size(size_t list_no) const override;
    const uint8_t* get_codes(size_t list_no) const override;
    const idx_t* get_ids(size_t list_no) const override;
    size_t add_entries(
            size_t list_no,
            size_t n_entry,
            const idx_t* ids,
            const uint8_t* code) override;
    void update_entries(
            size_t list_no,
            size_t offset,
            size_t n_entry,
            const idx_t* ids,
            const uint8_t* code) override;
    void resize(size_t list_no, size_t new_size) override;
    // copy all inverted lists into *this, in compact form (without
    // allocating slots)
    size_t merge_from(
            const InvertedLists** ils,
            int n_il,
            bool verbose = false);
    /// same as merge_from for a single invlist
    size_t merge_from_1(const InvertedLists* il, bool verbose = false);
    /// restrict the inverted lists to l0:l1 without touching the mmapped region
    void crop_invlists(size_t l0, size_t l1);
    void prefetch_lists(const idx_t* list_nos, int nlist) const override;
    ~OnDiskInvertedLists() override;
    // private
    LockLevels* locks;
    // encapsulates the threads that are busy prefeteching
    struct OngoingPrefetch;
    OngoingPrefetch* pf;
    int prefetch_nthread;
    void do_mmap();
    void update_totsize(size_t new_totsize);
    void resize_locked(size_t list_no, size_t new_size);
    size_t allocate_slot(size_t capacity);
    void free_slot(size_t offset, size_t capacity);
    /// override all list sizes and make a packed storage
    void set_all_lists_sizes(const size_t* sizes);
    // empty constructor for the I/O functions
    OnDiskInvertedLists();
 };
 struct OnDiskInvertedListsIOHook : InvertedListsIOHook {
    OnDiskInvertedListsIOHook();
    void write(const InvertedLists* ils, IOWriter* f) const override;
    InvertedLists* read(IOReader* f, int io_flags) const override;
    InvertedLists* read_ArrayInvertedLists(
            IOReader* f,
            int io_flags,
            size_t nlist,
            size_t code_size,
            const std::vector<size_t>& sizes) const override;
 };
 } // namespace faiss
 #endif
--- a/src/3rdlib/faiss/utils/AlignedTable.h
+++ b/src/3rdlib/faiss/utils/AlignedTable.h
@ -0,0 +1,176 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <cassert>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
 #include <algorithm>
 #include <faiss/impl/platform_macros.h>
 namespace faiss {
 template <int A = 32>
 inline bool is_aligned_pointer(const void* x) {
    size_t xi = (size_t)x;
    return xi % A == 0;
 }
 // class that manages suitably aligned arrays for SIMD
 // T should be a POV type. The default alignment is 32 for AVX
 template <class T, int A = 32>
 struct AlignedTableTightAlloc {
    T* ptr;
    size_t numel;
    AlignedTableTightAlloc() : ptr(nullptr), numel(0) {}
    explicit AlignedTableTightAlloc(size_t n) : ptr(nullptr), numel(0) {
        resize(n);
    }
    size_t itemsize() const {
        return sizeof(T);
    }
    void resize(size_t n) {
        if (numel == n) {
            return;
        }
        T* new_ptr;
        if (n > 0) {
            int ret = posix_memalign((void**)&new_ptr, A, n * sizeof(T));
            if (ret != 0) {
                throw std::bad_alloc();
            }
            if (numel > 0) {
                memcpy(new_ptr, ptr, sizeof(T) * std::min(numel, n));
            }
        } else {
            new_ptr = nullptr;
        }
        numel = n;
        posix_memalign_free(ptr);
        ptr = new_ptr;
    }
    void clear() {
        memset(ptr, 0, nbytes());
    }
    size_t size() const {
        return numel;
    }
    size_t nbytes() const {
        return numel * sizeof(T);
    }
    T* get() {
        return ptr;
    }
    const T* get() const {
        return ptr;
    }
    T* data() {
        return ptr;
    }
    const T* data() const {
        return ptr;
    }
    T& operator[](size_t i) {
        return ptr[i];
    }
    T operator[](size_t i) const {
        return ptr[i];
    }
    ~AlignedTableTightAlloc() {
        posix_memalign_free(ptr);
    }
    AlignedTableTightAlloc<T, A>& operator=(
            const AlignedTableTightAlloc<T, A>& other) {
        resize(other.numel);
        memcpy(ptr, other.ptr, sizeof(T) * numel);
        return *this;
    }
    AlignedTableTightAlloc(const AlignedTableTightAlloc<T, A>& other)
            : ptr(nullptr), numel(0) {
        *this = other;
    }
 };
 // same as AlignedTableTightAlloc, but with geometric re-allocation
 template <class T, int A = 32>
 struct AlignedTable {
    AlignedTableTightAlloc<T, A> tab;
    size_t numel = 0;
    static size_t round_capacity(size_t n) {
        if (n == 0) {
            return 0;
        }
        if (n < 8 * A) {
            return 8 * A;
        }
        size_t capacity = 8 * A;
        while (capacity < n) {
            capacity *= 2;
        }
        return capacity;
    }
    AlignedTable() {}
    explicit AlignedTable(size_t n) : tab(round_capacity(n)), numel(n) {}
    size_t itemsize() const {
        return sizeof(T);
    }
    void resize(size_t n) {
        tab.resize(round_capacity(n));
        numel = n;
    }
    void clear() {
        tab.clear();
    }
    size_t size() const {
        return numel;
    }
    size_t nbytes() const {
        return numel * sizeof(T);
    }
    T* get() {
        return tab.get();
    }
    const T* get() const {
        return tab.get();
    }
    T* data() {
        return tab.get();
    }
    const T* data() const {
        return tab.get();
    }
    T& operator[](size_t i) {
        return tab.ptr[i];
    }
    T operator[](size_t i) const {
        return tab.ptr[i];
    }
    // assign and copy constructor should work as expected
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/utils/Heap.h
+++ b/src/3rdlib/faiss/utils/Heap.h
@ -0,0 +1,481 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 /*
 * C++ support for heaps. The set of functions is tailored for efficient
 * similarity search.
 *
 * There is no specific object for a heap, and the functions that operate on a
 * single heap are inlined, because heaps are often small. More complex
 * functions are implemented in Heaps.cpp
 *
 * All heap functions rely on a C template class that define the type of the
 * keys and values and their ordering (increasing with CMax and decreasing with
 * Cmin). The C types are defined in ordered_key_value.h
 */
 #ifndef FAISS_Heap_h
 #define FAISS_Heap_h
 #include <climits>
 #include <cmath>
 #include <cstring>
 #include <stdint.h>
 #include <cassert>
 #include <cstdio>
 #include <limits>
 #include <faiss/utils/ordered_key_value.h>
 namespace faiss {
 /*******************************************************************
 * Basic heap ops: push and pop
 *******************************************************************/
 /** Pops the top element from the heap defined by bh_val[0..k-1] and
 * bh_ids[0..k-1].  on output the element at k-1 is undefined.
 */
 template <class C>
 inline void heap_pop(size_t k, typename C::T* bh_val, typename C::TI* bh_ids) {
    bh_val--; /* Use 1-based indexing for easier node->child translation */
    bh_ids--;
    typename C::T val = bh_val[k];
    size_t i = 1, i1, i2;
    while (1) {
        i1 = i << 1;
        i2 = i1 + 1;
        if (i1 > k)
            break;
        if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) {
            if (C::cmp(val, bh_val[i1]))
                break;
            bh_val[i] = bh_val[i1];
            bh_ids[i] = bh_ids[i1];
            i = i1;
        } else {
            if (C::cmp(val, bh_val[i2]))
                break;
            bh_val[i] = bh_val[i2];
            bh_ids[i] = bh_ids[i2];
            i = i2;
        }
    }
    bh_val[i] = bh_val[k];
    bh_ids[i] = bh_ids[k];
 }
 /** Pushes the element (val, ids) into the heap bh_val[0..k-2] and
 * bh_ids[0..k-2].  on output the element at k-1 is defined.
 */
 template <class C>
 inline void heap_push(
        size_t k,
        typename C::T* bh_val,
        typename C::TI* bh_ids,
        typename C::T val,
        typename C::TI ids) {
    bh_val--; /* Use 1-based indexing for easier node->child translation */
    bh_ids--;
    size_t i = k, i_father;
    while (i > 1) {
        i_father = i >> 1;
        if (!C::cmp(val, bh_val[i_father])) /* the heap structure is ok */
            break;
        bh_val[i] = bh_val[i_father];
        bh_ids[i] = bh_ids[i_father];
        i = i_father;
    }
    bh_val[i] = val;
    bh_ids[i] = ids;
 }
 /** Replace the top element from the heap defined by bh_val[0..k-1] and
 * bh_ids[0..k-1].
 */
 template <class C>
 inline void heap_replace_top(
        size_t k,
        typename C::T* bh_val,
        typename C::TI* bh_ids,
        typename C::T val,
        typename C::TI ids) {
    bh_val--; /* Use 1-based indexing for easier node->child translation */
    bh_ids--;
    size_t i = 1, i1, i2;
    while (1) {
        i1 = i << 1;
        i2 = i1 + 1;
        if (i1 > k)
            break;
        if (i2 == k + 1 || C::cmp(bh_val[i1], bh_val[i2])) {
            if (C::cmp(val, bh_val[i1]))
                break;
            bh_val[i] = bh_val[i1];
            bh_ids[i] = bh_ids[i1];
            i = i1;
        } else {
            if (C::cmp(val, bh_val[i2]))
                break;
            bh_val[i] = bh_val[i2];
            bh_ids[i] = bh_ids[i2];
            i = i2;
        }
    }
    bh_val[i] = val;
    bh_ids[i] = ids;
 }
 /* Partial instanciation for heaps with TI = int64_t */
 template <typename T>
 inline void minheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
    heap_pop<CMin<T, int64_t>>(k, bh_val, bh_ids);
 }
 template <typename T>
 inline void minheap_push(
        size_t k,
        T* bh_val,
        int64_t* bh_ids,
        T val,
        int64_t ids) {
    heap_push<CMin<T, int64_t>>(k, bh_val, bh_ids, val, ids);
 }
 template <typename T>
 inline void minheap_replace_top(
        size_t k,
        T* bh_val,
        int64_t* bh_ids,
        T val,
        int64_t ids) {
    heap_replace_top<CMin<T, int64_t>>(k, bh_val, bh_ids, val, ids);
 }
 template <typename T>
 inline void maxheap_pop(size_t k, T* bh_val, int64_t* bh_ids) {
    heap_pop<CMax<T, int64_t>>(k, bh_val, bh_ids);
 }
 template <typename T>
 inline void maxheap_push(
        size_t k,
        T* bh_val,
        int64_t* bh_ids,
        T val,
        int64_t ids) {
    heap_push<CMax<T, int64_t>>(k, bh_val, bh_ids, val, ids);
 }
 template <typename T>
 inline void maxheap_replace_top(
        size_t k,
        T* bh_val,
        int64_t* bh_ids,
        T val,
        int64_t ids) {
    heap_replace_top<CMax<T, int64_t>>(k, bh_val, bh_ids, val, ids);
 }
 /*******************************************************************
 * Heap initialization
 *******************************************************************/
 /* Initialization phase for the heap (with unconditionnal pushes).
 * Store k0 elements in a heap containing up to k values. Note that
 * (bh_val, bh_ids) can be the same as (x, ids) */
 template <class C>
 inline void heap_heapify(
        size_t k,
        typename C::T* bh_val,
        typename C::TI* bh_ids,
        const typename C::T* x = nullptr,
        const typename C::TI* ids = nullptr,
        size_t k0 = 0) {
    if (k0 > 0)
        assert(x);
    if (ids) {
        for (size_t i = 0; i < k0; i++)
            heap_push<C>(i + 1, bh_val, bh_ids, x[i], ids[i]);
    } else {
        for (size_t i = 0; i < k0; i++)
            heap_push<C>(i + 1, bh_val, bh_ids, x[i], i);
    }
    for (size_t i = k0; i < k; i++) {
        bh_val[i] = C::neutral();
        bh_ids[i] = -1;
    }
 }
 template <typename T>
 inline void minheap_heapify(
        size_t k,
        T* bh_val,
        int64_t* bh_ids,
        const T* x = nullptr,
        const int64_t* ids = nullptr,
        size_t k0 = 0) {
    heap_heapify<CMin<T, int64_t>>(k, bh_val, bh_ids, x, ids, k0);
 }
 template <typename T>
 inline void maxheap_heapify(
        size_t k,
        T* bh_val,
        int64_t* bh_ids,
        const T* x = nullptr,
        const int64_t* ids = nullptr,
        size_t k0 = 0) {
    heap_heapify<CMax<T, int64_t>>(k, bh_val, bh_ids, x, ids, k0);
 }
 /*******************************************************************
 * Add n elements to the heap
 *******************************************************************/
 /* Add some elements to the heap  */
 template <class C>
 inline void heap_addn(
        size_t k,
        typename C::T* bh_val,
        typename C::TI* bh_ids,
        const typename C::T* x,
        const typename C::TI* ids,
        size_t n) {
    size_t i;
    if (ids)
        for (i = 0; i < n; i++) {
            if (C::cmp(bh_val[0], x[i])) {
                heap_replace_top<C>(k, bh_val, bh_ids, x[i], ids[i]);
            }
        }
    else
        for (i = 0; i < n; i++) {
            if (C::cmp(bh_val[0], x[i])) {
                heap_replace_top<C>(k, bh_val, bh_ids, x[i], i);
            }
        }
 }
 /* Partial instanciation for heaps with TI = int64_t */
 template <typename T>
 inline void minheap_addn(
        size_t k,
        T* bh_val,
        int64_t* bh_ids,
        const T* x,
        const int64_t* ids,
        size_t n) {
    heap_addn<CMin<T, int64_t>>(k, bh_val, bh_ids, x, ids, n);
 }
 template <typename T>
 inline void maxheap_addn(
        size_t k,
        T* bh_val,
        int64_t* bh_ids,
        const T* x,
        const int64_t* ids,
        size_t n) {
    heap_addn<CMax<T, int64_t>>(k, bh_val, bh_ids, x, ids, n);
 }
 /*******************************************************************
 * Heap finalization (reorder elements)
 *******************************************************************/
 /* This function maps a binary heap into an sorted structure.
   It returns the number  */
 template <typename C>
 inline size_t heap_reorder(
        size_t k,
        typename C::T* bh_val,
        typename C::TI* bh_ids) {
    size_t i, ii;
    for (i = 0, ii = 0; i < k; i++) {
        /* top element should be put at the end of the list */
        typename C::T val = bh_val[0];
        typename C::TI id = bh_ids[0];
        /* boundary case: we will over-ride this value if not a true element */
        heap_pop<C>(k - i, bh_val, bh_ids);
        bh_val[k - ii - 1] = val;
        bh_ids[k - ii - 1] = id;
        if (id != -1)
            ii++;
    }
    /* Count the number of elements which are effectively returned */
    size_t nel = ii;
    memmove(bh_val, bh_val + k - ii, ii * sizeof(*bh_val));
    memmove(bh_ids, bh_ids + k - ii, ii * sizeof(*bh_ids));
    for (; ii < k; ii++) {
        bh_val[ii] = C::neutral();
        bh_ids[ii] = -1;
    }
    return nel;
 }
 template <typename T>
 inline size_t minheap_reorder(size_t k, T* bh_val, int64_t* bh_ids) {
    return heap_reorder<CMin<T, int64_t>>(k, bh_val, bh_ids);
 }
 template <typename T>
 inline size_t maxheap_reorder(size_t k, T* bh_val, int64_t* bh_ids) {
    return heap_reorder<CMax<T, int64_t>>(k, bh_val, bh_ids);
 }
 /*******************************************************************
 * Operations on heap arrays
 *******************************************************************/
 /** a template structure for a set of [min|max]-heaps it is tailored
 * so that the actual data of the heaps can just live in compact
 * arrays.
 */
 template <typename C>
 struct HeapArray {
    typedef typename C::TI TI;
    typedef typename C::T T;
    size_t nh; ///< number of heaps
    size_t k;  ///< allocated size per heap
    TI* ids;   ///< identifiers (size nh * k)
    T* val;    ///< values (distances or similarities), size nh * k
    /// Return the list of values for a heap
    T* get_val(size_t key) {
        return val + key * k;
    }
    /// Correspponding identifiers
    TI* get_ids(size_t key) {
        return ids + key * k;
    }
    /// prepare all the heaps before adding
    void heapify();
    /** add nj elements to heaps i0:i0+ni, with sequential ids
     *
     * @param nj    nb of elements to add to each heap
     * @param vin   elements to add, size ni * nj
     * @param j0    add this to the ids that are added
     * @param i0    first heap to update
     * @param ni    nb of elements to update (-1 = use nh)
     */
    void addn(
            size_t nj,
            const T* vin,
            TI j0 = 0,
            size_t i0 = 0,
            int64_t ni = -1);
    /** same as addn
     *
     * @param id_in     ids of the elements to add, size ni * nj
     * @param id_stride stride for id_in
     */
    void addn_with_ids(
            size_t nj,
            const T* vin,
            const TI* id_in = nullptr,
            int64_t id_stride = 0,
            size_t i0 = 0,
            int64_t ni = -1);
    /// reorder all the heaps
    void reorder();
    /** this is not really a heap function. It just finds the per-line
     *   extrema of each line of array D
     * @param vals_out    extreme value of each line (size nh, or NULL)
     * @param idx_out     index of extreme value (size nh or NULL)
     */
    void per_line_extrema(T* vals_out, TI* idx_out) const;
 };
 /* Define useful heaps */
 typedef HeapArray<CMin<float, int64_t>> float_minheap_array_t;
 typedef HeapArray<CMin<int, int64_t>> int_minheap_array_t;
 typedef HeapArray<CMax<float, int64_t>> float_maxheap_array_t;
 typedef HeapArray<CMax<int, int64_t>> int_maxheap_array_t;
 // The heap templates are instanciated explicitly in Heap.cpp
 /*********************************************************************
 * Indirect heaps: instead of having
 *
 *          node i = (bh_ids[i], bh_val[i]),
 *
 * in indirect heaps,
 *
 *          node i = (bh_ids[i], bh_val[bh_ids[i]]),
 *
 *********************************************************************/
 template <class C>
 inline void indirect_heap_pop(
        size_t k,
        const typename C::T* bh_val,
        typename C::TI* bh_ids) {
    bh_ids--; /* Use 1-based indexing for easier node->child translation */
    typename C::T val = bh_val[bh_ids[k]];
    size_t i = 1;
    while (1) {
        size_t i1 = i << 1;
        size_t i2 = i1 + 1;
        if (i1 > k)
            break;
        typename C::TI id1 = bh_ids[i1], id2 = bh_ids[i2];
        if (i2 == k + 1 || C::cmp(bh_val[id1], bh_val[id2])) {
            if (C::cmp(val, bh_val[id1]))
                break;
            bh_ids[i] = id1;
            i = i1;
        } else {
            if (C::cmp(val, bh_val[id2]))
                break;
            bh_ids[i] = id2;
            i = i2;
        }
    }
    bh_ids[i] = bh_ids[k];
 }
 template <class C>
 inline void indirect_heap_push(
        size_t k,
        const typename C::T* bh_val,
        typename C::TI* bh_ids,
        typename C::TI id) {
    bh_ids--; /* Use 1-based indexing for easier node->child translation */
    typename C::T val = bh_val[id];
    size_t i = k;
    while (i > 1) {
        size_t i_father = i >> 1;
        if (!C::cmp(val, bh_val[bh_ids[i_father]]))
            break;
        bh_ids[i] = bh_ids[i_father];
        i = i_father;
    }
    bh_ids[i] = id;
 }
 } // namespace faiss
 #endif /* FAISS_Heap_h */
--- a/src/3rdlib/faiss/utils/WorkerThread.h
+++ b/src/3rdlib/faiss/utils/WorkerThread.h
@ -0,0 +1,60 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <condition_variable>
 #include <deque>
 #include <future>
 #include <thread>
 namespace faiss {
 class WorkerThread {
   public:
    WorkerThread();
    /// Stops and waits for the worker thread to exit, flushing all
    /// pending lambdas
    ~WorkerThread();
    /// Request that the worker thread stop itself
    void stop();
    /// Blocking waits in the current thread for the worker thread to
    /// stop
    void waitForThreadExit();
    /// Adds a lambda to run on the worker thread; returns a future that
    /// can be used to block on its completion.
    /// Future status is `true` if the lambda was run in the worker
    /// thread; `false` if it was not run, because the worker thread is
    /// exiting or has exited.
    std::future<bool> add(std::function<void()> f);
   private:
    void startThread();
    void threadMain();
    void threadLoop();
    /// Thread that all queued lambdas are run on
    std::thread thread_;
    /// Mutex for the queue and exit status
    std::mutex mutex_;
    /// Monitor for the exit status and the queue
    std::condition_variable monitor_;
    /// Whether or not we want the thread to exit
    bool wantStop_;
    /// Queue of pending lambdas to call
    std::deque<std::pair<std::function<void()>, std::promise<bool>>> queue_;
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/utils/distances.h
+++ b/src/3rdlib/faiss/utils/distances.h
@ -0,0 +1,300 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 /* All distance functions for L2 and IP distances.
 * The actual functions are implemented in distances.cpp and distances_simd.cpp
 */
 #pragma once
 #include <stdint.h>
 #include <faiss/impl/platform_macros.h>
 #include <faiss/utils/Heap.h>
 namespace faiss {
 /*********************************************************
 * Optimized distance/norm/inner prod computations
 *********************************************************/
 /// Squared L2 distance between two vectors
 float fvec_L2sqr(const float* x, const float* y, size_t d);
 /// inner product
 float fvec_inner_product(const float* x, const float* y, size_t d);
 /// L1 distance
 float fvec_L1(const float* x, const float* y, size_t d);
 /// infinity distance
 float fvec_Linf(const float* x, const float* y, size_t d);
 /** Compute pairwise distances between sets of vectors
 *
 * @param d     dimension of the vectors
 * @param nq    nb of query vectors
 * @param nb    nb of database vectors
 * @param xq    query vectors (size nq * d)
 * @param xb    database vectors (size nb * d)
 * @param dis   output distances (size nq * nb)
 * @param ldq,ldb, ldd strides for the matrices
 */
 void pairwise_L2sqr(
        int64_t d,
        int64_t nq,
        const float* xq,
        int64_t nb,
        const float* xb,
        float* dis,
        int64_t ldq = -1,
        int64_t ldb = -1,
        int64_t ldd = -1);
 /* compute the inner product between nx vectors x and one y */
 void fvec_inner_products_ny(
        float* ip, /* output inner product */
        const float* x,
        const float* y,
        size_t d,
        size_t ny);
 /* compute ny square L2 distance between x and a set of contiguous y vectors */
 void fvec_L2sqr_ny(
        float* dis,
        const float* x,
        const float* y,
        size_t d,
        size_t ny);
 /** squared norm of a vector */
 float fvec_norm_L2sqr(const float* x, size_t d);
 /** compute the L2 norms for a set of vectors
 *
 * @param  norms    output norms, size nx
 * @param  x        set of vectors, size nx * d
 */
 void fvec_norms_L2(float* norms, const float* x, size_t d, size_t nx);
 /// same as fvec_norms_L2, but computes squared norms
 void fvec_norms_L2sqr(float* norms, const float* x, size_t d, size_t nx);
 /* L2-renormalize a set of vector. Nothing done if the vector is 0-normed */
 void fvec_renorm_L2(size_t d, size_t nx, float* x);
 /* This function exists because the Torch counterpart is extremely slow
   (not multi-threaded + unexpected overhead even in single thread).
   It is here to implement the usual property |x-y|^2=|x|^2+|y|^2-2<x|y>  */
 void inner_product_to_L2sqr(
        float* dis,
        const float* nr1,
        const float* nr2,
        size_t n1,
        size_t n2);
 /*********************************************************
 * Vector to vector functions
 *********************************************************/
 /** compute c := a + b for vectors
 *
 * c and a can overlap, c and b can overlap
 *
 * @param a size d
 * @param b size d
 * @param c size d
 */
 void fvec_add(size_t d, const float* a, const float* b, float* c);
 /** compute c := a + b for a, c vectors and b a scalar
 *
 * c and a can overlap
 *
 * @param a size d
 * @param c size d
 */
 void fvec_add(size_t d, const float* a, float b, float* c);
 /** compute c := a - b for vectors
 *
 * c and a can overlap, c and b can overlap
 *
 * @param a size d
 * @param b size d
 * @param c size d
 */
 void fvec_sub(size_t d, const float* a, const float* b, float* c);
 /***************************************************************************
 * Compute a subset of  distances
 ***************************************************************************/
 /* compute the inner product between x and a subset y of ny vectors,
  whose indices are given by idy.  */
 void fvec_inner_products_by_idx(
        float* ip,
        const float* x,
        const float* y,
        const int64_t* ids,
        size_t d,
        size_t nx,
        size_t ny);
 /* same but for a subset in y indexed by idsy (ny vectors in total) */
 void fvec_L2sqr_by_idx(
        float* dis,
        const float* x,
        const float* y,
        const int64_t* ids, /* ids of y vecs */
        size_t d,
        size_t nx,
        size_t ny);
 /** compute dis[j] = L2sqr(x[ix[j]], y[iy[j]]) forall j=0..n-1
 *
 * @param x  size (max(ix) + 1, d)
 * @param y  size (max(iy) + 1, d)
 * @param ix size n
 * @param iy size n
 * @param dis size n
 */
 void pairwise_indexed_L2sqr(
        size_t d,
        size_t n,
        const float* x,
        const int64_t* ix,
        const float* y,
        const int64_t* iy,
        float* dis);
 /* same for inner product */
 void pairwise_indexed_inner_product(
        size_t d,
        size_t n,
        const float* x,
        const int64_t* ix,
        const float* y,
        const int64_t* iy,
        float* dis);
 /***************************************************************************
 * KNN functions
 ***************************************************************************/
 // threshold on nx above which we switch to BLAS to compute distances
 FAISS_API extern int distance_compute_blas_threshold;
 // block sizes for BLAS distance computations
 FAISS_API extern int distance_compute_blas_query_bs;
 FAISS_API extern int distance_compute_blas_database_bs;
 // above this number of results we switch to a reservoir to collect results
 // rather than a heap
 FAISS_API extern int distance_compute_min_k_reservoir;
 /** Return the k nearest neighors of each of the nx vectors x among the ny
 *  vector y, w.r.t to max inner product
 *
 * @param x    query vectors, size nx * d
 * @param y    database vectors, size ny * d
 * @param res  result array, which also provides k. Sorted on output
 */
 void knn_inner_product(
        const float* x,
        const float* y,
        size_t d,
        size_t nx,
        size_t ny,
        float_minheap_array_t* res);
 /** Same as knn_inner_product, for the L2 distance
 *  @param y_norm2    norms for the y vectors (nullptr or size ny)
 */
 void knn_L2sqr(
        const float* x,
        const float* y,
        size_t d,
        size_t nx,
        size_t ny,
        float_maxheap_array_t* res,
        const float* y_norm2 = nullptr);
 /* Find the nearest neighbors for nx queries in a set of ny vectors
 * indexed by ids. May be useful for re-ranking a pre-selected vector list
 */
 void knn_inner_products_by_idx(
        const float* x,
        const float* y,
        const int64_t* ids,
        size_t d,
        size_t nx,
        size_t ny,
        float_minheap_array_t* res);
 void knn_L2sqr_by_idx(
        const float* x,
        const float* y,
        const int64_t* ids,
        size_t d,
        size_t nx,
        size_t ny,
        float_maxheap_array_t* res);
 /***************************************************************************
 * Range search
 ***************************************************************************/
 /// Forward declaration, see AuxIndexStructures.h
 struct RangeSearchResult;
 /** Return the k nearest neighors of each of the nx vectors x among the ny
 *  vector y, w.r.t to max inner product
 *
 * @param x      query vectors, size nx * d
 * @param y      database vectors, size ny * d
 * @param radius search radius around the x vectors
 * @param result result structure
 */
 void range_search_L2sqr(
        const float* x,
        const float* y,
        size_t d,
        size_t nx,
        size_t ny,
        float radius,
        RangeSearchResult* result);
 /// same as range_search_L2sqr for the inner product similarity
 void range_search_inner_product(
        const float* x,
        const float* y,
        size_t d,
        size_t nx,
        size_t ny,
        float radius,
        RangeSearchResult* result);
 /***************************************************************************
 * PQ tables computations
 ***************************************************************************/
 /// specialized function for PQ2
 void compute_PQ_dis_tables_dsub2(
        size_t d,
        size_t ksub,
        const float* centroids,
        size_t nx,
        const float* x,
        bool is_inner_product,
        float* dis_tables);
 /***************************************************************************
 * Templatized versions of distance functions
 ***************************************************************************/
 } // namespace faiss
--- a/src/3rdlib/faiss/utils/extra_distances-inl.h
+++ b/src/3rdlib/faiss/utils/extra_distances-inl.h
@ -0,0 +1,117 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 /** In this file are the implementations of extra metrics beyond L2
 *  and inner product */
 #include <faiss/utils/distances.h>
 #include <type_traits>
 namespace faiss {
 template <MetricType mt>
 struct VectorDistance {
    size_t d;
    float metric_arg;
    inline float operator()(const float* x, const float* y) const;
    // heap template to use for this type of metric
    using C = typename std::conditional<
            mt == METRIC_INNER_PRODUCT,
            CMin<float, int64_t>,
            CMax<float, int64_t>>::type;
 };
 template <>
 inline float VectorDistance<METRIC_L2>::operator()(
        const float* x,
        const float* y) const {
    return fvec_L2sqr(x, y, d);
 }
 template <>
 inline float VectorDistance<METRIC_INNER_PRODUCT>::operator()(
        const float* x,
        const float* y) const {
    return fvec_inner_product(x, y, d);
 }
 template <>
 inline float VectorDistance<METRIC_L1>::operator()(
        const float* x,
        const float* y) const {
    return fvec_L1(x, y, d);
 }
 template <>
 inline float VectorDistance<METRIC_Linf>::operator()(
        const float* x,
        const float* y) const {
    return fvec_Linf(x, y, d);
    /*
        float vmax = 0;
        for (size_t i = 0; i < d; i++) {
            float diff = fabs (x[i] - y[i]);
            if (diff > vmax) vmax = diff;
        }
     return vmax;*/
 }
 template <>
 inline float VectorDistance<METRIC_Lp>::operator()(
        const float* x,
        const float* y) const {
    float accu = 0;
    for (size_t i = 0; i < d; i++) {
        float diff = fabs(x[i] - y[i]);
        accu += powf(diff, metric_arg);
    }
    return accu;
 }
 template <>
 inline float VectorDistance<METRIC_Canberra>::operator()(
        const float* x,
        const float* y) const {
    float accu = 0;
    for (size_t i = 0; i < d; i++) {
        float xi = x[i], yi = y[i];
        accu += fabs(xi - yi) / (fabs(xi) + fabs(yi));
    }
    return accu;
 }
 template <>
 inline float VectorDistance<METRIC_BrayCurtis>::operator()(
        const float* x,
        const float* y) const {
    float accu_num = 0, accu_den = 0;
    for (size_t i = 0; i < d; i++) {
        float xi = x[i], yi = y[i];
        accu_num += fabs(xi - yi);
        accu_den += fabs(xi + yi);
    }
    return accu_num / accu_den;
 }
 template <>
 inline float VectorDistance<METRIC_JensenShannon>::operator()(
        const float* x,
        const float* y) const {
    float accu = 0;
    for (size_t i = 0; i < d; i++) {
        float xi = x[i], yi = y[i];
        float mi = 0.5 * (xi + yi);
        float kl1 = -xi * log(mi / xi);
        float kl2 = -yi * log(mi / yi);
        accu += kl1 + kl2;
    }
    return 0.5 * accu;
 }
 } // namespace faiss
--- a/src/3rdlib/faiss/utils/extra_distances.h
+++ b/src/3rdlib/faiss/utils/extra_distances.h
@ -0,0 +1,55 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 /** In this file are the implementations of extra metrics beyond L2
 *  and inner product */
 #include <stdint.h>
 #include <faiss/Index.h>
 #include <faiss/utils/Heap.h>
 namespace faiss {
 void pairwise_extra_distances(
        int64_t d,
        int64_t nq,
        const float* xq,
        int64_t nb,
        const float* xb,
        MetricType mt,
        float metric_arg,
        float* dis,
        int64_t ldq = -1,
        int64_t ldb = -1,
        int64_t ldd = -1);
 void knn_extra_metrics(
        const float* x,
        const float* y,
        size_t d,
        size_t nx,
        size_t ny,
        MetricType mt,
        float metric_arg,
        float_maxheap_array_t* res);
 /** get a DistanceComputer that refers to this type of distance and
 *  indexes a flat array of size nb */
 DistanceComputer* get_extra_distance_computer(
        size_t d,
        MetricType mt,
        float metric_arg,
        size_t nb,
        const float* xb);
 } // namespace faiss
 #include <faiss/utils/extra_distances-inl.h>
--- a/src/3rdlib/faiss/utils/hamming-inl.h
+++ b/src/3rdlib/faiss/utils/hamming-inl.h
@ -0,0 +1,521 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 namespace faiss {
 extern const uint8_t hamdis_tab_ham_bytes[256];
 inline BitstringWriter::BitstringWriter(uint8_t* code, size_t code_size)
        : code(code), code_size(code_size), i(0) {
    memset(code, 0, code_size);
 }
 inline void BitstringWriter::write(uint64_t x, int nbit) {
    assert(code_size * 8 >= nbit + i);
    // nb of available bits in i / 8
    int na = 8 - (i & 7);
    if (nbit <= na) {
        code[i >> 3] |= x << (i & 7);
        i += nbit;
        return;
    } else {
        size_t j = i >> 3;
        code[j++] |= x << (i & 7);
        i += nbit;
        x >>= na;
        while (x != 0) {
            code[j++] |= x;
            x >>= 8;
        }
    }
 }
 inline BitstringReader::BitstringReader(const uint8_t* code, size_t code_size)
        : code(code), code_size(code_size), i(0) {}
 inline uint64_t BitstringReader::read(int nbit) {
    assert(code_size * 8 >= nbit + i);
    // nb of available bits in i / 8
    int na = 8 - (i & 7);
    // get available bits in current byte
    uint64_t res = code[i >> 3] >> (i & 7);
    if (nbit <= na) {
        res &= (1 << nbit) - 1;
        i += nbit;
        return res;
    } else {
        int ofs = na;
        size_t j = (i >> 3) + 1;
        i += nbit;
        nbit -= na;
        while (nbit > 8) {
            res |= ((uint64_t)code[j++]) << ofs;
            ofs += 8;
            nbit -= 8; // TODO remove nbit
        }
        uint64_t last_byte = code[j];
        last_byte &= (1 << nbit) - 1;
        res |= last_byte << ofs;
        return res;
    }
 }
 /******************************************************************
 * The HammingComputer series of classes compares a single code of
 * size 4 to 32 to incoming codes. They are intended for use as a
 * template class where it would be inefficient to switch on the code
 * size in the inner loop. Hopefully the compiler will inline the
 * hamming() functions and put the a0, a1, ... in registers.
 ******************************************************************/
 struct HammingComputer4 {
    uint32_t a0;
    HammingComputer4() {}
    HammingComputer4(const uint8_t* a, int code_size) {
        set(a, code_size);
    }
    void set(const uint8_t* a, int code_size) {
        assert(code_size == 4);
        a0 = *(uint32_t*)a;
    }
    inline int hamming(const uint8_t* b) const {
        return popcount64(*(uint32_t*)b ^ a0);
    }
 };
 struct HammingComputer8 {
    uint64_t a0;
    HammingComputer8() {}
    HammingComputer8(const uint8_t* a, int code_size) {
        set(a, code_size);
    }
    void set(const uint8_t* a, int code_size) {
        assert(code_size == 8);
        a0 = *(uint64_t*)a;
    }
    inline int hamming(const uint8_t* b) const {
        return popcount64(*(uint64_t*)b ^ a0);
    }
 };
 struct HammingComputer16 {
    uint64_t a0, a1;
    HammingComputer16() {}
    HammingComputer16(const uint8_t* a8, int code_size) {
        set(a8, code_size);
    }
    void set(const uint8_t* a8, int code_size) {
        assert(code_size == 16);
        const uint64_t* a = (uint64_t*)a8;
        a0 = a[0];
        a1 = a[1];
    }
    inline int hamming(const uint8_t* b8) const {
        const uint64_t* b = (uint64_t*)b8;
        return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1);
    }
 };
 // when applied to an array, 1/2 of the 64-bit accesses are unaligned.
 // This incurs a penalty of ~10% wrt. fully aligned accesses.
 struct HammingComputer20 {
    uint64_t a0, a1;
    uint32_t a2;
    HammingComputer20() {}
    HammingComputer20(const uint8_t* a8, int code_size) {
        set(a8, code_size);
    }
    void set(const uint8_t* a8, int code_size) {
        assert(code_size == 20);
        const uint64_t* a = (uint64_t*)a8;
        a0 = a[0];
        a1 = a[1];
        a2 = a[2];
    }
    inline int hamming(const uint8_t* b8) const {
        const uint64_t* b = (uint64_t*)b8;
        return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
                popcount64(*(uint32_t*)(b + 2) ^ a2);
    }
 };
 struct HammingComputer32 {
    uint64_t a0, a1, a2, a3;
    HammingComputer32() {}
    HammingComputer32(const uint8_t* a8, int code_size) {
        set(a8, code_size);
    }
    void set(const uint8_t* a8, int code_size) {
        assert(code_size == 32);
        const uint64_t* a = (uint64_t*)a8;
        a0 = a[0];
        a1 = a[1];
        a2 = a[2];
        a3 = a[3];
    }
    inline int hamming(const uint8_t* b8) const {
        const uint64_t* b = (uint64_t*)b8;
        return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
                popcount64(b[2] ^ a2) + popcount64(b[3] ^ a3);
    }
 };
 struct HammingComputer64 {
    uint64_t a0, a1, a2, a3, a4, a5, a6, a7;
    HammingComputer64() {}
    HammingComputer64(const uint8_t* a8, int code_size) {
        set(a8, code_size);
    }
    void set(const uint8_t* a8, int code_size) {
        assert(code_size == 64);
        const uint64_t* a = (uint64_t*)a8;
        a0 = a[0];
        a1 = a[1];
        a2 = a[2];
        a3 = a[3];
        a4 = a[4];
        a5 = a[5];
        a6 = a[6];
        a7 = a[7];
    }
    inline int hamming(const uint8_t* b8) const {
        const uint64_t* b = (uint64_t*)b8;
        return popcount64(b[0] ^ a0) + popcount64(b[1] ^ a1) +
                popcount64(b[2] ^ a2) + popcount64(b[3] ^ a3) +
                popcount64(b[4] ^ a4) + popcount64(b[5] ^ a5) +
                popcount64(b[6] ^ a6) + popcount64(b[7] ^ a7);
    }
 };
 struct HammingComputerDefault {
    const uint8_t* a8;
    int quotient8;
    int remainder8;
    HammingComputerDefault() {}
    HammingComputerDefault(const uint8_t* a8, int code_size) {
        set(a8, code_size);
    }
    void set(const uint8_t* a8, int code_size) {
        this->a8 = a8;
        quotient8 = code_size / 8;
        remainder8 = code_size % 8;
    }
    int hamming(const uint8_t* b8) const {
        int accu = 0;
        const uint64_t* a64 = reinterpret_cast<const uint64_t*>(a8);
        const uint64_t* b64 = reinterpret_cast<const uint64_t*>(b8);
        int i = 0, len = quotient8;
        switch (len & 7) {
            default:
                while (len > 7) {
                    len -= 8;
                    accu += popcount64(a64[i] ^ b64[i]);
                    i++;
                    case 7:
                        accu += popcount64(a64[i] ^ b64[i]);
                        i++;
                    case 6:
                        accu += popcount64(a64[i] ^ b64[i]);
                        i++;
                    case 5:
                        accu += popcount64(a64[i] ^ b64[i]);
                        i++;
                    case 4:
                        accu += popcount64(a64[i] ^ b64[i]);
                        i++;
                    case 3:
                        accu += popcount64(a64[i] ^ b64[i]);
                        i++;
                    case 2:
                        accu += popcount64(a64[i] ^ b64[i]);
                        i++;
                    case 1:
                        accu += popcount64(a64[i] ^ b64[i]);
                        i++;
                }
        }
        if (remainder8) {
            const uint8_t* a = a8 + 8 * quotient8;
            const uint8_t* b = b8 + 8 * quotient8;
            switch (remainder8) {
                case 7:
                    accu += hamdis_tab_ham_bytes[a[6] ^ b[6]];
                case 6:
                    accu += hamdis_tab_ham_bytes[a[5] ^ b[5]];
                case 5:
                    accu += hamdis_tab_ham_bytes[a[4] ^ b[4]];
                case 4:
                    accu += hamdis_tab_ham_bytes[a[3] ^ b[3]];
                case 3:
                    accu += hamdis_tab_ham_bytes[a[2] ^ b[2]];
                case 2:
                    accu += hamdis_tab_ham_bytes[a[1] ^ b[1]];
                case 1:
                    accu += hamdis_tab_ham_bytes[a[0] ^ b[0]];
                default:
                    break;
            }
        }
        return accu;
    }
 };
 // more inefficient than HammingComputerDefault (obsolete)
 struct HammingComputerM8 {
    const uint64_t* a;
    int n;
    HammingComputerM8() {}
    HammingComputerM8(const uint8_t* a8, int code_size) {
        set(a8, code_size);
    }
    void set(const uint8_t* a8, int code_size) {
        assert(code_size % 8 == 0);
        a = (uint64_t*)a8;
        n = code_size / 8;
    }
    int hamming(const uint8_t* b8) const {
        const uint64_t* b = (uint64_t*)b8;
        int accu = 0;
        for (int i = 0; i < n; i++)
            accu += popcount64(a[i] ^ b[i]);
        return accu;
    }
 };
 // more inefficient than HammingComputerDefault (obsolete)
 struct HammingComputerM4 {
    const uint32_t* a;
    int n;
    HammingComputerM4() {}
    HammingComputerM4(const uint8_t* a4, int code_size) {
        set(a4, code_size);
    }
    void set(const uint8_t* a4, int code_size) {
        assert(code_size % 4 == 0);
        a = (uint32_t*)a4;
        n = code_size / 4;
    }
    int hamming(const uint8_t* b8) const {
        const uint32_t* b = (uint32_t*)b8;
        int accu = 0;
        for (int i = 0; i < n; i++)
            accu += popcount64(a[i] ^ b[i]);
        return accu;
    }
 };
 /***************************************************************************
 * Equivalence with a template class when code size is known at compile time
 **************************************************************************/
 // default template
 template <int CODE_SIZE>
 struct HammingComputer : HammingComputerDefault {
    HammingComputer(const uint8_t* a, int code_size)
            : HammingComputerDefault(a, code_size) {}
 };
 #define SPECIALIZED_HC(CODE_SIZE)                                    \
    template <>                                                      \
    struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
        HammingComputer(const uint8_t* a)                            \
                : HammingComputer##CODE_SIZE(a, CODE_SIZE) {}        \
    }
 SPECIALIZED_HC(4);
 SPECIALIZED_HC(8);
 SPECIALIZED_HC(16);
 SPECIALIZED_HC(20);
 SPECIALIZED_HC(32);
 SPECIALIZED_HC(64);
 #undef SPECIALIZED_HC
 /***************************************************************************
 * generalized Hamming = number of bytes that are different between
 * two codes.
 ***************************************************************************/
 inline int generalized_hamming_64(uint64_t a) {
    a |= a >> 1;
    a |= a >> 2;
    a |= a >> 4;
    a &= 0x0101010101010101UL;
    return popcount64(a);
 }
 struct GenHammingComputer8 {
    uint64_t a0;
    GenHammingComputer8(const uint8_t* a, int code_size) {
        assert(code_size == 8);
        a0 = *(uint64_t*)a;
    }
    inline int hamming(const uint8_t* b) const {
        return generalized_hamming_64(*(uint64_t*)b ^ a0);
    }
 };
 struct GenHammingComputer16 {
    uint64_t a0, a1;
    GenHammingComputer16(const uint8_t* a8, int code_size) {
        assert(code_size == 16);
        const uint64_t* a = (uint64_t*)a8;
        a0 = a[0];
        a1 = a[1];
    }
    inline int hamming(const uint8_t* b8) const {
        const uint64_t* b = (uint64_t*)b8;
        return generalized_hamming_64(b[0] ^ a0) +
                generalized_hamming_64(b[1] ^ a1);
    }
 };
 struct GenHammingComputer32 {
    uint64_t a0, a1, a2, a3;
    GenHammingComputer32(const uint8_t* a8, int code_size) {
        assert(code_size == 32);
        const uint64_t* a = (uint64_t*)a8;
        a0 = a[0];
        a1 = a[1];
        a2 = a[2];
        a3 = a[3];
    }
    inline int hamming(const uint8_t* b8) const {
        const uint64_t* b = (uint64_t*)b8;
        return generalized_hamming_64(b[0] ^ a0) +
                generalized_hamming_64(b[1] ^ a1) +
                generalized_hamming_64(b[2] ^ a2) +
                generalized_hamming_64(b[3] ^ a3);
    }
 };
 struct GenHammingComputerM8 {
    const uint64_t* a;
    int n;
    GenHammingComputerM8(const uint8_t* a8, int code_size) {
        assert(code_size % 8 == 0);
        a = (uint64_t*)a8;
        n = code_size / 8;
    }
    int hamming(const uint8_t* b8) const {
        const uint64_t* b = (uint64_t*)b8;
        int accu = 0;
        for (int i = 0; i < n; i++)
            accu += generalized_hamming_64(a[i] ^ b[i]);
        return accu;
    }
 };
 /** generalized Hamming distances (= count number of code bytes that
    are the same) */
 void generalized_hammings_knn_hc(
        int_maxheap_array_t* ha,
        const uint8_t* a,
        const uint8_t* b,
        size_t nb,
        size_t code_size,
        int ordered = true);
 /** This class maintains a list of best distances seen so far.
 *
 * Since the distances are in a limited range (0 to nbit), the
 * object maintains one list per possible distance, and fills
 * in only the n-first lists, such that the sum of sizes of the
 * n lists is below k.
 */
 template <class HammingComputer>
 struct HCounterState {
    int* counters;
    int64_t* ids_per_dis;
    HammingComputer hc;
    int thres;
    int count_lt;
    int count_eq;
    int k;
    HCounterState(
            int* counters,
            int64_t* ids_per_dis,
            const uint8_t* x,
            int d,
            int k)
            : counters(counters),
              ids_per_dis(ids_per_dis),
              hc(x, d / 8),
              thres(d + 1),
              count_lt(0),
              count_eq(0),
              k(k) {}
    void update_counter(const uint8_t* y, size_t j) {
        int32_t dis = hc.hamming(y);
        if (dis <= thres) {
            if (dis < thres) {
                ids_per_dis[dis * k + counters[dis]++] = j;
                ++count_lt;
                while (count_lt == k && thres > 0) {
                    --thres;
                    count_eq = counters[thres];
                    count_lt -= count_eq;
                }
            } else if (count_eq < k) {
                ids_per_dis[dis * k + count_eq++] = j;
                counters[dis] = count_eq;
            }
        }
    }
 };
 } // namespace faiss
--- a/src/3rdlib/faiss/utils/hamming.h
+++ b/src/3rdlib/faiss/utils/hamming.h
@ -0,0 +1,218 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 /*
 * Hamming distances. The binary vector dimensionality should be a
 * multiple of 8, as the elementary operations operate on bytes. If
 * you need other sizes, just pad with 0s (this is done by function
 * fvecs2bitvecs).
 *
 * User-defined type hamdis_t is used for distances because at this time
 * it is still uncler clear how we will need to balance
 * - flexibility in vector size (may need 16- or even 8-bit vectors)
 * - memory usage
 * - cache-misses when dealing with large volumes of data (fewer bits is better)
 *
 */
 #ifndef FAISS_hamming_h
 #define FAISS_hamming_h
 #include <stdint.h>
 #include <faiss/impl/platform_macros.h>
 #include <faiss/utils/Heap.h>
 /* The Hamming distance type */
 typedef int32_t hamdis_t;
 namespace faiss {
 /**************************************************
 * General bit vector functions
 **************************************************/
 struct RangeSearchResult;
 void bitvec_print(const uint8_t* b, size_t d);
 /* Functions for casting vectors of regular types to compact bits.
   They assume proper allocation done beforehand, meaning that b
   should be be able to receive as many bits as x may produce.  */
 /* Makes an array of bits from the signs of a float array. The length
   of the output array b is rounded up to byte size (allocate
   accordingly) */
 void fvecs2bitvecs(const float* x, uint8_t* b, size_t d, size_t n);
 void bitvecs2fvecs(const uint8_t* b, float* x, size_t d, size_t n);
 void fvec2bitvec(const float* x, uint8_t* b, size_t d);
 /** Shuffle the bits from b(i, j) := a(i, order[j])
 */
 void bitvec_shuffle(
        size_t n,
        size_t da,
        size_t db,
        const int* order,
        const uint8_t* a,
        uint8_t* b);
 /***********************************************
 * Generic reader/writer for bit strings
 ***********************************************/
 struct BitstringWriter {
    uint8_t* code;
    size_t code_size;
    size_t i; // current bit offset
    // code_size in bytes
    BitstringWriter(uint8_t* code, size_t code_size);
    // write the nbit low bits of x
    void write(uint64_t x, int nbit);
 };
 struct BitstringReader {
    const uint8_t* code;
    size_t code_size;
    size_t i;
    // code_size in bytes
    BitstringReader(const uint8_t* code, size_t code_size);
    // read nbit bits from the code
    uint64_t read(int nbit);
 };
 /**************************************************
 * Hamming distance computation functions
 **************************************************/
 FAISS_API extern size_t hamming_batch_size;
 inline int popcount64(uint64_t x) {
    return __builtin_popcountl(x);
 }
 /** Compute a set of Hamming distances between na and nb binary vectors
 *
 * @param  a             size na * nbytespercode
 * @param  b             size nb * nbytespercode
 * @param  nbytespercode should be multiple of 8
 * @param  dis           output distances, size na * nb
 */
 void hammings(
        const uint8_t* a,
        const uint8_t* b,
        size_t na,
        size_t nb,
        size_t nbytespercode,
        hamdis_t* dis);
 /** Return the k smallest Hamming distances for a set of binary query vectors,
 * using a max heap.
 * @param a       queries, size ha->nh * ncodes
 * @param b       database, size nb * ncodes
 * @param nb      number of database vectors
 * @param ncodes  size of the binary codes (bytes)
 * @param ordered if != 0: order the results by decreasing distance
 *                (may be bottleneck for k/n > 0.01) */
 void hammings_knn_hc(
        int_maxheap_array_t* ha,
        const uint8_t* a,
        const uint8_t* b,
        size_t nb,
        size_t ncodes,
        int ordered);
 /* Legacy alias to hammings_knn_hc. */
 void hammings_knn(
        int_maxheap_array_t* ha,
        const uint8_t* a,
        const uint8_t* b,
        size_t nb,
        size_t ncodes,
        int ordered);
 /** Return the k smallest Hamming distances for a set of binary query vectors,
 * using counting max.
 * @param a       queries, size na * ncodes
 * @param b       database, size nb * ncodes
 * @param na      number of query vectors
 * @param nb      number of database vectors
 * @param k       number of vectors/distances to return
 * @param ncodes  size of the binary codes (bytes)
 * @param distances output distances from each query vector to its k nearest
 *                neighbors
 * @param labels  output ids of the k nearest neighbors to each query vector
 */
 void hammings_knn_mc(
        const uint8_t* a,
        const uint8_t* b,
        size_t na,
        size_t nb,
        size_t k,
        size_t ncodes,
        int32_t* distances,
        int64_t* labels);
 /** same as hammings_knn except we are doing a range search with radius */
 void hamming_range_search(
        const uint8_t* a,
        const uint8_t* b,
        size_t na,
        size_t nb,
        int radius,
        size_t ncodes,
        RangeSearchResult* result);
 /* Counting the number of matches or of cross-matches (without returning them)
   For use with function that assume pre-allocated memory */
 void hamming_count_thres(
        const uint8_t* bs1,
        const uint8_t* bs2,
        size_t n1,
        size_t n2,
        hamdis_t ht,
        size_t ncodes,
        size_t* nptr);
 /* Return all Hamming distances/index passing a thres. Pre-allocation of output
   is required. Use hamming_count_thres to determine the proper size. */
 size_t match_hamming_thres(
        const uint8_t* bs1,
        const uint8_t* bs2,
        size_t n1,
        size_t n2,
        hamdis_t ht,
        size_t ncodes,
        int64_t* idx,
        hamdis_t* dis);
 /* Cross-matching in a set of vectors */
 void crosshamming_count_thres(
        const uint8_t* dbs,
        size_t n,
        hamdis_t ht,
        size_t ncodes,
        size_t* nptr);
 /* compute the Hamming distances between two codewords of nwords*64 bits */
 hamdis_t hamming(const uint64_t* bs1, const uint64_t* bs2, size_t nwords);
 } // namespace faiss
 // inlined definitions of HammingComputerXX and GenHammingComputerXX
 #include <faiss/utils/hamming-inl.h>
 #endif /* FAISS_hamming_h */
--- a/src/3rdlib/faiss/utils/ordered_key_value.h
+++ b/src/3rdlib/faiss/utils/ordered_key_value.h
@ -0,0 +1,96 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <climits>
 #include <cmath>
 #include <limits>
 namespace faiss {
 /*******************************************************************
 * C object: uniform handling of min and max heap
 *******************************************************************/
 /** The C object gives the type T of the values of a key-value storage, the type
 *  of the keys, TI and the comparison that is done: CMax for a decreasing
 *  series and CMin for increasing series. In other words, for a given threshold
 *  threshold, an incoming value x is kept if
 *
 *      C::cmp(threshold, x)
 *
 *  is true.
 */
 template <typename T_, typename TI_>
 struct CMax;
 template <typename T>
 inline T cmin_nextafter(T x);
 template <typename T>
 inline T cmax_nextafter(T x);
 // traits of minheaps = heaps where the minimum value is stored on top
 // useful to find the *max* values of an array
 template <typename T_, typename TI_>
 struct CMin {
    typedef T_ T;
    typedef TI_ TI;
    typedef CMax<T_, TI_> Crev; // reference to reverse comparison
    inline static bool cmp(T a, T b) {
        return a < b;
    }
    inline static T neutral() {
        return std::numeric_limits<T>::lowest();
    }
    static const bool is_max = false;
    inline static T nextafter(T x) {
        return cmin_nextafter(x);
    }
 };
 template <typename T_, typename TI_>
 struct CMax {
    typedef T_ T;
    typedef TI_ TI;
    typedef CMin<T_, TI_> Crev;
    inline static bool cmp(T a, T b) {
        return a > b;
    }
    inline static T neutral() {
        return std::numeric_limits<T>::max();
    }
    static const bool is_max = true;
    inline static T nextafter(T x) {
        return cmax_nextafter(x);
    }
 };
 template <>
 inline float cmin_nextafter<float>(float x) {
    return std::nextafterf(x, -HUGE_VALF);
 }
 template <>
 inline float cmax_nextafter<float>(float x) {
    return std::nextafterf(x, HUGE_VALF);
 }
 template <>
 inline uint16_t cmin_nextafter<uint16_t>(uint16_t x) {
    return x - 1;
 }
 template <>
 inline uint16_t cmax_nextafter<uint16_t>(uint16_t x) {
    return x + 1;
 }
 } // namespace faiss
--- a/src/3rdlib/faiss/utils/partitioning.h
+++ b/src/3rdlib/faiss/utils/partitioning.h
@ -0,0 +1,74 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <stdint.h>
 #include <stdio.h>
 #include <faiss/impl/platform_macros.h>
 namespace faiss {
 /** partitions the table into 0:q and q:n where all elements above q are >= all
 * elements below q (for C = CMax, for CMin comparisons are reversed)
 *
 * Returns the partition threshold. The elements q:n are destroyed on output.
 */
 template <class C>
 typename C::T partition_fuzzy(
        typename C::T* vals,
        typename C::TI* ids,
        size_t n,
        size_t q_min,
        size_t q_max,
        size_t* q_out);
 /** simplified interface for when the parition is not fuzzy */
 template <class C>
 inline typename C::T partition(
        typename C::T* vals,
        typename C::TI* ids,
        size_t n,
        size_t q) {
    return partition_fuzzy<C>(vals, ids, n, q, q, nullptr);
 }
 /** low level SIMD histogramming functions */
 /** 8-bin histogram of (x - min) >> shift
 * values outside the range are ignored.
 * the data table should be aligned on 32 bytes */
 void simd_histogram_8(
        const uint16_t* data,
        int n,
        uint16_t min,
        int shift,
        int* hist);
 /** same for 16-bin histogram */
 void simd_histogram_16(
        const uint16_t* data,
        int n,
        uint16_t min,
        int shift,
        int* hist);
 struct PartitionStats {
    uint64_t bissect_cycles;
    uint64_t compress_cycles;
    PartitionStats() {
        reset();
    }
    void reset();
 };
 // global var that collects them all
 FAISS_API extern PartitionStats partition_stats;
 } // namespace faiss
--- a/src/3rdlib/faiss/utils/quantize_lut.h
+++ b/src/3rdlib/faiss/utils/quantize_lut.h
@ -0,0 +1,82 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <cstdint>
 #include <cstdio>
 namespace faiss {
 /** Functions to quantize PQ floating-point Look Up Tables (LUT) to uint8, and
 * biases to uint16. The accumulation is supposed to take place in uint16.
 * The quantization coefficients are float (a, b) such that
 *
 *      original_value = quantized_value * a / b
 *
 * The hardest part of the quantization is with multiple LUTs that need to be
 * added up together. In that case, coefficient a has to be chosen so that
 * the sum fits in a uint16 accumulator.
 */
 namespace quantize_lut {
 /* affine quantizer, a and b are the affine coefficients, marginalize over d
 *
 * @param tab input/output, size (n, d)
 */
 void round_uint8_per_column(
        float* tab,
        size_t n,
        size_t d,
        float* a_out = nullptr,
        float* b_out = nullptr);
 /* affine quantizer, a and b are the affine coefficients
 *
 * @param tab input/output, size (m, n, d)
 */
 void round_uint8_per_column_multi(
        float* tab,
        size_t m,
        size_t n,
        size_t d,
        float* a_out = nullptr,
        float* b_out = nullptr);
 /** LUT quantization to uint8 and bias to uint16.
 *
 * (nprobe, M, ksub, lut_is_3d) determine the size of the the LUT
 *
 *  LUT input:
 *  - 2D size (M, ksub): single matrix per probe (lut_is_3d=false)
 *  - 3D size (nprobe, M, ksub): separate LUT per probe (lut_is_3d=true)
 *  bias input:
 *  - nullptr: bias is 0
 *  - size (nprobe): one bias per probe
 *  Output:
 *  - LUTq uint8 version of the LUT (M size is rounded up to M2)
 *  - biasq (or nullptr): uint16 version of the LUT
 *  - a, b: scalars to approximate the true distance
 */
 void quantize_LUT_and_bias(
        size_t nprobe,
        size_t M,
        size_t ksub,
        bool lut_is_3d,
        const float* LUT,
        const float* bias,
        uint8_t* LUTq,
        size_t M2,
        uint16_t* biasq,
        float* a_out = nullptr,
        float* b_out = nullptr);
 } // namespace quantize_lut
 } // namespace faiss
--- a/src/3rdlib/faiss/utils/random.h
+++ b/src/3rdlib/faiss/utils/random.h
@ -0,0 +1,57 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 // -*- c++ -*-
 /* Random generators. Implemented here for speed and to make
 * sequences reproducible.
 */
 #pragma once
 #include <stdint.h>
 #include <random>
 namespace faiss {
 /**************************************************
 * Random data generation functions
 **************************************************/
 /// random generator that can be used in multithreaded contexts
 struct RandomGenerator {
    std::mt19937 mt;
    /// random positive integer
    int rand_int();
    /// random int64_t
    int64_t rand_int64();
    /// generate random integer between 0 and max-1
    int rand_int(int max);
    /// between 0 and 1
    float rand_float();
    double rand_double();
    explicit RandomGenerator(int64_t seed = 1234);
 };
 /* Generate an array of uniform random floats / multi-threaded implementation */
 void float_rand(float* x, size_t n, int64_t seed);
 void float_randn(float* x, size_t n, int64_t seed);
 void int64_rand(int64_t* x, size_t n, int64_t seed);
 void byte_rand(uint8_t* x, size_t n, int64_t seed);
 // max is actually the maximum value + 1
 void int64_rand_max(int64_t* x, size_t n, uint64_t max, int64_t seed);
 /* random permutation */
 void rand_perm(int* perm, size_t n, int64_t seed);
 } // namespace faiss
--- a/src/3rdlib/faiss/utils/simdlib.h
+++ b/src/3rdlib/faiss/utils/simdlib.h
@ -0,0 +1,33 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 /** Abstractions for 256-bit registers
 *
 * The objective is to separate the different interpretations of the same
 * registers (as a vector of uint8, uint16 or uint32), to provide printing
 * functions.
 */
 #ifdef __AVX2__
 #include <faiss/utils/simdlib_avx2.h>
 #elif defined(__aarch64__)
 #include <faiss/utils/simdlib_neon.h>
 #else
 // emulated = all operations are implemented as scalars
 #include <faiss/utils/simdlib_emulated.h>
 // FIXME: make a SSE version
 // is this ever going to happen? We will probably rather implement AVX512
 #endif
--- a/src/3rdlib/faiss/utils/simdlib_avx2.h
+++ b/src/3rdlib/faiss/utils/simdlib_avx2.h
@ -0,0 +1,464 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <cstdint>
 #include <string>
 #include <immintrin.h>
 #include <faiss/impl/platform_macros.h>
 namespace faiss {
 /** Simple wrapper around the AVX 256-bit registers
 *
 * The objective is to separate the different interpretations of the same
 * registers (as a vector of uint8, uint16 or uint32), to provide printing
 * functions, and to give more readable names to the AVX intrinsics. It does not
 * pretend to be exhausitve, functions are added as needed.
 */
 /// 256-bit representation without interpretation as a vector
 struct simd256bit {
    union {
        __m256i i;
        __m256 f;
    };
    simd256bit() {}
    explicit simd256bit(__m256i i) : i(i) {}
    explicit simd256bit(__m256 f) : f(f) {}
    explicit simd256bit(const void* x)
            : i(_mm256_load_si256((__m256i const*)x)) {}
    void clear() {
        i = _mm256_setzero_si256();
    }
    void storeu(void* ptr) const {
        _mm256_storeu_si256((__m256i*)ptr, i);
    }
    void loadu(const void* ptr) {
        i = _mm256_loadu_si256((__m256i*)ptr);
    }
    void store(void* ptr) const {
        _mm256_store_si256((__m256i*)ptr, i);
    }
    void bin(char bits[257]) const {
        char bytes[32];
        storeu((void*)bytes);
        for (int i = 0; i < 256; i++) {
            bits[i] = '0' + ((bytes[i / 8] >> (i % 8)) & 1);
        }
        bits[256] = 0;
    }
    std::string bin() const {
        char bits[257];
        bin(bits);
        return std::string(bits);
    }
 };
 /// vector of 16 elements in uint16
 struct simd16uint16 : simd256bit {
    simd16uint16() {}
    explicit simd16uint16(__m256i i) : simd256bit(i) {}
    explicit simd16uint16(int x) : simd256bit(_mm256_set1_epi16(x)) {}
    explicit simd16uint16(uint16_t x) : simd256bit(_mm256_set1_epi16(x)) {}
    explicit simd16uint16(simd256bit x) : simd256bit(x) {}
    explicit simd16uint16(const uint16_t* x) : simd256bit((const void*)x) {}
    std::string elements_to_string(const char* fmt) const {
        uint16_t bytes[16];
        storeu((void*)bytes);
        char res[1000];
        char* ptr = res;
        for (int i = 0; i < 16; i++) {
            ptr += sprintf(ptr, fmt, bytes[i]);
        }
        // strip last ,
        ptr[-1] = 0;
        return std::string(res);
    }
    std::string hex() const {
        return elements_to_string("%02x,");
    }
    std::string dec() const {
        return elements_to_string("%3d,");
    }
    void set1(uint16_t x) {
        i = _mm256_set1_epi16((short)x);
    }
    // shift must be known at compile time
    simd16uint16 operator>>(const int shift) const {
        return simd16uint16(_mm256_srli_epi16(i, shift));
    }
    // shift must be known at compile time
    simd16uint16 operator<<(const int shift) const {
        return simd16uint16(_mm256_slli_epi16(i, shift));
    }
    simd16uint16 operator+=(simd16uint16 other) {
        i = _mm256_add_epi16(i, other.i);
        return *this;
    }
    simd16uint16 operator-=(simd16uint16 other) {
        i = _mm256_sub_epi16(i, other.i);
        return *this;
    }
    simd16uint16 operator+(simd16uint16 other) const {
        return simd16uint16(_mm256_add_epi16(i, other.i));
    }
    simd16uint16 operator-(simd16uint16 other) const {
        return simd16uint16(_mm256_sub_epi16(i, other.i));
    }
    simd16uint16 operator&(simd256bit other) const {
        return simd16uint16(_mm256_and_si256(i, other.i));
    }
    simd16uint16 operator|(simd256bit other) const {
        return simd16uint16(_mm256_or_si256(i, other.i));
    }
    // returns binary masks
    simd16uint16 operator==(simd256bit other) const {
        return simd16uint16(_mm256_cmpeq_epi16(i, other.i));
    }
    simd16uint16 operator~() const {
        return simd16uint16(_mm256_xor_si256(i, _mm256_set1_epi32(-1)));
    }
    // get scalar at index 0
    uint16_t get_scalar_0() const {
        return _mm256_extract_epi16(i, 0);
    }
    // mask of elements where this >= thresh
    // 2 bit per component: 16 * 2 = 32 bit
    uint32_t ge_mask(simd16uint16 thresh) const {
        __m256i j = thresh.i;
        __m256i max = _mm256_max_epu16(i, j);
        __m256i ge = _mm256_cmpeq_epi16(i, max);
        return _mm256_movemask_epi8(ge);
    }
    uint32_t le_mask(simd16uint16 thresh) const {
        return thresh.ge_mask(*this);
    }
    uint32_t gt_mask(simd16uint16 thresh) const {
        return ~le_mask(thresh);
    }
    bool all_gt(simd16uint16 thresh) const {
        return le_mask(thresh) == 0;
    }
    // for debugging only
    uint16_t operator[](int i) const {
        ALIGNED(32) uint16_t tab[16];
        store(tab);
        return tab[i];
    }
    void accu_min(simd16uint16 incoming) {
        i = _mm256_min_epu16(i, incoming.i);
    }
    void accu_max(simd16uint16 incoming) {
        i = _mm256_max_epu16(i, incoming.i);
    }
 };
 // not really a std::min because it returns an elementwise min
 inline simd16uint16 min(simd16uint16 a, simd16uint16 b) {
    return simd16uint16(_mm256_min_epu16(a.i, b.i));
 }
 inline simd16uint16 max(simd16uint16 a, simd16uint16 b) {
    return simd16uint16(_mm256_max_epu16(a.i, b.i));
 }
 // decompose in 128-lanes: a = (a0, a1), b = (b0, b1)
 // return (a0 + a1, b0 + b1)
 // TODO find a better name
 inline simd16uint16 combine2x2(simd16uint16 a, simd16uint16 b) {
    __m256i a1b0 = _mm256_permute2f128_si256(a.i, b.i, 0x21);
    __m256i a0b1 = _mm256_blend_epi32(a.i, b.i, 0xF0);
    return simd16uint16(a1b0) + simd16uint16(a0b1);
 }
 // compare d0 and d1 to thr, return 32 bits corresponding to the concatenation
 // of d0 and d1 with thr
 inline uint32_t cmp_ge32(simd16uint16 d0, simd16uint16 d1, simd16uint16 thr) {
    __m256i max0 = _mm256_max_epu16(d0.i, thr.i);
    __m256i ge0 = _mm256_cmpeq_epi16(d0.i, max0);
    __m256i max1 = _mm256_max_epu16(d1.i, thr.i);
    __m256i ge1 = _mm256_cmpeq_epi16(d1.i, max1);
    __m256i ge01 = _mm256_packs_epi16(ge0, ge1);
    // easier than manipulating bit fields afterwards
    ge01 = _mm256_permute4x64_epi64(ge01, 0 | (2 << 2) | (1 << 4) | (3 << 6));
    uint32_t ge = _mm256_movemask_epi8(ge01);
    return ge;
 }
 inline uint32_t cmp_le32(simd16uint16 d0, simd16uint16 d1, simd16uint16 thr) {
    __m256i max0 = _mm256_min_epu16(d0.i, thr.i);
    __m256i ge0 = _mm256_cmpeq_epi16(d0.i, max0);
    __m256i max1 = _mm256_min_epu16(d1.i, thr.i);
    __m256i ge1 = _mm256_cmpeq_epi16(d1.i, max1);
    __m256i ge01 = _mm256_packs_epi16(ge0, ge1);
    // easier than manipulating bit fields afterwards
    ge01 = _mm256_permute4x64_epi64(ge01, 0 | (2 << 2) | (1 << 4) | (3 << 6));
    uint32_t ge = _mm256_movemask_epi8(ge01);
    return ge;
 }
 // vector of 32 unsigned 8-bit integers
 struct simd32uint8 : simd256bit {
    simd32uint8() {}
    explicit simd32uint8(__m256i i) : simd256bit(i) {}
    explicit simd32uint8(int x) : simd256bit(_mm256_set1_epi8(x)) {}
    explicit simd32uint8(uint8_t x) : simd256bit(_mm256_set1_epi8(x)) {}
    explicit simd32uint8(simd256bit x) : simd256bit(x) {}
    explicit simd32uint8(const uint8_t* x) : simd256bit((const void*)x) {}
    std::string elements_to_string(const char* fmt) const {
        uint8_t bytes[32];
        storeu((void*)bytes);
        char res[1000];
        char* ptr = res;
        for (int i = 0; i < 32; i++) {
            ptr += sprintf(ptr, fmt, bytes[i]);
        }
        // strip last ,
        ptr[-1] = 0;
        return std::string(res);
    }
    std::string hex() const {
        return elements_to_string("%02x,");
    }
    std::string dec() const {
        return elements_to_string("%3d,");
    }
    void set1(uint8_t x) {
        i = _mm256_set1_epi8((char)x);
    }
    simd32uint8 operator&(simd256bit other) const {
        return simd32uint8(_mm256_and_si256(i, other.i));
    }
    simd32uint8 operator+(simd32uint8 other) const {
        return simd32uint8(_mm256_add_epi8(i, other.i));
    }
    simd32uint8 lookup_2_lanes(simd32uint8 idx) const {
        return simd32uint8(_mm256_shuffle_epi8(i, idx.i));
    }
    // extract + 0-extend lane
    // this operation is slow (3 cycles)
    simd16uint16 lane0_as_uint16() const {
        __m128i x = _mm256_extracti128_si256(i, 0);
        return simd16uint16(_mm256_cvtepu8_epi16(x));
    }
    simd16uint16 lane1_as_uint16() const {
        __m128i x = _mm256_extracti128_si256(i, 1);
        return simd16uint16(_mm256_cvtepu8_epi16(x));
    }
    simd32uint8 operator+=(simd32uint8 other) {
        i = _mm256_add_epi8(i, other.i);
        return *this;
    }
    // for debugging only
    uint8_t operator[](int i) const {
        ALIGNED(32) uint8_t tab[32];
        store(tab);
        return tab[i];
    }
 };
 // convert with saturation
 // careful: this does not cross lanes, so the order is weird
 inline simd32uint8 uint16_to_uint8_saturate(simd16uint16 a, simd16uint16 b) {
    return simd32uint8(_mm256_packs_epi16(a.i, b.i));
 }
 /// get most significant bit of each byte
 inline uint32_t get_MSBs(simd32uint8 a) {
    return _mm256_movemask_epi8(a.i);
 }
 /// use MSB of each byte of mask to select a byte between a and b
 inline simd32uint8 blendv(simd32uint8 a, simd32uint8 b, simd32uint8 mask) {
    return simd32uint8(_mm256_blendv_epi8(a.i, b.i, mask.i));
 }
 /// vector of 8 unsigned 32-bit integers
 struct simd8uint32 : simd256bit {
    simd8uint32() {}
    explicit simd8uint32(__m256i i) : simd256bit(i) {}
    explicit simd8uint32(uint32_t x) : simd256bit(_mm256_set1_epi32(x)) {}
    explicit simd8uint32(simd256bit x) : simd256bit(x) {}
    explicit simd8uint32(const uint8_t* x) : simd256bit((const void*)x) {}
    std::string elements_to_string(const char* fmt) const {
        uint32_t bytes[8];
        storeu((void*)bytes);
        char res[1000];
        char* ptr = res;
        for (int i = 0; i < 8; i++) {
            ptr += sprintf(ptr, fmt, bytes[i]);
        }
        // strip last ,
        ptr[-1] = 0;
        return std::string(res);
    }
    std::string hex() const {
        return elements_to_string("%08x,");
    }
    std::string dec() const {
        return elements_to_string("%10d,");
    }
    void set1(uint32_t x) {
        i = _mm256_set1_epi32((int)x);
    }
 };
 struct simd8float32 : simd256bit {
    simd8float32() {}
    explicit simd8float32(simd256bit x) : simd256bit(x) {}
    explicit simd8float32(__m256 x) : simd256bit(x) {}
    explicit simd8float32(float x) : simd256bit(_mm256_set1_ps(x)) {}
    explicit simd8float32(const float* x) : simd256bit(_mm256_load_ps(x)) {}
    simd8float32 operator*(simd8float32 other) const {
        return simd8float32(_mm256_mul_ps(f, other.f));
    }
    simd8float32 operator+(simd8float32 other) const {
        return simd8float32(_mm256_add_ps(f, other.f));
    }
    simd8float32 operator-(simd8float32 other) const {
        return simd8float32(_mm256_sub_ps(f, other.f));
    }
    std::string tostring() const {
        float tab[8];
        storeu((void*)tab);
        char res[1000];
        char* ptr = res;
        for (int i = 0; i < 8; i++) {
            ptr += sprintf(ptr, "%g,", tab[i]);
        }
        // strip last ,
        ptr[-1] = 0;
        return std::string(res);
    }
 };
 inline simd8float32 hadd(simd8float32 a, simd8float32 b) {
    return simd8float32(_mm256_hadd_ps(a.f, b.f));
 }
 inline simd8float32 unpacklo(simd8float32 a, simd8float32 b) {
    return simd8float32(_mm256_unpacklo_ps(a.f, b.f));
 }
 inline simd8float32 unpackhi(simd8float32 a, simd8float32 b) {
    return simd8float32(_mm256_unpackhi_ps(a.f, b.f));
 }
 // compute a * b + c
 inline simd8float32 fmadd(simd8float32 a, simd8float32 b, simd8float32 c) {
    return simd8float32(_mm256_fmadd_ps(a.f, b.f, c.f));
 }
 namespace {
 // get even float32's of a and b, interleaved
 inline simd8float32 geteven(simd8float32 a, simd8float32 b) {
    return simd8float32(
            _mm256_shuffle_ps(a.f, b.f, 0 << 0 | 2 << 2 | 0 << 4 | 2 << 6));
 }
 // get odd float32's of a and b, interleaved
 inline simd8float32 getodd(simd8float32 a, simd8float32 b) {
    return simd8float32(
            _mm256_shuffle_ps(a.f, b.f, 1 << 0 | 3 << 2 | 1 << 4 | 3 << 6));
 }
 // 3 cycles
 // if the lanes are a = [a0 a1] and b = [b0 b1], return [a0 b0]
 inline simd8float32 getlow128(simd8float32 a, simd8float32 b) {
    return simd8float32(_mm256_permute2f128_ps(a.f, b.f, 0 | 2 << 4));
 }
 inline simd8float32 gethigh128(simd8float32 a, simd8float32 b) {
    return simd8float32(_mm256_permute2f128_ps(a.f, b.f, 1 | 3 << 4));
 }
 } // namespace
 } // namespace faiss
--- a/src/3rdlib/faiss/utils/simdlib_emulated.h
+++ b/src/3rdlib/faiss/utils/simdlib_emulated.h
@ -0,0 +1,650 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 #include <algorithm>
 #include <cstdint>
 #include <cstring>
 #include <string>
 namespace faiss {
 struct simd256bit {
    union {
        uint8_t u8[32];
        uint16_t u16[16];
        uint32_t u32[8];
        float f32[8];
    };
    simd256bit() {}
    explicit simd256bit(const void* x) {
        memcpy(u8, x, 32);
    }
    void clear() {
        memset(u8, 0, 32);
    }
    void storeu(void* ptr) const {
        memcpy(ptr, u8, 32);
    }
    void loadu(const void* ptr) {
        memcpy(u8, ptr, 32);
    }
    void store(void* ptr) const {
        storeu(ptr);
    }
    void bin(char bits[257]) const {
        const char* bytes = (char*)this->u8;
        for (int i = 0; i < 256; i++) {
            bits[i] = '0' + ((bytes[i / 8] >> (i % 8)) & 1);
        }
        bits[256] = 0;
    }
    std::string bin() const {
        char bits[257];
        bin(bits);
        return std::string(bits);
    }
 };
 /// vector of 16 elements in uint16
 struct simd16uint16 : simd256bit {
    simd16uint16() {}
    explicit simd16uint16(int x) {
        set1(x);
    }
    explicit simd16uint16(uint16_t x) {
        set1(x);
    }
    explicit simd16uint16(const simd256bit& x) : simd256bit(x) {}
    explicit simd16uint16(const uint16_t* x) : simd256bit((const void*)x) {}
    std::string elements_to_string(const char* fmt) const {
        char res[1000], *ptr = res;
        for (int i = 0; i < 16; i++) {
            ptr += sprintf(ptr, fmt, u16[i]);
        }
        // strip last ,
        ptr[-1] = 0;
        return std::string(res);
    }
    std::string hex() const {
        return elements_to_string("%02x,");
    }
    std::string dec() const {
        return elements_to_string("%3d,");
    }
    template <typename F>
    static simd16uint16 unary_func(const simd16uint16& a, F&& f) {
        simd16uint16 c;
        for (int j = 0; j < 16; j++) {
            c.u16[j] = f(a.u16[j]);
        }
        return c;
    }
    template <typename F>
    static simd16uint16 binary_func(
            const simd16uint16& a,
            const simd16uint16& b,
            F&& f) {
        simd16uint16 c;
        for (int j = 0; j < 16; j++) {
            c.u16[j] = f(a.u16[j], b.u16[j]);
        }
        return c;
    }
    void set1(uint16_t x) {
        for (int i = 0; i < 16; i++) {
            u16[i] = x;
        }
    }
    // shift must be known at compile time
    simd16uint16 operator>>(const int shift) const {
        return unary_func(*this, [shift](uint16_t a) { return a >> shift; });
    }
    // shift must be known at compile time
    simd16uint16 operator<<(const int shift) const {
        return unary_func(*this, [shift](uint16_t a) { return a << shift; });
    }
    simd16uint16 operator+=(const simd16uint16& other) {
        *this = *this + other;
        return *this;
    }
    simd16uint16 operator-=(const simd16uint16& other) {
        *this = *this - other;
        return *this;
    }
    simd16uint16 operator+(const simd16uint16& other) const {
        return binary_func(
                *this, other, [](uint16_t a, uint16_t b) { return a + b; });
    }
    simd16uint16 operator-(const simd16uint16& other) const {
        return binary_func(
                *this, other, [](uint16_t a, uint16_t b) { return a - b; });
    }
    simd16uint16 operator&(const simd256bit& other) const {
        return binary_func(
                *this, simd16uint16(other), [](uint16_t a, uint16_t b) {
                    return a & b;
                });
    }
    simd16uint16 operator|(const simd256bit& other) const {
        return binary_func(
                *this, simd16uint16(other), [](uint16_t a, uint16_t b) {
                    return a | b;
                });
    }
    // returns binary masks
    simd16uint16 operator==(const simd16uint16& other) const {
        return binary_func(*this, other, [](uint16_t a, uint16_t b) {
            return a == b ? 0xffff : 0;
        });
    }
    simd16uint16 operator~() const {
        return unary_func(*this, [](uint16_t a) { return ~a; });
    }
    // get scalar at index 0
    uint16_t get_scalar_0() const {
        return u16[0];
    }
    // mask of elements where this >= thresh
    // 2 bit per component: 16 * 2 = 32 bit
    uint32_t ge_mask(const simd16uint16& thresh) const {
        uint32_t gem = 0;
        for (int j = 0; j < 16; j++) {
            if (u16[j] >= thresh.u16[j]) {
                gem |= 3 << (j * 2);
            }
        }
        return gem;
    }
    uint32_t le_mask(const simd16uint16& thresh) const {
        return thresh.ge_mask(*this);
    }
    uint32_t gt_mask(const simd16uint16& thresh) const {
        return ~le_mask(thresh);
    }
    bool all_gt(const simd16uint16& thresh) const {
        return le_mask(thresh) == 0;
    }
    // for debugging only
    uint16_t operator[](int i) const {
        return u16[i];
    }
    void accu_min(const simd16uint16& incoming) {
        for (int j = 0; j < 16; j++) {
            if (incoming.u16[j] < u16[j]) {
                u16[j] = incoming.u16[j];
            }
        }
    }
    void accu_max(const simd16uint16& incoming) {
        for (int j = 0; j < 16; j++) {
            if (incoming.u16[j] > u16[j]) {
                u16[j] = incoming.u16[j];
            }
        }
    }
 };
 // not really a std::min because it returns an elementwise min
 inline simd16uint16 min(const simd16uint16& av, const simd16uint16& bv) {
    return simd16uint16::binary_func(
            av, bv, [](uint16_t a, uint16_t b) { return std::min(a, b); });
 }
 inline simd16uint16 max(const simd16uint16& av, const simd16uint16& bv) {
    return simd16uint16::binary_func(
            av, bv, [](uint16_t a, uint16_t b) { return std::max(a, b); });
 }
 // decompose in 128-lanes: a = (a0, a1), b = (b0, b1)
 // return (a0 + a1, b0 + b1)
 // TODO find a better name
 inline simd16uint16 combine2x2(const simd16uint16& a, const simd16uint16& b) {
    simd16uint16 c;
    for (int j = 0; j < 8; j++) {
        c.u16[j] = a.u16[j] + a.u16[j + 8];
        c.u16[j + 8] = b.u16[j] + b.u16[j + 8];
    }
    return c;
 }
 // compare d0 and d1 to thr, return 32 bits corresponding to the concatenation
 // of d0 and d1 with thr
 inline uint32_t cmp_ge32(
        const simd16uint16& d0,
        const simd16uint16& d1,
        const simd16uint16& thr) {
    uint32_t gem = 0;
    for (int j = 0; j < 16; j++) {
        if (d0.u16[j] >= thr.u16[j]) {
            gem |= 1 << j;
        }
        if (d1.u16[j] >= thr.u16[j]) {
            gem |= 1 << (j + 16);
        }
    }
    return gem;
 }
 inline uint32_t cmp_le32(
        const simd16uint16& d0,
        const simd16uint16& d1,
        const simd16uint16& thr) {
    uint32_t gem = 0;
    for (int j = 0; j < 16; j++) {
        if (d0.u16[j] <= thr.u16[j]) {
            gem |= 1 << j;
        }
        if (d1.u16[j] <= thr.u16[j]) {
            gem |= 1 << (j + 16);
        }
    }
    return gem;
 }
 // vector of 32 unsigned 8-bit integers
 struct simd32uint8 : simd256bit {
    simd32uint8() {}
    explicit simd32uint8(int x) {
        set1(x);
    }
    explicit simd32uint8(uint8_t x) {
        set1(x);
    }
    explicit simd32uint8(const simd256bit& x) : simd256bit(x) {}
    explicit simd32uint8(const uint8_t* x) : simd256bit((const void*)x) {}
    std::string elements_to_string(const char* fmt) const {
        char res[1000], *ptr = res;
        for (int i = 0; i < 32; i++) {
            ptr += sprintf(ptr, fmt, u8[i]);
        }
        // strip last ,
        ptr[-1] = 0;
        return std::string(res);
    }
    std::string hex() const {
        return elements_to_string("%02x,");
    }
    std::string dec() const {
        return elements_to_string("%3d,");
    }
    void set1(uint8_t x) {
        for (int j = 0; j < 32; j++) {
            u8[j] = x;
        }
    }
    template <typename F>
    static simd32uint8 binary_func(
            const simd32uint8& a,
            const simd32uint8& b,
            F&& f) {
        simd32uint8 c;
        for (int j = 0; j < 32; j++) {
            c.u8[j] = f(a.u8[j], b.u8[j]);
        }
        return c;
    }
    simd32uint8 operator&(const simd256bit& other) const {
        return binary_func(*this, simd32uint8(other), [](uint8_t a, uint8_t b) {
            return a & b;
        });
    }
    simd32uint8 operator+(const simd32uint8& other) const {
        return binary_func(
                *this, other, [](uint8_t a, uint8_t b) { return a + b; });
    }
    // The very important operation that everything relies on
    simd32uint8 lookup_2_lanes(const simd32uint8& idx) const {
        simd32uint8 c;
        for (int j = 0; j < 32; j++) {
            if (idx.u8[j] & 0x80) {
                c.u8[j] = 0;
            } else {
                uint8_t i = idx.u8[j] & 15;
                if (j < 16) {
                    c.u8[j] = u8[i];
                } else {
                    c.u8[j] = u8[16 + i];
                }
            }
        }
        return c;
    }
    // extract + 0-extend lane
    // this operation is slow (3 cycles)
    simd32uint8 operator+=(const simd32uint8& other) {
        *this = *this + other;
        return *this;
    }
    // for debugging only
    uint8_t operator[](int i) const {
        return u8[i];
    }
 };
 // convert with saturation
 // careful: this does not cross lanes, so the order is weird
 inline simd32uint8 uint16_to_uint8_saturate(
        const simd16uint16& a,
        const simd16uint16& b) {
    simd32uint8 c;
    auto saturate_16_to_8 = [](uint16_t x) { return x >= 256 ? 0xff : x; };
    for (int i = 0; i < 8; i++) {
        c.u8[i] = saturate_16_to_8(a.u16[i]);
        c.u8[8 + i] = saturate_16_to_8(b.u16[i]);
        c.u8[16 + i] = saturate_16_to_8(a.u16[8 + i]);
        c.u8[24 + i] = saturate_16_to_8(b.u16[8 + i]);
    }
    return c;
 }
 /// get most significant bit of each byte
 inline uint32_t get_MSBs(const simd32uint8& a) {
    uint32_t res = 0;
    for (int i = 0; i < 32; i++) {
        if (a.u8[i] & 0x80) {
            res |= 1 << i;
        }
    }
    return res;
 }
 /// use MSB of each byte of mask to select a byte between a and b
 inline simd32uint8 blendv(
        const simd32uint8& a,
        const simd32uint8& b,
        const simd32uint8& mask) {
    simd32uint8 c;
    for (int i = 0; i < 32; i++) {
        if (mask.u8[i] & 0x80) {
            c.u8[i] = b.u8[i];
        } else {
            c.u8[i] = a.u8[i];
        }
    }
    return c;
 }
 /// vector of 8 unsigned 32-bit integers
 struct simd8uint32 : simd256bit {
    simd8uint32() {}
    explicit simd8uint32(uint32_t x) {
        set1(x);
    }
    explicit simd8uint32(const simd256bit& x) : simd256bit(x) {}
    explicit simd8uint32(const uint8_t* x) : simd256bit((const void*)x) {}
    std::string elements_to_string(const char* fmt) const {
        char res[1000], *ptr = res;
        for (int i = 0; i < 8; i++) {
            ptr += sprintf(ptr, fmt, u32[i]);
        }
        // strip last ,
        ptr[-1] = 0;
        return std::string(res);
    }
    std::string hex() const {
        return elements_to_string("%08x,");
    }
    std::string dec() const {
        return elements_to_string("%10d,");
    }
    void set1(uint32_t x) {
        for (int i = 0; i < 8; i++) {
            u32[i] = x;
        }
    }
 };
 struct simd8float32 : simd256bit {
    simd8float32() {}
    explicit simd8float32(const simd256bit& x) : simd256bit(x) {}
    explicit simd8float32(float x) {
        set1(x);
    }
    explicit simd8float32(const float* x) {
        loadu((void*)x);
    }
    void set1(float x) {
        for (int i = 0; i < 8; i++) {
            f32[i] = x;
        }
    }
    template <typename F>
    static simd8float32 binary_func(
            const simd8float32& a,
            const simd8float32& b,
            F&& f) {
        simd8float32 c;
        for (int j = 0; j < 8; j++) {
            c.f32[j] = f(a.f32[j], b.f32[j]);
        }
        return c;
    }
    simd8float32 operator*(const simd8float32& other) const {
        return binary_func(
                *this, other, [](float a, float b) { return a * b; });
    }
    simd8float32 operator+(const simd8float32& other) const {
        return binary_func(
                *this, other, [](float a, float b) { return a + b; });
    }
    simd8float32 operator-(const simd8float32& other) const {
        return binary_func(
                *this, other, [](float a, float b) { return a - b; });
    }
    std::string tostring() const {
        char res[1000], *ptr = res;
        for (int i = 0; i < 8; i++) {
            ptr += sprintf(ptr, "%g,", f32[i]);
        }
        // strip last ,
        ptr[-1] = 0;
        return std::string(res);
    }
 };
 // hadd does not cross lanes
 inline simd8float32 hadd(const simd8float32& a, const simd8float32& b) {
    simd8float32 c;
    c.f32[0] = a.f32[0] + a.f32[1];
    c.f32[1] = a.f32[2] + a.f32[3];
    c.f32[2] = b.f32[0] + b.f32[1];
    c.f32[3] = b.f32[2] + b.f32[3];
    c.f32[4] = a.f32[4] + a.f32[5];
    c.f32[5] = a.f32[6] + a.f32[7];
    c.f32[6] = b.f32[4] + b.f32[5];
    c.f32[7] = b.f32[6] + b.f32[7];
    return c;
 }
 inline simd8float32 unpacklo(const simd8float32& a, const simd8float32& b) {
    simd8float32 c;
    c.f32[0] = a.f32[0];
    c.f32[1] = b.f32[0];
    c.f32[2] = a.f32[1];
    c.f32[3] = b.f32[1];
    c.f32[4] = a.f32[4];
    c.f32[5] = b.f32[4];
    c.f32[6] = a.f32[5];
    c.f32[7] = b.f32[5];
    return c;
 }
 inline simd8float32 unpackhi(const simd8float32& a, const simd8float32& b) {
    simd8float32 c;
    c.f32[0] = a.f32[2];
    c.f32[1] = b.f32[2];
    c.f32[2] = a.f32[3];
    c.f32[3] = b.f32[3];
    c.f32[4] = a.f32[6];
    c.f32[5] = b.f32[6];
    c.f32[6] = a.f32[7];
    c.f32[7] = b.f32[7];
    return c;
 }
 // compute a * b + c
 inline simd8float32 fmadd(
        const simd8float32& a,
        const simd8float32& b,
        const simd8float32& c) {
    simd8float32 res;
    for (int i = 0; i < 8; i++) {
        res.f32[i] = a.f32[i] * b.f32[i] + c.f32[i];
    }
    return res;
 }
 namespace {
 // get even float32's of a and b, interleaved
 simd8float32 geteven(const simd8float32& a, const simd8float32& b) {
    simd8float32 c;
    c.f32[0] = a.f32[0];
    c.f32[1] = a.f32[2];
    c.f32[2] = b.f32[0];
    c.f32[3] = b.f32[2];
    c.f32[4] = a.f32[4];
    c.f32[5] = a.f32[6];
    c.f32[6] = b.f32[4];
    c.f32[7] = b.f32[6];
    return c;
 }
 // get odd float32's of a and b, interleaved
 simd8float32 getodd(const simd8float32& a, const simd8float32& b) {
    simd8float32 c;
    c.f32[0] = a.f32[1];
    c.f32[1] = a.f32[3];
    c.f32[2] = b.f32[1];
    c.f32[3] = b.f32[3];
    c.f32[4] = a.f32[5];
    c.f32[5] = a.f32[7];
    c.f32[6] = b.f32[5];
    c.f32[7] = b.f32[7];
    return c;
 }
 // 3 cycles
 // if the lanes are a = [a0 a1] and b = [b0 b1], return [a0 b0]
 simd8float32 getlow128(const simd8float32& a, const simd8float32& b) {
    simd8float32 c;
    c.f32[0] = a.f32[0];
    c.f32[1] = a.f32[1];
    c.f32[2] = a.f32[2];
    c.f32[3] = a.f32[3];
    c.f32[4] = b.f32[0];
    c.f32[5] = b.f32[1];
    c.f32[6] = b.f32[2];
    c.f32[7] = b.f32[3];
    return c;
 }
 simd8float32 gethigh128(const simd8float32& a, const simd8float32& b) {
    simd8float32 c;
    c.f32[0] = a.f32[4];
    c.f32[1] = a.f32[5];
    c.f32[2] = a.f32[6];
    c.f32[3] = a.f32[7];
    c.f32[4] = b.f32[4];
    c.f32[5] = b.f32[5];
    c.f32[6] = b.f32[6];
    c.f32[7] = b.f32[7];
    return c;
 }
 } // namespace
 } // namespace faiss
--- a/src/3rdlib/faiss/utils/simdlib_neon.h
+++ b/src/3rdlib/faiss/utils/simdlib_neon.h
@ -0,0 +1,832 @@
 /**
 * Copyright (c) Facebook, Inc. and its affiliates.
 *
 * This source code is licensed under the MIT license found in the
 * LICENSE file in the root directory of this source tree.
 */
 #pragma once
 // TODO: Support big endian (currently supporting only little endian)
 #include <algorithm>
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
 #include <string>
 #include <type_traits>
 #include <arm_neon.h>
 namespace faiss {
 namespace detail {
 namespace simdlib {
 static inline uint8x16x2_t reinterpret_u8(const uint8x16x2_t& v) {
    return v;
 }
 static inline uint8x16x2_t reinterpret_u8(const uint16x8x2_t& v) {
    return {vreinterpretq_u8_u16(v.val[0]), vreinterpretq_u8_u16(v.val[1])};
 }
 static inline uint8x16x2_t reinterpret_u8(const uint32x4x2_t& v) {
    return {vreinterpretq_u8_u32(v.val[0]), vreinterpretq_u8_u32(v.val[1])};
 }
 static inline uint8x16x2_t reinterpret_u8(const float32x4x2_t& v) {
    return {vreinterpretq_u8_f32(v.val[0]), vreinterpretq_u8_f32(v.val[1])};
 }
 static inline uint16x8x2_t reinterpret_u16(const uint8x16x2_t& v) {
    return {vreinterpretq_u16_u8(v.val[0]), vreinterpretq_u16_u8(v.val[1])};
 }
 static inline uint16x8x2_t reinterpret_u16(const uint16x8x2_t& v) {
    return v;
 }
 static inline uint16x8x2_t reinterpret_u16(const uint32x4x2_t& v) {
    return {vreinterpretq_u16_u32(v.val[0]), vreinterpretq_u16_u32(v.val[1])};
 }
 static inline uint16x8x2_t reinterpret_u16(const float32x4x2_t& v) {
    return {vreinterpretq_u16_f32(v.val[0]), vreinterpretq_u16_f32(v.val[1])};
 }
 static inline uint32x4x2_t reinterpret_u32(const uint8x16x2_t& v) {
    return {vreinterpretq_u32_u8(v.val[0]), vreinterpretq_u32_u8(v.val[1])};
 }
 static inline uint32x4x2_t reinterpret_u32(const uint16x8x2_t& v) {
    return {vreinterpretq_u32_u16(v.val[0]), vreinterpretq_u32_u16(v.val[1])};
 }
 static inline uint32x4x2_t reinterpret_u32(const uint32x4x2_t& v) {
    return v;
 }
 static inline uint32x4x2_t reinterpret_u32(const float32x4x2_t& v) {
    return {vreinterpretq_u32_f32(v.val[0]), vreinterpretq_u32_f32(v.val[1])};
 }
 static inline float32x4x2_t reinterpret_f32(const uint8x16x2_t& v) {
    return {vreinterpretq_f32_u8(v.val[0]), vreinterpretq_f32_u8(v.val[1])};
 }
 static inline float32x4x2_t reinterpret_f32(const uint16x8x2_t& v) {
    return {vreinterpretq_f32_u16(v.val[0]), vreinterpretq_f32_u16(v.val[1])};
 }
 static inline float32x4x2_t reinterpret_f32(const uint32x4x2_t& v) {
    return {vreinterpretq_f32_u32(v.val[0]), vreinterpretq_f32_u32(v.val[1])};
 }
 static inline float32x4x2_t reinterpret_f32(const float32x4x2_t& v) {
    return v;
 }
 template <
        typename T,
        typename U = decltype(reinterpret_u8(std::declval<T>().data))>
 struct is_simd256bit : std::is_same<U, uint8x16x2_t> {};
 static inline void bin(const char (&bytes)[32], char bits[257]) {
    for (int i = 0; i < 256; ++i) {
        bits[i] = '0' + ((bytes[i / 8] >> (i % 8)) & 1);
    }
    bits[256] = 0;
 }
 template <typename T, size_t N, typename S>
 static inline void bin(const S& simd, char bits[257]) {
    static_assert(
            std::is_same<void (S::*)(T*) const, decltype(&S::store)>::value,
            "invalid T");
    T ds[N];
    simd.store(ds);
    char bytes[32];
    std::memcpy(bytes, ds, sizeof(char) * 32);
    bin(bytes, bits);
 }
 template <typename S>
 static inline std::string bin(const S& simd) {
    char bits[257];
    simd.bin(bits);
    return std::string(bits);
 }
 template <typename D, typename F, typename T>
 static inline void set1(D& d, F&& f, T t) {
    const auto v = f(t);
    d.val[0] = v;
    d.val[1] = v;
 }
 template <typename T, size_t N, typename S>
 static inline std::string elements_to_string(const char* fmt, const S& simd) {
    static_assert(
            std::is_same<void (S::*)(T*) const, decltype(&S::store)>::value,
            "invalid T");
    T bytes[N];
    simd.store(bytes);
    char res[1000], *ptr = res;
    for (size_t i = 0; i < N; ++i) {
        ptr += sprintf(ptr, fmt, bytes[i]);
    }
    // strip last ,
    ptr[-1] = 0;
    return std::string(res);
 }
 template <typename T, typename F>
 static inline T unary_func(const T& a, F&& f) {
    T t;
    t.val[0] = f(a.val[0]);
    t.val[1] = f(a.val[1]);
    return t;
 }
 template <typename T, typename F>
 static inline T binary_func(const T& a, const T& b, F&& f) {
    T t;
    t.val[0] = f(a.val[0], b.val[0]);
    t.val[1] = f(a.val[1], b.val[1]);
    return t;
 }
 static inline uint16_t vmovmask_u8(const uint8x16_t& v) {
    uint8_t d[16];
    const auto v2 = vreinterpretq_u16_u8(vshrq_n_u8(v, 7));
    const auto v3 = vreinterpretq_u32_u16(vsraq_n_u16(v2, v2, 7));
    const auto v4 = vreinterpretq_u64_u32(vsraq_n_u32(v3, v3, 14));
    vst1q_u8(d, vreinterpretq_u8_u64(vsraq_n_u64(v4, v4, 28)));
    return d[0] | static_cast<uint16_t>(d[8]) << 8u;
 }
 template <uint16x8_t (*F)(uint16x8_t, uint16x8_t)>
 static inline uint32_t cmp_xe32(
        const uint16x8x2_t& d0,
        const uint16x8x2_t& d1,
        const uint16x8x2_t& thr) {
    const auto d0_thr = detail::simdlib::binary_func(d0, thr, F);
    const auto d1_thr = detail::simdlib::binary_func(d1, thr, F);
    const auto d0_mask = vmovmask_u8(
            vmovn_high_u16(vmovn_u16(d0_thr.val[0]), d0_thr.val[1]));
    const auto d1_mask = vmovmask_u8(
            vmovn_high_u16(vmovn_u16(d1_thr.val[0]), d1_thr.val[1]));
    return d0_mask | static_cast<uint32_t>(d1_mask) << 16;
 }
 template <std::uint8_t Shift>
 static inline uint16x8_t vshlq(uint16x8_t vec) {
    return vshlq_n_u16(vec, Shift);
 }
 template <std::uint8_t Shift>
 static inline uint16x8_t vshrq(uint16x8_t vec) {
    return vshrq_n_u16(vec, Shift);
 }
 } // namespace simdlib
 } // namespace detail
 /// vector of 16 elements in uint16
 struct simd16uint16 {
    uint16x8x2_t data;
    simd16uint16() = default;
    explicit simd16uint16(int x) : data{vdupq_n_u16(x), vdupq_n_u16(x)} {}
    explicit simd16uint16(uint16_t x) : data{vdupq_n_u16(x), vdupq_n_u16(x)} {}
    explicit simd16uint16(const uint16x8x2_t& v) : data{v} {}
    template <
            typename T,
            typename std::enable_if<
                    detail::simdlib::is_simd256bit<T>::value,
                    std::nullptr_t>::type = nullptr>
    explicit simd16uint16(const T& x)
            : data{detail::simdlib::reinterpret_u16(x.data)} {}
    explicit simd16uint16(const uint16_t* x)
            : data{vld1q_u16(x), vld1q_u16(x + 8)} {}
    void clear() {
        detail::simdlib::set1(data, &vdupq_n_u16, static_cast<uint16_t>(0));
    }
    void storeu(uint16_t* ptr) const {
        vst1q_u16(ptr, data.val[0]);
        vst1q_u16(ptr + 8, data.val[1]);
    }
    void loadu(const uint16_t* ptr) {
        data.val[0] = vld1q_u16(ptr);
        data.val[1] = vld1q_u16(ptr + 8);
    }
    void store(uint16_t* ptr) const {
        storeu(ptr);
    }
    void bin(char bits[257]) const {
        detail::simdlib::bin<uint16_t, 16u>(*this, bits);
    }
    std::string bin() const {
        return detail::simdlib::bin(*this);
    }
    std::string elements_to_string(const char* fmt) const {
        return detail::simdlib::elements_to_string<uint16_t, 16u>(fmt, *this);
    }
    std::string hex() const {
        return elements_to_string("%02x,");
    }
    std::string dec() const {
        return elements_to_string("%3d,");
    }
    void set1(uint16_t x) {
        detail::simdlib::set1(data, &vdupq_n_u16, x);
    }
    // shift must be known at compile time
    simd16uint16 operator>>(const int shift) const {
        switch (shift) {
            case 0:
                return *this;
            case 1:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<1>)};
            case 2:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<2>)};
            case 3:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<3>)};
            case 4:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<4>)};
            case 5:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<5>)};
            case 6:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<6>)};
            case 7:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<7>)};
            case 8:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<8>)};
            case 9:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<9>)};
            case 10:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<10>)};
            case 11:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<11>)};
            case 12:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<12>)};
            case 13:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<13>)};
            case 14:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<14>)};
            case 15:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshrq<15>)};
            default:
                FAISS_THROW_FMT("Invalid shift %d", shift);
        }
    }
    // shift must be known at compile time
    simd16uint16 operator<<(const int shift) const {
        switch (shift) {
            case 0:
                return *this;
            case 1:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<1>)};
            case 2:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<2>)};
            case 3:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<3>)};
            case 4:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<4>)};
            case 5:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<5>)};
            case 6:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<6>)};
            case 7:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<7>)};
            case 8:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<8>)};
            case 9:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<9>)};
            case 10:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<10>)};
            case 11:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<11>)};
            case 12:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<12>)};
            case 13:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<13>)};
            case 14:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<14>)};
            case 15:
                return simd16uint16{detail::simdlib::unary_func(
                        data, detail::simdlib::vshlq<15>)};
            default:
                FAISS_THROW_FMT("Invalid shift %d", shift);
        }
    }
    simd16uint16 operator+=(const simd16uint16& other) {
        *this = *this + other;
        return *this;
    }
    simd16uint16 operator-=(const simd16uint16& other) {
        *this = *this - other;
        return *this;
    }
    simd16uint16 operator+(const simd16uint16& other) const {
        return simd16uint16{
                detail::simdlib::binary_func(data, other.data, &vaddq_u16)};
    }
    simd16uint16 operator-(const simd16uint16& other) const {
        return simd16uint16{
                detail::simdlib::binary_func(data, other.data, &vsubq_u16)};
    }
    template <
            typename T,
            typename std::enable_if<
                    detail::simdlib::is_simd256bit<T>::value,
                    std::nullptr_t>::type = nullptr>
    simd16uint16 operator&(const T& other) const {
        return simd16uint16{detail::simdlib::binary_func(
                data,
                detail::simdlib::reinterpret_u16(other.data),
                &vandq_u16)};
    }
    template <
            typename T,
            typename std::enable_if<
                    detail::simdlib::is_simd256bit<T>::value,
                    std::nullptr_t>::type = nullptr>
    simd16uint16 operator|(const T& other) const {
        return simd16uint16{detail::simdlib::binary_func(
                data,
                detail::simdlib::reinterpret_u16(other.data),
                &vorrq_u16)};
    }
    // returns binary masks
    simd16uint16 operator==(const simd16uint16& other) const {
        return simd16uint16{
                detail::simdlib::binary_func(data, other.data, &vceqq_u16)};
    }
    simd16uint16 operator~() const {
        return simd16uint16{detail::simdlib::unary_func(data, &vmvnq_u16)};
    }
    // get scalar at index 0
    uint16_t get_scalar_0() const {
        return vgetq_lane_u16(data.val[0], 0);
    }
    // mask of elements where this >= thresh
    // 2 bit per component: 16 * 2 = 32 bit
    uint32_t ge_mask(const simd16uint16& thresh) const {
        const auto input =
                detail::simdlib::binary_func(data, thresh.data, &vcgeq_u16);
        const auto vmovmask_u16 = [](uint16x8_t v) -> uint16_t {
            uint16_t d[8];
            const auto v2 = vreinterpretq_u32_u16(vshrq_n_u16(v, 14));
            const auto v3 = vreinterpretq_u64_u32(vsraq_n_u32(v2, v2, 14));
            vst1q_u16(d, vreinterpretq_u16_u64(vsraq_n_u64(v3, v3, 28)));
            return d[0] | d[4] << 8u;
        };
        return static_cast<uint32_t>(vmovmask_u16(input.val[1])) << 16u |
                vmovmask_u16(input.val[0]);
    }
    uint32_t le_mask(const simd16uint16& thresh) const {
        return thresh.ge_mask(*this);
    }
    uint32_t gt_mask(const simd16uint16& thresh) const {
        return ~le_mask(thresh);
    }
    bool all_gt(const simd16uint16& thresh) const {
        return le_mask(thresh) == 0;
    }
    // for debugging only
    uint16_t operator[](int i) const {
        uint16_t tab[8];
        const bool high = i >= 8;
        vst1q_u16(tab, data.val[high]);
        return tab[i - high * 8];
    }
    void accu_min(const simd16uint16& incoming) {
        data = detail::simdlib::binary_func(incoming.data, data, &vminq_u16);
    }
    void accu_max(const simd16uint16& incoming) {
        data = detail::simdlib::binary_func(incoming.data, data, &vmaxq_u16);
    }
 };
 // not really a std::min because it returns an elementwise min
 inline simd16uint16 min(const simd16uint16& av, const simd16uint16& bv) {
    return simd16uint16{
            detail::simdlib::binary_func(av.data, bv.data, &vminq_u16)};
 }
 inline simd16uint16 max(const simd16uint16& av, const simd16uint16& bv) {
    return simd16uint16{
            detail::simdlib::binary_func(av.data, bv.data, &vmaxq_u16)};
 }
 // decompose in 128-lanes: a = (a0, a1), b = (b0, b1)
 // return (a0 + a1, b0 + b1)
 // TODO find a better name
 inline simd16uint16 combine2x2(const simd16uint16& a, const simd16uint16& b) {
    return simd16uint16{uint16x8x2_t{
            vaddq_u16(a.data.val[0], a.data.val[1]),
            vaddq_u16(b.data.val[0], b.data.val[1])}};
 }
 // compare d0 and d1 to thr, return 32 bits corresponding to the concatenation
 // of d0 and d1 with thr
 inline uint32_t cmp_ge32(
        const simd16uint16& d0,
        const simd16uint16& d1,
        const simd16uint16& thr) {
    return detail::simdlib::cmp_xe32<&vcgeq_u16>(d0.data, d1.data, thr.data);
 }
 inline uint32_t cmp_le32(
        const simd16uint16& d0,
        const simd16uint16& d1,
        const simd16uint16& thr) {
    return detail::simdlib::cmp_xe32<&vcleq_u16>(d0.data, d1.data, thr.data);
 }
 // vector of 32 unsigned 8-bit integers
 struct simd32uint8 {
    uint8x16x2_t data;
    simd32uint8() = default;
    explicit simd32uint8(int x) : data{vdupq_n_u8(x), vdupq_n_u8(x)} {}
    explicit simd32uint8(uint8_t x) : data{vdupq_n_u8(x), vdupq_n_u8(x)} {}
    explicit simd32uint8(const uint8x16x2_t& v) : data{v} {}
    template <
            typename T,
            typename std::enable_if<
                    detail::simdlib::is_simd256bit<T>::value,
                    std::nullptr_t>::type = nullptr>
    explicit simd32uint8(const T& x)
            : data{detail::simdlib::reinterpret_u8(x.data)} {}
    explicit simd32uint8(const uint8_t* x)
            : data{vld1q_u8(x), vld1q_u8(x + 16)} {}
    void clear() {
        detail::simdlib::set1(data, &vdupq_n_u8, static_cast<uint8_t>(0));
    }
    void storeu(uint8_t* ptr) const {
        vst1q_u8(ptr, data.val[0]);
        vst1q_u8(ptr + 16, data.val[1]);
    }
    void loadu(const uint8_t* ptr) {
        data.val[0] = vld1q_u8(ptr);
        data.val[1] = vld1q_u8(ptr + 16);
    }
    void store(uint8_t* ptr) const {
        storeu(ptr);
    }
    void bin(char bits[257]) const {
        uint8_t bytes[32];
        store(bytes);
        detail::simdlib::bin(
                const_cast<const char(&)[32]>(
                        reinterpret_cast<char(&)[32]>(bytes)),
                bits);
    }
    std::string bin() const {
        return detail::simdlib::bin(*this);
    }
    std::string elements_to_string(const char* fmt) const {
        return detail::simdlib::elements_to_string<uint8_t, 32u>(fmt, *this);
    }
    std::string hex() const {
        return elements_to_string("%02x,");
    }
    std::string dec() const {
        return elements_to_string("%3d,");
    }
    void set1(uint8_t x) {
        detail::simdlib::set1(data, &vdupq_n_u8, x);
    }
    template <
            typename T,
            typename std::enable_if<
                    detail::simdlib::is_simd256bit<T>::value,
                    std::nullptr_t>::type = nullptr>
    simd32uint8 operator&(const T& other) const {
        return simd32uint8{detail::simdlib::binary_func(
                data, detail::simdlib::reinterpret_u8(other.data), &vandq_u8)};
    }
    simd32uint8 operator+(const simd32uint8& other) const {
        return simd32uint8{
                detail::simdlib::binary_func(data, other.data, &vaddq_u8)};
    }
    // The very important operation that everything relies on
    simd32uint8 lookup_2_lanes(const simd32uint8& idx) const {
        return simd32uint8{
                detail::simdlib::binary_func(data, idx.data, &vqtbl1q_u8)};
    }
    simd32uint8 operator+=(const simd32uint8& other) {
        *this = *this + other;
        return *this;
    }
    // for debugging only
    uint8_t operator[](int i) const {
        uint8_t tab[16];
        const bool high = i >= 16;
        vst1q_u8(tab, data.val[high]);
        return tab[i - high * 16];
    }
 };
 // convert with saturation
 // careful: this does not cross lanes, so the order is weird
 inline simd32uint8 uint16_to_uint8_saturate(
        const simd16uint16& a,
        const simd16uint16& b) {
    return simd32uint8{uint8x16x2_t{
            vqmovn_high_u16(vqmovn_u16(a.data.val[0]), b.data.val[0]),
            vqmovn_high_u16(vqmovn_u16(a.data.val[1]), b.data.val[1])}};
 }
 /// get most significant bit of each byte
 inline uint32_t get_MSBs(const simd32uint8& a) {
    using detail::simdlib::vmovmask_u8;
    return vmovmask_u8(a.data.val[0]) |
            static_cast<uint32_t>(vmovmask_u8(a.data.val[1])) << 16u;
 }
 /// use MSB of each byte of mask to select a byte between a and b
 inline simd32uint8 blendv(
        const simd32uint8& a,
        const simd32uint8& b,
        const simd32uint8& mask) {
    const auto msb = vdupq_n_u8(0x80);
    const uint8x16x2_t msb_mask = {
            vtstq_u8(mask.data.val[0], msb), vtstq_u8(mask.data.val[1], msb)};
    const uint8x16x2_t selected = {
            vbslq_u8(msb_mask.val[0], a.data.val[0], b.data.val[0]),
            vbslq_u8(msb_mask.val[1], a.data.val[1], b.data.val[1])};
    return simd32uint8{selected};
 }
 /// vector of 8 unsigned 32-bit integers
 struct simd8uint32 {
    uint32x4x2_t data;
    simd8uint32() = default;
    explicit simd8uint32(uint32_t x) : data{vdupq_n_u32(x), vdupq_n_u32(x)} {}
    explicit simd8uint32(const uint32x4x2_t& v) : data{v} {}
    template <
            typename T,
            typename std::enable_if<
                    detail::simdlib::is_simd256bit<T>::value,
                    std::nullptr_t>::type = nullptr>
    explicit simd8uint32(const T& x)
            : data{detail::simdlib::reinterpret_u32(x.data)} {}
    explicit simd8uint32(const uint8_t* x) : simd8uint32(simd32uint8(x)) {}
    void clear() {
        detail::simdlib::set1(data, &vdupq_n_u32, static_cast<uint32_t>(0));
    }
    void storeu(uint32_t* ptr) const {
        vst1q_u32(ptr, data.val[0]);
        vst1q_u32(ptr + 4, data.val[1]);
    }
    void loadu(const uint32_t* ptr) {
        data.val[0] = vld1q_u32(ptr);
        data.val[1] = vld1q_u32(ptr + 4);
    }
    void store(uint32_t* ptr) const {
        storeu(ptr);
    }
    void bin(char bits[257]) const {
        detail::simdlib::bin<uint32_t, 8u>(*this, bits);
    }
    std::string bin() const {
        return detail::simdlib::bin(*this);
    }
    std::string elements_to_string(const char* fmt) const {
        return detail::simdlib::elements_to_string<uint32_t, 8u>(fmt, *this);
    }
    std::string hex() const {
        return elements_to_string("%08x,");
    }
    std::string dec() const {
        return elements_to_string("%10d,");
    }
    void set1(uint32_t x) {
        detail::simdlib::set1(data, &vdupq_n_u32, x);
    }
 };
 struct simd8float32 {
    float32x4x2_t data;
    simd8float32() = default;
    explicit simd8float32(float x) : data{vdupq_n_f32(x), vdupq_n_f32(x)} {}
    explicit simd8float32(const float32x4x2_t& v) : data{v} {}
    template <
            typename T,
            typename std::enable_if<
                    detail::simdlib::is_simd256bit<T>::value,
                    std::nullptr_t>::type = nullptr>
    explicit simd8float32(const T& x)
            : data{detail::simdlib::reinterpret_f32(x.data)} {}
    explicit simd8float32(const float* x)
            : data{vld1q_f32(x), vld1q_f32(x + 4)} {}
    void clear() {
        detail::simdlib::set1(data, &vdupq_n_f32, 0.f);
    }
    void storeu(float* ptr) const {
        vst1q_f32(ptr, data.val[0]);
        vst1q_f32(ptr + 4, data.val[1]);
    }
    void loadu(const float* ptr) {
        data.val[0] = vld1q_f32(ptr);
        data.val[1] = vld1q_f32(ptr + 4);
    }
    void store(float* ptr) const {
        storeu(ptr);
    }
    void bin(char bits[257]) const {
        detail::simdlib::bin<float, 8u>(*this, bits);
    }
    std::string bin() const {
        return detail::simdlib::bin(*this);
    }
    simd8float32 operator*(const simd8float32& other) const {
        return simd8float32{
                detail::simdlib::binary_func(data, other.data, &vmulq_f32)};
    }
    simd8float32 operator+(const simd8float32& other) const {
        return simd8float32{
                detail::simdlib::binary_func(data, other.data, &vaddq_f32)};
    }
    simd8float32 operator-(const simd8float32& other) const {
        return simd8float32{
                detail::simdlib::binary_func(data, other.data, &vsubq_f32)};
    }
    std::string tostring() const {
        return detail::simdlib::elements_to_string<float, 8u>("%g,", *this);
    }
 };
 // hadd does not cross lanes
 inline simd8float32 hadd(const simd8float32& a, const simd8float32& b) {
    return simd8float32{
            detail::simdlib::binary_func(a.data, b.data, &vpaddq_f32)};
 }
 inline simd8float32 unpacklo(const simd8float32& a, const simd8float32& b) {
    return simd8float32{
            detail::simdlib::binary_func(a.data, b.data, &vzip1q_f32)};
 }
 inline simd8float32 unpackhi(const simd8float32& a, const simd8float32& b) {
    return simd8float32{
            detail::simdlib::binary_func(a.data, b.data, &vzip2q_f32)};
 }
 // compute a * b + c
 inline simd8float32 fmadd(
        const simd8float32& a,
        const simd8float32& b,
        const simd8float32& c) {
    return simd8float32{float32x4x2_t{
            vfmaq_f32(c.data.val[0], a.data.val[0], b.data.val[0]),
            vfmaq_f32(c.data.val[1], a.data.val[1], b.data.val[1])}};
 }
 namespace {
 // get even float32's of a and b, interleaved
 simd8float32 geteven(const simd8float32& a, const simd8float32& b) {
    return simd8float32{float32x4x2_t{
            vuzp1q_f32(a.data.val[0], b.data.val[0]),
            vuzp1q_f32(a.data.val[1], b.data.val[1])}};
 }
 // get odd float32's of a and b, interleaved
 simd8float32 getodd(const simd8float32& a, const simd8float32& b) {
    return simd8float32{float32x4x2_t{
            vuzp2q_f32(a.data.val[0], b.data.val[0]),
            vuzp2q_f32(a.data.val[1], b.data.val[1])}};
 }
 // 3 cycles
 // if the lanes are a = [a0 a1] and b = [b0 b1], return [a0 b0]
 simd8float32 getlow128(const simd8float32& a, const simd8float32& b) {
    return simd8float32{float32x4x2_t{a.data.val[0], b.data.val[0]}};
 }
 simd8float32 gethigh128(const simd8float32& a, const simd8float32& b) {
    return simd8float32{float32x4x2_t{a.data.val[1], b.data.val[1]}};
 }
 } // namespace
 } // namespace faiss
--- a/Show More
+++ b/Show More