Rest Client操作向量索引示例
更新时间:2024-02-27
本文基于Java High Level REST Client 7.x 和 Java Low Level REST Client 7.x 版本,为您介绍Elasticsearch Java API的用法。
准备工作
- 安装Java,要求JDK版本为1.8及以上。
-
创建Baidu Elasticsearch实例,版本7.10.2。
采用 High Level Client 进行创建索引,写入数据,refresh 以及 删除索引操作, 采用 Low Level Client 执行检索数据操作。
注意 High Level Client能够向上兼容,但为了保证最大程度地使用最新版客户端的特性,推荐High Level Client版本与集群版本一致。
注意 Low Level Client能够与任何版本的Elasticsearch兼容,因此客户端版本可以为任何版本,本文以7.10.2版本为例。
- 创建Java Maven工程,并将如下的pom依赖添加到Java工程的pom.xml文件中。
pom依赖
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.7</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.7</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-client</artifactId>
<version>7.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpasyncclient</artifactId>
<version>4.1.4</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore-nio</artifactId>
<version>4.4.13</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.6</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.4.12</version>
</dependency>
示例
以下示例采用 High Level Client 执行创建索引操作、写入数据、refresh 操作以及删除索引操作, 采用 Low Level Client 执行检索数据操作,可供参考。
import org.apache.http.HttpHost;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.entity.ContentType;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.apache.http.nio.entity.NStringEntity;
import org.apache.http.util.EntityUtils;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshRequest;
import org.elasticsearch.action.admin.indices.refresh.RefreshResponse;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.Response;
import org.elasticsearch.client.indices.CreateIndexRequest;
import org.elasticsearch.client.indices.CreateIndexResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import java.io.IOException;
import java.util.Random;
public class RestClientTest710 {
public static void main(String[] args) {
final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
//访问用户名和密码为您创建Elasticsearch实例时设置的用户名和密码。
credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials("{访问用户名}", "{访问密码}"));
// 通过builder创建high level rest client,配置http client的HttpClientConfigCallback。
// ES HTTP URL 在Baidu Elasticsearch界面中可以查询
RestClientBuilder builder = RestClient.builder(new HttpHost("{ES HTTP URL}", 8200))
.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
@Override
public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
}
});
// RestHighLevelClient实例通过REST low-level client builder进行构造。
RestHighLevelClient client = new RestHighLevelClient(builder);
// 通过 restClient 进行查询操作
RestClient restClient = RestClient.builder(new HttpHost("{ES HTTP URL}", 8200))
.setHttpClientConfigCallback(new RestClientBuilder.HttpClientConfigCallback() {
@Override
public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider);
}
}).build();
try {
//创建索引
CreateIndexRequest createRequest = new CreateIndexRequest("my_index_vec");
//创建的每个索引都可以有与之关联的特定设置。
createRequest.settings(Settings.builder()
.put("index.number_of_shards", 2)
.put("index.number_of_replicas", 1)
.put("index.refresh_interval", "10s")
.put("knn", true)
);
// 创建索引时创建文档类型映射
createRequest.mapping(
"{\n" +
" \"properties\": {\n" +
" \"id\": {\n" +
" \"type\": \"long\"\n" +
" },\n" +
" \"field_hnsw\": {\n" +
" \"type\": \"bpack_vector\",\n" +
" \"index_type\": \"hnsw\",\n" +
" \"dims\": 8,\n" +
" \"space_type\": \"l2\",\n" +
" \"parameters\": {\n" +
" \"m\": 32,\n" +
" \"ef_construction\": 256\n" +
" }\n" +
" }\n" +
" }\n" +
"}",
XContentType.JSON);
// 同步执行创建索引操作
CreateIndexResponse createIndexResponse = client.indices().create(createRequest, RequestOptions.DEFAULT);
System.out.println("create index successfully! response : " + createIndexResponse.toString());
// 批次写入文档
BulkRequest bulkRequest = new BulkRequest();
for (int i = 0; i < 100; i++) {
// 以XContentBuilder对象的形式提供,Elasticsearch 内部会帮我们生成JSON内容
XContentBuilder jsonBuilder = XContentFactory.jsonBuilder();
jsonBuilder.startObject();
{
jsonBuilder.field("id", i);
jsonBuilder.field("field_hnsw", RandomVector(8));
}
jsonBuilder.endObject();
bulkRequest.add(new IndexRequest("my_index_vec").source(jsonBuilder));
}
BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
System.out.println("bulk success ! response : " + bulkResponse.toString());
//数据写入后会过一定时间触发自动的refresh构建向量索引,手动执行refresh保证可以立即查询
RefreshRequest refreshRequest = new RefreshRequest("my_index_vec");
RefreshResponse refreshResponse = client.indices().refresh(refreshRequest, RequestOptions.DEFAULT);
System.out.println("refresh success ! response : " + refreshResponse.toString());
// 检索向量数据
// 创建检索request
Request request = new Request(
"POST",
"/my_index_vec/_search");
// 配置请求体
request.setEntity(new NStringEntity("{\n" +
" \"size\": 5, \n" +
" \"_source\": [\"id\"], \n" +
" \"query\": {\n" +
" \"knn\": { \n" +
" \"field_hnsw\": { \n" +
" \"vector\": [0.2, 0.3, 0.5, 0.6, 0.7, 0.8, 0.9 ,0.1], \n" +
" \"k\": 10, \n" +
" \"ef\": 256 \n" +
" }\n" +
" }\n" +
" }\n" +
"}", ContentType.APPLICATION_JSON));
// 发送检索请求
Response response = restClient.performRequest(request);
System.out.println(EntityUtils.toString(response.getEntity()));
// 指定要删除的索引名称
DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest("my_index_vec");
// 执行删除索引操作
AcknowledgedResponse deleteIndexResponse = client.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);
System.out.println("delete index successfully! response : " + deleteIndexResponse.toString());
} catch (IOException ioException) {
// 异常处理。
ioException.printStackTrace();
} finally {
// 在不需要使用client后,将client关闭。
try {
client.close();
restClient.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
// 生成指定维度随机向量
public static float[] RandomVector(int dim) {
float[] vec = new float[dim];
Random random = new Random();
// 随机生成数组元素
for (int i = 0; i < dim; i++) {
vec[i] = random.nextFloat(1);
}
return vec;
}
}
以上示例代码中带{}
的参数需要替换为您具体业务的参数,详情请参见代码注释。
更多Java High Level REST Client的使用特性,请参见Java High Level REST Client官方文档。 更多Java Low Level REST Client的使用特性,请参见Java Low Level REST Client官方文档。