过滤功能。
在查询数据的时候可以在searchRequest中指定一些参数,实现过滤、分页、排序、高亮等功能。
首先看一下如何在查询的时候指定过滤条件。Java代码实现如下:
package com.simoniu.db_elasticsearch.filter;
import org.apache.http.HttpHost;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchType;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
/**
* Search详解
* Created by simoniu
*/
public class EsFilterOperateDemo {
public static void main(String[] args) throws Exception {
//获取RestClient连接
RestHighLevelClient client = new RestHighLevelClient(
RestClient.builder(
new HttpHost("master", 9200, "http"),
new HttpHost("slave1", 9200, "http"),
new HttpHost("slave2", 9200, "http")));
SearchRequest searchRequest = new SearchRequest();
//指定索引库,支持指定一个或者多个,也支持通配符,例如:user*
searchRequest.indices("myschool");
//设置搜索类型
//searchRequest.searchType(SearchType.DEFAULT);
//searchRequest.searchType(SearchType.DFS_QUERY_THEN_FETCH);
//指定查询条件
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
//查询所有,可以不指定,默认就是查询索引库中的所有数据
//searchSourceBuilder.query(QueryBuilders.matchAllQuery());
//对指定字段的值进行过滤,注意:在查询数据的时候会对数据进行分词
//如果指定多个query,后面的query会覆盖前面的query
//针对字符串类型内容的查询,不支持通配符
//searchSourceBuilder.query(QueryBuilders.matchQuery("name","tom"));
//searchSourceBuilder.query(QueryBuilders.matchQuery("age","17"));//针对age的值,这里可以指定字符串或者数字都可以
//针对字符串类型内容的查询,支持通配符,但是性能较差,可以认为是全表扫描
//searchSourceBuilder.query(QueryBuilders.wildcardQuery("name","t*"));
//区间查询,主要针对数据类型,可以使用from+to 或者gt,gte+lt,lte
//searchSourceBuilder.query(QueryBuilders.rangeQuery("age").from(0).to(20));
//searchSourceBuilder.query(QueryBuilders.rangeQuery("age").gte(0).lte(20));
//不限制边界,指定为null即可
//searchSourceBuilder.query(QueryBuilders.rangeQuery("age").from(0).to(null));
//同时指定多个条件,条件之间的关系支持and(must)、or(should)
//searchSourceBuilder.query(QueryBuilders.boolQuery().should(QueryBuilders.matchQuery("name","tom")).should(QueryBuilders.matchQuery("age",19)));
//多条件组合查询的时候,可以设置条件的权重值,将满足高权重值条件的数据排到结果列表的前面
//searchSourceBuilder.query(QueryBuilders.boolQuery().should(QueryBuilders.matchQuery("name","tom").boost(1.0f)).should(QueryBuilders.matchQuery("age",19).boost(5.0f)));
//对多个指定字段的值进行过滤,注意:多个字段的数据类型必须一致,否则会报错,如果查询的字段不存在不会报错
searchSourceBuilder.query(QueryBuilders.multiMatchQuery("tom","name","tag"));
//这里通过queryStringQuery可以支持Lucene的原生查询语法,更加灵活,注意:AND、OR、TO之类的关键字必须大写
//searchSourceBuilder.query(QueryBuilders.queryStringQuery("name:tom AND age:[15 TO 30]"));
//searchSourceBuilder.query(QueryBuilders.boolQuery().must(QueryBuilders.matchQuery("name","tom")).must(QueryBuilders.rangeQuery("age").from(15).to(30)));
//queryStringQuery支持通配符,但是性能也是比较差
//searchSourceBuilder.query(QueryBuilders.queryStringQuery("name:t*"));
//精确查询,查询的时候不分词,针对人名、手机号、主机名、邮箱号码等字段的查询时一般不需要分词
//初始化一条测试数据name=刘德华,默认情况下在建立索引的时候刘德华 会被切分为刘、德、华这三个词
//所以这里精确查询是查不出来的,使用matchQuery是可以查出来的
//searchSourceBuilder.query(QueryBuilders.matchQuery("name","刘德华"));
//searchSourceBuilder.query(QueryBuilders.termQuery("name","刘德华"));
//正常情况下想要使用termQuery实现精确查询的字段不能进行分词
//但是有时候会遇到某个字段已经分词建立索引了,后期还想要实现精确查询
//重新建立索引也不现实,怎么办呢?
//searchSourceBuilder.query(QueryBuilders.queryStringQuery("name:\"刘德华\""));
//matchQuery默认会根据分词的结果进行 or 操作,满足任意一个词语的数据都会查询出来
//searchSourceBuilder.query(QueryBuilders.matchQuery("name","刘德华"));
//如果想要对matchQuery的分词结果实现and操作,可以通过operator进行设置
//这种方式也可以解决某个字段已经分词建立索引了,后期还想要实现精确查询的问题(间接实现,其实是查询了满足刘、德、华这三个词语的内容)
//searchSourceBuilder.query(QueryBuilders.matchQuery("name","刘德华").operator(Operator.AND));
//设置 SearchSourceBuilder
searchRequest.source(searchSourceBuilder);
//执行查询操作
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
//获取查询返回的结果
SearchHits hits = searchResponse.getHits();
//获取数据总量
long numHits = hits.getTotalHits().value;
System.out.println("数据总数:"+numHits);
//获取具体内容
SearchHit[] searchHits = hits.getHits();
//迭代解析具体内容
for (SearchHit hit : searchHits) {
String sourceAsString = hit.getSourceAsString();
System.out.println(sourceAsString);
}
//关闭连接
client.close();
}
}
默认情况下ES会对"刘德华"这个词语进行分词,效果如下(使用的默认分词器):
[es@master cerebro-0.9.4]$ curl -H "Content-Type: application/json" -XPOST 'http://master:9200/myschool/_analyze?pretty' -d '{"text":"刘德华"}'
{
"tokens" : [
{
"token" : "刘",
"start_offset" : 0,
"end_offset" : 1,
"type" : "<IDEOGRAPHIC>",
"position" : 0
},
{
"token" : "德",
"start_offset" : 1,
"end_offset" : 2,
"type" : "<IDEOGRAPHIC>",
"position" : 1
},
{
"token" : "华",
"start_offset" : 2,
"end_offset" : 3,
"type" : "<IDEOGRAPHIC>",
"position" : 2
}
]
}
添加两条测试数据,用来测试如何精确查找"刘德华"。
curl -H "Content-Type: application/json" -XPOST 'http://master:9200/myschool/_doc/16' -d '{"name":"刘德华","age":59}'
curl -H "Content-Type: application/json" -XPOST 'http://master:9200/myschool/_doc/17' -d '{"name":"刘老虎","age":47}'
根据上面的代码注释,我们发现要想精确查找"刘德华",可以采用以下三种方案:
searchSourceBuilder.query(QueryBuilders.matchQuery("name.keyword","刘德华"));
searchSourceBuilder.query(QueryBuilders.queryStringQuery("name:\"刘德华\""));
searchSourceBuilder.query(QueryBuilders.matchQuery("name","刘德华").operator(Operator.AND));
运行结果:
数据总数:1
{"name":"刘德华","age":59}