Hbase 提供了种类丰富的过滤器(filter)来提高数据处理的效率,用户可以通过内置或自定义的过滤器来对数据进行过滤,所有的过滤器都在服务端生效,即谓词下推(predicate push down)。这样可以保证过滤掉的数据不会被传送到客户端,从而减轻网络传输和客户端处理的压力。
1.HBase过滤器 HBase为筛选数据提供了一组过滤器,通过过滤器可以在HBase中的 数据的多个维度(行,歹L数据版本)上进行对数据的筛选操作。
在HBase的shell中,通过show_filters指令,可以查看到HBase中内置的一些过滤器。
hbase(main):007:0> show_filters
DependentColumnFilter
KeyOnlyFilter
ColumnCountGetFilter
SingleColumnValueFilter
PrefixFilter
SingleColumnValueExcludeFilter
FirstKeyOnlyFilter
ColumnRangeFilter
TimestampsFilter
FamilyFilter
QualifierFilter
ColumnPrefixFilter
RowFilter
MultipleColumnPrefixFilter
InclusiveStopFilter
PageFilter
ValueFilter
ColumnPaginationFilter
这些过滤器可以分为以下类型:
2.过滤器实例
import java.util.Arrays;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
import org.apache.hadoop.hbase.filter.PageFilter;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
/**
* Created by jixin on 18-2-25.
*/
public class HBaseFilterTest {
@Test
public void createTable() {
HBaseUtil.createTable("FileTable", new String[]{"fileInfo", "saveInfo"});
}
@Test
public void addFileDetails() {
HBaseUtil.putRow("FileTable", "rowkey1", "fileInfo", "name", "file1.txt");
HBaseUtil.putRow("FileTable", "rowkey1", "fileInfo", "type", "txt");
HBaseUtil.putRow("FileTable", "rowkey1", "fileInfo", "size", "1024");
HBaseUtil.putRow("FileTable", "rowkey1", "saveInfo", "creator", "jixin");
HBaseUtil.putRow("FileTable", "rowkey2", "fileInfo", "name", "file2.jpg");
HBaseUtil.putRow("FileTable", "rowkey2", "fileInfo", "type", "jpg");
HBaseUtil.putRow("FileTable", "rowkey2", "fileInfo", "size", "1024");
HBaseUtil.putRow("FileTable", "rowkey2", "saveInfo", "creator", "jixin");
HBaseUtil.putRow("FileTable", "rowkey3", "fileInfo", "name", "file3.jpg");
HBaseUtil.putRow("FileTable", "rowkey3", "fileInfo", "type", "jpg");
HBaseUtil.putRow("FileTable", "rowkey3", "fileInfo", "size", "1024");
HBaseUtil.putRow("FileTable", "rowkey3", "saveInfo", "creator", "jixin");
}
@Test
public void rowFilterTest() {
Filter filter = new RowFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("rowkey1")));
FilterList filterList = new FilterList(Operator.MUST_PASS_ONE, Arrays.asList(filter));
ResultScanner scanner = HBaseUtil
.getScanner("FileTable", "rowkey1", "rowkey3", filterList);
if (scanner != null) {
scanner.forEach(result -> {
System.out.println("rowkey=" + Bytes.toString(result.getRow()));
System.out.println("fileName=" + Bytes
.toString(result.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("name"))));
});
scanner.close();
}
}
@Test
public void prefixFilterTest() {
Filter filter = new PrefixFilter(Bytes.toBytes("rowkey2"));
FilterList filterList = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(filter));
ResultScanner scanner = HBaseUtil
.getScanner("FileTable", "rowkey1", "rowkey3", filterList);
if (scanner != null) {
scanner.forEach(result -> {
System.out.println("rowkey=" + Bytes.toString(result.getRow()));
System.out.println("fileName=" + Bytes
.toString(result.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("name"))));
});
scanner.close();
}
}
@Test
public void keyOnlyFilterTest() {
Filter filter = new KeyOnlyFilter(true);
FilterList filterList = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(filter));
ResultScanner scanner = HBaseUtil
.getScanner("FileTable", "rowkey1", "rowkey3", filterList);
if (scanner != null) {
scanner.forEach(result -> {
System.out.println("rowkey=" + Bytes.toString(result.getRow()));
System.out.println("fileName=" + Bytes
.toString(result.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("name"))));
});
scanner.close();
}
}
@Test
public void columnPrefixFilterTest() {
Filter filter = new ColumnPrefixFilter(Bytes.toBytes("nam"));
FilterList filterList = new FilterList(Operator.MUST_PASS_ALL, Arrays.asList(filter));
ResultScanner scanner = HBaseUtil
.getScanner("FileTable", "rowkey1", "rowkey3", filterList);
if (scanner != null) {
scanner.forEach(result -> {
System.out.println("rowkey=" + Bytes.toString(result.getRow()));
System.out.println("fileName=" + Bytes
.toString(result.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("name"))));
System.out.println("fileType=" + Bytes
.toString(result.getValue(Bytes.toBytes("fileInfo"), Bytes.toBytes("type"))));
});
scanner.close();
}
}
}