if (this.reader != null) { thrownewIllegalAccessError("Already open"); }
// Open the StoreFile.Reader this.reader = fileInfo.open(this.fs, this.cacheConf, canUseDropBehind);
// Load up indices and fileinfo. This also loads Bloom filter type. metadataMap = Collections.unmodifiableMap(this.reader.loadFileInfo());
// Read in our metadata. byte [] b = metadataMap.get(MAX_SEQ_ID_KEY); if (b != null) { // By convention, if halfhfile, top half has a sequence number > bottom // half. Thats why we add one in below. Its done for case the two halves // are ever merged back together --rare. Without it, on open of store, // since store files are distinguished by sequence id, the one half would // subsume the other. this.sequenceid = Bytes.toLong(b); if (fileInfo.isTopReference()) { this.sequenceid += 1; } }
if (isBulkLoadResult()){ // generate the sequenceId from the fileName // fileName is of the form <randomName>_SeqId_<id-when-loaded>_ StringfileName=this.getPath().getName(); // Use lastIndexOf() to get the last, most recent bulk load seqId. intstartPos= fileName.lastIndexOf("SeqId_"); if (startPos != -1) { this.sequenceid = Long.parseLong(fileName.substring(startPos + 6, fileName.indexOf('_', startPos + 6))); // Handle reference files as done above. if (fileInfo.isTopReference()) { this.sequenceid += 1; } } this.reader.setBulkLoaded(true); } this.reader.setSequenceID(this.sequenceid);
b = metadataMap.get(HFileWriterV2.MAX_MEMSTORE_TS_KEY); if (b != null) { this.maxMemstoreTS = Bytes.toLong(b); }
b = metadataMap.get(MAJOR_COMPACTION_KEY); if (b != null) { booleanmc= Bytes.toBoolean(b); if (this.majorCompaction == null) { this.majorCompaction = newAtomicBoolean(mc); } else { this.majorCompaction.set(mc); } } else { // Presume it is not major compacted if it doesn't explicity say so // HFileOutputFormat explicitly sets the major compacted key. this.majorCompaction = newAtomicBoolean(false); }
b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY); this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b));
//此出会读取bloom filter BloomTypehfileBloomType= reader.getBloomFilterType(); if (cfBloomType != BloomType.NONE) { reader.loadBloomfilter(BlockType.GENERAL_BLOOM_META); if (hfileBloomType != cfBloomType) { LOG.info("HFile Bloom filter type for " + reader.getHFileReader().getName() + ": " + hfileBloomType + ", but " + cfBloomType + " specified in column family " + "configuration"); } } elseif (hfileBloomType != BloomType.NONE) { LOG.info("Bloom filter turned off by CF config for " + reader.getHFileReader().getName()); }
// load delete family bloom filter reader.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
try { this.reader.timeRange = TimeRangeTracker.getTimeRange(metadataMap.get(TIMERANGE_KEY)); } catch (IllegalArgumentException e) { LOG.error("Error reading timestamp range data from meta -- " + "proceeding without", e); this.reader.timeRange = null; } returnthis.reader;
判断的一个文件是否需要读取时,在伟大的 boolean org.apache.hadoop.hbase.regionserver.StoreFileScanner.shouldUseScanner(Scan scan, SortedSet<byte[]> columns, long oldestUnexpiredTS) 方法中的如下方法使用了bloomfilter。
1 2
//此处使用bloomfilter过滤。在此方法中会调用bloomFilter.contains,在此contains会先使用block index 判断。 reader.passesBloomFilter(scan, columns)
//判断读取哪个block,rootBlockContaingKey里的blockKeys为chunk的个数。 //index是从bloommeta中读取,DataInput bloomMeta = reader.getGeneralBloomFilterMetadata(); 代码获取。 intblock= index.rootBlockContainingKey(key, keyOffset, keyLength); if (block < 0) { result = false; // This key is not in the file. } else { HFileBlock bloomBlock; try { // We cache the block and use a positional read. //读取那个chunk的bf bloomBlock = reader.readBlock(index.getRootBlockOffset(block), index.getRootBlockDataSize(block), true, true, false, true, BlockType.BLOOM_CHUNK); } catch (IOException ex) { // The Bloom filter is broken, turn it off. thrownewIllegalArgumentException( "Failed to load Bloom block for key " + Bytes.toStringBinary(key, keyOffset, keyLength), ex); }