Class SimpleTextBKDWriter

  • All Implemented Interfaces:
    java.io.Closeable, java.lang.AutoCloseable

    final class SimpleTextBKDWriter
    extends java.lang.Object
    implements java.io.Closeable
    Forked from BKDWriter and simplified/specialized for SimpleText's usage
    • Field Detail

      • VERSION_COMPRESSED_DOC_IDS

        public static final int VERSION_COMPRESSED_DOC_IDS
        See Also:
        Constant Field Values
      • VERSION_COMPRESSED_VALUES

        public static final int VERSION_COMPRESSED_VALUES
        See Also:
        Constant Field Values
      • VERSION_IMPLICIT_SPLIT_DIM_1D

        public static final int VERSION_IMPLICIT_SPLIT_DIM_1D
        See Also:
        Constant Field Values
      • DEFAULT_MAX_MB_SORT_IN_HEAP

        public static final float DEFAULT_MAX_MB_SORT_IN_HEAP
        Default maximum heap to use, before spilling to (slower) disk
        See Also:
        Constant Field Values
      • config

        protected final BKDConfig config
        How many dimensions we are storing at the leaf (data) nodes
      • tempFileNamePrefix

        final java.lang.String tempFileNamePrefix
      • maxMBSortInHeap

        final double maxMBSortInHeap
      • scratchDiff

        final byte[] scratchDiff
      • scratch1

        final byte[] scratch1
      • scratch2

        final byte[] scratch2
      • scratchBytesRef1

        final BytesRef scratchBytesRef1
      • scratchBytesRef2

        final BytesRef scratchBytesRef2
      • commonPrefixLengths

        final int[] commonPrefixLengths
      • finished

        private boolean finished
      • maxPointsSortInHeap

        private final int maxPointsSortInHeap
      • minPackedValue

        protected final byte[] minPackedValue
        Minimum per-dim values, packed
      • maxPackedValue

        protected final byte[] maxPackedValue
        Maximum per-dim values, packed
      • pointCount

        protected long pointCount
      • totalPointCount

        private final long totalPointCount
        An upper bound on how many points the caller will add (includes deletions)
      • maxDoc

        private final int maxDoc
    • Constructor Detail

      • SimpleTextBKDWriter

        public SimpleTextBKDWriter​(int maxDoc,
                                   Directory tempDir,
                                   java.lang.String tempFileNamePrefix,
                                   BKDConfig config,
                                   double maxMBSortInHeap,
                                   long totalPointCount)
                            throws java.io.IOException
        Throws:
        java.io.IOException
    • Method Detail

      • verifyParams

        public static void verifyParams​(double maxMBSortInHeap,
                                        long totalPointCount)
      • add

        public void add​(byte[] packedValue,
                        int docID)
                 throws java.io.IOException
        Throws:
        java.io.IOException
      • getPointCount

        public long getPointCount()
        How many points have been added so far
      • writeField

        public long writeField​(IndexOutput out,
                               java.lang.String fieldName,
                               MutablePointTree reader)
                        throws java.io.IOException
        Write a field from a MutablePointTree. This way of writing points is faster than regular writes with BKDWriter.add(byte[], int) since there is opportunity for reordering points before writing them to disk. This method does not use transient disk in order to reorder points.
        Throws:
        java.io.IOException
      • writeFieldNDims

        private long writeFieldNDims​(IndexOutput out,
                                     java.lang.String fieldName,
                                     MutablePointTree values)
                              throws java.io.IOException
        Throws:
        java.io.IOException
      • writeField1Dim

        private long writeField1Dim​(IndexOutput out,
                                    java.lang.String fieldName,
                                    MutablePointTree reader)
                             throws java.io.IOException
        Throws:
        java.io.IOException
      • rotateToTree

        private void rotateToTree​(int nodeID,
                                  int offset,
                                  int count,
                                  byte[] index,
                                  java.util.List<byte[]> leafBlockStartValues)
      • checkMaxLeafNodeCount

        private void checkMaxLeafNodeCount​(int numLeaves)
      • finish

        public long finish​(IndexOutput out)
                    throws java.io.IOException
        Writes the BKD tree to the provided IndexOutput and returns the file offset where index was written.
        Throws:
        java.io.IOException
      • writeIndex

        private void writeIndex​(IndexOutput out,
                                long[] leafBlockFPs,
                                byte[] splitPackedValues,
                                int maxPointsInLeafNode)
                         throws java.io.IOException
        Subclass can change how it writes the index.
        Throws:
        java.io.IOException
      • writeLeafBlockDocs

        protected void writeLeafBlockDocs​(IndexOutput out,
                                          int[] docIDs,
                                          int start,
                                          int count)
                                   throws java.io.IOException
        Throws:
        java.io.IOException
      • writeLeafBlockPackedValues

        protected void writeLeafBlockPackedValues​(IndexOutput out,
                                                  int[] commonPrefixLengths,
                                                  int count,
                                                  int sortedDim,
                                                  java.util.function.IntFunction<BytesRef> packedValues)
                                           throws java.io.IOException
        Throws:
        java.io.IOException
      • close

        public void close()
                   throws java.io.IOException
        Specified by:
        close in interface java.lang.AutoCloseable
        Specified by:
        close in interface java.io.Closeable
        Throws:
        java.io.IOException
      • verifyChecksum

        private java.lang.Error verifyChecksum​(java.lang.Throwable priorException,
                                               PointWriter writer)
                                        throws java.io.IOException
        Called on exception, to check whether the checksum is also corrupt in this source, and add that information (checksum matched or didn't) as a suppressed exception.
        Throws:
        java.io.IOException
      • valueInBounds

        private boolean valueInBounds​(BytesRef packedValue,
                                      byte[] minPackedValue,
                                      byte[] maxPackedValue)
        Called only in assert
      • split

        protected int split​(byte[] minPackedValue,
                            byte[] maxPackedValue)
      • switchToHeap

        private HeapPointWriter switchToHeap​(PointWriter source)
                                      throws java.io.IOException
        Pull a partition back into heap once the point count is low enough while recursing.
        Throws:
        java.io.IOException
      • build

        private void build​(int nodeID,
                           int leafNodeOffset,
                           MutablePointTree reader,
                           int from,
                           int to,
                           IndexOutput out,
                           byte[] minPackedValue,
                           byte[] maxPackedValue,
                           byte[] splitPackedValues,
                           long[] leafBlockFPs,
                           int[] spareDocIds)
                    throws java.io.IOException
        Throws:
        java.io.IOException
      • build

        private void build​(int nodeID,
                           int leafNodeOffset,
                           BKDRadixSelector.PathSlice points,
                           IndexOutput out,
                           BKDRadixSelector radixSelector,
                           byte[] minPackedValue,
                           byte[] maxPackedValue,
                           byte[] splitPackedValues,
                           long[] leafBlockFPs,
                           int[] spareDocIds)
                    throws java.io.IOException
        The array (sized numDims) of PathSlice describe the cell we have currently recursed to.
        Throws:
        java.io.IOException
      • computeCommonPrefixLength

        private void computeCommonPrefixLength​(HeapPointWriter heapPointWriter,
                                               byte[] commonPrefix)
      • valuesInOrderAndBounds

        private boolean valuesInOrderAndBounds​(int count,
                                               int sortedDim,
                                               byte[] minPackedValue,
                                               byte[] maxPackedValue,
                                               java.util.function.IntFunction<BytesRef> values,
                                               int[] docs,
                                               int docsOffset)
                                        throws java.io.IOException
        Throws:
        java.io.IOException
      • valueInOrder

        private boolean valueInOrder​(long ord,
                                     int sortedDim,
                                     byte[] lastPackedValue,
                                     byte[] packedValue,
                                     int packedValueOffset,
                                     int doc,
                                     int lastDoc)
      • write

        private void write​(IndexOutput out,
                           java.lang.String s)
                    throws java.io.IOException
        Throws:
        java.io.IOException
      • writeInt

        private void writeInt​(IndexOutput out,
                              int x)
                       throws java.io.IOException
        Throws:
        java.io.IOException
      • writeLong

        private void writeLong​(IndexOutput out,
                               long x)
                        throws java.io.IOException
        Throws:
        java.io.IOException
      • write

        private void write​(IndexOutput out,
                           BytesRef b)
                    throws java.io.IOException
        Throws:
        java.io.IOException
      • newline

        private void newline​(IndexOutput out)
                      throws java.io.IOException
        Throws:
        java.io.IOException