class PDF::Reader::TextRun
A value object that represents one or more consecutive characters on a page.
Attributes
font_size[R]
origin[R]
text[R]
to_s[R]
width[R]
Public Class Methods
new(x, y, width, font_size, text)
click to toggle source
# File lib/pdf/reader/text_run.rb, line 14 def initialize(x, y, width, font_size, text) @origin = PDF::Reader::Point.new(x, y) @width = width @font_size = font_size @text = text end
Public Instance Methods
+(other)
click to toggle source
# File lib/pdf/reader/text_run.rb, line 61 def +(other) raise ArgumentError, "#{other} cannot be merged with this run" unless mergable?(other) if (other.x - endx) <( font_size * 0.2) TextRun.new(x, y, other.endx - x, font_size, text + other.text) else TextRun.new(x, y, other.endx - x, font_size, "#{text} #{other.text}") end end
<=>(other)
click to toggle source
Allows collections of TextRun
objects to be sorted. They will be sorted in order of their position on a cartesian plain - Top Left to Bottom Right
# File lib/pdf/reader/text_run.rb, line 23 def <=>(other) if x == other.x && y == other.y 0 elsif y < other.y 1 elsif y > other.y -1 elsif x < other.x -1 elsif x > other.x 1 end end
endx()
click to toggle source
# File lib/pdf/reader/text_run.rb, line 45 def endx @endx ||= @origin.x + width end
endy()
click to toggle source
# File lib/pdf/reader/text_run.rb, line 49 def endy @endy ||= @origin.y + font_size end
inspect()
click to toggle source
# File lib/pdf/reader/text_run.rb, line 71 def inspect "#{text} w:#{width} f:#{font_size} @#{x},#{y}" end
intersect?(other_run)
click to toggle source
# File lib/pdf/reader/text_run.rb, line 75 def intersect?(other_run) x <= other_run.endx && endx >= other_run.x && endy >= other_run.y && y <= other_run.endy end
intersection_area_percent(other_run)
click to toggle source
return what percentage of this text run is overlapped by another run
# File lib/pdf/reader/text_run.rb, line 81 def intersection_area_percent(other_run) return 0 unless intersect?(other_run) dx = [endx, other_run.endx].min - [x, other_run.x].max dy = [endy, other_run.endy].min - [y, other_run.y].max intersection_area = dx*dy intersection_area.to_f / area end
mean_character_width()
click to toggle source
# File lib/pdf/reader/text_run.rb, line 53 def mean_character_width @width / character_count end
mergable?(other)
click to toggle source
# File lib/pdf/reader/text_run.rb, line 57 def mergable?(other) y.to_i == other.y.to_i && font_size == other.font_size && mergable_range.include?(other.x) end
x()
click to toggle source
# File lib/pdf/reader/text_run.rb, line 37 def x @origin.x end
y()
click to toggle source
# File lib/pdf/reader/text_run.rb, line 41 def y @origin.y end
Private Instance Methods
area()
click to toggle source
# File lib/pdf/reader/text_run.rb, line 93 def area (endx - x) * (endy - y) end
character_count()
click to toggle source
Assume string encoding is marked correctly and we can trust String#size to return a character count
# File lib/pdf/reader/text_run.rb, line 103 def character_count @text.size.to_f end
mergable_range()
click to toggle source
# File lib/pdf/reader/text_run.rb, line 97 def mergable_range @mergable_range ||= Range.new(endx - 3, endx + font_size) end