This library helps splitting text into its constituent sentences, based on a regular expressions (packed into SplitPattern class) and a list of abbreviations.
Example usage
require 'sentence'
include Sentence
text = %(THE BIG RIPOFF [by someone else]
Mr. John B. Smith bought cheapsite.com for 1.5 million dollars, i.e. he paid
far too much for it. Did he mind? Adam Jones Jr. thinks he didn't. In any case,
this isn't true... Well, with a probability of .9 it isn't. )
patterns = [
SplitPattern.new(:marked, /\[(.+)\]/, '\1'+Sentence::EOS),
]
Sentence.EN.patterns(patterns).get_sentences(text).each_with_index do |sentence,index|
puts "#{index+1} -> #{sentence.inspect}"
end
Example RSpec test
describe Sentence, " base functionality" do
sentences :count => 1, :lang => :EN do
"Hello world!"
end
sentences :count => 6, :lang => :EN do
"THE BIG RIPOFF [by someone else]\n\n" +
"Mr. John B. Smith bought cheapsite.com for 1.5 million dollars, i.e. he paid far too much for it. " +
"Did he mind? " +
"Adam Jones Jr. thinks he didn't. " +
"In any case, this isn't true... " +
"Well, with a probability of .9 it isn't."
end
between_sentences :count => 1, :lang => :EN do
"1.2 + 1.0 = 2.2"
end
end