Rubyの文字列検索いろいろ

単純な文字列検索ならString#include?が速い。 正規表現なら余分なMatchDataを生成しないRegexp#match?が速い。 MatchDataが生成されると遅くなる。 String#~は何らかの最適化が入っているようで、検索後にMatchDataにアクセスしなければRegexp#match?の次に速い。

通常はString#~を使って、速度が気になるときはRegexp#match?が使えるか検討するとよいかな。

ベンチマークスクリプト:

#!/usr/local/bin/ruby
# -*- coding: utf-8; frozen_string_literal: true -*-

require 'benchmark'

s = '12345 foo 67890'

puts 'String#include?'
p(s.include? 'foo')

puts 'Regexp#match?'
p(/foo/.match? s)

puts 'Regexp#match'
p(/foo/.match s)

puts 'String#=~'
p(s =~ /foo/)

Benchmark.bm(35) do |x|
  num_trial = 1_000_000
  x.report('empty') {
    num_trial.times do
      # nothing to do.
    end
  }
  x.report('[match] String#include?') {
    num_trial.times do
      s.include? 'foo'
    end
  }
  x.report('[match] Regexp#match?') {
    num_trial.times do
      /foo/.match? s
    end
  }
  x.report('[match] Regexp#match') {
    num_trial.times do
      /foo/.match s
    end
  }
  x.report('[match] String#=~') {
    num_trial.times do
      s =~ /foo/
    end
  }
  x.report("[match] String#=~; $`; $&; $'") {
    num_trial.times do
      s =~ /foo/; $`; $&; $'
    end
  }
  x.report('[match] String#=~; $~') {
    num_trial.times do
      s =~ /foo/; $~
    end
  }
  x.report('[no match] String#include?') {
    num_trial.times do
      s.include? 'bar'
    end
  }
  x.report('[no match] Regexp#match?') {
    num_trial.times do
      /bar/.match? s
    end
  }
  x.report('[no match] Regexp#match') {
    num_trial.times do
      /bar/.match s
    end
  }
  x.report('[no match] String#=~') {
    num_trial.times do
      s =~ /bar/
    end
  }
  x.report("[no match] String#=~; $`; $&; $'") {
    num_trial.times do
      s =~ /bar/; $`; $&; $'
    end
  }
  x.report('[no match] String#=~; $~') {
    num_trial.times do
      s =~ /bar/; $~
    end
  }
end

実行結果:

String#include?
true
Regexp#match?
true
Regexp#match
#<MatchData "foo">
String#=~
6
                                          user     system      total        real
empty                                 0.109375   0.000000   0.109375 (  0.109012)
[match] String#include?               0.250000   0.000000   0.250000 (  0.256767)
[match] Regexp#match?                 0.312500   0.000000   0.312500 (  0.301961)
[match] Regexp#match                  0.890625   0.000000   0.890625 (  0.897493)
[match] String#=~                     0.421875   0.000000   0.421875 (  0.423258)
[match] String#=~; $`; $&; $'         0.531250   0.000000   0.531250 (  0.550842)
[match] String#=~; $~                 0.921875   0.000000   0.921875 (  0.933610)
[no match] String#include?            0.187500   0.000000   0.187500 (  0.183050)
[no match] Regexp#match?              0.265625   0.000000   0.265625 (  0.279306)
[no match] Regexp#match               0.406250   0.000000   0.406250 (  0.413185)
[no match] String#=~                  0.328125   0.000000   0.328125 (  0.324284)
[no match] String#=~; $`; $&; $'      0.359375   0.000000   0.359375 (  0.357800)
[no match] String#=~; $~              0.343750   0.000000   0.343750 (  0.371355)