Skip to content

Commit

Permalink
[backport] SI-7710 fix memory performance of RegexParsers in jdk7u6+
Browse files Browse the repository at this point in the history
Backport of scala/scala-parser-combinators@91584dc.

---

Starting with 1.7.0_06 [1], String.substring no longer reuses the internal
char array of the String but make a copy instead. Since we call
subSequence twice for *every* input character, this results in horrible
parse performance and GC.

With the benchmark from the (duplicate) ticket SI-8542, I get:

BEFORE:
    parseAll(new StringReader(String))
    For 100 items: 49 ms
    For 500 items: 97 ms
    For 1000 items: 155 ms
    For 5000 items: 113 ms
    For 10000 items: 188 ms
    For 50000 items: 1437 ms
    ===
    parseAll(String)
    For 100 items: 4 ms
    For 500 items: 67 ms
    For 1000 items: 372 ms
    For 5000 items: 5693 ms
    For 10000 items: 23126 ms
    For 50000 items: 657665 ms

AFTER:
    parseAll(new StringReader(String))
    For 100 items: 43 ms
    For 500 items: 118 ms
    For 1000 items: 217 ms
    For 5000 items: 192 ms
    For 10000 items: 196 ms
    For 50000 items: 1424 ms
    ===
    parseAll(String)
    For 100 items: 2 ms
    For 500 items: 8 ms
    For 1000 items: 16 ms
    For 5000 items: 79 ms
    For 10000 items: 161 ms
    For 50000 items: 636 ms

[1] http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6924259
  • Loading branch information
gourlaysama committed Aug 12, 2014
1 parent 300db2a commit fceae70
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 3 deletions.
6 changes: 5 additions & 1 deletion bincompat-forward.whitelist.conf
Expand Up @@ -177,7 +177,11 @@ filter {
{
matchName="scala.reflect.runtime.JavaMirrors#JavaMirror.scala$reflect$runtime$JavaMirrors$JavaMirror$$followStatic"
problemName=MissingMethodProblem
},
{
# only accessible from util.parsing.combinator package
matchName="scala.util.parsing.combinator.SubSequence"
problemName=MissingClassProblem
}

]
}
4 changes: 2 additions & 2 deletions src/library/scala/util/parsing/combinator/RegexParsers.scala
Expand Up @@ -72,7 +72,7 @@ trait RegexParsers extends Parsers {
*/
protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int =
if (skipWhitespace)
(whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match {
(whiteSpace findPrefixMatchOf (new SubSequence(source, offset))) match {
case Some(matched) => offset + matched.end
case None => offset
}
Expand Down Expand Up @@ -106,7 +106,7 @@ trait RegexParsers extends Parsers {
val source = in.source
val offset = in.offset
val start = handleWhiteSpace(source, offset)
(r findPrefixMatchOf (source.subSequence(start, source.length))) match {
(r findPrefixMatchOf (new SubSequence(source, start))) match {
case Some(matched) =>
Success(source.subSequence(start, start + matched.end).toString,
in.drop(start + matched.end - offset))
Expand Down
32 changes: 32 additions & 0 deletions src/library/scala/util/parsing/combinator/SubSequence.scala
@@ -0,0 +1,32 @@
/* __ *\
** ________ ___ / / ___ Scala API **
** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL **
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
** /____/\___/_/ |_/____/_/ | | **
** |/ **
\* */


package scala
package util.parsing.combinator

// A shallow wrapper over another CharSequence (usually a String)
//
// See SI-7710: in jdk7u6 String.subSequence stopped sharing the char array of the original
// string and began copying it.
// RegexParsers calls subSequence twice per input character: that's a lot of array copying!
private[combinator] class SubSequence(s: CharSequence, start: Int, val length: Int) extends CharSequence {
def this(s: CharSequence, start: Int) = this(s, start, s.length - start)

def charAt(i: Int) =
if (i >= 0 && i < length) s.charAt(start + i) else throw new IndexOutOfBoundsException(s"index: $i, length: $length")

def subSequence(_start: Int, _end: Int) = {
if (_start < 0 || _end < 0 || _end > length || _start > _end)
throw new IndexOutOfBoundsException(s"start: ${_start}, end: ${_end}, length: $length")

new SubSequence(s, start + _start, _end - _start)
}

override def toString = s.subSequence(start, start + length).toString
}

0 comments on commit fceae70

Please sign in to comment.