Skip to content

Commit

Permalink
[backport] SI-9060 Backpatch fifth-edition names
Browse files Browse the repository at this point in the history
Because the compiler and library share some code in this
version, compiler must exclude xml tags that look like
Scala operators, such as `<:`.

This is an upstream port of:
scala-xml/commit/968f7bd94e934c781c19e25847ab09ac98cfbaf6
  • Loading branch information
som-snytt committed Feb 16, 2015
1 parent fe7867f commit b4e3bec
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 39 deletions.
4 changes: 3 additions & 1 deletion src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
Expand Up @@ -367,7 +367,9 @@ trait Scanners extends ScannersCommon {
val last = if (charOffset >= 2) buf(charOffset - 2) else ' '
nextChar()
last match {
case ' ' | '\t' | '\n' | '{' | '(' | '>' if isNameStart(ch) || ch == '!' || ch == '?' =>
// exclude valid xml names that happen to be Scala operator chars
case ' ' | '\t' | '\n' | '{' | '(' | '>' if (isNameStart(ch) && ch != ':' && !isSpecial(ch))
|| ch == '!' || ch == '?' =>
token = XMLSTART
case _ =>
// Console.println("found '<', but last is '"+in.last+"'"); // DEBUG
Expand Down
56 changes: 19 additions & 37 deletions src/library/scala/xml/parsing/TokenTests.scala
Expand Up @@ -30,43 +30,25 @@ trait TokenTests {
def isAlpha(c: Char) = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
def isAlphaDigit(c: Char) = isAlpha(c) || (c >= '0' && c <= '9')

/** {{{
* NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
* | CombiningChar | Extender
* }}}
* See [4] and Appendix B of XML 1.0 specification.
*/
def isNameChar(ch: Char) = {
import java.lang.Character._
// The constants represent groups Mc, Me, Mn, Lm, and Nd.

isNameStart(ch) || (getType(ch).toByte match {
case COMBINING_SPACING_MARK |
ENCLOSING_MARK | NON_SPACING_MARK |
MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true
case _ => ".-:" contains ch
})
}

/** {{{
* NameStart ::= ( Letter | '_' )
* }}}
* where Letter means in one of the Unicode general
* categories `{ Ll, Lu, Lo, Lt, Nl }`.
*
* We do not allow a name to start with `:`.
* See [3] and Appendix B of XML 1.0 specification
*/
def isNameStart(ch: Char) = {
import java.lang.Character._

getType(ch).toByte match {
case LOWERCASE_LETTER |
UPPERCASE_LETTER | OTHER_LETTER |
TITLECASE_LETTER | LETTER_NUMBER => true
case _ => ch == '_'
}
}
def isNameChar(c: Char): Boolean = (
isNameStart(c) ||
(c >= '0' && c <= '9') ||
c == '-' ||
c == '.' ||
c == 0xB7 ||
(c >= 0x300 && c <= 0x36F) ||
(c >= 0x203F && c <= 0x2040)
)
def isNameStart(c: Char): Boolean = (
if (c < 0x00C0) isAlpha(c) || c == ':' || c == '_'
else if (c < 0x0300) c != 0xD7 && c != 0xF7
else if (c < 0x2000) c >= 0x370 && c != 0x37E
else if (c < 0x3001) c == 0x200C || c == 0x200D || (0x2070 to 0x218F contains c) ||
(0x2C00 to 0x2FEF contains c)
else if (c < 0xD800) true
else if (c < 0x10000) (0xF900 to 0xFDCF contains c) || (0xFDF0 to 0xFFFD contains c)
else false // codepoint < 0xF0000
)

/** {{{
* Name ::= ( Letter | '_' ) (NameChar)*
Expand Down
4 changes: 3 additions & 1 deletion test/files/jvm/unittest_xml.scala
Expand Up @@ -62,7 +62,9 @@ object Test {
object UtilityTest {
def run() {
assert(Utility.isNameStart('b'))
assert(!Utility.isNameStart(':'))

// no longer: this was a convenience for the implementation, not to spec.
//assert(!Utility.isNameStart(':'))

val x = <foo>
<toomuchws/>
Expand Down

0 comments on commit b4e3bec

Please sign in to comment.