forgejo/vendor/github.com/dlclark/regexp2/syntax/parser.go

package syntax

import (
	"fmt"
	"math"
	"os"
	"sort"
	"strconv"
	"unicode"
)

type RegexOptions int32

const (
	IgnoreCase              RegexOptions = 0x0001 // "i"
	Multiline                            = 0x0002 // "m"
	ExplicitCapture                      = 0x0004 // "n"
	Compiled                             = 0x0008 // "c"
	Singleline                           = 0x0010 // "s"
	IgnorePatternWhitespace              = 0x0020 // "x"
	RightToLeft                          = 0x0040 // "r"
	Debug                                = 0x0080 // "d"
	ECMAScript                           = 0x0100 // "e"
	RE2                                  = 0x0200 // RE2 compat mode
)

func optionFromCode(ch rune) RegexOptions {
	// case-insensitive
	switch ch {
	case 'i', 'I':
		return IgnoreCase
	case 'r', 'R':
		return RightToLeft
	case 'm', 'M':
		return Multiline
	case 'n', 'N':
		return ExplicitCapture
	case 's', 'S':
		return Singleline
	case 'x', 'X':
		return IgnorePatternWhitespace
	case 'd', 'D':
		return Debug
	case 'e', 'E':
		return ECMAScript
	default:
		return 0
	}
}

// An Error describes a failure to parse a regular expression
// and gives the offending expression.
type Error struct {
	Code ErrorCode
	Expr string
	Args []interface{}
}

func (e *Error) Error() string {
	if len(e.Args) == 0 {
		return "error parsing regexp: " + e.Code.String() + " in `" + e.Expr + "`"
	}
	return "error parsing regexp: " + fmt.Sprintf(e.Code.String(), e.Args...) + " in `" + e.Expr + "`"
}

// An ErrorCode describes a failure to parse a regular expression.
type ErrorCode string

const (
	// internal issue
	ErrInternalError ErrorCode = "regexp/syntax: internal error"
	// Parser errors
	ErrUnterminatedComment        = "unterminated comment"
	ErrInvalidCharRange           = "invalid character class range"
	ErrInvalidRepeatSize          = "invalid repeat count"
	ErrInvalidUTF8                = "invalid UTF-8"
	ErrCaptureGroupOutOfRange     = "capture group number out of range"
	ErrUnexpectedParen            = "unexpected )"
	ErrMissingParen               = "missing closing )"
	ErrMissingBrace               = "missing closing }"
	ErrInvalidRepeatOp            = "invalid nested repetition operator"
	ErrMissingRepeatArgument      = "missing argument to repetition operator"
	ErrConditionalExpression      = "illegal conditional (?(...)) expression"
	ErrTooManyAlternates          = "too many | in (?()|)"
	ErrUnrecognizedGrouping       = "unrecognized grouping construct: (%v"
	ErrInvalidGroupName           = "invalid group name: group names must begin with a word character and have a matching terminator"
	ErrCapNumNotZero              = "capture number cannot be zero"
	ErrUndefinedBackRef           = "reference to undefined group number %v"
	ErrUndefinedNameRef           = "reference to undefined group name %v"
	ErrAlternationCantCapture     = "alternation conditions do not capture and cannot be named"
	ErrAlternationCantHaveComment = "alternation conditions cannot be comments"
	ErrMalformedReference         = "(?(%v) ) malformed"
	ErrUndefinedReference         = "(?(%v) ) reference to undefined group"
	ErrIllegalEndEscape           = "illegal \\ at end of pattern"
	ErrMalformedSlashP            = "malformed \\p{X} character escape"
	ErrIncompleteSlashP           = "incomplete \\p{X} character escape"
	ErrUnknownSlashP              = "unknown unicode category, script, or property '%v'"
	ErrUnrecognizedEscape         = "unrecognized escape sequence \\%v"
	ErrMissingControl             = "missing control character"
	ErrUnrecognizedControl        = "unrecognized control character"
	ErrTooFewHex                  = "insufficient hexadecimal digits"
	ErrInvalidHex                 = "hex values may not be larger than 0x10FFFF"
	ErrMalformedNameRef           = "malformed \\k<...> named back reference"
	ErrBadClassInCharRange        = "cannot include class \\%v in character range"
	ErrUnterminatedBracket        = "unterminated [] set"
	ErrSubtractionMustBeLast      = "a subtraction must be the last element in a character class"
	ErrReversedCharRange          = "[x-y] range in reverse order"
)

func (e ErrorCode) String() string {
	return string(e)
}

type parser struct {
	stack         *regexNode
	group         *regexNode
	alternation   *regexNode
	concatenation *regexNode
	unit          *regexNode

	patternRaw string
	pattern    []rune

	currentPos  int
	specialCase *unicode.SpecialCase

	autocap  int
	capcount int
	captop   int
	capsize  int

	caps     map[int]int
	capnames map[string]int

	capnumlist  []int
	capnamelist []string

	options         RegexOptions
	optionsStack    []RegexOptions
	ignoreNextParen bool
}

const (
	maxValueDiv10 int = math.MaxInt32 / 10
	maxValueMod10     = math.MaxInt32 % 10
)

// Parse converts a regex string into a parse tree
func Parse(re string, op RegexOptions) (*RegexTree, error) {
	p := parser{
		options: op,
		caps:    make(map[int]int),
	}
	p.setPattern(re)

	if err := p.countCaptures(); err != nil {
		return nil, err
	}

	p.reset(op)
	root, err := p.scanRegex()

	if err != nil {
		return nil, err
	}
	tree := &RegexTree{
		root:       root,
		caps:       p.caps,
		capnumlist: p.capnumlist,
		captop:     p.captop,
		Capnames:   p.capnames,
		Caplist:    p.capnamelist,
		options:    op,
	}

	if tree.options&Debug > 0 {
		os.Stdout.WriteString(tree.Dump())
	}

	return tree, nil
}

func (p *parser) setPattern(pattern string) {
	p.patternRaw = pattern
	p.pattern = make([]rune, 0, len(pattern))

	//populate our rune array to handle utf8 encoding
	for _, r := range pattern {
		p.pattern = append(p.pattern, r)
	}
}
func (p *parser) getErr(code ErrorCode, args ...interface{}) error {
	return &Error{Code: code, Expr: p.patternRaw, Args: args}
}

func (p *parser) noteCaptureSlot(i, pos int) {
	if _, ok := p.caps[i]; !ok {
		// the rhs of the hashtable isn't used in the parser
		p.caps[i] = pos
		p.capcount++

		if p.captop <= i {
			if i == math.MaxInt32 {
				p.captop = i
			} else {
				p.captop = i + 1
			}
		}
	}
}

func (p *parser) noteCaptureName(name string, pos int) {
	if p.capnames == nil {
		p.capnames = make(map[string]int)
	}

	if _, ok := p.capnames[name]; !ok {
		p.capnames[name] = pos
		p.capnamelist = append(p.capnamelist, name)
	}
}

func (p *parser) assignNameSlots() {
	if p.capnames != nil {
		for _, name := range p.capnamelist {
			for p.isCaptureSlot(p.autocap) {
				p.autocap++
			}
			pos := p.capnames[name]
			p.capnames[name] = p.autocap
			p.noteCaptureSlot(p.autocap, pos)

			p.autocap++
		}
	}

	// if the caps array has at least one gap, construct the list of used slots
	if p.capcount < p.captop {
		p.capnumlist = make([]int, p.capcount)
		i := 0

		for k := range p.caps {
			p.capnumlist[i] = k
			i++
		}

		sort.Ints(p.capnumlist)
	}

	// merge capsnumlist into capnamelist
	if p.capnames != nil || p.capnumlist != nil {
		var oldcapnamelist []string
		var next int
		var k int

		if p.capnames == nil {
			oldcapnamelist = nil
			p.capnames = make(map[string]int)
			p.capnamelist = []string{}
			next = -1
		} else {
			oldcapnamelist = p.capnamelist
			p.capnamelist = []string{}
			next = p.capnames[oldcapnamelist[0]]
		}

		for i := 0; i < p.capcount; i++ {
			j := i
			if p.capnumlist != nil {
				j = p.capnumlist[i]
			}

			if next == j {
				p.capnamelist = append(p.capnamelist, oldcapnamelist[k])
				k++

				if k == len(oldcapnamelist) {
					next = -1
				} else {
					next = p.capnames[oldcapnamelist[k]]
				}

			} else {
				//feature: culture?
				str := strconv.Itoa(j)
				p.capnamelist = append(p.capnamelist, str)
				p.capnames[str] = j
			}
		}
	}
}

func (p *parser) consumeAutocap() int {
	r := p.autocap
	p.autocap++
	return r
}

// CountCaptures is a prescanner for deducing the slots used for
// captures by doing a partial tokenization of the pattern.
func (p *parser) countCaptures() error {
	var ch rune

	p.noteCaptureSlot(0, 0)

	p.autocap = 1

	for p.charsRight() > 0 {
		pos := p.textpos()
		ch = p.moveRightGetChar()
		switch ch {
		case '\\':
			if p.charsRight() > 0 {
				p.scanBackslash(true)
			}

		case '#':
			if p.useOptionX() {
				p.moveLeft()
				p.scanBlank()
			}

		case '[':
			p.scanCharSet(false, true)

		case ')':
			if !p.emptyOptionsStack() {
				p.popOptions()
			}

		case '(':
			if p.charsRight() >= 2 && p.rightChar(1) == '#' && p.rightChar(0) == '?' {
				p.moveLeft()
				p.scanBlank()
			} else {
				p.pushOptions()
				if p.charsRight() > 0 && p.rightChar(0) == '?' {
					// we have (?...
					p.moveRight(1)

					if p.charsRight() > 1 && (p.rightChar(0) == '<' || p.rightChar(0) == '\'') {
						// named group: (?<... or (?'...

						p.moveRight(1)
						ch = p.rightChar(0)

						if ch != '0' && IsWordChar(ch) {
							if ch >= '1' && ch <= '9' {
								dec, err := p.scanDecimal()
								if err != nil {
									return err
								}
								p.noteCaptureSlot(dec, pos)
							} else {
								p.noteCaptureName(p.scanCapname(), pos)
							}
						}
					} else if p.useRE2() && p.charsRight() > 2 && (p.rightChar(0) == 'P' && p.rightChar(1) == '<') {
						// RE2-compat (?P<)
						p.moveRight(2)
						ch = p.rightChar(0)
						if IsWordChar(ch) {
							p.noteCaptureName(p.scanCapname(), pos)
						}

					} else {
						// (?...

						// get the options if it's an option construct (?cimsx-cimsx...)
						p.scanOptions()

						if p.charsRight() > 0 {
							if p.rightChar(0) == ')' {
								// (?cimsx-cimsx)
								p.moveRight(1)
								p.popKeepOptions()
							} else if p.rightChar(0) == '(' {
								// alternation construct: (?(foo)yes|no)
								// ignore the next paren so we don't capture the condition
								p.ignoreNextParen = true

								// break from here so we don't reset ignoreNextParen
								continue
							}
						}
					}
				} else {
					if !p.useOptionN() && !p.ignoreNextParen {
						p.noteCaptureSlot(p.consumeAutocap(), pos)
					}
				}
			}

			p.ignoreNextParen = false

		}
	}

	p.assignNameSlots()
	return nil
}

func (p *parser) reset(topopts RegexOptions) {
	p.currentPos = 0
	p.autocap = 1
	p.ignoreNextParen = false

	if len(p.optionsStack) > 0 {
		p.optionsStack = p.optionsStack[:0]
	}

	p.options = topopts
	p.stack = nil
}

func (p *parser) scanRegex() (*regexNode, error) {
	ch := '@' // nonspecial ch, means at beginning
	isQuant := false

	p.startGroup(newRegexNodeMN(ntCapture, p.options, 0, -1))

	for p.charsRight() > 0 {
		wasPrevQuantifier := isQuant
		isQuant = false

		if err := p.scanBlank(); err != nil {
			return nil, err
		}

		startpos := p.textpos()

		// move past all of the normal characters.  We'll stop when we hit some kind of control character,
		// or if IgnorePatternWhiteSpace is on, we'll stop when we see some whitespace.
		if p.useOptionX() {
			for p.charsRight() > 0 {
				ch = p.rightChar(0)
				//UGLY: clean up, this is ugly
				if !(!isStopperX(ch) || (ch == '{' && !p.isTrueQuantifier())) {
					break
				}
				p.moveRight(1)
			}
		} else {
			for p.charsRight() > 0 {
				ch = p.rightChar(0)
				if !(!isSpecial(ch) || ch == '{' && !p.isTrueQuantifier()) {
					break
				}
				p.moveRight(1)
			}
		}

		endpos := p.textpos()

		p.scanBlank()

		if p.charsRight() == 0 {
			ch = '!' // nonspecial, means at end
		} else if ch = p.rightChar(0); isSpecial(ch) {
			isQuant = isQuantifier(ch)
			p.moveRight(1)
		} else {
			ch = ' ' // nonspecial, means at ordinary char
		}

		if startpos < endpos {
			cchUnquantified := endpos - startpos
			if isQuant {
				cchUnquantified--
			}
			wasPrevQuantifier = false

			if cchUnquantified > 0 {
				p.addToConcatenate(startpos, cchUnquantified, false)
			}

			if isQuant {
				p.addUnitOne(p.charAt(endpos - 1))
			}
		}

		switch ch {
		case '!':
			goto BreakOuterScan

		case ' ':
			goto ContinueOuterScan

		case '[':
			cc, err := p.scanCharSet(p.useOptionI(), false)
			if err != nil {
				return nil, err
			}
			p.addUnitSet(cc)

		case '(':
			p.pushOptions()

			if grouper, err := p.scanGroupOpen(); err != nil {
				return nil, err
			} else if grouper == nil {
				p.popKeepOptions()
			} else {
				p.pushGroup()
				p.startGroup(grouper)
			}

			continue

		case '|':
			p.addAlternate()
			goto ContinueOuterScan

		case ')':
			if p.emptyStack() {
				return nil, p.getErr(ErrUnexpectedParen)
			}

			if err := p.addGroup(); err != nil {
				return nil, err
			}
			if err := p.popGroup(); err != nil {
				return nil, err
			}
			p.popOptions()

			if p.unit == nil {
				goto ContinueOuterScan
			}

		case '\\':
			n, err := p.scanBackslash(false)
			if err != nil {
				return nil, err
			}
			p.addUnitNode(n)

		case '^':
			if p.useOptionM() {
				p.addUnitType(ntBol)
			} else {
				p.addUnitType(ntBeginning)
			}

		case '$':
			if p.useOptionM() {
				p.addUnitType(ntEol)
			} else {
				p.addUnitType(ntEndZ)
			}

		case '.':
			if p.useOptionE() {
				p.addUnitSet(ECMAAnyClass())
			} else if p.useOptionS() {
				p.addUnitSet(AnyClass())
			} else {
				p.addUnitNotone('\n')
			}

		case '{', '*', '+', '?':
			if p.unit == nil {
				if wasPrevQuantifier {
					return nil, p.getErr(ErrInvalidRepeatOp)
				} else {
					return nil, p.getErr(ErrMissingRepeatArgument)
				}
			}
			p.moveLeft()

		default:
			return nil, p.getErr(ErrInternalError)
		}

		if err := p.scanBlank(); err != nil {
			return nil, err
		}

		if p.charsRight() > 0 {
			isQuant = p.isTrueQuantifier()
		}
		if p.charsRight() == 0 || !isQuant {
			//maintain odd C# assignment order -- not sure if required, could clean up?
			p.addConcatenate()
			goto ContinueOuterScan
		}

		ch = p.moveRightGetChar()

		// Handle quantifiers
		for p.unit != nil {
			var min, max int
			var lazy bool

			switch ch {
			case '*':
				min = 0
				max = math.MaxInt32

			case '?':
				min = 0
				max = 1

			case '+':
				min = 1
				max = math.MaxInt32

			case '{':
				{
					var err error
					startpos = p.textpos()
					if min, err = p.scanDecimal(); err != nil {
						return nil, err
					}
					max = min
					if startpos < p.textpos() {
						if p.charsRight() > 0 && p.rightChar(0) == ',' {
							p.moveRight(1)
							if p.charsRight() == 0 || p.rightChar(0) == '}' {
								max = math.MaxInt32
							} else {
								if max, err = p.scanDecimal(); err != nil {
									return nil, err
								}
							}
						}
					}

					if startpos == p.textpos() || p.charsRight() == 0 || p.moveRightGetChar() != '}' {
						p.addConcatenate()
						p.textto(startpos - 1)
						goto ContinueOuterScan
					}
				}

			default:
				return nil, p.getErr(ErrInternalError)
			}

			if err := p.scanBlank(); err != nil {
				return nil, err
			}

			if p.charsRight() == 0 || p.rightChar(0) != '?' {
				lazy = false
			} else {
				p.moveRight(1)
				lazy = true
			}

			if min > max {
				return nil, p.getErr(ErrInvalidRepeatSize)
			}

			p.addConcatenate3(lazy, min, max)
		}

	ContinueOuterScan:
	}

BreakOuterScan:
	;

	if !p.emptyStack() {
		return nil, p.getErr(ErrMissingParen)
	}

	if err := p.addGroup(); err != nil {
		return nil, err
	}

	return p.unit, nil

}

/*
 * Simple parsing for replacement patterns
 */
func (p *parser) scanReplacement() (*regexNode, error) {
	var c, startpos int

	p.concatenation = newRegexNode(ntConcatenate, p.options)

	for {
		c = p.charsRight()
		if c == 0 {
			break
		}

		startpos = p.textpos()

		for c > 0 && p.rightChar(0) != '$' {
			p.moveRight(1)
			c--
		}

		p.addToConcatenate(startpos, p.textpos()-startpos, true)

		if c > 0 {
			if p.moveRightGetChar() == '$' {
				n, err := p.scanDollar()
				if err != nil {
					return nil, err
				}
				p.addUnitNode(n)
			}
			p.addConcatenate()
		}
	}

	return p.concatenation, nil
}

/*
 * Scans $ patterns recognized within replacement patterns
 */
func (p *parser) scanDollar() (*regexNode, error) {
	if p.charsRight() == 0 {
		return newRegexNodeCh(ntOne, p.options, '$'), nil
	}

	ch := p.rightChar(0)
	angled := false
	backpos := p.textpos()
	lastEndPos := backpos

	// Note angle

	if ch == '{' && p.charsRight() > 1 {
		angled = true
		p.moveRight(1)
		ch = p.rightChar(0)
	}

	// Try to parse backreference: \1 or \{1} or \{cap}

	if ch >= '0' && ch <= '9' {
		if !angled && p.useOptionE() {
			capnum := -1
			newcapnum := int(ch - '0')
			p.moveRight(1)
			if p.isCaptureSlot(newcapnum) {
				capnum = newcapnum
				lastEndPos = p.textpos()
			}

			for p.charsRight() > 0 {
				ch = p.rightChar(0)
				if ch < '0' || ch > '9' {
					break
				}
				digit := int(ch - '0')
				if newcapnum > maxValueDiv10 || (newcapnum == maxValueDiv10 && digit > maxValueMod10) {
					return nil, p.getErr(ErrCaptureGroupOutOfRange)
				}

				newcapnum = newcapnum*10 + digit

				p.moveRight(1)
				if p.isCaptureSlot(newcapnum) {
					capnum = newcapnum
					lastEndPos = p.textpos()
				}
			}
			p.textto(lastEndPos)
			if capnum >= 0 {
				return newRegexNodeM(ntRef, p.options, capnum), nil
			}
		} else {
			capnum, err := p.scanDecimal()
			if err != nil {
				return nil, err
			}
			if !angled || p.charsRight() > 0 && p.moveRightGetChar() == '}' {
				if p.isCaptureSlot(capnum) {
					return newRegexNodeM(ntRef, p.options, capnum), nil
				}
			}
		}
	} else if angled && IsWordChar(ch) {
		capname := p.scanCapname()

		if p.charsRight() > 0 && p.moveRightGetChar() == '}' {
			if p.isCaptureName(capname) {
				return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
			}
		}
	} else if !angled {
		capnum := 1

		switch ch {
		case '$':
			p.moveRight(1)
			return newRegexNodeCh(ntOne, p.options, '$'), nil
		case '&':
			capnum = 0
		case '`':
			capnum = replaceLeftPortion
		case '\'':
			capnum = replaceRightPortion
		case '+':
			capnum = replaceLastGroup
		case '_':
			capnum = replaceWholeString
		}

		if capnum != 1 {
			p.moveRight(1)
			return newRegexNodeM(ntRef, p.options, capnum), nil
		}
	}

	// unrecognized $: literalize

	p.textto(backpos)
	return newRegexNodeCh(ntOne, p.options, '$'), nil
}

// scanGroupOpen scans chars following a '(' (not counting the '('), and returns
// a RegexNode for the type of group scanned, or nil if the group
// simply changed options (?cimsx-cimsx) or was a comment (#...).
func (p *parser) scanGroupOpen() (*regexNode, error) {
	var ch rune
	var nt nodeType
	var err error
	close := '>'
	start := p.textpos()

	// just return a RegexNode if we have:
	// 1. "(" followed by nothing
	// 2. "(x" where x != ?
	// 3. "(?)"
	if p.charsRight() == 0 || p.rightChar(0) != '?' || (p.rightChar(0) == '?' && (p.charsRight() > 1 && p.rightChar(1) == ')')) {
		if p.useOptionN() || p.ignoreNextParen {
			p.ignoreNextParen = false
			return newRegexNode(ntGroup, p.options), nil
		}
		return newRegexNodeMN(ntCapture, p.options, p.consumeAutocap(), -1), nil
	}

	p.moveRight(1)

	for {
		if p.charsRight() == 0 {
			break
		}

		switch ch = p.moveRightGetChar(); ch {
		case ':':
			nt = ntGroup

		case '=':
			p.options &= ^RightToLeft
			nt = ntRequire

		case '!':
			p.options &= ^RightToLeft
			nt = ntPrevent

		case '>':
			nt = ntGreedy

		case '\'':
			close = '\''
			fallthrough

		case '<':
			if p.charsRight() == 0 {
				goto BreakRecognize
			}

			switch ch = p.moveRightGetChar(); ch {
			case '=':
				if close == '\'' {
					goto BreakRecognize
				}

				p.options |= RightToLeft
				nt = ntRequire

			case '!':
				if close == '\'' {
					goto BreakRecognize
				}

				p.options |= RightToLeft
				nt = ntPrevent

			default:
				p.moveLeft()
				capnum := -1
				uncapnum := -1
				proceed := false

				// grab part before -

				if ch >= '0' && ch <= '9' {
					if capnum, err = p.scanDecimal(); err != nil {
						return nil, err
					}

					if !p.isCaptureSlot(capnum) {
						capnum = -1
					}

					// check if we have bogus characters after the number
					if p.charsRight() > 0 && !(p.rightChar(0) == close || p.rightChar(0) == '-') {
						return nil, p.getErr(ErrInvalidGroupName)
					}
					if capnum == 0 {
						return nil, p.getErr(ErrCapNumNotZero)
					}
				} else if IsWordChar(ch) {
					capname := p.scanCapname()

					if p.isCaptureName(capname) {
						capnum = p.captureSlotFromName(capname)
					}

					// check if we have bogus character after the name
					if p.charsRight() > 0 && !(p.rightChar(0) == close || p.rightChar(0) == '-') {
						return nil, p.getErr(ErrInvalidGroupName)
					}
				} else if ch == '-' {
					proceed = true
				} else {
					// bad group name - starts with something other than a word character and isn't a number
					return nil, p.getErr(ErrInvalidGroupName)
				}

				// grab part after - if any

				if (capnum != -1 || proceed == true) && p.charsRight() > 0 && p.rightChar(0) == '-' {
					p.moveRight(1)

					//no more chars left, no closing char, etc
					if p.charsRight() == 0 {
						return nil, p.getErr(ErrInvalidGroupName)
					}

					ch = p.rightChar(0)
					if ch >= '0' && ch <= '9' {
						if uncapnum, err = p.scanDecimal(); err != nil {
							return nil, err
						}

						if !p.isCaptureSlot(uncapnum) {
							return nil, p.getErr(ErrUndefinedBackRef, uncapnum)
						}

						// check if we have bogus characters after the number
						if p.charsRight() > 0 && p.rightChar(0) != close {
							return nil, p.getErr(ErrInvalidGroupName)
						}
					} else if IsWordChar(ch) {
						uncapname := p.scanCapname()

						if !p.isCaptureName(uncapname) {
							return nil, p.getErr(ErrUndefinedNameRef, uncapname)
						}
						uncapnum = p.captureSlotFromName(uncapname)

						// check if we have bogus character after the name
						if p.charsRight() > 0 && p.rightChar(0) != close {
							return nil, p.getErr(ErrInvalidGroupName)
						}
					} else {
						// bad group name - starts with something other than a word character and isn't a number
						return nil, p.getErr(ErrInvalidGroupName)
					}
				}

				// actually make the node

				if (capnum != -1 || uncapnum != -1) && p.charsRight() > 0 && p.moveRightGetChar() == close {
					return newRegexNodeMN(ntCapture, p.options, capnum, uncapnum), nil
				}
				goto BreakRecognize
			}

		case '(':
			// alternation construct (?(...) | )

			parenPos := p.textpos()
			if p.charsRight() > 0 {
				ch = p.rightChar(0)

				// check if the alternation condition is a backref
				if ch >= '0' && ch <= '9' {
					var capnum int
					if capnum, err = p.scanDecimal(); err != nil {
						return nil, err
					}
					if p.charsRight() > 0 && p.moveRightGetChar() == ')' {
						if p.isCaptureSlot(capnum) {
							return newRegexNodeM(ntTestref, p.options, capnum), nil
						}
						return nil, p.getErr(ErrUndefinedReference, capnum)
					}

					return nil, p.getErr(ErrMalformedReference, capnum)

				} else if IsWordChar(ch) {
					capname := p.scanCapname()

					if p.isCaptureName(capname) && p.charsRight() > 0 && p.moveRightGetChar() == ')' {
						return newRegexNodeM(ntTestref, p.options, p.captureSlotFromName(capname)), nil
					}
				}
			}
			// not a backref
			nt = ntTestgroup
			p.textto(parenPos - 1)   // jump to the start of the parentheses
			p.ignoreNextParen = true // but make sure we don't try to capture the insides

			charsRight := p.charsRight()
			if charsRight >= 3 && p.rightChar(1) == '?' {
				rightchar2 := p.rightChar(2)
				// disallow comments in the condition
				if rightchar2 == '#' {
					return nil, p.getErr(ErrAlternationCantHaveComment)
				}

				// disallow named capture group (?<..>..) in the condition
				if rightchar2 == '\'' {
					return nil, p.getErr(ErrAlternationCantCapture)
				}

				if charsRight >= 4 && (rightchar2 == '<' && p.rightChar(3) != '!' && p.rightChar(3) != '=') {
					return nil, p.getErr(ErrAlternationCantCapture)
				}
			}

		case 'P':
			if p.useRE2() {
				// support for P<name> syntax
				if p.charsRight() < 3 {
					goto BreakRecognize
				}

				ch = p.moveRightGetChar()
				if ch != '<' {
					goto BreakRecognize
				}

				ch = p.moveRightGetChar()
				p.moveLeft()

				if IsWordChar(ch) {
					capnum := -1
					capname := p.scanCapname()

					if p.isCaptureName(capname) {
						capnum = p.captureSlotFromName(capname)
					}

					// check if we have bogus character after the name
					if p.charsRight() > 0 && p.rightChar(0) != '>' {
						return nil, p.getErr(ErrInvalidGroupName)
					}

					// actually make the node

					if capnum != -1 && p.charsRight() > 0 && p.moveRightGetChar() == '>' {
						return newRegexNodeMN(ntCapture, p.options, capnum, -1), nil
					}
					goto BreakRecognize

				} else {
					// bad group name - starts with something other than a word character and isn't a number
					return nil, p.getErr(ErrInvalidGroupName)
				}
			}
			// if we're not using RE2 compat mode then
			// we just behave like normal
			fallthrough

		default:
			p.moveLeft()

			nt = ntGroup
			// disallow options in the children of a testgroup node
			if p.group.t != ntTestgroup {
				p.scanOptions()
			}
			if p.charsRight() == 0 {
				goto BreakRecognize
			}

			if ch = p.moveRightGetChar(); ch == ')' {
				return nil, nil
			}

			if ch != ':' {
				goto BreakRecognize
			}

		}

		return newRegexNode(nt, p.options), nil
	}

BreakRecognize:

	// break Recognize comes here

	return nil, p.getErr(ErrUnrecognizedGrouping, string(p.pattern[start:p.textpos()]))
}

// scans backslash specials and basics
func (p *parser) scanBackslash(scanOnly bool) (*regexNode, error) {

	if p.charsRight() == 0 {
		return nil, p.getErr(ErrIllegalEndEscape)
	}

	switch ch := p.rightChar(0); ch {
	case 'b', 'B', 'A', 'G', 'Z', 'z':
		p.moveRight(1)
		return newRegexNode(p.typeFromCode(ch), p.options), nil

	case 'w':
		p.moveRight(1)
		if p.useOptionE() {
			return newRegexNodeSet(ntSet, p.options, ECMAWordClass()), nil
		}
		return newRegexNodeSet(ntSet, p.options, WordClass()), nil

	case 'W':
		p.moveRight(1)
		if p.useOptionE() {
			return newRegexNodeSet(ntSet, p.options, NotECMAWordClass()), nil
		}
		return newRegexNodeSet(ntSet, p.options, NotWordClass()), nil

	case 's':
		p.moveRight(1)
		if p.useOptionE() {
			return newRegexNodeSet(ntSet, p.options, ECMASpaceClass()), nil
		}
		return newRegexNodeSet(ntSet, p.options, SpaceClass()), nil

	case 'S':
		p.moveRight(1)
		if p.useOptionE() {
			return newRegexNodeSet(ntSet, p.options, NotECMASpaceClass()), nil
		}
		return newRegexNodeSet(ntSet, p.options, NotSpaceClass()), nil

	case 'd':
		p.moveRight(1)
		if p.useOptionE() {
			return newRegexNodeSet(ntSet, p.options, ECMADigitClass()), nil
		}
		return newRegexNodeSet(ntSet, p.options, DigitClass()), nil

	case 'D':
		p.moveRight(1)
		if p.useOptionE() {
			return newRegexNodeSet(ntSet, p.options, NotECMADigitClass()), nil
		}
		return newRegexNodeSet(ntSet, p.options, NotDigitClass()), nil

	case 'p', 'P':
		p.moveRight(1)
		prop, err := p.parseProperty()
		if err != nil {
			return nil, err
		}
		cc := &CharSet{}
		cc.addCategory(prop, (ch != 'p'), p.useOptionI(), p.patternRaw)
		if p.useOptionI() {
			cc.addLowercase()
		}

		return newRegexNodeSet(ntSet, p.options, cc), nil

	default:
		return p.scanBasicBackslash(scanOnly)
	}
}

// Scans \-style backreferences and character escapes
func (p *parser) scanBasicBackslash(scanOnly bool) (*regexNode, error) {
	if p.charsRight() == 0 {
		return nil, p.getErr(ErrIllegalEndEscape)
	}
	angled := false
	close := '\x00'

	backpos := p.textpos()
	ch := p.rightChar(0)

	// allow \k<foo> instead of \<foo>, which is now deprecated

	if ch == 'k' {
		if p.charsRight() >= 2 {
			p.moveRight(1)
			ch = p.moveRightGetChar()

			if ch == '<' || ch == '\'' {
				angled = true
				if ch == '\'' {
					close = '\''
				} else {
					close = '>'
				}
			}
		}

		if !angled || p.charsRight() <= 0 {
			return nil, p.getErr(ErrMalformedNameRef)
		}

		ch = p.rightChar(0)

	} else if (ch == '<' || ch == '\'') && p.charsRight() > 1 { // Note angle without \g
		angled = true
		if ch == '\'' {
			close = '\''
		} else {
			close = '>'
		}

		p.moveRight(1)
		ch = p.rightChar(0)
	}

	// Try to parse backreference: \<1> or \<cap>

	if angled && ch >= '0' && ch <= '9' {
		capnum, err := p.scanDecimal()
		if err != nil {
			return nil, err
		}

		if p.charsRight() > 0 && p.moveRightGetChar() == close {
			if p.isCaptureSlot(capnum) {
				return newRegexNodeM(ntRef, p.options, capnum), nil
			}
			return nil, p.getErr(ErrUndefinedBackRef, capnum)
		}
	} else if !angled && ch >= '1' && ch <= '9' { // Try to parse backreference or octal: \1
		capnum, err := p.scanDecimal()
		if err != nil {
			return nil, err
		}

		if scanOnly {
			return nil, nil
		}

		if p.isCaptureSlot(capnum) {
			return newRegexNodeM(ntRef, p.options, capnum), nil
		}
		if capnum <= 9 && !p.useOptionE() {
			return nil, p.getErr(ErrUndefinedBackRef, capnum)
		}

	} else if angled && IsWordChar(ch) {
		capname := p.scanCapname()

		if p.charsRight() > 0 && p.moveRightGetChar() == close {
			if p.isCaptureName(capname) {
				return newRegexNodeM(ntRef, p.options, p.captureSlotFromName(capname)), nil
			}
			return nil, p.getErr(ErrUndefinedNameRef, capname)
		}
	}

	// Not backreference: must be char code

	p.textto(backpos)
	ch, err := p.scanCharEscape()
	if err != nil {
		return nil, err
	}

	if p.useOptionI() {
		ch = unicode.ToLower(ch)
	}

	return newRegexNodeCh(ntOne, p.options, ch), nil
}

// Scans X for \p{X} or \P{X}
func (p *parser) parseProperty() (string, error) {
	if p.charsRight() < 3 {
		return "", p.getErr(ErrIncompleteSlashP)
	}
	ch := p.moveRightGetChar()
	if ch != '{' {
		return "", p.getErr(ErrMalformedSlashP)
	}

	startpos := p.textpos()
	for p.charsRight() > 0 {
		ch = p.moveRightGetChar()
		if !(IsWordChar(ch) || ch == '-') {
			p.moveLeft()
			break
		}
	}
	capname := string(p.pattern[startpos:p.textpos()])

	if p.charsRight() == 0 || p.moveRightGetChar() != '}' {
		return "", p.getErr(ErrIncompleteSlashP)
	}

	if !isValidUnicodeCat(capname) {
		return "", p.getErr(ErrUnknownSlashP, capname)
	}

	return capname, nil
}

// Returns ReNode type for zero-length assertions with a \ code.
func (p *parser) typeFromCode(ch rune) nodeType {
	switch ch {
	case 'b':
		if p.useOptionE() {
			return ntECMABoundary
		}
		return ntBoundary
	case 'B':
		if p.useOptionE() {
			return ntNonECMABoundary
		}
		return ntNonboundary
	case 'A':
		return ntBeginning
	case 'G':
		return ntStart
	case 'Z':
		return ntEndZ
	case 'z':
		return ntEnd
	default:
		return ntNothing
	}
}

// Scans whitespace or x-mode comments.
func (p *parser) scanBlank() error {
	if p.useOptionX() {
		for {
			for p.charsRight() > 0 && isSpace(p.rightChar(0)) {
				p.moveRight(1)
			}

			if p.charsRight() == 0 {
				break
			}

			if p.rightChar(0) == '#' {
				for p.charsRight() > 0 && p.rightChar(0) != '\n' {
					p.moveRight(1)
				}
			} else if p.charsRight() >= 3 && p.rightChar(2) == '#' &&
				p.rightChar(1) == '?' && p.rightChar(0) == '(' {
				for p.charsRight() > 0 && p.rightChar(0) != ')' {
					p.moveRight(1)
				}
				if p.charsRight() == 0 {
					return p.getErr(ErrUnterminatedComment)
				}
				p.moveRight(1)
			} else {
				break
			}
		}
	} else {
		for {
			if p.charsRight() < 3 || p.rightChar(2) != '#' ||
				p.rightChar(1) != '?' || p.rightChar(0) != '(' {
				return nil
			}

			for p.charsRight() > 0 && p.rightChar(0) != ')' {
				p.moveRight(1)
			}
			if p.charsRight() == 0 {
				return p.getErr(ErrUnterminatedComment)
			}
			p.moveRight(1)
		}
	}
	return nil
}

func (p *parser) scanCapname() string {
	startpos := p.textpos()

	for p.charsRight() > 0 {
		if !IsWordChar(p.moveRightGetChar()) {
			p.moveLeft()
			break
		}
	}

	return string(p.pattern[startpos:p.textpos()])
}

//Scans contents of [] (not including []'s), and converts to a set.
func (p *parser) scanCharSet(caseInsensitive, scanOnly bool) (*CharSet, error) {
	ch := '\x00'
	chPrev := '\x00'
	inRange := false
	firstChar := true
	closed := false

	var cc *CharSet
	if !scanOnly {
		cc = &CharSet{}
	}

	if p.charsRight() > 0 && p.rightChar(0) == '^' {
		p.moveRight(1)
		if !scanOnly {
			cc.negate = true
		}
	}

	for ; p.charsRight() > 0; firstChar = false {
		fTranslatedChar := false
		ch = p.moveRightGetChar()
		if ch == ']' {
			if !firstChar {
				closed = true
				break
			} else if p.useOptionE() {
				if !scanOnly {
					cc.addRanges(NoneClass().ranges)
				}
				closed = true
				break
			}

		} else if ch == '\\' && p.charsRight() > 0 {
			switch ch = p.moveRightGetChar(); ch {
			case 'D', 'd':
				if !scanOnly {
					if inRange {
						return nil, p.getErr(ErrBadClassInCharRange, ch)
					}
					cc.addDigit(p.useOptionE(), ch == 'D', p.patternRaw)
				}
				continue

			case 'S', 's':
				if !scanOnly {
					if inRange {
						return nil, p.getErr(ErrBadClassInCharRange, ch)
					}
					cc.addSpace(p.useOptionE(), ch == 'S')
				}
				continue

			case 'W', 'w':
				if !scanOnly {
					if inRange {
						return nil, p.getErr(ErrBadClassInCharRange, ch)
					}

					cc.addWord(p.useOptionE(), ch == 'W')
				}
				continue

			case 'p', 'P':
				if !scanOnly {
					if inRange {
						return nil, p.getErr(ErrBadClassInCharRange, ch)
					}
					prop, err := p.parseProperty()
					if err != nil {
						return nil, err
					}
					cc.addCategory(prop, (ch != 'p'), caseInsensitive, p.patternRaw)
				} else {
					p.parseProperty()
				}

				continue

			case '-':
				if !scanOnly {
					cc.addRange(ch, ch)
				}
				continue

			default:
				p.moveLeft()
				var err error
				ch, err = p.scanCharEscape() // non-literal character
				if err != nil {
					return nil, err
				}
				fTranslatedChar = true
				break // this break will only break out of the switch
			}
		} else if ch == '[' {
			// This is code for Posix style properties - [:Ll:] or [:IsTibetan:].
			// It currently doesn't do anything other than skip the whole thing!
			if p.charsRight() > 0 && p.rightChar(0) == ':' && !inRange {
				savePos := p.textpos()

				p.moveRight(1)
				negate := false
				if p.charsRight() > 1 && p.rightChar(0) == '^' {
					negate = true
					p.moveRight(1)
				}

				nm := p.scanCapname() // snag the name
				if !scanOnly && p.useRE2() {
					// look up the name since these are valid for RE2
					// add the group based on the name
					if ok := cc.addNamedASCII(nm, negate); !ok {
						return nil, p.getErr(ErrInvalidCharRange)
					}
				}
				if p.charsRight() < 2 || p.moveRightGetChar() != ':' || p.moveRightGetChar() != ']' {
					p.textto(savePos)
				} else if p.useRE2() {
					// move on
					continue
				}
			}
		}

		if inRange {
			inRange = false
			if !scanOnly {
				if ch == '[' && !fTranslatedChar && !firstChar {
					// We thought we were in a range, but we're actually starting a subtraction.
					// In that case, we'll add chPrev to our char class, skip the opening [, and
					// scan the new character class recursively.
					cc.addChar(chPrev)
					sub, err := p.scanCharSet(caseInsensitive, false)
					if err != nil {
						return nil, err
					}
					cc.addSubtraction(sub)

					if p.charsRight() > 0 && p.rightChar(0) != ']' {
						return nil, p.getErr(ErrSubtractionMustBeLast)
					}
				} else {
					// a regular range, like a-z
					if chPrev > ch {
						return nil, p.getErr(ErrReversedCharRange)
					}
					cc.addRange(chPrev, ch)
				}
			}
		} else if p.charsRight() >= 2 && p.rightChar(0) == '-' && p.rightChar(1) != ']' {
			// this could be the start of a range
			chPrev = ch
			inRange = true
			p.moveRight(1)
		} else if p.charsRight() >= 1 && ch == '-' && !fTranslatedChar && p.rightChar(0) == '[' && !firstChar {
			// we aren't in a range, and now there is a subtraction.  Usually this happens
			// only when a subtraction follows a range, like [a-z-[b]]
			if !scanOnly {
				p.moveRight(1)
				sub, err := p.scanCharSet(caseInsensitive, false)
				if err != nil {
					return nil, err
				}
				cc.addSubtraction(sub)

				if p.charsRight() > 0 && p.rightChar(0) != ']' {
					return nil, p.getErr(ErrSubtractionMustBeLast)
				}
			} else {
				p.moveRight(1)
				p.scanCharSet(caseInsensitive, true)
			}
		} else {
			if !scanOnly {
				cc.addRange(ch, ch)
			}
		}
	}

	if !closed {
		return nil, p.getErr(ErrUnterminatedBracket)
	}

	if !scanOnly && caseInsensitive {
		cc.addLowercase()
	}

	return cc, nil
}

// Scans any number of decimal digits (pegs value at 2^31-1 if too large)
func (p *parser) scanDecimal() (int, error) {
	i := 0
	var d int

	for p.charsRight() > 0 {
		d = int(p.rightChar(0) - '0')
		if d < 0 || d > 9 {
			break
		}
		p.moveRight(1)

		if i > maxValueDiv10 || (i == maxValueDiv10 && d > maxValueMod10) {
			return 0, p.getErr(ErrCaptureGroupOutOfRange)
		}

		i *= 10
		i += d
	}

	return int(i), nil
}

// Returns true for options allowed only at the top level
func isOnlyTopOption(option RegexOptions) bool {
	return option == RightToLeft || option == ECMAScript || option == RE2
}

// Scans cimsx-cimsx option string, stops at the first unrecognized char.
func (p *parser) scanOptions() {

	for off := false; p.charsRight() > 0; p.moveRight(1) {
		ch := p.rightChar(0)

		if ch == '-' {
			off = true
		} else if ch == '+' {
			off = false
		} else {
			option := optionFromCode(ch)
			if option == 0 || isOnlyTopOption(option) {
				return
			}

			if off {
				p.options &= ^option
			} else {
				p.options |= option
			}
		}
	}
}

// Scans \ code for escape codes that map to single unicode chars.
func (p *parser) scanCharEscape() (rune, error) {

	ch := p.moveRightGetChar()

	if ch >= '0' && ch <= '7' {
		p.moveLeft()
		return p.scanOctal(), nil
	}

	switch ch {
	case 'x':
		// support for \x{HEX} syntax from Perl and PCRE
		if p.charsRight() > 0 && p.rightChar(0) == '{' {
			p.moveRight(1)
			return p.scanHexUntilBrace()
		}
		return p.scanHex(2)
	case 'u':
		return p.scanHex(4)
	case 'a':
		return '\u0007', nil
	case 'b':
		return '\b', nil
	case 'e':
		return '\u001B', nil
	case 'f':
		return '\f', nil
	case 'n':
		return '\n', nil
	case 'r':
		return '\r', nil
	case 't':
		return '\t', nil
	case 'v':
		return '\u000B', nil
	case 'c':
		return p.scanControl()
	default:
		if !p.useOptionE() && IsWordChar(ch) {
			return 0, p.getErr(ErrUnrecognizedEscape, string(ch))
		}
		return ch, nil
	}
}

// Grabs and converts an ascii control character
func (p *parser) scanControl() (rune, error) {
	if p.charsRight() <= 0 {
		return 0, p.getErr(ErrMissingControl)
	}

	ch := p.moveRightGetChar()

	// \ca interpreted as \cA

	if ch >= 'a' && ch <= 'z' {
		ch = (ch - ('a' - 'A'))
	}
	ch = (ch - '@')
	if ch >= 0 && ch < ' ' {
		return ch, nil
	}

	return 0, p.getErr(ErrUnrecognizedControl)

}

// Scan hex digits until we hit a closing brace.
// Non-hex digits, hex value too large for UTF-8, or running out of chars are errors
func (p *parser) scanHexUntilBrace() (rune, error) {
	// PCRE spec reads like unlimited hex digits are allowed, but unicode has a limit
	// so we can enforce that
	i := 0
	hasContent := false

	for p.charsRight() > 0 {
		ch := p.moveRightGetChar()
		if ch == '}' {
			// hit our close brace, we're done here
			// prevent \x{}
			if !hasContent {
				return 0, p.getErr(ErrTooFewHex)
			}
			return rune(i), nil
		}
		hasContent = true
		// no brace needs to be hex digit
		d := hexDigit(ch)
		if d < 0 {
			return 0, p.getErr(ErrMissingBrace)
		}

		i *= 0x10
		i += d

		if i > unicode.MaxRune {
			return 0, p.getErr(ErrInvalidHex)
		}
	}

	// we only make it here if we run out of digits without finding the brace
	return 0, p.getErr(ErrMissingBrace)
}

// Scans exactly c hex digits (c=2 for \xFF, c=4 for \uFFFF)
func (p *parser) scanHex(c int) (rune, error) {

	i := 0

	if p.charsRight() >= c {
		for c > 0 {
			d := hexDigit(p.moveRightGetChar())
			if d < 0 {
				break
			}
			i *= 0x10
			i += d
			c--
		}
	}

	if c > 0 {
		return 0, p.getErr(ErrTooFewHex)
	}

	return rune(i), nil
}

// Returns n <= 0xF for a hex digit.
func hexDigit(ch rune) int {

	if d := uint(ch - '0'); d <= 9 {
		return int(d)
	}

	if d := uint(ch - 'a'); d <= 5 {
		return int(d + 0xa)
	}

	if d := uint(ch - 'A'); d <= 5 {
		return int(d + 0xa)
	}

	return -1
}

// Scans up to three octal digits (stops before exceeding 0377).
func (p *parser) scanOctal() rune {
	// Consume octal chars only up to 3 digits and value 0377

	c := 3

	if c > p.charsRight() {
		c = p.charsRight()
	}

	//we know the first char is good because the caller had to check
	i := 0
	d := int(p.rightChar(0) - '0')
	for c > 0 && d <= 7 {
		if i >= 0x20 && p.useOptionE() {
			break
		}
		i *= 8
		i += d
		c--

		p.moveRight(1)
		if !p.rightMost() {
			d = int(p.rightChar(0) - '0')
		}
	}

	// Octal codes only go up to 255.  Any larger and the behavior that Perl follows
	// is simply to truncate the high bits.
	i &= 0xFF

	return rune(i)
}

// Returns the current parsing position.
func (p *parser) textpos() int {
	return p.currentPos
}

// Zaps to a specific parsing position.
func (p *parser) textto(pos int) {
	p.currentPos = pos
}

// Returns the char at the right of the current parsing position and advances to the right.
func (p *parser) moveRightGetChar() rune {
	ch := p.pattern[p.currentPos]
	p.currentPos++
	return ch
}

// Moves the current position to the right.
func (p *parser) moveRight(i int) {
	// default would be 1
	p.currentPos += i
}

// Moves the current parsing position one to the left.
func (p *parser) moveLeft() {
	p.currentPos--
}

// Returns the char left of the current parsing position.
func (p *parser) charAt(i int) rune {
	return p.pattern[i]
}

// Returns the char i chars right of the current parsing position.
func (p *parser) rightChar(i int) rune {
	// default would be 0
	return p.pattern[p.currentPos+i]
}

// Number of characters to the right of the current parsing position.
func (p *parser) charsRight() int {
	return len(p.pattern) - p.currentPos
}

func (p *parser) rightMost() bool {
	return p.currentPos == len(p.pattern)
}

// Looks up the slot number for a given name
func (p *parser) captureSlotFromName(capname string) int {
	return p.capnames[capname]
}

// True if the capture slot was noted
func (p *parser) isCaptureSlot(i int) bool {
	if p.caps != nil {
		_, ok := p.caps[i]
		return ok
	}

	return (i >= 0 && i < p.capsize)
}

// Looks up the slot number for a given name
func (p *parser) isCaptureName(capname string) bool {
	if p.capnames == nil {
		return false
	}

	_, ok := p.capnames[capname]
	return ok
}

// option shortcuts

// True if N option disabling '(' autocapture is on.
func (p *parser) useOptionN() bool {
	return (p.options & ExplicitCapture) != 0
}

// True if I option enabling case-insensitivity is on.
func (p *parser) useOptionI() bool {
	return (p.options & IgnoreCase) != 0
}

// True if M option altering meaning of $ and ^ is on.
func (p *parser) useOptionM() bool {
	return (p.options & Multiline) != 0
}

// True if S option altering meaning of . is on.
func (p *parser) useOptionS() bool {
	return (p.options & Singleline) != 0
}

// True if X option enabling whitespace/comment mode is on.
func (p *parser) useOptionX() bool {
	return (p.options & IgnorePatternWhitespace) != 0
}

// True if E option enabling ECMAScript behavior on.
func (p *parser) useOptionE() bool {
	return (p.options & ECMAScript) != 0
}

// true to use RE2 compatibility parsing behavior.
func (p *parser) useRE2() bool {
	return (p.options & RE2) != 0
}

// True if options stack is empty.
func (p *parser) emptyOptionsStack() bool {
	return len(p.optionsStack) == 0
}

// Finish the current quantifiable (when a quantifier is not found or is not possible)
func (p *parser) addConcatenate() {
	// The first (| inside a Testgroup group goes directly to the group
	p.concatenation.addChild(p.unit)
	p.unit = nil
}

// Finish the current quantifiable (when a quantifier is found)
func (p *parser) addConcatenate3(lazy bool, min, max int) {
	p.concatenation.addChild(p.unit.makeQuantifier(lazy, min, max))
	p.unit = nil
}

// Sets the current unit to a single char node
func (p *parser) addUnitOne(ch rune) {
	if p.useOptionI() {
		ch = unicode.ToLower(ch)
	}

	p.unit = newRegexNodeCh(ntOne, p.options, ch)
}

// Sets the current unit to a single inverse-char node
func (p *parser) addUnitNotone(ch rune) {
	if p.useOptionI() {
		ch = unicode.ToLower(ch)
	}

	p.unit = newRegexNodeCh(ntNotone, p.options, ch)
}

// Sets the current unit to a single set node
func (p *parser) addUnitSet(set *CharSet) {
	p.unit = newRegexNodeSet(ntSet, p.options, set)
}

// Sets the current unit to a subtree
func (p *parser) addUnitNode(node *regexNode) {
	p.unit = node
}

// Sets the current unit to an assertion of the specified type
func (p *parser) addUnitType(t nodeType) {
	p.unit = newRegexNode(t, p.options)
}

// Finish the current group (in response to a ')' or end)
func (p *parser) addGroup() error {
	if p.group.t == ntTestgroup || p.group.t == ntTestref {
		p.group.addChild(p.concatenation.reverseLeft())
		if (p.group.t == ntTestref && len(p.group.children) > 2) || len(p.group.children) > 3 {
			return p.getErr(ErrTooManyAlternates)
		}
	} else {
		p.alternation.addChild(p.concatenation.reverseLeft())
		p.group.addChild(p.alternation)
	}

	p.unit = p.group
	return nil
}

// Pops the option stack, but keeps the current options unchanged.
func (p *parser) popKeepOptions() {
	lastIdx := len(p.optionsStack) - 1
	p.optionsStack = p.optionsStack[:lastIdx]
}

// Recalls options from the stack.
func (p *parser) popOptions() {
	lastIdx := len(p.optionsStack) - 1
	// get the last item on the stack and then remove it by reslicing
	p.options = p.optionsStack[lastIdx]
	p.optionsStack = p.optionsStack[:lastIdx]
}

// Saves options on a stack.
func (p *parser) pushOptions() {
	p.optionsStack = append(p.optionsStack, p.options)
}

// Add a string to the last concatenate.
func (p *parser) addToConcatenate(pos, cch int, isReplacement bool) {
	var node *regexNode

	if cch == 0 {
		return
	}

	if cch > 1 {
		str := p.pattern[pos : pos+cch]

		if p.useOptionI() && !isReplacement {
			// We do the ToLower character by character for consistency.  With surrogate chars, doing
			// a ToLower on the entire string could actually change the surrogate pair.  This is more correct
			// linguistically, but since Regex doesn't support surrogates, it's more important to be
			// consistent.
			for i := 0; i < len(str); i++ {
				str[i] = unicode.ToLower(str[i])
			}
		}

		node = newRegexNodeStr(ntMulti, p.options, str)
	} else {
		ch := p.charAt(pos)

		if p.useOptionI() && !isReplacement {
			ch = unicode.ToLower(ch)
		}

		node = newRegexNodeCh(ntOne, p.options, ch)
	}

	p.concatenation.addChild(node)
}

// Push the parser state (in response to an open paren)
func (p *parser) pushGroup() {
	p.group.next = p.stack
	p.alternation.next = p.group
	p.concatenation.next = p.alternation
	p.stack = p.concatenation
}

// Remember the pushed state (in response to a ')')
func (p *parser) popGroup() error {
	p.concatenation = p.stack
	p.alternation = p.concatenation.next
	p.group = p.alternation.next
	p.stack = p.group.next

	// The first () inside a Testgroup group goes directly to the group
	if p.group.t == ntTestgroup && len(p.group.children) == 0 {
		if p.unit == nil {
			return p.getErr(ErrConditionalExpression)
		}

		p.group.addChild(p.unit)
		p.unit = nil
	}
	return nil
}

// True if the group stack is empty.
func (p *parser) emptyStack() bool {
	return p.stack == nil
}

// Start a new round for the parser state (in response to an open paren or string start)
func (p *parser) startGroup(openGroup *regexNode) {
	p.group = openGroup
	p.alternation = newRegexNode(ntAlternate, p.options)
	p.concatenation = newRegexNode(ntConcatenate, p.options)
}

// Finish the current concatenation (in response to a |)
func (p *parser) addAlternate() {
	// The | parts inside a Testgroup group go directly to the group

	if p.group.t == ntTestgroup || p.group.t == ntTestref {
		p.group.addChild(p.concatenation.reverseLeft())
	} else {
		p.alternation.addChild(p.concatenation.reverseLeft())
	}

	p.concatenation = newRegexNode(ntConcatenate, p.options)
}

// For categorizing ascii characters.

const (
	Q byte = 5 // quantifier
	S      = 4 // ordinary stopper
	Z      = 3 // ScanBlank stopper
	X      = 2 // whitespace
	E      = 1 // should be escaped
)

var _category = []byte{
	//01  2  3  4  5  6  7  8  9  A  B  C  D  E  F  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
	0, 0, 0, 0, 0, 0, 0, 0, 0, X, X, X, X, X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	// !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /  0  1  2  3  4  5  6  7  8  9  :  ;  <  =  >  ?
	X, 0, 0, Z, S, 0, 0, 0, S, S, Q, Q, 0, 0, S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q,
	//@A  B  C  D  E  F  G  H  I  J  K  L  M  N  O  P  Q  R  S  T  U  V  W  X  Y  Z  [  \  ]  ^  _
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, S, 0,
	//'a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t  u  v  w  x  y  z  {  |  }  ~
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Q, S, 0, 0, 0,
}

func isSpace(ch rune) bool {
	return (ch <= ' ' && _category[ch] == X)
}

// Returns true for those characters that terminate a string of ordinary chars.
func isSpecial(ch rune) bool {
	return (ch <= '|' && _category[ch] >= S)
}

// Returns true for those characters that terminate a string of ordinary chars.
func isStopperX(ch rune) bool {
	return (ch <= '|' && _category[ch] >= X)
}

// Returns true for those characters that begin a quantifier.
func isQuantifier(ch rune) bool {
	return (ch <= '{' && _category[ch] >= Q)
}

func (p *parser) isTrueQuantifier() bool {
	nChars := p.charsRight()
	if nChars == 0 {
		return false
	}

	startpos := p.textpos()
	ch := p.charAt(startpos)
	if ch != '{' {
		return ch <= '{' && _category[ch] >= Q
	}

	//UGLY: this is ugly -- the original code was ugly too
	pos := startpos
	for {
		nChars--
		if nChars <= 0 {
			break
		}
		pos++
		ch = p.charAt(pos)
		if ch < '0' || ch > '9' {
			break
		}
	}

	if nChars == 0 || pos-startpos == 1 {
		return false
	}
	if ch == '}' {
		return true
	}
	if ch != ',' {
		return false
	}
	for {
		nChars--
		if nChars <= 0 {
			break
		}
		pos++
		ch = p.charAt(pos)
		if ch < '0' || ch > '9' {
			break
		}
	}

	return nChars > 0 && ch == '}'
}