red / red

dockimbel FIX: issue #4730 (Using decimal pair leads to invalid values for pair…

Latest commit 1d936bd

Nov 27, 2020

… parts)

6 contributors

Users who have contributed to this file

973 lines (864 sloc) 25.7 KB

Raw Blame

	REBOL [
	Title: "Red Lexical Scanner"
	Author: "Nenad Rakocevic"
	File: %lexer.r
	Tabs: 4
	Rights: "Copyright (C) 2011-2018 Red Foundation. All rights reserved."
	License: "BSD-3 - https://github.com/red/red/blob/master/BSD-3-License.txt"
	]

	;-- Patch NEW-LINE and NEW-LINE? natives to accept paren! --
	append first find third :new-line block! [path! paren!]
	append first find third :new-line? block! paren!


	lexer: context [
	verbose: 0

	old-line: none
	line: none ;-- source code lines counter
	count?: yes ;-- if TRUE, lines counter is enabled
	cnt: none ;-- counts nested {} in multi-line strings
	pos: none ;-- source input position (error reporting)
	mark: none ;-- use for keeping input cursor at same position
	path: none ;-- path input position (error reporting)
	in-path?: no ;-- flag for path items
	s: none ;-- mark start position of new value
	e: none ;-- mark end position of new value
	series: none ;-- temporary hold last stack series
	value: none ;-- new value
	value2: none ;-- secondary new value
	fail?: none ;-- used for failing some parsing rules
	type: none ;-- define the type of the new value
	rs?: no ;-- if TRUE, do lexing for Red/System
	neg?: no ;-- if TRUE, denotes a negative number value
	base: 16 ;-- binary base
	otag: none
	ot: none
	ct: none
	sep: none
	year: none
	month: none
	day: none
	hour: none
	mn: none
	sec: none
	date: none
	ee: none

	;====== Parsing rules ======

	four: charset "01234"
	half: charset "012345"
	non-zero: charset "123456789"
	digit: union non-zero charset "0"
	dot: #"."
	comma: #","

	byte: [
	"25" half
	\| #"2" four digit
	\| #"1" digit digit
	\| opt #"0" non-zero digit
	\| 0 2 #"0" digit
	\| 1 2 #"0"
	]

	hexa: union digit charset "ABCDEF"
	hexa-char: union hexa charset "abcdef"
	alpha: charset [#"A" - #"Z" #"a" - #"z"]
	base64-char: union digit union alpha charset "+/="

	;-- UTF-8 encoding rules from: http://tools.ietf.org/html/rfc3629#section-4
	UTF-8-BOM: #{EFBBBF}
	ws-ASCII: charset " ^-^M" ;-- ASCII common whitespaces
	ws-U+2k: charset [#"^(80)" - #"^(8A)"] ;-- Unicode spaces in the U+2000-U+200A range
	UTF8-tail: charset [#"^(80)" - #"^(BF)"]
	UTF8-1: charset [#"^(00)" - #"^(7F)"]

	UTF8-2: reduce [
	charset [#"^(C2)" - #"^(DF)"]
	UTF8-tail
	]

	UTF8-3: reduce [
	#{E0} charset [#"^(A0)" - #"^(BF)"] UTF8-tail
	'\| charset [#"^(E1)" - #"^(EC)"] 2 UTF8-tail
	'\| #{ED} charset [#"^(80)" - #"^(9F)"] UTF8-tail
	'\| charset [#"^(EE)" - #"^(EF)"] 2 UTF8-tail
	]

	UTF8-4: reduce [
	#{F0} charset [#"^(90)" - #"^(BF)"] 2 UTF8-tail
	'\| charset [#"^(F1)" - #"^(F3)"] 3 UTF8-tail
	'\| #{F4} charset [#"^(80)" - #"^(8F)"] 2 UTF8-tail
	]

	UTF8-char: [pos: UTF8-1 \| UTF8-2 \| UTF8-3 \| UTF8-4]

	not-word-char: charset {/\,[](){}"#%$@:;}
	not-word-1st: union union not-word-char digit charset {'}
	not-file-char: charset {[](){}"@:;}
	not-url-char: charset {[](){}";}
	not-email-char: union not-file-char union ws-ASCII charset "<^/"
	not-ref-char: union ws-ASCII charset {#$@'",;=\^^^/<>[](){}}
	not-str-char: #"^""
	not-mstr-char: #"}"
	not-tag-1st: complement union ws-ASCII charset "=><[](){};^""
	not-tag-char: complement charset ">"
	tag-char: charset "<>"
	caret-Uchar: charset [#"^(40)" - #"^(5F)"]
	caret-Lchar: charset [#"^(61)" - #"^(7A)"]
	non-printable-char: charset [#"^(00)" - #"^(1F)"]
	pair-end: charset {^{"[]();:/}
	integer-end: charset {^{"[]();:xX</}
	path-end: charset {^{"[]();}
	file-end: charset {^{[]();}
	date-sep: charset "/-"
	time-sep: charset "T/"
	stop: none

	control-char: reduce [ ;-- Control characters
	charset [#"^(00)" - #"^(1F)"] ;-- C0 control codes
	'\| #"^(C2)" charset [#"^(80)" - #"^(9F)"] ;-- C1 control codes (UTF-8 encoded)
	]

	UTF8-filtered-char: [
	[pos: stop :pos (fail?: [end skip]) \| UTF8-char e: (fail?: none)]
	fail?
	]

	UTF8-printable: [
	[non-printable-char \| not-str-char (fail?: [end skip]) \| UTF8-char (fail?: none)]
	fail?
	]

	;-- Whitespaces list from: http://en.wikipedia.org/wiki/Whitespace_character
	ws: [
	#"^/" (
	if count? [
	line: line + 1
	stack/nl?: yes
	]
	)
	\| ws-ASCII ;-- only the common whitespaces are matched
	\| #{C2} [
	#{85} ;-- U+0085 (Newline)
	\| #{A0} ;-- U+00A0 (No-break space)
	]
	;\| #{E1} [
	; #{9A80} ;-- U+1680 (Ogham space mark)
	; \| #{A08E} ;-- U+180E (Mongolian vowel separator)
	;]
	;\| #{E2} [
	; #{80} [
	; ws-U+2k ;-- U+2000-U+200A range
	; \| #{A8} ;-- U+2028 (Line separator)
	; \| #{A9} ;-- U+2029 (Paragraph separator)
	; \| #{AF} ;-- U+202F (Narrow no-break space)
	; ]
	; \| #{819F} ;-- U+205F (Medium mathematical space)
	;]
	;\| #{E38080} ;-- U+3000 (Ideographic space)
	]

	newline-char: [
	#"^/" ;-- LF
	\| #"^M" ;-- CR
	\| #{C285} ;-- U+0085 (Newline)
	\| #{E280} [
	#{A8} ;-- U+2028 (Line separator)
	\| #{A9} ;-- U+2029 (Paragraph separator)
	]
	]

	counted-newline: [pos: #"^/" (line: line + 1)]

	ws-no-count: [(count?: no) ws (count?: yes)]

	any-ws: [pos: any ws]

	symbol-rule: [
	(stop: [not-word-char \| ws-no-count \| control-char \| tag-char] otag: #"<" ot: none)
	some [
	otag ot: [#"/" (otag: [end skip] ot: back ot) :ot \| none] ;-- a</b>
	\| #">" ct: (if ot [otag: [end skip] ct: back ot]) :ct ;-- a<b>
	\| UTF8-filtered-char
	] e:
	]

	begin-symbol-rule: [ ;-- 1st char in symbols is restricted
	(stop: [not-word-1st \| ws-no-count \| control-char])
	UTF8-filtered-char
	opt symbol-rule
	]

	by-value: [paren! string! integer! pair! char! decimal! issue!]

	path-rule: [
	pos: slash :pos ( ;-- path detection barrier
	stack/allocate block! 4
	stack/push to type copy/part s e ;-- push 1st path element
	)
	some [
	slash
	s: [(in-path?: yes)
	integer-rule
	\| begin-symbol-rule (type: word!)
	\| paren-rule (type: paren!)
	\| #":" s: begin-symbol-rule (type: get-word!)
	\| line-string (value: load-string s e)
	\| char-rule (value: decode-UTF8-char value)
	] (
	stack/push either find by-value to word! type [ ;-- append path element
	value
	][
	to type copy/part s e
	]
	type: path!
	in-path?: no
	)
	]
	opt [#":" (type: set-path!)]
	e: [path-end \| ws-no-count \| end \| (pos: path throw-error)] :e ;-- detect invalid tail characters
	(value: stack/pop type)
	]

	word-rule: [
	(type: word!)
	#"%" [ws-no-count \| pos: file-end :pos \| end] (value: "%") ;-- special case for remainder op!
	\| path: s: begin-symbol-rule [
	url-rule
	\| path-rule ;-- path matched
	\| (value: copy/part s e) ;-- word matched
	opt [#":" (type: set-word!)]
	]
	]

	get-word-rule: [
	#":" (type: get-word!) s: begin-symbol-rule [
	path-rule (
	if set-path? :value [throw-error]
	value/1: to get-word! value/1 ;-- workaround missing get-path! in R2
	)
	\| (
	type: get-word!
	value: copy/part s e ;-- word matched
	)
	]
	]

	lit-word-rule: [
	#"'" (type: word!) [
	s: some #"/" e: (type: lit-word! value: copy/part s e)
	\| s: begin-symbol-rule [
	path-rule (
	if set-path? :value [throw-error]
	type: lit-path! ;-- path matched
	)
	\| (
	type: lit-word!
	value: copy/part s e ;-- word matched
	)
	]
	][s: #":" :s (throw-error) \| none]
	]

	map-rule: [
	"#(" (stack/allocate block! 10) any-value #")" (
	stack/prefix #!map!
	value: stack/pop block!
	)
	]

	issue-rule: [#"#" (type: issue!) s: any [symbol-rule \| #":"] e:]

	ref-rule: [(stop: [not-ref-char]) #"@" s: any UTF8-filtered-char e:]

	refinement-rule: [slash (type: refinement!) s: some [symbol-rule \| #":"] e:]

	slash-rule: [
	[[#":" (type: get-word!) \| #"'" (type: lit-word!)] \| none (type: word!)]
	s: some slash e: opt [#":" (if find ":'" s/-1 [throw-error] type: set-word!)]
	]

	hexa-rule: [2 8 hexa e: #"h" pos: [integer-end \| ws-no-count \| end ] :pos (type: integer!)]

	sticky-word-rule: [ ;-- protect from sticky words typos
	mark: [integer-end \| ws-no-count \| end \| (pos: s throw-error)] :mark
	]

	tuple-rule: [(type: tuple!) byte dot byte 1 12 [dot byte] e: sticky-word-rule]

	time-rule: [
	s: positive-integer-rule [
	decimal-number-rule (value: as-time 0 value load-number copy/part s e neg?) ;-- mm:ss.dd
	\| (value2: load-number copy/part s e) [
	#":" s: positive-integer-rule opt decimal-number-rule
	(value: as-time value value2 load-number copy/part s e neg?) ;-- hh:mm:ss[.dd]
	\| (value: as-time value value2 0 neg?) ;-- hh:mm
	]
	] (type: time!)
	]

	month-rule: [(m: none)
	"January" (m: 1)
	\| "February" (m: 2)
	\| "March" (m: 3)
	\| "April" (m: 4)
	\| "May" (m: 5)
	\| "June" (m: 6)
	\| "July" (m: 7)
	\| "August" (m: 8)
	\| "September" (m: 9)
	\| "October" (m: 10)
	\| "November" (m: 11)
	\| "December" (m: 12)
	]
	mon-rule: [(m: none)
	"Jan" (m: 1)
	\| "Feb" (m: 2)
	\| "Mar" (m: 3)
	\| "Apr" (m: 4)
	\| "May" (m: 5)
	\| "Jun" (m: 6)
	\| "Jul" (m: 7)
	\| "Aug" (m: 8)
	\| "Sep" (m: 9)
	\| "Oct" (m: 10)
	\| "Nov" (m: 11)
	\| "Dec" (m: 12)
	]

	day-year-rule: [
	(neg?: no) opt [#"-" (neg?: yes)]
	s: 3 4 digit e: (year: load-number copy/part s e if neg? [year: 65536 - year])
	\| 1 2 digit e: (
	value: load-number copy/part s e no
	either day [year: value + pick [2000 1900] 50 > value][day: value]
	)
	]

	date-rule: [
	pos: [opt #"-" 1 4 digit date-sep \| 8 digit #"T"] :pos [ ;-- quick lookhead
	s: 8 digit ee: #"T" ( ;-- yyyymmddT
	year: load-number copy/part s 4
	month: load-number copy/part skip s 4 2
	day: load-number copy/part skip s 6 2
	date: make date! reduce [day month year]
	) :ee
	\| day-year-rule sep: date-sep (sep: sep/1) [
	s: 1 2 digit e: (month: load-number copy/part s e no)
	\| some alpha e: (
	fail?: either all [parse/all copy/part s e [month-rule \| mon-rule] m][month: m none][[end skip]]
	) fail?
	]
	sep day-year-rule (
	fail?: either all [day month year][
	date: make date! reduce [day month year]
	none
	][[end skip]]
	) fail?
	\| s: 4 digit #"-" (
	year: load-number copy/part s 4
	date: make date! reduce [1 1 year]
	)[
	"W" s: 2 digit (ee: none) opt [#"-" ee: non-zero] ( ;-- yyyy-Www
	date: to-iso-week date load-number copy/part s 2
	if ee [date: to-weekday date to integer! s/4 - #"0"] ;-- yyyy-Www-d
	)
	\| s: 3 digit (date: date + (load-number copy/part s 3) - 1) ;-- yyyy-ddd
	] (month: -1)
	](
	type: date!
	if all [
	month <> -1 any [date/year <> year date/month <> month date/day <> day]
	][throw-error]
	day: month: year: none
	) opt [
	time-sep (ee: no) [
	s: 6 digit opt [#"." 1 9 digit ee:] ( ;-- Thhmmss[.sss]
	hour: load-number copy/part s 2
	mn: load-number copy/part skip s 2 2
	sec: load-number either ee [copy/part skip s 4 ee][copy/part skip s 4 2]
	date/time: as-time hour mn sec no
	)
	\| 4 digit ( ;-- Thhmm
	hour: load-number copy/part s 2
	mn: load-number copy/part skip s 2 2
	date/time: as-time hour mn 0 no
	)
	\| s: positive-integer-rule (value: load-number copy/part s e)
	#":" [(neg?: no) time-rule (date/time: value) \| (throw-error)]
	]
	opt [
	#"Z" \| [#"-" (neg?: yes) \| #"+" (neg?: no)][
	s: 4 digit ( ;-- +/-hhmm
	hour: load-number copy/part s e: skip s 2
	mn: load-number copy/part e e: skip e 2
	)
	\| 1 2 digit e: (hour: load-number copy/part s e mn: none) ;-- +/-h, +/-hh
	opt [#":" s: 2 digit e: (mn: load-number copy/part s e)]
	](
	either all [mn find [15 45] mn: round/floor/to mn 15][
	date: reduce [#!date! date as-time hour mn 0 neg?] ;-- special encoding for 15/45
	][
	date/zone: as-time hour any [mn 0] 0 neg?
	]
	)
	]
	] sticky-word-rule (value: date)
	]

	positive-integer-rule: [digit any digit e: (type: integer!)]

	integer-number-rule: [
	opt [#"-" (neg?: yes) \| #"+" (neg?: no)] digit any [digit \| #"'" digit] e:
	(type: integer!)
	]

	integer-rule: [
	pos: decimal-special e: ;-- escape path for NaN, INFs
	(type: issue! value: load-number copy/part s e)
	\| (neg?: no) integer-number-rule
	opt [decimal-number-rule \| decimal-exp-rule e: (type: decimal!)]
	opt [#"%" e: (type: issue!)]
	sticky-word-rule
	(value: load-number copy/part s e)
	opt [
	[#"x" \| #"X"] (
	type: pair!
	value2: to pair! reduce [value 0]
	)
	[s: integer-number-rule \| (type: pair! throw-error)]
	mark: [pair-end \| ws-no-count \| end \| (type: pair! throw-error)] :mark (
	value2/2: load-number copy/part s e
	if any [decimal? value decimal? value2][type: pair! throw-error]
	value: value2
	)
	]
	e: opt [#":" [time-rule \| (unless in-path? [throw-error]) :e]]
	]

	decimal-special: [
	s: "-0.0" pos: [integer-end \| ws-no-count \| end ] :pos
	\| (neg?: no) opt [#"-" (neg?: yes)] opt #"+" "1.#" s: [
	[[#"N" \| #"n"] [#"a" \| #"A"] [#"N" \| #"n"]]
	\| [[#"I" \| #"i"] [#"N" \| #"n"] [#"F" \| #"f"]]
	]
	]

	decimal-exp-rule: [
	[[#"e" \| #"E"] opt [#"-" \| #"+"] 1 3 digit]
	]

	decimal-number-rule: [
	[dot \| comma] digit any [digit \| #"'" digit]
	opt decimal-exp-rule e: (type: decimal!)
	]

	decimal-rule: [
	decimal-number-rule opt [#"%" e: (type: issue!)]
	sticky-word-rule
	]

	money-rule: [
	(neg?: no) opt [#"-" (neg?: yes) \| #"+"]
	s: opt [3 alpha] #"$" digit any [digit \| #"'" digit] opt [[dot \| comma] some digit]
	e: (type: money!)
	]

	block-rule: [#"[" (stack/allocate block! 10) any-value #"]" (value: stack/pop block!)]

	paren-rule: [#"(" (stack/allocate paren! 10) any-value #")" (value: stack/pop paren!)]

	escaped-char: [
	"^^(" [
	[ ;-- special case first
	"null" (value: #"^(00)")
	\| "back" (value: #"^(08)")
	\| "tab" (value: #"^(09)")
	\| "line" (value: #"^(0A)")
	\| "page" (value: #"^(0C)")
	\| "esc" (value: #"^(1B)")
	\| "del" (value: #"^~")
	]
	\| pos: [1 6 hexa-char] e: ( ;-- Unicode values allowed up to 10FFFFh
	if e/1 <> #")" [throw-error] ;-- more than 6 hexadecimal digits
	value: either rs? [
	to-char to-integer to-issue copy/part pos e
	][
	encode-UTF8-char pos e
	]
	)
	\| (throw-error) ;-- invalid syntax
	] #")"
	\| #"^^" [
	[
	#"/" (value: #"^/")
	\| #"-" (value: #"^-")
	\| #"~" (value: #"^(del)")
	\| #"^^" (value: #"^^") ;-- caret escaping case
	\| #"{" (value: #"{")
	\| #"}" (value: #"}")
	\| #"^"" (value: #"^"")
	]
	\| pos: caret-Uchar (value: pos/1 - 64)
	\| pos: caret-Lchar (value: pos/1 - 96)
	]
	]

	char-rule: [
	{#"} (type: char!) [
	s: escaped-char
	\| copy value UTF8-printable (value: as-binary value)
	\| #"^-" (value: s/1)
	] {"}
	]

	line-string: [
	{"} s: (type: string! stop: [not-str-char \| newline-char])
	any [{^^"} \| escaped-char \| UTF8-filtered-char]
	e: {"}
	]

	nested-curly-braces: [
	(cnt: 1 fail?: none)
	any [[
	counted-newline
	\| "^^^^"
	\| "^^{"
	\| "^^}"
	\| #"{" (cnt: cnt + 1)
	\| e: #"}" (if zero? cnt: cnt - 1 [fail?: [end skip]])
	\| UTF8-char
	] fail?
	]
	#"}" (old-line: line)
	]

	multiline-string: [#"{" s: (type: string!) nested-curly-braces]

	string-rule: [line-string \| multiline-string]

	tag-rule: [
	#"<" s: not-tag-1st (type: tag!)
	any [#"^"" thru #"^"" \| #"'" thru #"'" \| not-tag-char] e: #">"
	]

	email-rule: [
	(stop: [not-email-char])
	s: some UTF8-filtered-char #"@" (type: email!)
	any UTF8-filtered-char e: (value: dehex copy/part s e)
	]

	base-2-rule: [
	"2#{" (type: binary!) [
	s: any [counted-newline \| 8 [any [ws-no-count \| comment-rule][#"0" \| #"1" ]] \| ws-no-count \| comment-rule]
	e: #"}" (base: 2)
	\| (pos: skip s -3 throw-error)
	]
	]

	base-16-rule: [
	opt "16" "#{" (type: binary!) [
	s: any [counted-newline \| 2 hexa-char \| ws-no-count \| comment-rule]
	e: #"}" (base: 16)
	\| (pos: skip s -2 throw-error)
	]
	]

	base-64-rule: [
	"64#{" (type: binary!) [
	s: any [counted-newline \| base64-char \| ws-no-count \| comment-rule]
	e: #"}" (
	cnt: offset? s e
	if all [0 < cnt cnt < 4][pos: skip s -4 throw-error]
	base: 64
	)
	\| (pos: skip s -4 throw-error)
	]
	]

	binary-rule: [[base-16-rule \| base-64-rule \| base-2-rule] (old-line: line)]

	file-rule: [
	pos: #"%" (type: file! stop: [not-file-char \| ws-no-count]) [
	#"{" (throw-error)
	\| line-string e: (value: to file! load-string s e)
	\| s: any UTF8-filtered-char e: (value: to file! dehex copy/part s e)
	]
	]

	rawstr-rule: [
	pos: (type: string! cnt: 0 value: none) some [#"%" (cnt: cnt + 1)] #"{" s:
	some [e: #"}" cnt #"%" (value: copy/part s e) break \| skip]
	(unless value [throw-error])
	]

	url-rule: [
	#":" (type: url! stop: [not-url-char \| ws-no-count])
	some UTF8-filtered-char e: (value: dehex copy/part s e)
	]

	escaped-rule: [
	"#[" any-ws [
	"true" (value: true)
	\| "false" (value: false)
	\| s: [
	"none!" \| "logic!" \| "block!" \| "integer!" \| "word!"
	\| "set-word!" \| "get-word!" \| "lit-word!" \| "refinement!"
	\| "binary!" \| "string!" \| "char!" \| "bitset!" \| "path!"
	\| "set-path!" \| "lit-path!" \| "native!" \| "action!"
	\| "issue!" \| "paren!" \| "function!"
	] e: (value: get to word! copy/part s e)
	\| "none" (value: none)
	] any-ws #"]"
	]

	comment-rule: [#";" [to #"^/" \| to end]]

	wrong-end: [(
	ending: either all [1 < length? stack/stk not empty? stack/stk/1][
	value: switch type?/word stack/top [
	block! [#"]"]
	paren! [#")"]
	]
	first [(throw-error/with ["missing" mold value "character"])]
	][none]
	)
	ending
	]

	literal-value: [
	pos: (e: none) s: [
	comment-rule
	\| escaped-rule (stack/push value)
	\| tuple-rule (stack/push load-tuple copy/part s e)
	\| hexa-rule (stack/push decode-hexa copy/part s e)
	\| binary-rule (stack/push load-binary s e base)
	\| email-rule (stack/push to email! value)
	\| date-rule (stack/push value)
	\| integer-rule (stack/push value)
	\| decimal-rule (stack/push load-decimal copy/part s e)
	\| tag-rule (stack/push to tag! copy/part s e)
	\| rawstr-rule (stack/push value)
	\| money-rule (stack/push load-money s e neg?)
	\| word-rule (stack/push to type value)
	\| lit-word-rule (stack/push to type value)
	\| get-word-rule (stack/push to type value)
	\| refinement-rule (stack/push to refinement! copy/part s e)
	\| slash-rule (stack/push to type copy/part s e)
	\| file-rule (stack/push value)
	\| char-rule (stack/push decode-UTF8-char value)
	\| block-rule (stack/push value)
	\| paren-rule (stack/push value)
	\| string-rule (stack/push load-string s e)
	\| map-rule (stack/push value)
	\| issue-rule (stack/push to issue! copy/part s e)
	\| ref-rule (stack/push load-ref copy/part s e)
	]
	]

	any-value: [pos: any [literal-value \| ws]]

	program: [
	block-rule (if rs? [stack/push 'Red/System] stack/push value)
	any-value
	opt wrong-end
	]

	;====== Helper functions ======

	stack: context [
	stk: []
	nl?: no

	allocate: func [type [datatype!] size [integer!] /local new pos][
	pos: insert/only tail stk new: make type size
	if nl? [new-line back pos yes nl?: no]
	new
	]

	prefix: func [value][insert/only last stk :value]

	push: func [value][
	value: insert/only tail last stk :value
	if nl? [new-line back value yes nl?: no]
	value
	]

	pop: func [type [datatype!] /local pos][
	pos: back tail stk
	nl?: new-line? pos

	either any [type = path! type = set-path!][
	change/only pos to type pos/1
	][
	if type <> type? pos/1 [
	throw-error/with ["invalid" mold type "closing delimiter"]
	]
	]
	also pos/1 remove pos
	]

	top: does [last stk]

	reset: does [clear stk]

	clean-up: does [
	unless empty? stk [
	clear next stk ;-- keep root block in stk
	clear first stk ;-- clear root block
	]
	nl?: no
	]
	]

	throw-error: func [/with msg [string! block!]][
	print rejoin [
	"*** Syntax Error: " either with [
	uppercase/part reform msg 1
	][
	reform ["Invalid" mold type "value"]
	]
	any [
	all [
	value? 'red
	object? red
	red/script-name
	join "^/*** in file: " to-local-file red/script-name
	]
	""
	]
	"^/*** line: " line
	"^/*** at: " mold copy/part pos 40
	]
	stack/clean-up
	either encap? [quit][halt]
	]


	pad-head: func [s [string!]][
	head insert/dup s #"0" 8 - length? s
	]

	encode-UTF8-char: func [s [string!] e [string!] /local c code new][
	c: debase/base pad-head copy/part s e 16
	while [c/1 = 0][c: next c] ;-- trim heading zeros
	code: to integer! c

	case [
	code <= 127 [
	new: to char! code ;-- c <= 7Fh
	]
	code <= 2047 [ ;-- c <= 07FFh
	new: (shift/left (shift code 6) or #"^(C0)" 8)
	or (code and #"^(3F)") or #"^(80)"
	]
	code <= 65535 [ ;-- c <= FFFFh
	new: (shift/left (shift code 12) or #"^(E0)" 16)
	or (shift/left (shift code 6) and #"^(3F)" or #"^(80)" 8)
	or (code and #"^(3F)") or #"^(80)"
	]
	code <= 1114111 [ ;-- c <= 10FFFFh
	new: (shift/left (shift code 18) or #"^(F0)" 24)
	or (shift/left (shift code 12) and #"^(3F)" or #"^(80)" 16)
	or (shift/left (shift code 6) and #"^(3F)" or #"^(80)" 8)
	or (code and #"^(3F)") or #"^(80)"
	]
	'else [
	throw-error/with "Codepoints above U+10FFFF are not supported"
	]
	]
	if integer? new [
	new: debase/base to-hex new 16
	remove-each byte new [byte = #"^(null)"]
	]
	new
	]

	decode-UTF8-char: func [value /redbin][
	if all [not redbin char? value][return encode-char to integer! value]

	value: switch/default length? value [
	1 [value]
	2 [
	value: value and #{1F3F}
	value: add shift/left value/1 6 value/2
	]
	3 [
	value: value and #{0F3F3F}
	value: add add
	shift/left value/1 12
	shift/left value/2 6
	value/3
	]
	4 [
	value: value and #{073F3F3F}
	value: add add add
	shift/left value/1 18
	shift/left value/2 12
	shift/left value/3 6
	value/4
	]
	][
	throw-error/with "Unsupported or invalid UTF-8 encoding"
	]

	either redbin [value][
	encode-char to integer! value ;-- special encoding for Unicode char!
	]
	]

	decode-UTF8-string: func [str [string!] /local new s e][
	new: make string! length? str
	parse/all str [
	some [
	s: UTF8-char e: (
	append new debase/base skip decode-UTF8-char as-binary copy/part s e 7 16
	)
	]
	]
	head change/part str new tail str
	]

	encode-char: func [value [integer!]][
	head insert to-hex value #"'"
	]

	decode-hexa: func [s [string!]][
	to integer! to issue! s
	]

	as-time: func [h [integer!] m [integer!] s [integer! decimal!] neg? [logic!] /local t][
	if any [all [h <> 0 m < 0] all [s s < 0]][type: time! throw-error]
	t: to time! reduce [abs h abs m abs s]
	either neg? [negate t][t]
	]

	to-weekday: func [d [date!] wd [integer!]][
	if negative? wd: wd - d/weekday [wd: 7 + wd]
	d + wd
	]

	to-iso-week: func [d [date!] w [integer!] /local wd d1][
	d1: make date! reduce [1 1 d/year]
	wd: d1/weekday
	d1 + (w - 1 * 7 + (either wd < 5 [1][8]) - wd)
	]

	load-ref: func [s [string!]][
	append join make issue! 1 + length? s #"@" s
	]

	load-money: func [s [string!] e [string!] neg? [logic!] /local cur dec pos][
	if all [s/1 <> #"$" s/4 = #"$"][
	cur: uppercase copy/part s 3
	s: skip s 3
	]
	s: copy/part next s e
	remove-each c s [c = #"'"]
	dec: either pos: find s dot [
	remove pos
	if 5 < length? pos [clear skip pos 5]
	length? pos
	][0]
	insert/dup tail s #"0" 5 - dec
	if 22 < length? s [throw-error]
	insert/dup s #"0" 22 - length? s
	insert s pick "-+" neg?
	insert s any [cur "..."]
	append join make issue! 1 + length? s #"$" s
	]

	load-tuple: func [s [string!] /local new byte p e][
	new: join make issue! 1 + length? s #"~"
	byte: [p: 1 3 digit e: (append new skip to-hex load copy/part p e 6)]
	unless parse s [byte 2 11 [dot byte]][throw-error]
	new
	]

	load-number: func [s [string!]][
	switch/default type [
	#[datatype! decimal!][s: load-decimal s]
	#[datatype! issue! ][
	if s = "-0.0" [s: "0-"] ;-- re-encoded for consistency
	either #"%" = last s [s: to issue! s][
	s: to issue! join "." s
	if neg? [append s #"-"]
	]
	]
	][
	if any [
	not find [integer! decimal!] type?/word s: to integer! s
	all [rs? type <> type? s]
	][
	throw-error
	]
	]
	s
	]

	load-decimal: func [s [string!]][
	unless attempt [s: to decimal! s][throw-error]
	s
	]

	load-string: func [s [string!] e [string!] /local new filter][
	new: make string! offset? s e ;-- allocated size close to final size
	filter: get pick [UTF8-char UTF8-filtered-char] s/-1 = #"{"

	parse/all/case copy/part s e [
	any [
	escaped-char (insert tail new value)
	\| #"^^"
	\| s: filter e: (insert/part tail new s e)
	] ;-- exit on matching " or }
	]
	new
	]

	load-binary: func [s [string!] e [string!] base [integer!] /local new str][
	new: make string! offset? s e ;-- allocated size above final size

	parse/all/case s [
	some [
	copy str some base64-char (insert tail new str)
	\| ws \| comment-rule
	\| #"}" end skip
	]
	]
	new: debase/base new base
	if none? new [throw-error]
	new
	]

	load-file: func [s [string!]][
	parse s [any [#"%" [2 hexa \| (pos: skip pos negate 1 + length? s throw-error)] \| skip]]
	to file! replace/all dehex s #"\" #"/"
	]

	identify-header: func [src /local p ws found?][
	ws: charset " ^-^M^/"
	rs?: no
	pos: src
	until [
	pos: any [
	find/tail pos "Red" ;-- don't set pos to none before throw-error
	throw-error/with "Invalid Red program"
	]
	if find/match pos "/System" [rs?: yes pos: skip pos 7]
	pos: any [
	find pos negate ws
	pos
	]
	found?: pos/1 = #"["
	]
	unless found? [throw-error/with "Invalid Red program"]
	pos
	]

	process: func [src [string! binary!] /local blk][
	old-line: line: 1
	count?: yes
	stack/clean-up
	blk: stack/allocate block! 100 ;-- root block
	src: identify-header src

	unless parse/all/case src program [throw-error]
	stack/reset
	blk
	]
	]

Jun	JUL	Aug
	16
2020	2021	2022

red / red

red/lexer.r

Users who have contributed to this file