Document double-quoted string escape sequences

This commit is contained in:
Kurtis Rader 2020-07-22 20:11:25 -07:00 committed by Qi Xiao
parent fbd86925a9
commit 082052b5a8
3 changed files with 93 additions and 38 deletions

View File

@ -53,20 +53,19 @@ func ParseAs(src Source, n Node, w io.Writer) error {
// Errors.
var (
errShouldBeForm = newError("", "form")
errBadLHS = errors.New("bad assignment LHS")
errBadRedirSign = newError("bad redir sign", "'<'", "'>'", "'>>'", "'<>'")
errShouldBeFD = newError("", "a composite term representing fd")
errShouldBeFilename = newError("", "a composite term representing filename")
errShouldBeArray = newError("", "spaced")
errStringUnterminated = newError("string not terminated")
errChainedAssignment = newError("chained assignment not yet supported")
errInvalidEscape = newError("invalid escape sequence")
errInvalidEscapeOct = newError("invalid escape sequence", "octal digit")
errInvalidEscapeHex = newError("invalid escape sequence", "hex digit")
errInvalidEscapeControl = newError("invalid control sequence", "a rune between @ (0x40) and _(0x5F)")
errShouldBePrimary = newError("",
"single-quoted string", "double-quoted string", "bareword")
errShouldBeForm = newError("", "form")
errBadLHS = errors.New("bad assignment LHS")
errBadRedirSign = newError("bad redir sign", "'<'", "'>'", "'>>'", "'<>'")
errShouldBeFD = newError("", "a composite term representing fd")
errShouldBeFilename = newError("", "a composite term representing filename")
errShouldBeArray = newError("", "spaced")
errStringUnterminated = newError("string not terminated")
errChainedAssignment = newError("chained assignment not yet supported")
errInvalidEscape = newError("invalid escape sequence")
errInvalidEscapeOct = newError("invalid escape sequence", "octal digit")
errInvalidEscapeHex = newError("invalid escape sequence", "hex digit")
errInvalidEscapeControl = newError("invalid control sequence", "a codepoint between 0x3F and 0x5F")
errShouldBePrimary = newError("", "single-quoted string", "double-quoted string", "bareword")
errShouldBeVariableName = newError("", "variable name")
errShouldBeRBracket = newError("", "']'")
errShouldBeRBrace = newError("", "'}'")
@ -585,16 +584,19 @@ func (pn *Primary) doubleQuoted(ps *parser) {
return
case '\\':
switch r := ps.next(); r {
case 'c', '^':
// Control sequence
case 'c', '^': // control sequence
r := ps.next()
if r < 0x40 || r >= 0x60 {
if r < 0x3F || r > 0x5F {
ps.backup()
ps.error(errInvalidEscapeControl)
ps.next()
}
buf.WriteByte(byte(r - 0x40))
case 'x', 'u', 'U':
if byte(r) == '?' { // special-case: \c? => del
buf.WriteByte(byte(0x7F))
} else {
buf.WriteByte(byte(r - 0x40))
}
case 'x', 'u', 'U': // two, four, or eight hex digits
var n int
switch r {
case 'x':
@ -615,8 +617,7 @@ func (pn *Primary) doubleQuoted(ps *parser) {
rr = rr*16 + d
}
buf.WriteRune(rr)
case '0', '1', '2', '3', '4', '5', '6', '7':
// 2 more octal digits
case '0', '1', '2', '3', '4', '5', '6', '7': // three octal digits
rr := r - '0'
for i := 0; i < 2; i++ {
r := ps.next()

View File

@ -94,7 +94,19 @@ var goodCases = []struct {
"Type": SingleQuoted, "Value": "'x'y'",
}})},
// Double quote
{`a "b\^[\x1b\u548c\U0002CE23\123\n\t\\"`,
{`a "[\c?\c@\cI\^I\^[]"`, // control char sequences
a(ast{"Compound/Indexing/Primary", fs{
"Type": DoubleQuoted,
"Value": "[\x7f\x00\t\t\x1b]",
}})},
{`a "[\n\t\a\v\\\"]"`, // single char sequences
a(ast{"Compound/Indexing/Primary", fs{
"Type": DoubleQuoted,
"Value": "[\n\t\a\v\\\"]",
}})},
{`a "b\^[\x1b\u548c\U0002CE23\123\n\t\\"`, // numeric sequences
a(ast{"Compound/Indexing/Primary", fs{
"Type": DoubleQuoted,
"Value": "b\x1b\x1b\u548c\U0002CE23\123\n\t\\",
@ -364,7 +376,8 @@ var parseErrorTests = []struct {
{src: "'a", errAtEnd: true, errMsg: "string not terminated"},
{src: `"a`, errAtEnd: true, errMsg: "string not terminated"},
// Bad escape sequence.
{src: `a "\^0"`, errPart: "0", errMsg: "invalid control sequence, should be a rune between @ (0x40) and _(0x5F)"},
{src: `a "\^` + "\t", errPart: "\t",
errMsg: "invalid control sequence, should be a codepoint between 0x3F and 0x5F"},
{src: `a "\xQQ"`, errPart: "Q", errMsg: "invalid escape sequence, should be hex digit"},
{src: `a "\1ab"`, errPart: "a", errMsg: "invalid escape sequence, should be octal digit"},
{src: `a "\i"`, errPart: "i", errMsg: "invalid escape sequence"},

View File

@ -55,30 +55,71 @@ some values. (The traditional terms for the two levels are "commands" and
## String
The most common data structure in shells is the string. String literals can be
quoted or unquoted (barewords).
quoted or unquoted (barewords). There are two types of quoted strings in Elvish:
single-quoted strings and double-quoted strings.
### Quoted
### Single-Quoted String
There are two types of quoted strings in Elvish, single-quoted strings and
double-quoted strings.
In single-quoted strings, all characters represent themselves, except single
In single-quoted strings all characters represent themselves, except single
quotes, which need to be doubled. For instance, `'*\'` evaluates to `*\`, and
`'it''s'` evaluates to `it's`.
In double-quoted strings, the backslash `\` introduces a **escape sequence**.
For instance, `"\n"` evaluates to a newline; `"\\"` evaluates to a backslash;
invalid escape sequences like `"\*"` result in a syntax error.
### Double-Quoted String
**TODO**: Document the full list of supported escape sequences.
In double-quoted strings the backslash, `\`, introduces an **escape sequence**.
For instance, `\n` evaluates to a newline and `\\` evaluates to a backslash.
Invalid escape sequences like `\*` result in a syntax error when the program is
compiled.
Unlike most other shells, double-quoted strings do not support interpolation.
For instance, `"$USER"` simply evaluates to the string `$USER`. To get a similar
effect, simply concatenate strings: instead of `"my name is $name"`, write
`"my name is "$name`. Under the hood this is a
Unlike most other shells, double-quoted strings in Elvish do not support
interpolation. For instance, `"$name"` simply evaluates to the string `$name`.
To get a similar effect, simply concatenate strings: instead of
`"my name is $name"`, write `"my name is "$name`. Under the hood this is a
[compound expression](#compound-expression-and-braced-lists).
### Barewords
The following escape sequences are recognized in double-quoted strings:
- `\cX`, where _X_ is a character with codepoint between 0x40 and 0x5F,
represents the codepoint that is 0x40 lower than _X_. For example, `\cI` is
the tab character: 0x49 (`I`) - 0x40 = 0x09 (tab). There is one special
case: A question-mark is converted to del; i.e., `\c?` or `\^?` is
equivalent to `\x7F`.
- `\^X` is the same as `\cX`.
- `\[0..7][0..7][0..7]` is a byte written as an octal value. There must be
three octal digits following the backslash. For example, `\000` is the nul
character, and `\101` is the same as `A`, but `\0` is an invalid escape
sequence (too few digits).
- `\x..` is a Unicode code point represented by two hexadecimal digits.
- `\u....` is a Unicode code point represented by four hexadecimal digits.
- `\U......` is a Unicode code point represented by eight hexadecimal digits.
- The following single character escape sequences:
- `\a` is the "bel" character, equivalent to `\007` or `\x07`.
- `\b` is the "backspace" character, equivalent to `\010` or `\x08`.
- `\f` is the "formfeed" (aka "np") character, equivalent to `\014` or
`\x0c`.
- `\n` is the "nl" character, equivalent to `\012` or `\x0a`.
- `\r` is the "cr" character, equivalent to `\015` or `\x0d`.
- `\t` is the "tab" character, equivalent to `\011` or `\x09`.
- `\v` is the "vt" character, equivalent to `\013` or `\x0b`.
- `\\` is the "backslash" character, equivalent to `\134` or `\x5c`.
- `\"` is the "double-quote" character, equivalent to `\042` or `\x22`.
### Bareword String
If a string only consists of bareword characters, it can be written without any
quote; this is called a **bareword**. Examples are `a.txt`, `long-bareword`, and