mirror of
https://github.com/go-sylixos/elvish.git
synced 2024-12-13 18:07:51 +08:00
Document double-quoted string escape sequences
This commit is contained in:
parent
fbd86925a9
commit
082052b5a8
|
@ -53,20 +53,19 @@ func ParseAs(src Source, n Node, w io.Writer) error {
|
|||
|
||||
// Errors.
|
||||
var (
|
||||
errShouldBeForm = newError("", "form")
|
||||
errBadLHS = errors.New("bad assignment LHS")
|
||||
errBadRedirSign = newError("bad redir sign", "'<'", "'>'", "'>>'", "'<>'")
|
||||
errShouldBeFD = newError("", "a composite term representing fd")
|
||||
errShouldBeFilename = newError("", "a composite term representing filename")
|
||||
errShouldBeArray = newError("", "spaced")
|
||||
errStringUnterminated = newError("string not terminated")
|
||||
errChainedAssignment = newError("chained assignment not yet supported")
|
||||
errInvalidEscape = newError("invalid escape sequence")
|
||||
errInvalidEscapeOct = newError("invalid escape sequence", "octal digit")
|
||||
errInvalidEscapeHex = newError("invalid escape sequence", "hex digit")
|
||||
errInvalidEscapeControl = newError("invalid control sequence", "a rune between @ (0x40) and _(0x5F)")
|
||||
errShouldBePrimary = newError("",
|
||||
"single-quoted string", "double-quoted string", "bareword")
|
||||
errShouldBeForm = newError("", "form")
|
||||
errBadLHS = errors.New("bad assignment LHS")
|
||||
errBadRedirSign = newError("bad redir sign", "'<'", "'>'", "'>>'", "'<>'")
|
||||
errShouldBeFD = newError("", "a composite term representing fd")
|
||||
errShouldBeFilename = newError("", "a composite term representing filename")
|
||||
errShouldBeArray = newError("", "spaced")
|
||||
errStringUnterminated = newError("string not terminated")
|
||||
errChainedAssignment = newError("chained assignment not yet supported")
|
||||
errInvalidEscape = newError("invalid escape sequence")
|
||||
errInvalidEscapeOct = newError("invalid escape sequence", "octal digit")
|
||||
errInvalidEscapeHex = newError("invalid escape sequence", "hex digit")
|
||||
errInvalidEscapeControl = newError("invalid control sequence", "a codepoint between 0x3F and 0x5F")
|
||||
errShouldBePrimary = newError("", "single-quoted string", "double-quoted string", "bareword")
|
||||
errShouldBeVariableName = newError("", "variable name")
|
||||
errShouldBeRBracket = newError("", "']'")
|
||||
errShouldBeRBrace = newError("", "'}'")
|
||||
|
@ -585,16 +584,19 @@ func (pn *Primary) doubleQuoted(ps *parser) {
|
|||
return
|
||||
case '\\':
|
||||
switch r := ps.next(); r {
|
||||
case 'c', '^':
|
||||
// Control sequence
|
||||
case 'c', '^': // control sequence
|
||||
r := ps.next()
|
||||
if r < 0x40 || r >= 0x60 {
|
||||
if r < 0x3F || r > 0x5F {
|
||||
ps.backup()
|
||||
ps.error(errInvalidEscapeControl)
|
||||
ps.next()
|
||||
}
|
||||
buf.WriteByte(byte(r - 0x40))
|
||||
case 'x', 'u', 'U':
|
||||
if byte(r) == '?' { // special-case: \c? => del
|
||||
buf.WriteByte(byte(0x7F))
|
||||
} else {
|
||||
buf.WriteByte(byte(r - 0x40))
|
||||
}
|
||||
case 'x', 'u', 'U': // two, four, or eight hex digits
|
||||
var n int
|
||||
switch r {
|
||||
case 'x':
|
||||
|
@ -615,8 +617,7 @@ func (pn *Primary) doubleQuoted(ps *parser) {
|
|||
rr = rr*16 + d
|
||||
}
|
||||
buf.WriteRune(rr)
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||
// 2 more octal digits
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7': // three octal digits
|
||||
rr := r - '0'
|
||||
for i := 0; i < 2; i++ {
|
||||
r := ps.next()
|
||||
|
|
|
@ -94,7 +94,19 @@ var goodCases = []struct {
|
|||
"Type": SingleQuoted, "Value": "'x'y'",
|
||||
}})},
|
||||
// Double quote
|
||||
{`a "b\^[\x1b\u548c\U0002CE23\123\n\t\\"`,
|
||||
{`a "[\c?\c@\cI\^I\^[]"`, // control char sequences
|
||||
a(ast{"Compound/Indexing/Primary", fs{
|
||||
"Type": DoubleQuoted,
|
||||
"Value": "[\x7f\x00\t\t\x1b]",
|
||||
}})},
|
||||
|
||||
{`a "[\n\t\a\v\\\"]"`, // single char sequences
|
||||
a(ast{"Compound/Indexing/Primary", fs{
|
||||
"Type": DoubleQuoted,
|
||||
"Value": "[\n\t\a\v\\\"]",
|
||||
}})},
|
||||
|
||||
{`a "b\^[\x1b\u548c\U0002CE23\123\n\t\\"`, // numeric sequences
|
||||
a(ast{"Compound/Indexing/Primary", fs{
|
||||
"Type": DoubleQuoted,
|
||||
"Value": "b\x1b\x1b\u548c\U0002CE23\123\n\t\\",
|
||||
|
@ -364,7 +376,8 @@ var parseErrorTests = []struct {
|
|||
{src: "'a", errAtEnd: true, errMsg: "string not terminated"},
|
||||
{src: `"a`, errAtEnd: true, errMsg: "string not terminated"},
|
||||
// Bad escape sequence.
|
||||
{src: `a "\^0"`, errPart: "0", errMsg: "invalid control sequence, should be a rune between @ (0x40) and _(0x5F)"},
|
||||
{src: `a "\^` + "\t", errPart: "\t",
|
||||
errMsg: "invalid control sequence, should be a codepoint between 0x3F and 0x5F"},
|
||||
{src: `a "\xQQ"`, errPart: "Q", errMsg: "invalid escape sequence, should be hex digit"},
|
||||
{src: `a "\1ab"`, errPart: "a", errMsg: "invalid escape sequence, should be octal digit"},
|
||||
{src: `a "\i"`, errPart: "i", errMsg: "invalid escape sequence"},
|
||||
|
|
|
@ -55,30 +55,71 @@ some values. (The traditional terms for the two levels are "commands" and
|
|||
## String
|
||||
|
||||
The most common data structure in shells is the string. String literals can be
|
||||
quoted or unquoted (barewords).
|
||||
quoted or unquoted (barewords). There are two types of quoted strings in Elvish:
|
||||
single-quoted strings and double-quoted strings.
|
||||
|
||||
### Quoted
|
||||
### Single-Quoted String
|
||||
|
||||
There are two types of quoted strings in Elvish, single-quoted strings and
|
||||
double-quoted strings.
|
||||
|
||||
In single-quoted strings, all characters represent themselves, except single
|
||||
In single-quoted strings all characters represent themselves, except single
|
||||
quotes, which need to be doubled. For instance, `'*\'` evaluates to `*\`, and
|
||||
`'it''s'` evaluates to `it's`.
|
||||
|
||||
In double-quoted strings, the backslash `\` introduces a **escape sequence**.
|
||||
For instance, `"\n"` evaluates to a newline; `"\\"` evaluates to a backslash;
|
||||
invalid escape sequences like `"\*"` result in a syntax error.
|
||||
### Double-Quoted String
|
||||
|
||||
**TODO**: Document the full list of supported escape sequences.
|
||||
In double-quoted strings the backslash, `\`, introduces an **escape sequence**.
|
||||
For instance, `\n` evaluates to a newline and `\\` evaluates to a backslash.
|
||||
Invalid escape sequences like `\*` result in a syntax error when the program is
|
||||
compiled.
|
||||
|
||||
Unlike most other shells, double-quoted strings do not support interpolation.
|
||||
For instance, `"$USER"` simply evaluates to the string `$USER`. To get a similar
|
||||
effect, simply concatenate strings: instead of `"my name is $name"`, write
|
||||
`"my name is "$name`. Under the hood this is a
|
||||
Unlike most other shells, double-quoted strings in Elvish do not support
|
||||
interpolation. For instance, `"$name"` simply evaluates to the string `$name`.
|
||||
To get a similar effect, simply concatenate strings: instead of
|
||||
`"my name is $name"`, write `"my name is "$name`. Under the hood this is a
|
||||
[compound expression](#compound-expression-and-braced-lists).
|
||||
|
||||
### Barewords
|
||||
The following escape sequences are recognized in double-quoted strings:
|
||||
|
||||
- `\cX`, where _X_ is a character with codepoint between 0x40 and 0x5F,
|
||||
represents the codepoint that is 0x40 lower than _X_. For example, `\cI` is
|
||||
the tab character: 0x49 (`I`) - 0x40 = 0x09 (tab). There is one special
|
||||
case: A question-mark is converted to del; i.e., `\c?` or `\^?` is
|
||||
equivalent to `\x7F`.
|
||||
|
||||
- `\^X` is the same as `\cX`.
|
||||
|
||||
- `\[0..7][0..7][0..7]` is a byte written as an octal value. There must be
|
||||
three octal digits following the backslash. For example, `\000` is the nul
|
||||
character, and `\101` is the same as `A`, but `\0` is an invalid escape
|
||||
sequence (too few digits).
|
||||
|
||||
- `\x..` is a Unicode code point represented by two hexadecimal digits.
|
||||
|
||||
- `\u....` is a Unicode code point represented by four hexadecimal digits.
|
||||
|
||||
- `\U......` is a Unicode code point represented by eight hexadecimal digits.
|
||||
|
||||
- The following single character escape sequences:
|
||||
|
||||
- `\a` is the "bel" character, equivalent to `\007` or `\x07`.
|
||||
|
||||
- `\b` is the "backspace" character, equivalent to `\010` or `\x08`.
|
||||
|
||||
- `\f` is the "formfeed" (aka "np") character, equivalent to `\014` or
|
||||
`\x0c`.
|
||||
|
||||
- `\n` is the "nl" character, equivalent to `\012` or `\x0a`.
|
||||
|
||||
- `\r` is the "cr" character, equivalent to `\015` or `\x0d`.
|
||||
|
||||
- `\t` is the "tab" character, equivalent to `\011` or `\x09`.
|
||||
|
||||
- `\v` is the "vt" character, equivalent to `\013` or `\x0b`.
|
||||
|
||||
- `\\` is the "backslash" character, equivalent to `\134` or `\x5c`.
|
||||
|
||||
- `\"` is the "double-quote" character, equivalent to `\042` or `\x22`.
|
||||
|
||||
### Bareword String
|
||||
|
||||
If a string only consists of bareword characters, it can be written without any
|
||||
quote; this is called a **bareword**. Examples are `a.txt`, `long-bareword`, and
|
||||
|
|
Loading…
Reference in New Issue
Block a user