cli/cli/connhelper/internal/syntax/quote.go at v29.3.0 · docker/cli · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
// Copyright (c) 2021, Daniel Martí <mvdan@mvdan.cc>
// See LICENSE for licensing information

package syntax

import (
	"fmt"
	"strings"
	"unicode"
	"unicode/utf8"
)

type QuoteError struct {
	ByteOffset int
	Message    string
}

func (e QuoteError) Error() string {
	return fmt.Sprintf("cannot quote character at byte %d: %s", e.ByteOffset, e.Message)
}

const (
	quoteErrNull  = "shell strings cannot contain null bytes"
	quoteErrPOSIX = "POSIX shell lacks escape sequences"
	quoteErrRange = "rune out of range"
	quoteErrMksh  = "mksh cannot escape codepoints above 16 bits"
)

// Quote returns a quoted version of the input string,
// so that the quoted version is expanded or interpreted
// as the original string in the given language variant.
//
// Quoting is necessary when using arbitrary literal strings
// as words in a shell script or command.
// Without quoting, one can run into syntax errors,
// as well as the possibility of running unintended code.
//
// An error is returned when a string cannot be quoted for a variant.
// For instance, POSIX lacks escape sequences for non-printable characters,
// and no language variant can represent a string containing null bytes.
// In such cases, the returned error type will be *QuoteError.
//
// The quoting strategy is chosen on a best-effort basis,
// to minimize the amount of extra bytes necessary.
//
// Some strings do not require any quoting and are returned unchanged.
// Those strings can be directly surrounded in single quotes as well.
//
//nolint:gocyclo // ignore "cyclomatic complexity 35 of func `Quote` is high (> 16) (gocyclo)"
func Quote(s string, lang LangVariant) (string, error) {
	if s == "" {
		// Special case; an empty string must always be quoted,
		// as otherwise it expands to zero fields.
		return "''", nil
	}
	shellChars := false
	nonPrintable := false
	offs := 0
	for rem := s; len(rem) > 0; {
		r, size := utf8.DecodeRuneInString(rem)
		switch r {
		// Like regOps; token characters.
		case ';', '"', '\'', '(', ')', '$', '|', '&', '>', '<', '`',
			// Whitespace; might result in multiple fields.
			' ', '\t', '\r', '\n',
			// Escape sequences would be expanded.
			'\\',
			// Would start a comment unless quoted.
			'#',
			// Might result in brace expansion.
			'{',
			// Might result in tilde expansion.
			'~',
			// Might result in globbing.
			'*', '?', '[',
			// Might result in an assignment.
			'=':
			shellChars = true
		case '\x00':
			return "", &QuoteError{ByteOffset: offs, Message: quoteErrNull}
		}
		if r == utf8.RuneError || !unicode.IsPrint(r) {
			if lang == LangPOSIX {
				return "", &QuoteError{ByteOffset: offs, Message: quoteErrPOSIX}
			}
			nonPrintable = true
		}
		rem = rem[size:]
		offs += size
	}
	if !shellChars && !nonPrintable && !IsKeyword(s) {
		// Nothing to quote; avoid allocating.
		return s, nil
	}

	// Single quotes are usually best,
	// as they don't require any escaping of characters.
	// If we have any invalid utf8 or non-printable runes,
	// use $'' so that we can escape them.
	// Note that we can't use double quotes for those.
	var b strings.Builder
	if nonPrintable {
		b.WriteString("$'")
		lastRequoteIfHex := false
		offs = 0
		for rem := s; len(rem) > 0; {
			nextRequoteIfHex := false
			r, size := utf8.DecodeRuneInString(rem)
			switch {
			case r == '\'', r == '\\':
				b.WriteByte('\\')
				b.WriteRune(r)
			case unicode.IsPrint(r) && r != utf8.RuneError:
				if lastRequoteIfHex && isHex(r) {
					b.WriteString("'$'")
				}
				b.WriteRune(r)
			case r == '\a':
				b.WriteString(`\a`)
			case r == '\b':
				b.WriteString(`\b`)
			case r == '\f':
				b.WriteString(`\f`)
			case r == '\n':
				b.WriteString(`\n`)
			case r == '\r':
				b.WriteString(`\r`)
			case r == '\t':
				b.WriteString(`\t`)
			case r == '\v':
				b.WriteString(`\v`)
			case r < utf8.RuneSelf, r == utf8.RuneError && size == 1:
				// \xXX, fixed at two hexadecimal characters.
				fmt.Fprintf(&b, "\\x%02x", rem[0])
				// Unfortunately, mksh allows \x to consume more hex characters.
				// Ensure that we don't allow it to read more than two.
				if lang == LangMirBSDKorn {
					nextRequoteIfHex = true
				}
			case r > utf8.MaxRune:
				// Not a valid Unicode code point?
				return "", &QuoteError{ByteOffset: offs, Message: quoteErrRange}
			case lang == LangMirBSDKorn && r > 0xFFFD:
				// From the CAVEATS section in R59's man page:
				//
				// mksh currently uses OPTU-16 internally, which is the same as
				// UTF-8 and CESU-8 with 0000..FFFD being valid codepoints.
				return "", &QuoteError{ByteOffset: offs, Message: quoteErrMksh}
			case r < 0x10000:
				// \uXXXX, fixed at four hexadecimal characters.
				fmt.Fprintf(&b, "\\u%04x", r)
			default:
				// \UXXXXXXXX, fixed at eight hexadecimal characters.
				fmt.Fprintf(&b, "\\U%08x", r)
			}
			rem = rem[size:]
			lastRequoteIfHex = nextRequoteIfHex
			offs += size
		}
		b.WriteString("'")
		return b.String(), nil
	}

	// Single quotes without any need for escaping.
	if !strings.Contains(s, "'") {
		return "'" + s + "'", nil
	}

	// The string contains single quotes,
	// so fall back to double quotes.
	b.WriteByte('"')
	for _, r := range s {
		switch r {
		case '"', '\\', '`', '$':
			b.WriteByte('\\')
		}
		b.WriteRune(r)
	}
	b.WriteByte('"')
	return b.String(), nil
}

func isHex(r rune) bool {
	return (r >= '0' && r <= '9') ||
		(r >= 'a' && r <= 'f') ||
		(r >= 'A' && r <= 'F')
}