1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
|
<h1> Package utf8 </h1> <ul id="short-nav">
<li><code>import "unicode/utf8"</code></li>
<li><a href="#pkg-overview" class="overviewLink">Overview</a></li>
<li><a href="#pkg-index" class="indexLink">Index</a></li>
<li><a href="#pkg-examples" class="examplesLink">Examples</a></li>
</ul> <h2 id="pkg-overview">Overview </h2> <p>Package utf8 implements functions and constants to support text encoded in UTF-8. It includes functions to translate between runes and UTF-8 byte sequences. See <a href="https://en.wikipedia.org/wiki/UTF-8">https://en.wikipedia.org/wiki/UTF-8</a> </p> <h2 id="pkg-index">Index </h2> <ul id="manual-nav">
<li><a href="#pkg-constants">Constants</a></li>
<li><a href="#AppendRune">func AppendRune(p []byte, r rune) []byte</a></li>
<li><a href="#DecodeLastRune">func DecodeLastRune(p []byte) (r rune, size int)</a></li>
<li><a href="#DecodeLastRuneInString">func DecodeLastRuneInString(s string) (r rune, size int)</a></li>
<li><a href="#DecodeRune">func DecodeRune(p []byte) (r rune, size int)</a></li>
<li><a href="#DecodeRuneInString">func DecodeRuneInString(s string) (r rune, size int)</a></li>
<li><a href="#EncodeRune">func EncodeRune(p []byte, r rune) int</a></li>
<li><a href="#FullRune">func FullRune(p []byte) bool</a></li>
<li><a href="#FullRuneInString">func FullRuneInString(s string) bool</a></li>
<li><a href="#RuneCount">func RuneCount(p []byte) int</a></li>
<li><a href="#RuneCountInString">func RuneCountInString(s string) (n int)</a></li>
<li><a href="#RuneLen">func RuneLen(r rune) int</a></li>
<li><a href="#RuneStart">func RuneStart(b byte) bool</a></li>
<li><a href="#Valid">func Valid(p []byte) bool</a></li>
<li><a href="#ValidRune">func ValidRune(r rune) bool</a></li>
<li><a href="#ValidString">func ValidString(s string) bool</a></li>
</ul> <div id="pkg-examples"> <h3>Examples</h3> <dl> <dd><a class="exampleLink" href="#example_AppendRune">AppendRune</a></dd> <dd><a class="exampleLink" href="#example_DecodeLastRune">DecodeLastRune</a></dd> <dd><a class="exampleLink" href="#example_DecodeLastRuneInString">DecodeLastRuneInString</a></dd> <dd><a class="exampleLink" href="#example_DecodeRune">DecodeRune</a></dd> <dd><a class="exampleLink" href="#example_DecodeRuneInString">DecodeRuneInString</a></dd> <dd><a class="exampleLink" href="#example_EncodeRune">EncodeRune</a></dd> <dd><a class="exampleLink" href="#example_EncodeRune_outOfRange">EncodeRune (OutOfRange)</a></dd> <dd><a class="exampleLink" href="#example_FullRune">FullRune</a></dd> <dd><a class="exampleLink" href="#example_FullRuneInString">FullRuneInString</a></dd> <dd><a class="exampleLink" href="#example_RuneCount">RuneCount</a></dd> <dd><a class="exampleLink" href="#example_RuneCountInString">RuneCountInString</a></dd> <dd><a class="exampleLink" href="#example_RuneLen">RuneLen</a></dd> <dd><a class="exampleLink" href="#example_RuneStart">RuneStart</a></dd> <dd><a class="exampleLink" href="#example_Valid">Valid</a></dd> <dd><a class="exampleLink" href="#example_ValidRune">ValidRune</a></dd> <dd><a class="exampleLink" href="#example_ValidString">ValidString</a></dd> </dl> </div> <h3>Package files</h3> <p> <span>utf8.go</span> </p> <h2 id="pkg-constants">Constants</h2> <p>Numbers fundamental to the encoding. </p>
<pre data-language="go">const (
RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
RuneSelf = 0x80 // characters below RuneSelf are represented as themselves in a single byte.
MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
UTFMax = 4 // maximum number of bytes of a UTF-8 encoded Unicode character.
)</pre> <h2 id="AppendRune">func <span>AppendRune</span> <span title="Added in Go 1.18">1.18</span> </h2> <pre data-language="go">func AppendRune(p []byte, r rune) []byte</pre> <p>AppendRune appends the UTF-8 encoding of r to the end of p and returns the extended buffer. If the rune is out of range, it appends the encoding of <a href="#RuneError">RuneError</a>. </p> <h4 id="example_AppendRune"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">buf1 := utf8.AppendRune(nil, 0x10000)
buf2 := utf8.AppendRune([]byte("init"), 0x10000)
fmt.Println(string(buf1))
fmt.Println(string(buf2))
</pre> <p>Output:</p> <pre class="output" data-language="go">๐
init๐
</pre> <h2 id="DecodeLastRune">func <span>DecodeLastRune</span> </h2> <pre data-language="go">func DecodeLastRune(p []byte) (r rune, size int)</pre> <p>DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and its width in bytes. If p is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p>
<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeLastRune"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">b := []byte("Hello, ไธ็")
for len(b) > 0 {
r, size := utf8.DecodeLastRune(b)
fmt.Printf("%c %v\n", r, size)
b = b[:len(b)-size]
}
</pre> <p>Output:</p> <pre class="output" data-language="go">็ 3
ไธ 3
1
, 1
o 1
l 1
l 1
e 1
H 1
</pre> <h2 id="DecodeLastRuneInString">func <span>DecodeLastRuneInString</span> </h2> <pre data-language="go">func DecodeLastRuneInString(s string) (r rune, size int)</pre> <p>DecodeLastRuneInString is like <a href="#DecodeLastRune">DecodeLastRune</a> but its input is a string. If s is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p>
<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeLastRuneInString"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">str := "Hello, ไธ็"
for len(str) > 0 {
r, size := utf8.DecodeLastRuneInString(str)
fmt.Printf("%c %v\n", r, size)
str = str[:len(str)-size]
}
</pre> <p>Output:</p> <pre class="output" data-language="go">็ 3
ไธ 3
1
, 1
o 1
l 1
l 1
e 1
H 1
</pre> <h2 id="DecodeRune">func <span>DecodeRune</span> </h2> <pre data-language="go">func DecodeRune(p []byte) (r rune, size int)</pre> <p>DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes. If p is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p>
<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeRune"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">b := []byte("Hello, ไธ็")
for len(b) > 0 {
r, size := utf8.DecodeRune(b)
fmt.Printf("%c %v\n", r, size)
b = b[size:]
}
</pre> <p>Output:</p> <pre class="output" data-language="go">H 1
e 1
l 1
l 1
o 1
, 1
1
ไธ 3
็ 3
</pre> <h2 id="DecodeRuneInString">func <span>DecodeRuneInString</span> </h2> <pre data-language="go">func DecodeRuneInString(s string) (r rune, size int)</pre> <p>DecodeRuneInString is like <a href="#DecodeRune">DecodeRune</a> but its input is a string. If s is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p>
<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeRuneInString"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">str := "Hello, ไธ็"
for len(str) > 0 {
r, size := utf8.DecodeRuneInString(str)
fmt.Printf("%c %v\n", r, size)
str = str[size:]
}
</pre> <p>Output:</p> <pre class="output" data-language="go">H 1
e 1
l 1
l 1
o 1
, 1
1
ไธ 3
็ 3
</pre> <h2 id="EncodeRune">func <span>EncodeRune</span> </h2> <pre data-language="go">func EncodeRune(p []byte, r rune) int</pre> <p>EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune. If the rune is out of range, it writes the encoding of <a href="#RuneError">RuneError</a>. It returns the number of bytes written. </p> <h4 id="example_EncodeRune"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">r := 'ไธ'
buf := make([]byte, 3)
n := utf8.EncodeRune(buf, r)
fmt.Println(buf)
fmt.Println(n)
</pre> <p>Output:</p> <pre class="output" data-language="go">[228 184 150]
3
</pre> <h4 id="example_EncodeRune_outOfRange"> <span class="text">Example (OutOfRange)</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">runes := []rune{
// Less than 0, out of range.
-1,
// Greater than 0x10FFFF, out of range.
0x110000,
// The Unicode replacement character.
utf8.RuneError,
}
for i, c := range runes {
buf := make([]byte, 3)
size := utf8.EncodeRune(buf, c)
fmt.Printf("%d: %d %[2]s %d\n", i, buf, size)
}
</pre> <p>Output:</p> <pre class="output" data-language="go">0: [239 191 189] ๏ฟฝ 3
1: [239 191 189] ๏ฟฝ 3
2: [239 191 189] ๏ฟฝ 3
</pre> <h2 id="FullRune">func <span>FullRune</span> </h2> <pre data-language="go">func FullRune(p []byte) bool</pre> <p>FullRune reports whether the bytes in p begin with a full UTF-8 encoding of a rune. An invalid encoding is considered a full Rune since it will convert as a width-1 error rune. </p> <h4 id="example_FullRune"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">buf := []byte{228, 184, 150} // ไธ
fmt.Println(utf8.FullRune(buf))
fmt.Println(utf8.FullRune(buf[:2]))
</pre> <p>Output:</p> <pre class="output" data-language="go">true
false
</pre> <h2 id="FullRuneInString">func <span>FullRuneInString</span> </h2> <pre data-language="go">func FullRuneInString(s string) bool</pre> <p>FullRuneInString is like FullRune but its input is a string. </p> <h4 id="example_FullRuneInString"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">str := "ไธ"
fmt.Println(utf8.FullRuneInString(str))
fmt.Println(utf8.FullRuneInString(str[:2]))
</pre> <p>Output:</p> <pre class="output" data-language="go">true
false
</pre> <h2 id="RuneCount">func <span>RuneCount</span> </h2> <pre data-language="go">func RuneCount(p []byte) int</pre> <p>RuneCount returns the number of runes in p. Erroneous and short encodings are treated as single runes of width 1 byte. </p> <h4 id="example_RuneCount"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">buf := []byte("Hello, ไธ็")
fmt.Println("bytes =", len(buf))
fmt.Println("runes =", utf8.RuneCount(buf))
</pre> <p>Output:</p> <pre class="output" data-language="go">bytes = 13
runes = 9
</pre> <h2 id="RuneCountInString">func <span>RuneCountInString</span> </h2> <pre data-language="go">func RuneCountInString(s string) (n int)</pre> <p>RuneCountInString is like <a href="#RuneCount">RuneCount</a> but its input is a string. </p> <h4 id="example_RuneCountInString"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">str := "Hello, ไธ็"
fmt.Println("bytes =", len(str))
fmt.Println("runes =", utf8.RuneCountInString(str))
</pre> <p>Output:</p> <pre class="output" data-language="go">bytes = 13
runes = 9
</pre> <h2 id="RuneLen">func <span>RuneLen</span> </h2> <pre data-language="go">func RuneLen(r rune) int</pre> <p>RuneLen returns the number of bytes required to encode the rune. It returns -1 if the rune is not a valid value to encode in UTF-8. </p> <h4 id="example_RuneLen"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">fmt.Println(utf8.RuneLen('a'))
fmt.Println(utf8.RuneLen('็'))
</pre> <p>Output:</p> <pre class="output" data-language="go">1
3
</pre> <h2 id="RuneStart">func <span>RuneStart</span> </h2> <pre data-language="go">func RuneStart(b byte) bool</pre> <p>RuneStart reports whether the byte could be the first byte of an encoded, possibly invalid rune. Second and subsequent bytes always have the top two bits set to 10. </p> <h4 id="example_RuneStart"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">buf := []byte("a็")
fmt.Println(utf8.RuneStart(buf[0]))
fmt.Println(utf8.RuneStart(buf[1]))
fmt.Println(utf8.RuneStart(buf[2]))
</pre> <p>Output:</p> <pre class="output" data-language="go">true
true
false
</pre> <h2 id="Valid">func <span>Valid</span> </h2> <pre data-language="go">func Valid(p []byte) bool</pre> <p>Valid reports whether p consists entirely of valid UTF-8-encoded runes. </p> <h4 id="example_Valid"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">valid := []byte("Hello, ไธ็")
invalid := []byte{0xff, 0xfe, 0xfd}
fmt.Println(utf8.Valid(valid))
fmt.Println(utf8.Valid(invalid))
</pre> <p>Output:</p> <pre class="output" data-language="go">true
false
</pre> <h2 id="ValidRune">func <span>ValidRune</span> <span title="Added in Go 1.1">1.1</span> </h2> <pre data-language="go">func ValidRune(r rune) bool</pre> <p>ValidRune reports whether r can be legally encoded as UTF-8. Code points that are out of range or a surrogate half are illegal. </p> <h4 id="example_ValidRune"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">valid := 'a'
invalid := rune(0xfffffff)
fmt.Println(utf8.ValidRune(valid))
fmt.Println(utf8.ValidRune(invalid))
</pre> <p>Output:</p> <pre class="output" data-language="go">true
false
</pre> <h2 id="ValidString">func <span>ValidString</span> </h2> <pre data-language="go">func ValidString(s string) bool</pre> <p>ValidString reports whether s consists entirely of valid UTF-8-encoded runes. </p> <h4 id="example_ValidString"> <span class="text">Example</span>
</h4> <p>Code:</p> <pre class="code" data-language="go">valid := "Hello, ไธ็"
invalid := string([]byte{0xff, 0xfe, 0xfd})
fmt.Println(utf8.ValidString(valid))
fmt.Println(utf8.ValidString(invalid))
</pre> <p>Output:</p> <pre class="output" data-language="go">true
false
</pre><div class="_attribution">
<p class="_attribution-p">
© Google, Inc.<br>Licensed under the Creative Commons Attribution License 3.0.<br>
<a href="http://golang.org/pkg/unicode/utf8/" class="_attribution-link">http://golang.org/pkg/unicode/utf8/</a>
</p>
</div>
|