diff options
Diffstat (limited to 'devdocs/go/unicode%2Futf8%2Findex.html')
| -rw-r--r-- | devdocs/go/unicode%2Futf8%2Findex.html | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/devdocs/go/unicode%2Futf8%2Findex.html b/devdocs/go/unicode%2Futf8%2Findex.html new file mode 100644 index 00000000..b27d81f9 --- /dev/null +++ b/devdocs/go/unicode%2Futf8%2Findex.html @@ -0,0 +1,205 @@ +<h1> Package utf8 </h1> <ul id="short-nav"> +<li><code>import "unicode/utf8"</code></li> +<li><a href="#pkg-overview" class="overviewLink">Overview</a></li> +<li><a href="#pkg-index" class="indexLink">Index</a></li> +<li><a href="#pkg-examples" class="examplesLink">Examples</a></li> +</ul> <h2 id="pkg-overview">Overview </h2> <p>Package utf8 implements functions and constants to support text encoded in UTF-8. It includes functions to translate between runes and UTF-8 byte sequences. See <a href="https://en.wikipedia.org/wiki/UTF-8">https://en.wikipedia.org/wiki/UTF-8</a> </p> <h2 id="pkg-index">Index </h2> <ul id="manual-nav"> +<li><a href="#pkg-constants">Constants</a></li> +<li><a href="#AppendRune">func AppendRune(p []byte, r rune) []byte</a></li> +<li><a href="#DecodeLastRune">func DecodeLastRune(p []byte) (r rune, size int)</a></li> +<li><a href="#DecodeLastRuneInString">func DecodeLastRuneInString(s string) (r rune, size int)</a></li> +<li><a href="#DecodeRune">func DecodeRune(p []byte) (r rune, size int)</a></li> +<li><a href="#DecodeRuneInString">func DecodeRuneInString(s string) (r rune, size int)</a></li> +<li><a href="#EncodeRune">func EncodeRune(p []byte, r rune) int</a></li> +<li><a href="#FullRune">func FullRune(p []byte) bool</a></li> +<li><a href="#FullRuneInString">func FullRuneInString(s string) bool</a></li> +<li><a href="#RuneCount">func RuneCount(p []byte) int</a></li> +<li><a href="#RuneCountInString">func RuneCountInString(s string) (n int)</a></li> +<li><a href="#RuneLen">func RuneLen(r rune) int</a></li> +<li><a href="#RuneStart">func RuneStart(b byte) bool</a></li> +<li><a href="#Valid">func Valid(p []byte) bool</a></li> +<li><a href="#ValidRune">func ValidRune(r rune) bool</a></li> +<li><a href="#ValidString">func ValidString(s string) bool</a></li> +</ul> <div id="pkg-examples"> <h3>Examples</h3> <dl> <dd><a class="exampleLink" href="#example_AppendRune">AppendRune</a></dd> <dd><a class="exampleLink" href="#example_DecodeLastRune">DecodeLastRune</a></dd> <dd><a class="exampleLink" href="#example_DecodeLastRuneInString">DecodeLastRuneInString</a></dd> <dd><a class="exampleLink" href="#example_DecodeRune">DecodeRune</a></dd> <dd><a class="exampleLink" href="#example_DecodeRuneInString">DecodeRuneInString</a></dd> <dd><a class="exampleLink" href="#example_EncodeRune">EncodeRune</a></dd> <dd><a class="exampleLink" href="#example_EncodeRune_outOfRange">EncodeRune (OutOfRange)</a></dd> <dd><a class="exampleLink" href="#example_FullRune">FullRune</a></dd> <dd><a class="exampleLink" href="#example_FullRuneInString">FullRuneInString</a></dd> <dd><a class="exampleLink" href="#example_RuneCount">RuneCount</a></dd> <dd><a class="exampleLink" href="#example_RuneCountInString">RuneCountInString</a></dd> <dd><a class="exampleLink" href="#example_RuneLen">RuneLen</a></dd> <dd><a class="exampleLink" href="#example_RuneStart">RuneStart</a></dd> <dd><a class="exampleLink" href="#example_Valid">Valid</a></dd> <dd><a class="exampleLink" href="#example_ValidRune">ValidRune</a></dd> <dd><a class="exampleLink" href="#example_ValidString">ValidString</a></dd> </dl> </div> <h3>Package files</h3> <p> <span>utf8.go</span> </p> <h2 id="pkg-constants">Constants</h2> <p>Numbers fundamental to the encoding. </p> +<pre data-language="go">const ( + RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character" + RuneSelf = 0x80 // characters below RuneSelf are represented as themselves in a single byte. + MaxRune = '\U0010FFFF' // Maximum valid Unicode code point. + UTFMax = 4 // maximum number of bytes of a UTF-8 encoded Unicode character. +)</pre> <h2 id="AppendRune">func <span>AppendRune</span> <span title="Added in Go 1.18">1.18</span> </h2> <pre data-language="go">func AppendRune(p []byte, r rune) []byte</pre> <p>AppendRune appends the UTF-8 encoding of r to the end of p and returns the extended buffer. If the rune is out of range, it appends the encoding of <a href="#RuneError">RuneError</a>. </p> <h4 id="example_AppendRune"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">buf1 := utf8.AppendRune(nil, 0x10000) +buf2 := utf8.AppendRune([]byte("init"), 0x10000) +fmt.Println(string(buf1)) +fmt.Println(string(buf2)) +</pre> <p>Output:</p> <pre class="output" data-language="go">๐ +init๐ +</pre> <h2 id="DecodeLastRune">func <span>DecodeLastRune</span> </h2> <pre data-language="go">func DecodeLastRune(p []byte) (r rune, size int)</pre> <p>DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and its width in bytes. If p is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p> +<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeLastRune"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">b := []byte("Hello, ไธ็") + +for len(b) > 0 { + r, size := utf8.DecodeLastRune(b) + fmt.Printf("%c %v\n", r, size) + + b = b[:len(b)-size] +} +</pre> <p>Output:</p> <pre class="output" data-language="go">็ 3 +ไธ 3 + 1 +, 1 +o 1 +l 1 +l 1 +e 1 +H 1 +</pre> <h2 id="DecodeLastRuneInString">func <span>DecodeLastRuneInString</span> </h2> <pre data-language="go">func DecodeLastRuneInString(s string) (r rune, size int)</pre> <p>DecodeLastRuneInString is like <a href="#DecodeLastRune">DecodeLastRune</a> but its input is a string. If s is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p> +<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeLastRuneInString"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">str := "Hello, ไธ็" + +for len(str) > 0 { + r, size := utf8.DecodeLastRuneInString(str) + fmt.Printf("%c %v\n", r, size) + + str = str[:len(str)-size] +} +</pre> <p>Output:</p> <pre class="output" data-language="go">็ 3 +ไธ 3 + 1 +, 1 +o 1 +l 1 +l 1 +e 1 +H 1 +</pre> <h2 id="DecodeRune">func <span>DecodeRune</span> </h2> <pre data-language="go">func DecodeRune(p []byte) (r rune, size int)</pre> <p>DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes. If p is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p> +<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeRune"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">b := []byte("Hello, ไธ็") + +for len(b) > 0 { + r, size := utf8.DecodeRune(b) + fmt.Printf("%c %v\n", r, size) + + b = b[size:] +} +</pre> <p>Output:</p> <pre class="output" data-language="go">H 1 +e 1 +l 1 +l 1 +o 1 +, 1 + 1 +ไธ 3 +็ 3 +</pre> <h2 id="DecodeRuneInString">func <span>DecodeRuneInString</span> </h2> <pre data-language="go">func DecodeRuneInString(s string) (r rune, size int)</pre> <p>DecodeRuneInString is like <a href="#DecodeRune">DecodeRune</a> but its input is a string. If s is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p> +<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeRuneInString"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">str := "Hello, ไธ็" + +for len(str) > 0 { + r, size := utf8.DecodeRuneInString(str) + fmt.Printf("%c %v\n", r, size) + + str = str[size:] +} +</pre> <p>Output:</p> <pre class="output" data-language="go">H 1 +e 1 +l 1 +l 1 +o 1 +, 1 + 1 +ไธ 3 +็ 3 +</pre> <h2 id="EncodeRune">func <span>EncodeRune</span> </h2> <pre data-language="go">func EncodeRune(p []byte, r rune) int</pre> <p>EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune. If the rune is out of range, it writes the encoding of <a href="#RuneError">RuneError</a>. It returns the number of bytes written. </p> <h4 id="example_EncodeRune"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">r := 'ไธ' +buf := make([]byte, 3) + +n := utf8.EncodeRune(buf, r) + +fmt.Println(buf) +fmt.Println(n) +</pre> <p>Output:</p> <pre class="output" data-language="go">[228 184 150] +3 +</pre> <h4 id="example_EncodeRune_outOfRange"> <span class="text">Example (OutOfRange)</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">runes := []rune{ + // Less than 0, out of range. + -1, + // Greater than 0x10FFFF, out of range. + 0x110000, + // The Unicode replacement character. + utf8.RuneError, +} +for i, c := range runes { + buf := make([]byte, 3) + size := utf8.EncodeRune(buf, c) + fmt.Printf("%d: %d %[2]s %d\n", i, buf, size) +} +</pre> <p>Output:</p> <pre class="output" data-language="go">0: [239 191 189] ๏ฟฝ 3 +1: [239 191 189] ๏ฟฝ 3 +2: [239 191 189] ๏ฟฝ 3 +</pre> <h2 id="FullRune">func <span>FullRune</span> </h2> <pre data-language="go">func FullRune(p []byte) bool</pre> <p>FullRune reports whether the bytes in p begin with a full UTF-8 encoding of a rune. An invalid encoding is considered a full Rune since it will convert as a width-1 error rune. </p> <h4 id="example_FullRune"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">buf := []byte{228, 184, 150} // ไธ +fmt.Println(utf8.FullRune(buf)) +fmt.Println(utf8.FullRune(buf[:2])) +</pre> <p>Output:</p> <pre class="output" data-language="go">true +false +</pre> <h2 id="FullRuneInString">func <span>FullRuneInString</span> </h2> <pre data-language="go">func FullRuneInString(s string) bool</pre> <p>FullRuneInString is like FullRune but its input is a string. </p> <h4 id="example_FullRuneInString"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">str := "ไธ" +fmt.Println(utf8.FullRuneInString(str)) +fmt.Println(utf8.FullRuneInString(str[:2])) +</pre> <p>Output:</p> <pre class="output" data-language="go">true +false +</pre> <h2 id="RuneCount">func <span>RuneCount</span> </h2> <pre data-language="go">func RuneCount(p []byte) int</pre> <p>RuneCount returns the number of runes in p. Erroneous and short encodings are treated as single runes of width 1 byte. </p> <h4 id="example_RuneCount"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">buf := []byte("Hello, ไธ็") +fmt.Println("bytes =", len(buf)) +fmt.Println("runes =", utf8.RuneCount(buf)) +</pre> <p>Output:</p> <pre class="output" data-language="go">bytes = 13 +runes = 9 +</pre> <h2 id="RuneCountInString">func <span>RuneCountInString</span> </h2> <pre data-language="go">func RuneCountInString(s string) (n int)</pre> <p>RuneCountInString is like <a href="#RuneCount">RuneCount</a> but its input is a string. </p> <h4 id="example_RuneCountInString"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">str := "Hello, ไธ็" +fmt.Println("bytes =", len(str)) +fmt.Println("runes =", utf8.RuneCountInString(str)) +</pre> <p>Output:</p> <pre class="output" data-language="go">bytes = 13 +runes = 9 +</pre> <h2 id="RuneLen">func <span>RuneLen</span> </h2> <pre data-language="go">func RuneLen(r rune) int</pre> <p>RuneLen returns the number of bytes required to encode the rune. It returns -1 if the rune is not a valid value to encode in UTF-8. </p> <h4 id="example_RuneLen"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">fmt.Println(utf8.RuneLen('a')) +fmt.Println(utf8.RuneLen('็')) +</pre> <p>Output:</p> <pre class="output" data-language="go">1 +3 +</pre> <h2 id="RuneStart">func <span>RuneStart</span> </h2> <pre data-language="go">func RuneStart(b byte) bool</pre> <p>RuneStart reports whether the byte could be the first byte of an encoded, possibly invalid rune. Second and subsequent bytes always have the top two bits set to 10. </p> <h4 id="example_RuneStart"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">buf := []byte("a็") +fmt.Println(utf8.RuneStart(buf[0])) +fmt.Println(utf8.RuneStart(buf[1])) +fmt.Println(utf8.RuneStart(buf[2])) +</pre> <p>Output:</p> <pre class="output" data-language="go">true +true +false +</pre> <h2 id="Valid">func <span>Valid</span> </h2> <pre data-language="go">func Valid(p []byte) bool</pre> <p>Valid reports whether p consists entirely of valid UTF-8-encoded runes. </p> <h4 id="example_Valid"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">valid := []byte("Hello, ไธ็") +invalid := []byte{0xff, 0xfe, 0xfd} + +fmt.Println(utf8.Valid(valid)) +fmt.Println(utf8.Valid(invalid)) +</pre> <p>Output:</p> <pre class="output" data-language="go">true +false +</pre> <h2 id="ValidRune">func <span>ValidRune</span> <span title="Added in Go 1.1">1.1</span> </h2> <pre data-language="go">func ValidRune(r rune) bool</pre> <p>ValidRune reports whether r can be legally encoded as UTF-8. Code points that are out of range or a surrogate half are illegal. </p> <h4 id="example_ValidRune"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">valid := 'a' +invalid := rune(0xfffffff) + +fmt.Println(utf8.ValidRune(valid)) +fmt.Println(utf8.ValidRune(invalid)) +</pre> <p>Output:</p> <pre class="output" data-language="go">true +false +</pre> <h2 id="ValidString">func <span>ValidString</span> </h2> <pre data-language="go">func ValidString(s string) bool</pre> <p>ValidString reports whether s consists entirely of valid UTF-8-encoded runes. </p> <h4 id="example_ValidString"> <span class="text">Example</span> +</h4> <p>Code:</p> <pre class="code" data-language="go">valid := "Hello, ไธ็" +invalid := string([]byte{0xff, 0xfe, 0xfd}) + +fmt.Println(utf8.ValidString(valid)) +fmt.Println(utf8.ValidString(invalid)) +</pre> <p>Output:</p> <pre class="output" data-language="go">true +false +</pre><div class="_attribution"> + <p class="_attribution-p"> + © Google, Inc.<br>Licensed under the Creative Commons Attribution License 3.0.<br> + <a href="http://golang.org/pkg/unicode/utf8/" class="_attribution-link">http://golang.org/pkg/unicode/utf8/</a> + </p> +</div> |
