summaryrefslogtreecommitdiff
path: root/devdocs/go/unicode%2Futf8%2Findex.html
diff options
context:
space:
mode:
Diffstat (limited to 'devdocs/go/unicode%2Futf8%2Findex.html')
-rw-r--r--devdocs/go/unicode%2Futf8%2Findex.html205
1 files changed, 205 insertions, 0 deletions
diff --git a/devdocs/go/unicode%2Futf8%2Findex.html b/devdocs/go/unicode%2Futf8%2Findex.html
new file mode 100644
index 00000000..b27d81f9
--- /dev/null
+++ b/devdocs/go/unicode%2Futf8%2Findex.html
@@ -0,0 +1,205 @@
+<h1> Package utf8 </h1> <ul id="short-nav">
+<li><code>import "unicode/utf8"</code></li>
+<li><a href="#pkg-overview" class="overviewLink">Overview</a></li>
+<li><a href="#pkg-index" class="indexLink">Index</a></li>
+<li><a href="#pkg-examples" class="examplesLink">Examples</a></li>
+</ul> <h2 id="pkg-overview">Overview </h2> <p>Package utf8 implements functions and constants to support text encoded in UTF-8. It includes functions to translate between runes and UTF-8 byte sequences. See <a href="https://en.wikipedia.org/wiki/UTF-8">https://en.wikipedia.org/wiki/UTF-8</a> </p> <h2 id="pkg-index">Index </h2> <ul id="manual-nav">
+<li><a href="#pkg-constants">Constants</a></li>
+<li><a href="#AppendRune">func AppendRune(p []byte, r rune) []byte</a></li>
+<li><a href="#DecodeLastRune">func DecodeLastRune(p []byte) (r rune, size int)</a></li>
+<li><a href="#DecodeLastRuneInString">func DecodeLastRuneInString(s string) (r rune, size int)</a></li>
+<li><a href="#DecodeRune">func DecodeRune(p []byte) (r rune, size int)</a></li>
+<li><a href="#DecodeRuneInString">func DecodeRuneInString(s string) (r rune, size int)</a></li>
+<li><a href="#EncodeRune">func EncodeRune(p []byte, r rune) int</a></li>
+<li><a href="#FullRune">func FullRune(p []byte) bool</a></li>
+<li><a href="#FullRuneInString">func FullRuneInString(s string) bool</a></li>
+<li><a href="#RuneCount">func RuneCount(p []byte) int</a></li>
+<li><a href="#RuneCountInString">func RuneCountInString(s string) (n int)</a></li>
+<li><a href="#RuneLen">func RuneLen(r rune) int</a></li>
+<li><a href="#RuneStart">func RuneStart(b byte) bool</a></li>
+<li><a href="#Valid">func Valid(p []byte) bool</a></li>
+<li><a href="#ValidRune">func ValidRune(r rune) bool</a></li>
+<li><a href="#ValidString">func ValidString(s string) bool</a></li>
+</ul> <div id="pkg-examples"> <h3>Examples</h3> <dl> <dd><a class="exampleLink" href="#example_AppendRune">AppendRune</a></dd> <dd><a class="exampleLink" href="#example_DecodeLastRune">DecodeLastRune</a></dd> <dd><a class="exampleLink" href="#example_DecodeLastRuneInString">DecodeLastRuneInString</a></dd> <dd><a class="exampleLink" href="#example_DecodeRune">DecodeRune</a></dd> <dd><a class="exampleLink" href="#example_DecodeRuneInString">DecodeRuneInString</a></dd> <dd><a class="exampleLink" href="#example_EncodeRune">EncodeRune</a></dd> <dd><a class="exampleLink" href="#example_EncodeRune_outOfRange">EncodeRune (OutOfRange)</a></dd> <dd><a class="exampleLink" href="#example_FullRune">FullRune</a></dd> <dd><a class="exampleLink" href="#example_FullRuneInString">FullRuneInString</a></dd> <dd><a class="exampleLink" href="#example_RuneCount">RuneCount</a></dd> <dd><a class="exampleLink" href="#example_RuneCountInString">RuneCountInString</a></dd> <dd><a class="exampleLink" href="#example_RuneLen">RuneLen</a></dd> <dd><a class="exampleLink" href="#example_RuneStart">RuneStart</a></dd> <dd><a class="exampleLink" href="#example_Valid">Valid</a></dd> <dd><a class="exampleLink" href="#example_ValidRune">ValidRune</a></dd> <dd><a class="exampleLink" href="#example_ValidString">ValidString</a></dd> </dl> </div> <h3>Package files</h3> <p> <span>utf8.go</span> </p> <h2 id="pkg-constants">Constants</h2> <p>Numbers fundamental to the encoding. </p>
+<pre data-language="go">const (
+ RuneError = '\uFFFD' // the "error" Rune or "Unicode replacement character"
+ RuneSelf = 0x80 // characters below RuneSelf are represented as themselves in a single byte.
+ MaxRune = '\U0010FFFF' // Maximum valid Unicode code point.
+ UTFMax = 4 // maximum number of bytes of a UTF-8 encoded Unicode character.
+)</pre> <h2 id="AppendRune">func <span>AppendRune</span> <span title="Added in Go 1.18">1.18</span> </h2> <pre data-language="go">func AppendRune(p []byte, r rune) []byte</pre> <p>AppendRune appends the UTF-8 encoding of r to the end of p and returns the extended buffer. If the rune is out of range, it appends the encoding of <a href="#RuneError">RuneError</a>. </p> <h4 id="example_AppendRune"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">buf1 := utf8.AppendRune(nil, 0x10000)
+buf2 := utf8.AppendRune([]byte("init"), 0x10000)
+fmt.Println(string(buf1))
+fmt.Println(string(buf2))
+</pre> <p>Output:</p> <pre class="output" data-language="go">๐€€
+init๐€€
+</pre> <h2 id="DecodeLastRune">func <span>DecodeLastRune</span> </h2> <pre data-language="go">func DecodeLastRune(p []byte) (r rune, size int)</pre> <p>DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and its width in bytes. If p is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p>
+<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeLastRune"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">b := []byte("Hello, ไธ–็•Œ")
+
+for len(b) &gt; 0 {
+ r, size := utf8.DecodeLastRune(b)
+ fmt.Printf("%c %v\n", r, size)
+
+ b = b[:len(b)-size]
+}
+</pre> <p>Output:</p> <pre class="output" data-language="go">็•Œ 3
+ไธ– 3
+ 1
+, 1
+o 1
+l 1
+l 1
+e 1
+H 1
+</pre> <h2 id="DecodeLastRuneInString">func <span>DecodeLastRuneInString</span> </h2> <pre data-language="go">func DecodeLastRuneInString(s string) (r rune, size int)</pre> <p>DecodeLastRuneInString is like <a href="#DecodeLastRune">DecodeLastRune</a> but its input is a string. If s is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p>
+<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeLastRuneInString"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">str := "Hello, ไธ–็•Œ"
+
+for len(str) &gt; 0 {
+ r, size := utf8.DecodeLastRuneInString(str)
+ fmt.Printf("%c %v\n", r, size)
+
+ str = str[:len(str)-size]
+}
+</pre> <p>Output:</p> <pre class="output" data-language="go">็•Œ 3
+ไธ– 3
+ 1
+, 1
+o 1
+l 1
+l 1
+e 1
+H 1
+</pre> <h2 id="DecodeRune">func <span>DecodeRune</span> </h2> <pre data-language="go">func DecodeRune(p []byte) (r rune, size int)</pre> <p>DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes. If p is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p>
+<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeRune"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">b := []byte("Hello, ไธ–็•Œ")
+
+for len(b) &gt; 0 {
+ r, size := utf8.DecodeRune(b)
+ fmt.Printf("%c %v\n", r, size)
+
+ b = b[size:]
+}
+</pre> <p>Output:</p> <pre class="output" data-language="go">H 1
+e 1
+l 1
+l 1
+o 1
+, 1
+ 1
+ไธ– 3
+็•Œ 3
+</pre> <h2 id="DecodeRuneInString">func <span>DecodeRuneInString</span> </h2> <pre data-language="go">func DecodeRuneInString(s string) (r rune, size int)</pre> <p>DecodeRuneInString is like <a href="#DecodeRune">DecodeRune</a> but its input is a string. If s is empty it returns (<a href="#RuneError">RuneError</a>, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p>
+<p>An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p> <h4 id="example_DecodeRuneInString"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">str := "Hello, ไธ–็•Œ"
+
+for len(str) &gt; 0 {
+ r, size := utf8.DecodeRuneInString(str)
+ fmt.Printf("%c %v\n", r, size)
+
+ str = str[size:]
+}
+</pre> <p>Output:</p> <pre class="output" data-language="go">H 1
+e 1
+l 1
+l 1
+o 1
+, 1
+ 1
+ไธ– 3
+็•Œ 3
+</pre> <h2 id="EncodeRune">func <span>EncodeRune</span> </h2> <pre data-language="go">func EncodeRune(p []byte, r rune) int</pre> <p>EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune. If the rune is out of range, it writes the encoding of <a href="#RuneError">RuneError</a>. It returns the number of bytes written. </p> <h4 id="example_EncodeRune"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">r := 'ไธ–'
+buf := make([]byte, 3)
+
+n := utf8.EncodeRune(buf, r)
+
+fmt.Println(buf)
+fmt.Println(n)
+</pre> <p>Output:</p> <pre class="output" data-language="go">[228 184 150]
+3
+</pre> <h4 id="example_EncodeRune_outOfRange"> <span class="text">Example (OutOfRange)</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">runes := []rune{
+ // Less than 0, out of range.
+ -1,
+ // Greater than 0x10FFFF, out of range.
+ 0x110000,
+ // The Unicode replacement character.
+ utf8.RuneError,
+}
+for i, c := range runes {
+ buf := make([]byte, 3)
+ size := utf8.EncodeRune(buf, c)
+ fmt.Printf("%d: %d %[2]s %d\n", i, buf, size)
+}
+</pre> <p>Output:</p> <pre class="output" data-language="go">0: [239 191 189] ๏ฟฝ 3
+1: [239 191 189] ๏ฟฝ 3
+2: [239 191 189] ๏ฟฝ 3
+</pre> <h2 id="FullRune">func <span>FullRune</span> </h2> <pre data-language="go">func FullRune(p []byte) bool</pre> <p>FullRune reports whether the bytes in p begin with a full UTF-8 encoding of a rune. An invalid encoding is considered a full Rune since it will convert as a width-1 error rune. </p> <h4 id="example_FullRune"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">buf := []byte{228, 184, 150} // ไธ–
+fmt.Println(utf8.FullRune(buf))
+fmt.Println(utf8.FullRune(buf[:2]))
+</pre> <p>Output:</p> <pre class="output" data-language="go">true
+false
+</pre> <h2 id="FullRuneInString">func <span>FullRuneInString</span> </h2> <pre data-language="go">func FullRuneInString(s string) bool</pre> <p>FullRuneInString is like FullRune but its input is a string. </p> <h4 id="example_FullRuneInString"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">str := "ไธ–"
+fmt.Println(utf8.FullRuneInString(str))
+fmt.Println(utf8.FullRuneInString(str[:2]))
+</pre> <p>Output:</p> <pre class="output" data-language="go">true
+false
+</pre> <h2 id="RuneCount">func <span>RuneCount</span> </h2> <pre data-language="go">func RuneCount(p []byte) int</pre> <p>RuneCount returns the number of runes in p. Erroneous and short encodings are treated as single runes of width 1 byte. </p> <h4 id="example_RuneCount"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">buf := []byte("Hello, ไธ–็•Œ")
+fmt.Println("bytes =", len(buf))
+fmt.Println("runes =", utf8.RuneCount(buf))
+</pre> <p>Output:</p> <pre class="output" data-language="go">bytes = 13
+runes = 9
+</pre> <h2 id="RuneCountInString">func <span>RuneCountInString</span> </h2> <pre data-language="go">func RuneCountInString(s string) (n int)</pre> <p>RuneCountInString is like <a href="#RuneCount">RuneCount</a> but its input is a string. </p> <h4 id="example_RuneCountInString"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">str := "Hello, ไธ–็•Œ"
+fmt.Println("bytes =", len(str))
+fmt.Println("runes =", utf8.RuneCountInString(str))
+</pre> <p>Output:</p> <pre class="output" data-language="go">bytes = 13
+runes = 9
+</pre> <h2 id="RuneLen">func <span>RuneLen</span> </h2> <pre data-language="go">func RuneLen(r rune) int</pre> <p>RuneLen returns the number of bytes required to encode the rune. It returns -1 if the rune is not a valid value to encode in UTF-8. </p> <h4 id="example_RuneLen"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">fmt.Println(utf8.RuneLen('a'))
+fmt.Println(utf8.RuneLen('็•Œ'))
+</pre> <p>Output:</p> <pre class="output" data-language="go">1
+3
+</pre> <h2 id="RuneStart">func <span>RuneStart</span> </h2> <pre data-language="go">func RuneStart(b byte) bool</pre> <p>RuneStart reports whether the byte could be the first byte of an encoded, possibly invalid rune. Second and subsequent bytes always have the top two bits set to 10. </p> <h4 id="example_RuneStart"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">buf := []byte("a็•Œ")
+fmt.Println(utf8.RuneStart(buf[0]))
+fmt.Println(utf8.RuneStart(buf[1]))
+fmt.Println(utf8.RuneStart(buf[2]))
+</pre> <p>Output:</p> <pre class="output" data-language="go">true
+true
+false
+</pre> <h2 id="Valid">func <span>Valid</span> </h2> <pre data-language="go">func Valid(p []byte) bool</pre> <p>Valid reports whether p consists entirely of valid UTF-8-encoded runes. </p> <h4 id="example_Valid"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">valid := []byte("Hello, ไธ–็•Œ")
+invalid := []byte{0xff, 0xfe, 0xfd}
+
+fmt.Println(utf8.Valid(valid))
+fmt.Println(utf8.Valid(invalid))
+</pre> <p>Output:</p> <pre class="output" data-language="go">true
+false
+</pre> <h2 id="ValidRune">func <span>ValidRune</span> <span title="Added in Go 1.1">1.1</span> </h2> <pre data-language="go">func ValidRune(r rune) bool</pre> <p>ValidRune reports whether r can be legally encoded as UTF-8. Code points that are out of range or a surrogate half are illegal. </p> <h4 id="example_ValidRune"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">valid := 'a'
+invalid := rune(0xfffffff)
+
+fmt.Println(utf8.ValidRune(valid))
+fmt.Println(utf8.ValidRune(invalid))
+</pre> <p>Output:</p> <pre class="output" data-language="go">true
+false
+</pre> <h2 id="ValidString">func <span>ValidString</span> </h2> <pre data-language="go">func ValidString(s string) bool</pre> <p>ValidString reports whether s consists entirely of valid UTF-8-encoded runes. </p> <h4 id="example_ValidString"> <span class="text">Example</span>
+</h4> <p>Code:</p> <pre class="code" data-language="go">valid := "Hello, ไธ–็•Œ"
+invalid := string([]byte{0xff, 0xfe, 0xfd})
+
+fmt.Println(utf8.ValidString(valid))
+fmt.Println(utf8.ValidString(invalid))
+</pre> <p>Output:</p> <pre class="output" data-language="go">true
+false
+</pre><div class="_attribution">
+ <p class="_attribution-p">
+ &copy; Google, Inc.<br>Licensed under the Creative Commons Attribution License 3.0.<br>
+ <a href="http://golang.org/pkg/unicode/utf8/" class="_attribution-link">http://golang.org/pkg/unicode/utf8/</a>
+ </p>
+</div>