Go语言中string和[]byte的转换原理

byte类型

byte
// byte is an alias for uint8 and is equivalent to uint8 in all ways. It is
// used, by convention, to distinguish byte values from 8-bit unsigned
// integer values.
type byte = uint8
byteuint8
byteASCII

示例:

var ch byte = 65
var ch byte = '\x41'
var ch byte = 'A'
[]byte
[]bytebyte
// src/runtime/slice.go
type slice struct {
    array unsafe.Pointer
    len   int
    cap   int
}
arraylencap
func main()  {
	sl := make([]byte,0,2)
	sl = append(sl, 'A')
	sl = append(sl,'B')
	fmt.Println(sl)
}

根据这个例子我们可以画一个图:

string类型

string
// string is the set of all strings of 8-bit bytes, conventionally but not
// necessarily representing UTF-8-encoded text. A string may be empty, but
// not nil. Values of string type are immutable.
type string string
string8

看一个简单的例子:

func main()  {
	str := "asong"
	fmt.Println(str)
}
string
type stringStruct struct {
    str unsafe.Pointer
    len int
}
stringStructslicestrlenslice
//go:nosplit
func gostringnocopy(str *byte) string {
	ss := stringStruct{str: unsafe.Pointer(str), len: findnull(str)}
	s := *(*string)(unsafe.Pointer(&ss))
	return s
}
bytestringbyte

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-FhqvS15b-1653100148192)(C:/Users/86158/AppData/Roaming/Typora/typora-user-images/image-20220520221203893.png)]

string和[]byte有什么区别

stringbytestring
GostringGostring
stringstringStructstr
func main()  {
	str := "song"
	fmt.Printf("%p\n",[]byte(str))
	str = "asong"
	fmt.Printf("%p\n",[]byte(str))
}
// 运行结果
0xc00001a090
0xc00001a098
gc

string和[]byte标准转换

Gostring[]byte
func main()  {
	str := "asong"
	by := []byte(str)

	str1 := string(by)
	fmt.Println(str1)
}

标准转换用起来还是比较简单的,那你知道他们内部是怎样实现转换的吗?我们来分析一下:

string[]byte
go tool compile -N -l -S ./string_to_byte/string.goruntime.stringtoslicebyte
// runtime/string.go go 1.15.7
const tmpStringBufSize = 32

type tmpBuf [tmpStringBufSize]byte

func stringtoslicebyte(buf *tmpBuf, s string) []byte {
	var b []byte
	if buf != nil && len(s) <= len(buf) {
		*buf = tmpBuf{}
		b = buf[:len(s)]
	} else {
		b = rawbyteslice(len(s))
	}
	copy(b, s)
	return b
}
// rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
func rawbyteslice(size int) (b []byte) {
	cap := roundupsize(uintptr(size))
	p := mallocgc(cap, nil, false)
	if cap != uintptr(size) {
		memclrNoHeapPointers(add(p, uintptr(size)), cap-uintptr(size))
	}

	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(cap)}
	return
}
32[]byte3232
copysrc/runtime/slice.goslicestringcopy
[]bytestring
[]bytestringruntime.slicebytetostring
// 以下无关的代码片段
func slicebytetostring(buf *tmpBuf, ptr *byte, n int) (str string) {
	if n == 0 {
		return ""
	}
	if n == 1 {
		p := unsafe.Pointer(&staticuint64s[*ptr])
		if sys.BigEndian {
			p = add(p, 7)
		}
		stringStructOf(&str).str = p
		stringStructOf(&str).len = 1
		return
	}

	var p unsafe.Pointer
	if buf != nil && n <= len(buf) {
		p = unsafe.Pointer(buf)
	} else {
		p = mallocgc(uintptr(n), nil, false)
	}
	stringStructOf(&str).str = p
	stringStructOf(&str).len = n
	memmove(p, unsafe.Pointer(ptr), uintptr(n))
	return
}
[]bytememove

string和[]byte强转换

标准的转换方法都会发生内存拷贝,所以为了减少内存拷贝和内存申请我们可以使用强转换的方式对两者进行转换。在标准库中有对这两种方法实现:

// runtime/string.go
func slicebytetostringtmp(ptr *byte, n int) (str string) {
	stringStructOf(&str).str = unsafe.Pointer(ptr)
	stringStructOf(&str).len = n
	return
}

func stringtoslicebytetmp(s string) []byte {
    str := (*stringStruct)(unsafe.Pointer(&s))
    ret := slice{array: unsafe.Pointer(str.str), len: str.len, cap: str.len}
    return *(*[]byte)(unsafe.Pointer(&ret))
}
unsafe.Pointerstringslice
type stringStruct struct {
    str unsafe.Pointer
    len int
}
type slice struct {
    array unsafe.Pointer
    len   int
    cap   int
}
caparraystrlenunsafe.Pointer

两种转换如何取舍

当然是推荐大家使用标准转换方式了,毕竟标准转换方式是更安全的!但是如果你是在高性能场景下使用,是可以考虑使用强转换的方式的,但是要注意强转换的使用方式,他不是安全的,这里举个例子:

func stringtoslicebytetmp(s string) []byte {
	str := (*reflect.StringHeader)(unsafe.Pointer(&s))
	ret := reflect.SliceHeader{Data: str.Data, Len: str.Len, Cap: str.Len}
	return *(*[]byte)(unsafe.Pointer(&ret))
}

func main()  {
	str := "hello"
	by := stringtoslicebytetmp(str)
	by[0] = 'H'
}

运行结果:

unexpected fault address 0x109d65f
fatal error: fault
[signal SIGBUS: bus error code=0x2 addr=0x109d65f pc=0x107eabc]
deferrecover
stringbystrdown

总结

bytestring[]bytestring