Python如何读取16进制byte数据_Python

如何读取16进制byte数据

小弟最近在做网络编程的时候，遇到了一些byte数据需要储存，但是不是常见的str字符对应的byte，类似于b'\x00\xff\xfe\x01'这样的数据，查找资料后发现这种东西是16进制编码的byte格式，可以直接转成str没有问题，但是再转回bytes就会出现莫名其妙的双斜杠，很是头疼。

				?

									a = b'\x00\xef\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x19\x39\xd9\x9d\xfdABCDabcd'

									b = str(a)

									print(b)

									>>> b'\x00\xef\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x199\xd9\x9d\xfdABCDabcd'

									print(bytes(b,'utf8'))

									>>> b"b'\\x00\\xef\\xa2\\xa0\\xb3\\x8b\\x9d\\x1e\\xf8\\x98\\x199\\xd9\\x9d\\xfdABCDabcd'"

尝试写入文件，再读取也是如此，因为写进去的形式就是str字符

				?

									# 写入data.txt

									a = b'\x00\xef\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x19\x39\xd9\x9d\xfdABCDabcd'

									with open('data.txt','w') as p:

									    p.write(str(a))

									# 读取data.txt

									with open('data.txt','r') as p:

									    line = p.readline()

									print(line, type(line) == str)

									>>> b'\x00\xef\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x199\xd9\x9d\xfdABCDabcd\\' True

									print(bytes(line,'utf8'))

									>>> b"b'\\x00\\xef\\xa2\\xa0\\xb3\\x8b\\x9d\\x1e\\xf8\\x98\\x199\\xd9\\x9d\\xfdABCDabcd\\\\'"

观察了一下ASCII码，发现主要还是因为\x字符被理解成了一个斜杠加x的形式，然后被储存为str形式，相当于变成了两个字节。

这样解码的时候分开解了，但是\xnn这种形式是应该看作ASCII码的，于是我写了个转义的逻辑进行读取：

				?

									def readbytetxt(filename):

									    dic = {

									    '0': 0,    '1': 1,    '2': 2,

									    '3': 3,    '4': 4,    '5': 5,

									    '6': 6,    '7': 7,    '8': 8,

									    '9': 9,    'a': 10,   'b': 11,

									    'c': 12,   'd': 13,   'e': 14,

									    'f': 15,

									    }

									    with open(filename,'r') as p:

									        line = p.readline()

									        while line:

									            if line[-1] == '\n':

									                line = line[:-1]

									            i = 2

									            L = b''

									            while i+1 < len(line):

									                if line[i:i+2] == '\\x' and (line[i+2] in dic.keys()) and (line[i+3] in dic.keys()):

									                    L += bytes([dic[line[i+2]]*16+dic[line[i+3]]])

									                    i += 4

									                else:

									                    L += bytes(line[i],'utf8')

									                    i += 1

									            return L

									            line = p.readline()

									print(readbytetxt('data.txt'))

									>>> b'\x00\xef\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x19\x39\xd9\x9d\xfdABCDabcd'

问题解决了！基本就是写了个遍历，然后遇到\x就把16进制转成十进制的int，然后解码成bytes，这样常见的十六进制格式基本都能调用了。

后来发现除了\x还有其他的转义字符，比如\\，\n，如果不添加转变逻辑的话，依然会出现不识别的问题，于是重写了一下函数，支持了常见的大部分转义字符，并且写成了生成器输出。

				?

									def readbytetxt2(filename):

									    dic = {

									    '0': 0,    '1': 1,    '2': 2,

									    '3': 3,    '4': 4,    '5': 5,

									    '6': 6,    '7': 7,    '8': 8,

									    '9': 9,    'a': 10,   'b': 11,

									    'c': 12,   'd': 13,   'e': 14,

									    'f': 15,

									    }

									    dic2 = {

									    'a': '\a',     'b': '\b', 

									    'f': '\f',     'n': '\n', 

									    'r': '\r',     'v': '\v', 

									    '\'': '\'',    '\"': '', 

									    '\\': '\\', 

									    }

									    with open(filename,'r') as p:

									        line = p.readline()

									        while line:

									            if line[-1] == '\n':

									                line = line[:-1]

									            i = 2

									            L = b''

									            while i+1 < len(line):

									                if line[i:i+2] == '\\x' and (line[i+2] in dic.keys()) and (line[i+3] in dic.keys()):

									                    L += bytes([dic[line[i+2]]*16+dic[line[i+3]]])

									                    i += 4

									                elif line[i] == '\\' and line[i+1] in dic2.keys():

									                    L += bytes(dic2[line[i+1]],'utf8')

									                    i += 2

									                elif line[i:i+4] == '\\000':

									                    L += bytes('\000','utf8')

									                    i += 2

									                else:

									                    L += bytes(line[i],'utf8')

									                    i += 1

									            yield L

									            line = p.readline()

									a = b'\x00\xef\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x19\x39\xd9\x9d\xfdthe first line\n\r\a\b\t\\\f\'\"\v\b\n\000'

									b = b'\xa0\xdf\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x19\x39\xd9\x9d\xfdthe second line\nn'

									c = b'\xe0\xaf\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x19\x39\xd9\x9d\xfdthe third line\\'

									with open('data.txt','w') as p:

									    p.write(str(a)+'\n')

									    p.write(str(b)+'\n')

									    p.write(str(c))

									line = readbytetxt2('data.txt')

									print([a for a in line])

									>>> [b'\x00\xef\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x199\xd9\x9d\xfdthe first line\n\r\x07\x08\\t\\\x0c\'"\x0b\x08\n\x00', b'\xa0\xdf\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x199\xd9\x9d\xfdthe second line\nn', b'\xe0\xaf\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x199\xd9\x9d\xfdthe third line\\']

基本上至此为止，大部分编码形式都可以搞定了。

但是。。。其实还有一个更简单的方式！因为其实万恶之源就是str字符格式里面有很多转义的地方不清不楚的，我想要的是byte存进文件，再以byte读出来，而byte格式本来就是16进制的数字，说到底其实只要能存数字就可以了！所以写了个更简单的方法，直接转成数字存数字列表就好！

				?

									L = []

									a = b'\x00\xef\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x19\x39\xd9\x9d\xfdthe first line\n\r\a\b\t\\\f\'\"\v\b\n\000'

									print(a)

									for each in a:

									    L.append(int(each))

									with open('data.txt','w') as p:

									    p.write(str(L))

									print(L)

									>>> [0, 239, 162, 160, 179, 139, 157, 30, 248, 152, 25, 57, 217, 157, 253, 116, 104, 101, 32, 102, 105, 114, 115, 116, 32, 108, 105, 110, 101, 10, 13, 7, 8, 9, 92, 12, 39, 34, 11, 8, 10, 0]

									with open('data.txt','r') as p:

									    line = p.readline()

									print(b''.join([bytes([int(i)]) for i in line[1:-1].split(',')]))

									>>> b'\x00\xef\xa2\xa0\xb3\x8b\x9d\x1e\xf8\x98\x199\xd9\x9d\xfdthe first line\n\r\x07\x08\t\\\x0c\'"\x0b\x08\n\x00'