\uxxxx 转换为中文 及 反向转换
Posted: 06 Jul 2014, 21:18
缘起: 网页尤其json中有这种escape过的中文,需要还原成中文,不知道论坛中是否有发过相同功能的脚本
功能: 将 "\u7231\u5c14\u5170\u4e4b\u72d0" 这样的字符串转换为 "爱尔兰之狐",写得粗糙,欢迎拍砖
集结: 一开始只是分享一下准备自己用的脚本,没想引出两位大牛,经过改进及简单测试,选出在L版下速度最快的两种函数
"\u7231\u5c14\u5170\u4e4b\u72d0" -> "爱尔兰之狐"
"爱尔兰之狐" -> "\u7231\u5c14\u5170\u4e4b\u72d0"
下面是最开始的版本,只有uXXXX到文本,效率不高,唯一优点就是可以适用于原版,L/H版
功能: 将 "\u7231\u5c14\u5170\u4e4b\u72d0" 这样的字符串转换为 "爱尔兰之狐",写得粗糙,欢迎拍砖
集结: 一开始只是分享一下准备自己用的脚本,没想引出两位大牛,经过改进及简单测试,选出在L版下速度最快的两种函数
"\u7231\u5c14\u5170\u4e4b\u72d0" -> "爱尔兰之狐"
Code: Select all
msgbox, % uXXXX2CN("\u7231\u5c14\u5170\u4e4b\u72d0")
return
uXXXX2CN(uXXXX) ; in: "\u7231\u5c14\u5170\u4e4b\u72d0" out: "爱尔兰之狐"
{ ; by RobertL
Loop, Parse, uXXXX, u, \
retStr .= Chr("0x" . A_LoopField) ;为字符串添加16进制前缀。字符=Chr(编码)。
return retStr
}
Code: Select all
msgbox, % CN2uXXXX("爱尔兰之狐" )
return
CN2uXXXX(cnStr) ; in: "爱尔兰之狐" out: "\u7231\u5C14\u5170\u4E4B\u72D0"
{ ; from tmplinshi
OldFormat := A_FormatInteger
SetFormat, Integer, Hex
Loop, Parse, cnStr
out .= "\u" . SubStr( Asc(A_LoopField), 3 )
SetFormat, Integer, %OldFormat%
Return out
}
Code: Select all
#noenv
xxxx := "\u7231\u5c14\u5170\u4e4b\u72d0"
sTime := A_TickCount
xx := uXXXX2CN(xxxx)
eTime := A_TickCount - sTime
msgbox, 耗时: %eTime% ms`n%xx%
return
uXXXX2CN(uXXXX) ; in: "\u7231\u5c14\u5170\u4e4b\u72d0" out: "爱尔兰之狐"
{
StringReplace, uXXXX, uXXXX, \u, #, A
cCount := StrLen(uXXXX) / 5
VarSetCapacity(UUU, cCount * 2, 0)
cCount := 0
loop, parse, uXXXX, #
{
if ( "" = A_LoopField )
continue
NumPut("0x" . A_LoopField, &UUU+0, cCount)
cCount += 2
}
if ( A_IsUnicode ) {
return, UUU
} else {
Unicode2Ansi(UUU, rUUU, 0)
return, rUUU
}
}
Unicode2Ansi(ByRef wString, ByRef sString, CP = 0) ; 这个函数是从论坛上抄下来的
{
nSize := DllCall("WideCharToMultiByte", "Uint", CP, "Uint", 0, "Uint", &wString, "int", -1, "Uint", 0, "int", 0, "Uint", 0, "Uint", 0)
VarSetCapacity(sString, nSize)
DllCall("WideCharToMultiByte", "Uint", CP, "Uint", 0, "Uint", &wString, "int", -1, "str", sString, "int", nSize, "Uint", 0, "Uint", 0)
}