ruby for js unescape
最近由于项目接触到js escape后的字符串,需要解析,google了很多都没有查到具体的,于是便写了解析的方法
注:
escape() will not encode: @*/+
encodeURI() will not encode: ~!@#$&*()=:/,;?+’
encodeURIComponent() will not encode: ~!*()’
废话不多说,奉上代码:
1.ruby版本
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
require 'rubygems' require 'cgi' require 'iconv' def utf8RawUrlDecode(string) decodedStr = '' pos = 0 len = source.size while pos < len do charAt = source[pos, 1] if charAt == '%' pos += 1 charAt = source[pos, 1] if charAt == 'u' pos += 1 unicodeHexVal = source[pos, 4] unicodeHexVal.insert(0,'%') unicodeHexVal.insert(3,'%') entity = CGI.unescape(unicodeHexVal) decodedStr += Iconv.conv("utf-8","unicodebig",entity) pos += 4 else hexVal = source[pos, 2] #decodedStr += hexVal.hex.chr.force_encoding('utf-8') # # solve ArgumentError: invalid byte sequence in UTF-8 decodedStr += hexVal.hex.chr.force_encoding("ISO-8859-1").encode("utf-8", replace: nil) pos += 2 end else decodedStr += charAt pos += 1 end end decodedStr end |
2.php版本
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
function utf8RawUrlDecode($source){ $decodedStr = ''; $pos = 0; $len = strlen($source); while($pos < $len){ $charAt = substr($source,$pos,1); if($charAt == '%'){ $pos++; $charAt = substr($source,$pos,1); if($charAt == 'u'){ //wegotaunicodecharacter $pos++; $unicodeHexVal = substr($source,$pos,4); $unicode = hexdec($unicodeHexVal); $entity = "&#".$unicode.';'; $decodedStr .= utf8_encode($entity); echo $decodedStr; $pos += 4; } else{ $hexVal = substr($source,$pos,2); $decodedStr .= chr(hexdec($hexVal)); $pos += 2; } }else{ $decodedStr .= $charAt; $pos++; } } return$decodedStr; } |