unicode 与 utf8 互转

发布时间 2023-06-21 09:40:18作者: 万物有序
  1 <!DOCTYPE html>
  2 <html lang="en">
  3   <head>
  4     <meta charset="UTF-8" />
  5     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  6     <title>Document</title>
  7   </head>
  8   <body>
  9     <script>
 10       function binaryToHex(binary) {
 11         if (/[^01]/.test(binary)) return;
 12         const bytes = binary.match(/\d+?(?=(?:\d{8})*$)/g);
 13         const hex = bytes.map((binary) => (+("0b" + binary)).toString(16));
 14         return hex.join("");
 15       }
 16 
 17       function hexToBinary(hex) {
 18         if (/[^0-9a-fA-F]/.test(hex)) return;
 19         const bytes = hex.match(/\w+?(?=(?:\w{2})*$)/g);
 20         const binary = bytes.map((hex) =>
 21           (+("0x" + hex)).toString(2).padStart(8, "0")
 22         );
 23         return binary.join("").replace(/^0+/, "");
 24       }
 25 
 26       let binary = "强".charCodeAt(0).toString(2),
 27         hex = escape("强").replace(/u|%/g, "").toLowerCase();
 28 
 29       console.log(binaryToHex(binary) === hex);
 30       console.log(hexToBinary(hex) === binary);
 31 
 32       function intCompare(a, b) {
 33         const isBinary = /^[01]+$/,
 34           notHex = /[^0-9a-f-A-F]/;
 35         if (isBinary.test(a)) a = binaryToHex(a);
 36         if (isBinary.test(b)) b = binaryToHex(b);
 37         if (notHex.test(a) || notHex.test(b)) return;
 38         if (a === b) return true;
 39         const aLength = a.length,
 40           bLength = b.length;
 41         if (aLength !== bLength) return aLength > bLength ? true : false;
 42         for (let n = 0; n < aLength; n++) {
 43           /* 字符串比较的是 unicode */
 44           if (a[n] > b[n]) return true;
 45           if (b[n] > a[n]) return false;
 46         }
 47       }
 48 
 49       function unicodeToUtf8(unicode) {
 50         if (
 51           typeof unicode !== "string" ||
 52           /[^0-9a-fA-F]/.test(utf8) ||
 53           unicode.length > 4 ||
 54           /* 这里是不可打印字符 */
 55           utf8.length == 1 ||
 56           (utf8.length == 2 && intCompare("1f", utf8))
 57         )
 58           return;
 59         const ranges = ["7f", "7ff", "ffff" /* '10ffff' */];
 60         if (intCompare(ranges[0], unicode)) return unicode;
 61         const utf8Bytes = intCompare(ranges[1], unicode)
 62           ? 2
 63           : intCompare(ranges[2], unicode)
 64           ? 3
 65           : 4;
 66 
 67         let unicodeBytes = hexToBinary(unicode).match(/\d+?(?=(?:\d{6})*$)/g);
 68 
 69         let firstBinary =
 70           "1".repeat(utf8Bytes) + unicodeBytes[0].padStart(8 - utf8Bytes, "0");
 71 
 72         let restBinary = unicodeBytes
 73           .slice(1)
 74           .map((binary) => "10" + binary)
 75           .join("");
 76 
 77         return binaryToHex(firstBinary + restBinary);
 78       }
 79 
 80       function utf8ToUnicode(utf8) {
 81         if (
 82           typeof utf8 !== "string" ||
 83           /[^0-9a-fA-F]/.test(utf8) ||
 84           utf8.length > 8 ||
 85           /* 这里是不可打印字符 */
 86           utf8.length == 1 ||
 87           (utf8.length == 2 && intCompare("1f", utf8))
 88         )
 89           return;
 90         if (utf8.length === 2) return utf8;
 91         const utf8Bytes = utf8.match(/\w+?(?=(?:\w{2})*$)/g);
 92         const firstBinary = hexToBinary(utf8Bytes[0])
 93           .slice(utf8Bytes.length + 1)
 94           .replace(/^0+/, "");
 95 
 96         const restBinary = utf8Bytes
 97           .slice(1)
 98           .map((hex) => {
 99             return hexToBinary(hex).slice(2).replace(/^0+/, "");
100           })
101           .join("");
102 
103         return binaryToHex(firstBinary + restBinary);
104       }
105 
106       const utf8 = encodeURI("强").replace(/%/g, "").toLowerCase(),
107         unicode = "强".charCodeAt(0).toString(16);
108 
109       console.log(unicodeToUtf8(unicode) === utf8);
110       console.log(utf8ToUnicode(utf8) === unicode);
111     </script>
112   </body>
113 </html>