C# 读取文件并自动判断文件的编码

发布时间 2023-12-21 17:11:32作者: 懒人境界
using System;
using System.IO;
using System.Text;

class Program
{
    static void Main()
    {
        string filePath = "path_to_your_file"; // 替换为你的文件路径

        // 读取文件的字节数据
        byte[] fileBytes = File.ReadAllBytes(filePath);

        // 自动判断文件编码
        Encoding encoding = DetectFileEncoding(fileBytes);

        // 使用正确的编码读取文件内容
        string fileContent = encoding.GetString(fileBytes);

        Console.WriteLine(fileContent);
    }

    static Encoding DetectFileEncoding(byte[] fileBytes)
    {
        // 默认编码
        Encoding encoding = Encoding.Default;

        // UTF-8 BOM (EF BB BF)
        if (fileBytes.Length >= 3 && fileBytes[0] == 0xEF && fileBytes[1] == 0xBB && fileBytes[2] == 0xBF)
        {
            encoding = Encoding.UTF8;
        }
        // UTF-32, UTF-32 BE BOM (00 00 FE FF), UTF-32 LE BOM (FF FE 00 00)
        else if (fileBytes.Length >= 4 && (fileBytes[0] == 0x00 && fileBytes[1] == 0x00 && fileBytes[2] == 0xFE && fileBytes[3] == 0xFF ||
                                           fileBytes[0] == 0xFF && fileBytes[1] == 0xFE && fileBytes[2] == 0x00 && fileBytes[3] == 0x00))
        {
            encoding = Encoding.UTF32;
        }
        // UTF-16, UTF-16 BE BOM (FE FF), UTF-16 LE BOM (FF FE)
        else if (fileBytes.Length >= 2 && (fileBytes[0] == 0xFE && fileBytes[1] == 0xFF || fileBytes[0] == 0xFF && fileBytes[1] == 0xFE))
        {
            encoding = Encoding.Unicode;
        }

        return encoding;
    }
}