2023年11月23日发(作者:)

根据⽂件头数据判断⽂件类型

现有⼀⽂件,其扩展名未知或标记错误。假设它是⼀个正常的、⾮空的⽂件,且将扩展名更正后可以正常使⽤,那么,如何判断它是哪种类型的⽂件?

在后缀未知,或者后缀被修改的⽂件,依然通过⽂件头来判断该⽂件究竟是什么⽂件类型。我们可以使⽤⼀个⽂本编辑⼯具如UltraEdit打开⽂件(16进制模式下),然后看⽂件头是什么

字符,以下是常见⽂件类型的⽂件头字符(16进制),希望对你有帮助:

JPEG (jpg),⽂件头:FFD8FF

PNG (png),⽂件头:89504E47

GIF (gif),⽂件头:47494638

TIFF (tif),⽂件头:49492A00

Windows Bitmap (bmp),⽂件头:424D

CAD (dwg),⽂件头:41433130

Adobe Photoshop (psd),⽂件头:38425053

Rich Text Format (rtf),⽂件头:7B5C727466

XML (xml),⽂件头:3C3F786D6C

HTML (html),⽂件头:68746D6C3E

Email [thorough only] (eml),⽂件头:44656C69766572792D646174653A

Outlook Express (dbx),⽂件头:CFAD12FEC5FD746F

Outlook (pst),⽂件头:2142444E

MS Word/Excel (),⽂件头:D0CF11E0

MS Access (mdb),⽂件头:5374616E64617264204A

WordPerfect (wpd),⽂件头:FF575043

Postscript (),⽂件头:252150532D41646F6265

Adobe Acrobat (pdf),⽂件头:255044462D312E

Quicken (qdf),⽂件头:AC9EBD8F

Windows Password (pwl),⽂件头:E3828596

ZIP Archive (zip),⽂件头:504B0304

RAR Archive (rar),⽂件头:52617221

Wave (wav),⽂件头:57415645

AVI (avi),⽂件头:41564920

Real Audio (ram),⽂件头:2E7261FD

Real Media (rm),⽂件头:2E524D46

MPEG (mpg),⽂件头:000001BA

MPEG (mpg),⽂件头:000001B3

Quicktime (mov),⽂件头:6D6F6F76

Windows Media (asf),⽂件头:3026B2758E66CF11

MIDI (mid),⽂件头:4D546864

下⾯在提供⼀个⽹上使⽤java写的根据头⽂件码判断⽂件类型

package com;

import putStream;

import tFoundException;

import ption;

import ;

import p;

import or;

import ;

import ;

public class FileType {

public final static Map FILE_TYPE_MAP = new HashMap();

private FileType(){}

static{

getAllFileType(); //初始化⽂件类型信息

}

/**

* Discription:[getAllFileType,常见⽂件头信息]

*/

private static void getAllFileType()

{

FILE_TYPE_("ffd8ffe000104a464946", "jpg"); //JPEG (jpg)

FILE_TYPE_("89504e470d0a1a0a0000", "png"); //PNG (png)

FILE_TYPE_("47494638396126026f01", "gif"); //GIF (gif)

FILE_TYPE_("49492a37", "tif"); //TIFF (tif)

FILE_TYPE_("424d228c", "bmp"); //16⾊位图(bmp)

FILE_TYPE_("424d8240", "bmp"); //24位位图(bmp)

FILE_TYPE_("424d8e1b", "bmp"); //256⾊位图(bmp)

FILE_TYPE_("41433130", "dwg"); //CAD (dwg)

FILE_TYPE_("3c21444f435459504520", "html"); //HTML (html)

FILE_TYPE_("3c21646f637479706520", "htm"); //HTM (htm)

FILE_TYPE_("48544d4c207b0d0a0942", "css"); //css

FILE_TYPE_("696b2e71623d696b2e71", "js"); //js

FILE_TYPE_("7b5c727466315c616e73", "rtf"); //Rich Text Format (rtf)

FILE_TYPE_("38425000", "psd"); //Photoshop (psd)

FILE_TYPE_("46726f6d3a203d3f6762", "eml"); //Email [Outlook Express 6] (eml)

FILE_TYPE_("d0cf11e0a1b11ae10000", "doc"); //MS Excel 注意:wordmsi excel的⽂件头⼀样

FILE_TYPE_("d0cf11e0a1b11ae10000", "vsd"); //Visio 绘图

FILE_TYPE_("5374616E64617264204A", "mdb"); //MS Access (mdb)

FILE_TYPE_("252150532D41646F6265", "ps");

FILE_TYPE_("255044462d312e350d0a", "pdf"); //Adobe Acrobat (pdf)

FILE_TYPE_("2e524d46", "rmvb"); //rmvb/rm相同

FILE_TYPE_("464c5600", "flv"); //flvf4v相同

FILE_TYPE_("79706d70", "mp4");

FILE_TYPE_("49443376", "mp3");

FILE_TYPE_("000001ba21", "mpg"); //

FILE_TYPE_("3026b2758e66cf11a6d9", "wmv"); //wmvasf相同

FILE_TYPE_("52494646e27807005741", "wav"); //Wave (wav)

FILE_TYPE_("52494646d07d60074156", "avi");

FILE_TYPE_("4d546864", "mid"); //MIDI (mid)

FILE_TYPE_("504b0800", "zip");

FILE_TYPE_("526172211a0700cf9073", "rar");

FILE_TYPE_("235468697320636f6e66", "ini");

FILE_TYPE_("504b03040a", "jar");

FILE_TYPE_("4d5a9400", "exe");//可执⾏⽂件

FILE_TYPE_("3c25406c", "jsp");//jsp⽂件

FILE_TYPE_("4d616e69666573742d56", "mf");//MF⽂件

FILE_TYPE_("3c3f786d6c2076657273", "xml");//xml⽂件

FILE_TYPE_("494e5345525420494e54", "sql");//xml⽂件

FILE_TYPE_("7061636b6", "java");//java⽂件

FILE_TYPE_("406563686f206f66660d", "bat");//bat⽂件

FILE_TYPE_("1f8b0000", "gz");//gz⽂件

FILE_TYPE_("6c6f67346a2e726f6f74", "properties");//bat⽂件

FILE_TYPE_("cafebabe0000002e0041", "class");//bat⽂件

FILE_TYPE_("49545346", "chm");//bat⽂件

FILE_TYPE_("00001300", "mxp");//bat⽂件

FILE_TYPE_("504b0800", "docx");//docx⽂件

FILE_TYPE_("d0cf11e0a1b11ae10000", "wps");//WPS⽂字wps、表格et、演⽰dps都是⼀样的

FILE_TYPE_("6431303a637265617465", "torrent");

FILE_TYPE_("6D6F6F76", "mov"); //Quicktime (mov)

FILE_TYPE_("FF575043", "wpd"); //WordPerfect (wpd)

FILE_TYPE_("CFAD12FEC5FD746F", "dbx"); //Outlook Express (dbx)

FILE_TYPE_("2142444E", "pst"); //Outlook (pst)

FILE_TYPE_("AC9EBD8F", "qdf"); //Quicken (qdf)

FILE_TYPE_("E3828596", "pwl"); //Windows Password (pwl)

FILE_TYPE_("2E7261FD", "ram"); //Real Audio (ram)

}

/**

* 得到上传⽂件的⽂件头

* @param src

* @return

*/

public static String bytesToHexString(byte[] src) {

StringBuilder stringBuilder = new StringBuilder();

if (src == null || <= 0) {

return null;

}

for (int i = 0; i < ; i++) {

int v = src[i] & 0xFF;

String hv = tring(v);

if (() < 2) {

(0);

}

(hv);

}

return ng();

}

/**

* 根据制定⽂件的⽂件头判断其⽂件类型

* @param filePaht

* @return

*/

public static String getFileType(String filePaht){

String res = null;

try {

FileInputStream is = new FileInputStream(filePaht);

byte[] b = new byte[10];

(b, 0, );

String fileCode = bytesToHexString(b);

n(fileCode);

//这种⽅法在字典的头代码不够位数的时候可以⽤但是速度相对慢⼀点

Iterator keyIter = FILE_TYPE_().iterator();

while(t()){

String key = ();

if(rCase().startsWith(rCase()) || rCase().startsWith(rCase())){

res = FILE_TYPE_(key);

break;

}

}

} catch (FileNotFoundException e) {

tackTrace();

} catch (IOException e) {

tackTrace();

}

return res;

}