我建立了一个JVMTest工程,用于测试。在这个工程中编写和测试一些utilities。每个功能可以单独测试。
关于常量池的解析已初步完成,代码如下。
JVMTest.cpp
#include <stdio.h> #include <io.h> #include <fcntl.h> #include "ClassFileParser.h" #ifdef WIN32 #include <windows.h> #else //unix,linux #include <sys/types.h> #include <unistd.h> #define _open open #define _read read #define _write write #define _lseek lseek //linux下暂时用lseek(lseek64似乎不都支持) #define _close close #endif //将一个class文件读入缓存 //返回:缓存指针,调用者释放 char * loadClassToBuffer(const char *file, int *bufferLength) { int fd; fd = _open(file, O_RDONLY | O_BINARY); if (fd == -1) { printf("can not open file"); return NULL; } long length = _lseek(fd, 0, SEEK_END); if (length == -1) { _close(fd); printf("can not seek file"); return NULL; } char *buffer = new char [length]; if (buffer == NULL) { _close(fd); printf("out of memory"); return NULL; } _lseek(fd, 0, SEEK_SET); int bytesRead = _read(fd, buffer, length); if (bytesRead != length) { _close(fd); delete [] buffer; printf("read failed"); return NULL; } *bufferLength = length; return buffer; } int main(int argc, char *argv[]) { printf("JVM Test, JVM Utilities test...\n"); if (argc < 2) { printf("Usage: JVMTest classfile\n"); return 0; } const char *classFileName = argv[1]; int bufferLength; char *classBuffer = loadClassToBuffer(classFileName, &bufferLength); if (classBuffer == NULL) { printf("\nload class file failed"); return -1; } ClassFileParser parser((const u1 *)classBuffer, bufferLength); try { parser.parseVersion(); parser.parseConstantPool(); parser.printSummary(); } catch (Exception e) { printf("parse class file failed: %s", e.getMessage()); } //test //Exception e("abcdef"); return 0; }
ClassFileParser.h
#pragma once #include "util.h" typedef unsigned int u4; typedef unsigned short u2; typedef unsigned char u1; struct cp_info; struct field_info; struct method_info; struct attribute_info; #pragma pack(1) struct ClassFileFormat { u4 magic; u2 minor_verison; u2 major_version; u2 constant_pool_count; //按照JVM规范,此值等于cp_info的记录数+1 cp_info *constant_pool; //[constant_pool_count-1] u2 access_flags; u2 this_class; u2 super_class; u2 interfaces_count; u2 *interfaces; //[interfaces_count] u2 fields_count; field_info *fields; //[fields_count] u2 methods_count; method_info *methods; //[methods_count]; u2 attributes_count; attribute_info *attributes; //[attributes_count]; }; struct cp_info { u1 tag; u1 *info; }; enum { CONSTANT_Utf8 = 1, CONSTANT_Unicode, CONSTANT_Integer, CONSTANT_Float, CONSTANT_Long, CONSTANT_Double, CONSTANT_Class, CONSTANT_String, CONSTANT_Fieldref, CONSTANT_Methodref, CONSTANT_InterfaceMethodref, CONSTANT_NameAndType, }; struct CONSTANT_Class_info { u1 tag; u2 name_index; //常量池索引,该索引处的常量项必须是一个CONSTANT_Utf8_info }; struct CONSTANT_Fieldref_info { u1 tag; u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info }; struct CONSTANT_Methodref_info { u1 tag; u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info }; struct CONSTANT_InterfaceMethodref_info { u1 tag; u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info }; struct CONSTANT_String_info { u1 tag; u2 string_index; //常量池索引,必须指向一个CONSTANT_Utf8_info }; struct CONSTANT_Integer_info { u1 tag; u4 bytes; //4字节整数,高字节在前 }; struct CONSTANT_Float_info { u1 tag; u4 bytes; //4字节浮点数,IEEE 754格式,高字节在前 }; struct CONSTANT_Long_info { u1 tag; u4 high_bytes; //8字节整数,高字节在前 u4 low_bytes; }; struct CONSTANT_Double_info { u1 tag; u4 high_bytes; //双精度浮点数,IEEE 754格式,高字节在前 u4 low_bytes; }; struct CONSTANT_NameAndType_info { u1 tag; u2 name_index; //field或method的简单名字。该索引必须指向一个CONSTANT_Utf8_info u2 descriptor_index; //field或method的描述符。该索引必须指向一个CONSTANT_Utf8_info }; struct CONSTANT_Utf8_info { u1 tag; u2 length; u1 *bytes; //bytes的长度为length }; #pragma pack() class ClassFileParser { public: ClassFileParser(const u1 *classInBuffer, int length); virtual ~ClassFileParser(void); void parseVersion() throw (Exception); void parseConstantPool() throw (Exception); void printSummary(); private: const u1 * _class_buffer; int _buffer_length; bool valid_cp_index(int index, int cp_count) { return (index >= 1 && index < cp_count); } bool is_supported_version(u2 majorVersion, u2 minorVersion); bool check_utf8_string(const u1 *bytes, int length); void saveConstantPool() throw (Exception); private: u2 _major_version; u2 _minor_version; int _cp_count; int _cp_length; u1 ** _cp_index; u1 * _cp_data; u1 _tag_0; }; class ClassBufferInput { public: ClassBufferInput(const u1* buffer, int length); const u1* buffer() const { return _buffer_start; } int length() const { return _buffer_end - _buffer_start; } const u1* current() const { return _current; } u1 read_u1() throw (Exception); u2 read_u2() throw (Exception); u4 read_u4() throw (Exception); void skip_u1() throw (Exception); void skip_u2() throw (Exception); void skip_u4() throw (Exception); void skip_n(int n) throw (Exception); //java class文件中u2,u4的存储都是高字节在前,低字节在后 static u2 read_java_u2(const u1 *buffer); static u4 read_java_u4(const u1 *buffer); private: const u1* _buffer_start; const u1* _buffer_end; const u1* _current; void guarantee_size(int size) throw (Exception); }; class ClassBufferOutput { public: ClassBufferOutput(u1* buffer, int length); u1* buffer() const { return _buffer_start; } int length() const { return _buffer_end - _buffer_start; } u1* current() const { return _current; } void write_u1(u1 c) throw (Exception); void write_u2(u2 x) throw (Exception); void write_u4(u4 x) throw (Exception); void write_bytes(const u1 *bytes, int n) throw (Exception); private: u1* _buffer_start; u1* _buffer_end; u1* _current; void guarantee_size(int size) throw (Exception); };
ClassFileParser.cpp
#include "ClassFileParser.h" #include <stdio.h> #include <memory.h> #define CLASS_FILE_MAGIC_U4 0xCAFEBABE ClassFileParser::ClassFileParser(const u1 *classInBuffer, int length) { _class_buffer = classInBuffer; _buffer_length = length; _major_version = 0; _minor_version = 0; _cp_count = 0; _cp_length = -1; _cp_data = NULL; _cp_index = NULL; _tag_0 = 0; } ClassFileParser::~ClassFileParser(void) { } void ClassFileParser::parseVersion() throw (Exception) { ClassBufferInput in(_class_buffer, _buffer_length); u4 magic = in.read_u4(); assert_exception(magic == CLASS_FILE_MAGIC_U4, "bad magic value"); u2 minorVersion = in.read_u2(); u2 majorVersion = in.read_u2(); assert_exception(is_supported_version(majorVersion, minorVersion), "unsupported class version"); _major_version = majorVersion; _minor_version = minorVersion; } bool ClassFileParser::is_supported_version(u2 major, u2 minor) { //实际的Java虚拟机的版本,如SUN的Hotspot,令人费解 //比如1.5版本,推测major=1,minor=5,可是SUN的Hotspot虚拟机却不认 //查看Hotspot源代码,最小版本竟从45开始 if (major > 45 && major <= 51) return true; return false; } void ClassFileParser::parseConstantPool() throw (Exception) { int magic_version_length = sizeof(u4) + sizeof(u2) + sizeof(u2); ClassBufferInput in(_class_buffer + magic_version_length, _buffer_length - magic_version_length); u2 cp_count = in.read_u2(); //常量池项目数 + 1 assert_exception(cp_count >= 1, "bad constant pool size"); //下面遍历一遍常量池,为了统计常量池的长度(字节数),顺便执行一些检查 int cp_length = 0; int cp_info_length; for (int index = 1; index < cp_count; index++) { cp_info_length = -1; u1 tag = in.read_u1(); printf("index:%d, tag: %d, ", index, tag); switch (tag) { case CONSTANT_Utf8: { u2 length = in.read_u2(); //检查utf8字符串 bool isUtf8 = check_utf8_string(in.current(), length); assert_exception(isUtf8, "bad utf8 string"); in.skip_n(length); cp_info_length = 2 + length; } break; case CONSTANT_Integer: { in.skip_u4(); cp_info_length = 4; } break; case CONSTANT_Float: { in.skip_u4(); cp_info_length = 4; } break; case CONSTANT_Long: { in.skip_n(8); cp_info_length = 8; index++; //JVM规范:8字节的常量池项在计数上占两个 } break; case CONSTANT_Double: { in.skip_n(8); cp_info_length = 8; index++; //JVM规范:8字节的常量池项在计数上占两个 } break; case CONSTANT_Class: { u2 name_and_index = in.read_u2(); assert_exception(valid_cp_index(name_and_index, cp_count), "bad constant pool index"); cp_info_length = 2; } break; case CONSTANT_String: { u2 string_index = in.read_u2(); assert_exception(valid_cp_index(string_index, cp_count), "bad constant pool index"); cp_info_length = 2; } break; case CONSTANT_Fieldref: case CONSTANT_Methodref: case CONSTANT_InterfaceMethodref: { u2 class_index = in.read_u2(); u2 name_and_type_index = in.read_u2(); assert_exception(valid_cp_index(class_index, cp_count), "bad constant pool index"); assert_exception(valid_cp_index(name_and_type_index, cp_count), "bad constant pool index"); cp_info_length = 4; } break; case CONSTANT_NameAndType: { u2 name_index = in.read_u2(); u2 descriptor_index = in.read_u2(); assert_exception(valid_cp_index(name_index, cp_count), "bad constant pool index"); assert_exception(valid_cp_index(descriptor_index, cp_count), "bad constant pool index"); cp_info_length = 4; } break; default: { char msg[30]; sprintf(msg, "unknown tag: %d", tag); assert_exception(false, msg); } break; } assert_exception(cp_info_length != -1, "internal error"); printf("length: %d\n", cp_info_length); cp_length += (cp_info_length + 1); } _cp_count = cp_count; _cp_length = cp_length; saveConstantPool(); } bool ClassFileParser::check_utf8_string(const u1 *bytes, int length) { //Todo: check utf8 string return true; } void ClassFileParser::saveConstantPool() throw (Exception) { int magic_version_length = sizeof(u4) + sizeof(u2) + sizeof(u2); ClassBufferInput in(_class_buffer + magic_version_length, _buffer_length - magic_version_length); u2 cp_count = in.read_u2(); //常量池项目数 + 1 //现在已知常量池的长度,保存到一个数组中(目前运行时常量池与原始常量池完全相同!) //另用一个数组保存各个常量项的索引 _cp_data = new u1 [_cp_length]; assert_exception(_cp_data != NULL, "out of memory"); _cp_index = new u1 * [cp_count - 1]; assert_exception(_cp_index != NULL, "out of memory"); ClassBufferOutput out(_cp_data, _cp_length); for (int index = 0; index < cp_count - 1; index++) { u1 tag = in.read_u1(); switch (tag) { case CONSTANT_Utf8: { u2 length = in.read_u2(); _cp_index[index] = out.current(); out.write_u1(tag); out.write_u2(length); out.write_bytes(in.current(), length); in.skip_n(length); } break; case CONSTANT_Integer: { u4 bytes = in.read_u4(); _cp_index[index] = out.current(); out.write_u1(tag); out.write_u4(bytes); } break; case CONSTANT_Float: { u4 bytes = in.read_u4(); _cp_index[index] = out.current(); out.write_u1(tag); out.write_u4(bytes); } break; case CONSTANT_Long: { u4 high_bytes = in.read_u4(); u4 low_bytes = in.read_u4(); _cp_index[index] = out.current(); out.write_u1(tag); out.write_u4(high_bytes); out.write_u4(low_bytes); index++; //JVM规范:8字节的常量池项在计数上占两个 _cp_index[index] = &_tag_0; } break; case CONSTANT_Double: { u4 high_bytes = in.read_u4(); u4 low_bytes = in.read_u4(); _cp_index[index] = out.current(); out.write_u1(tag); out.write_u4(high_bytes); out.write_u4(low_bytes); index++; //JVM规范:8字节的常量池项在计数上占两个 _cp_index[index] = &_tag_0; } break; case CONSTANT_Class: { u2 name_and_index = in.read_u2(); _cp_index[index] = out.current(); out.write_u1(tag); out.write_u2(name_and_index); } break; case CONSTANT_String: { u2 string_index = in.read_u2(); _cp_index[index] = out.current(); out.write_u1(tag); out.write_u2(string_index); } break; case CONSTANT_Fieldref: case CONSTANT_Methodref: case CONSTANT_InterfaceMethodref: { u2 class_index = in.read_u2(); u2 name_and_type_index = in.read_u2(); _cp_index[index] = out.current(); out.write_u1(tag); out.write_u2(class_index); out.write_u2(name_and_type_index); } break; case CONSTANT_NameAndType: { u2 name_index = in.read_u2(); u2 descriptor_index = in.read_u2(); _cp_index[index] = out.current(); out.write_u1(tag); out.write_u2(name_index); out.write_u2(descriptor_index); } break; default: { char msg[30]; sprintf(msg, "unknown tag: %d", tag); assert_exception(false, msg); } break; } } //验证相等 //const u1 * p1 = _class_buffer + magic_version_length + 2; //int cmp = memcmp(p1, _cp_data, _cp_length); //检查常量项相互之间的引用是否正确 for (int index = 1; index < cp_count; index++) { u1 * pCurrent = _cp_index[index - 1]; u1 tag = *pCurrent++; switch (tag) { case CONSTANT_Utf8: break; case CONSTANT_Integer: break; case CONSTANT_Float: break; case CONSTANT_Long: { index++; } break; case CONSTANT_Double: { index++; } break; case CONSTANT_Class: { u2 name_index = ClassBufferInput::read_java_u2(pCurrent); u1 ref_tag = *(_cp_index[name_index - 1]); assert_exception(ref_tag == CONSTANT_Utf8, "name_index refered is not CONSTANT_Utf8"); } break; case CONSTANT_String: { u2 string_index = ClassBufferInput::read_java_u2(pCurrent); u1 ref_tag = *(_cp_index[string_index - 1]); assert_exception(ref_tag == CONSTANT_Utf8, "string_index refered is not CONSTANT_Utf8"); } break; case CONSTANT_Fieldref: case CONSTANT_Methodref: case CONSTANT_InterfaceMethodref: { u2 class_index = ClassBufferInput::read_java_u2(pCurrent); pCurrent += 2; u2 name_and_type_index = ClassBufferInput::read_java_u2(pCurrent); u1 ref_tag1 = *(_cp_index[class_index - 1]); assert_exception(ref_tag1 == CONSTANT_Class, "class_index refered is not CONSTANT_Class"); u1 ref_tag2 = *(_cp_index[name_and_type_index - 1]); assert_exception(ref_tag2 == CONSTANT_NameAndType, "name_and_type_index refered is not CONSTANT_NameAndType"); } break; case CONSTANT_NameAndType: { u2 name_index = ClassBufferInput::read_java_u2(pCurrent); pCurrent += 2; u2 descriptor_index = ClassBufferInput::read_java_u2(pCurrent); u1 ref_tag1 = *(_cp_index[name_index - 1]); assert_exception(ref_tag1 == CONSTANT_Utf8, "name_index refered is not CONSTANT_Utf8"); u1 ref_tag2 = *(_cp_index[descriptor_index - 1]); assert_exception(ref_tag2 == CONSTANT_Utf8, "descriptor_index refered is not CONSTANT_Utf8"); } break; } } } void ClassFileParser::printSummary() { printf("class version: %d.%d, cp count: %d, cp length: %d\n", _major_version, _minor_version, _cp_count, _cp_length); } //ClassBufferInput ClassBufferInput::ClassBufferInput(const u1* buffer, int length) { _buffer_start = buffer; _buffer_end = buffer + length; _current = buffer; } void ClassBufferInput::guarantee_size(int size) throw (Exception) { if (size > (_buffer_end - _buffer_start)) { throw Exception("unexpected end of file"); } } u1 ClassBufferInput::read_u1() throw (Exception) { guarantee_size(1); u1 c = *_current++; return c; } u2 ClassBufferInput::read_u2() throw (Exception) { guarantee_size(2); u2 result = read_java_u2(_current); _current += 2; return result; } u4 ClassBufferInput::read_u4() throw (Exception) { guarantee_size(4); u4 result = read_java_u4(_current); _current += 4; return result; } u2 ClassBufferInput::read_java_u2(const u1 *buffer) { u1 c1 = buffer[0]; u1 c2 = buffer[1]; u2 result = (u2)c1 << 8 | (u2)c2; return result; } u4 ClassBufferInput::read_java_u4(const u1 *buffer) { u1 c1 = buffer[0]; u1 c2 = buffer[1]; u1 c3 = buffer[2]; u1 c4 = buffer[3]; u4 result = (u4)c1 << 24 | (u4)c2 << 16 | (u4)c3 << 8 | (u4)c4; return result; } void ClassBufferInput::skip_u1() throw (Exception) { guarantee_size(1); _current++; } void ClassBufferInput::skip_u2() throw (Exception) { guarantee_size(2); _current += 2; } void ClassBufferInput::skip_u4() throw (Exception) { guarantee_size(4); _current += 4; } void ClassBufferInput::skip_n(int n) throw (Exception) { guarantee_size(n); _current += n; } //ClassBufferOutput ClassBufferOutput::ClassBufferOutput(u1* buffer, int length) { _buffer_start = buffer; _buffer_end = buffer + length; _current = buffer; } void ClassBufferOutput::guarantee_size(int size) throw (Exception) { if (size > (_buffer_end - _buffer_start)) { throw Exception("insufficient buffer"); } } void ClassBufferOutput::write_u1(u1 c) throw (Exception) { guarantee_size(1); *_current++ = c; } void ClassBufferOutput::write_u2(u2 x) throw (Exception) { guarantee_size(2); u1 c1 = x >> 8; u1 c2 = x; *_current++ = c1; *_current++ = c2; } void ClassBufferOutput::write_u4(u4 x) throw (Exception) { guarantee_size(4); u1 c1 = x >> 24; u1 c2 = x >> 16; u1 c3 = x >> 8; u1 c4 = x; *_current++ = c1; *_current++ = c2; *_current++ = c3; *_current++ = c4; } void ClassBufferOutput::write_bytes(const u1 *bytes, int n) throw (Exception) { guarantee_size(n); for (int i = 0; i < n; i++) { *_current++ = *bytes++; } }
util.h
#ifndef _util_h #define _util_h class Exception { public: Exception(); Exception(const char *msg); const char * getMessage() const { return _msg; } private: static const int MAX_MSG_SIZE = 256; char _msg[MAX_MSG_SIZE+1]; }; void assert_exception(bool b) throw (Exception); void assert_exception(bool b, const char *msg) throw (Exception); #endif
util.cpp
#include "util.h" #include <string.h> Exception::Exception() { memset(_msg, 0, MAX_MSG_SIZE); } Exception::Exception(const char *msg) { strncpy(_msg, msg, MAX_MSG_SIZE-1); _msg[MAX_MSG_SIZE-1] = '\0'; } void assert_exception(bool b) throw (Exception) { if (!b) { throw Exception(); } } void assert_exception(bool b, const char *msg) throw (Exception) { if (!b) { throw Exception(msg); } }
这个JVMTest工程可以用Visual Studio编译,或者在Linux上编译。我用的是cygwin,编译命令:g++ -g ClassFileParser.cpp JVMTest.cpp util.cpp