现在的位置: 首页 > 综合 > 正文

测试解析class常量池

2013年12月11日 ⁄ 综合 ⁄ 共 15122字 ⁄ 字号 评论关闭

我建立了一个JVMTest工程,用于测试。在这个工程中编写和测试一些utilities。每个功能可以单独测试。

关于常量池的解析已初步完成,代码如下。

JVMTest.cpp

#include <stdio.h>
#include <io.h>
#include <fcntl.h>

#include "ClassFileParser.h"

#ifdef WIN32
#include <windows.h>
#else   //unix,linux
#include <sys/types.h>
#include <unistd.h>
#define _open   open
#define _read   read
#define _write  write
#define _lseek   lseek   //linux下暂时用lseek(lseek64似乎不都支持)
#define _close  close
#endif

//将一个class文件读入缓存
//返回:缓存指针,调用者释放
char * loadClassToBuffer(const char *file, int *bufferLength)
{
    int fd;
    fd = _open(file, O_RDONLY | O_BINARY);
    if (fd == -1) {
        printf("can not open file");
        return NULL;
    }
    long length = _lseek(fd, 0, SEEK_END);
    if (length == -1) {
        _close(fd);
        printf("can not seek file");
        return NULL;
    }
    char *buffer = new char [length];
    if (buffer == NULL) {
        _close(fd);
        printf("out of memory");
        return NULL;
    }
    _lseek(fd, 0, SEEK_SET);
    int bytesRead = _read(fd, buffer, length);
    if (bytesRead != length) {
        _close(fd);
        delete [] buffer;
        printf("read failed");
        return NULL;
    }
    *bufferLength = length; 
    return buffer;
}

int main(int argc, char *argv[])
{
    printf("JVM Test, JVM Utilities test...\n");
    if (argc < 2) {
        printf("Usage: JVMTest classfile\n");
        return 0;
    }
    const char *classFileName = argv[1];
    int bufferLength;
    char *classBuffer = loadClassToBuffer(classFileName, &bufferLength);
    if (classBuffer == NULL) {
        printf("\nload class file failed");
        return -1;
    }

    ClassFileParser parser((const u1 *)classBuffer, bufferLength);
    try {
        parser.parseVersion();
        parser.parseConstantPool();
        parser.printSummary();
    } catch (Exception e) {
        printf("parse class file failed: %s", e.getMessage());
    }

    //test
    //Exception e("abcdef");

    return 0;
}

ClassFileParser.h

#pragma once

#include "util.h"

typedef unsigned int u4;
typedef unsigned short u2;
typedef unsigned char u1;

struct cp_info;
struct field_info;
struct method_info;
struct attribute_info;

#pragma pack(1)

struct ClassFileFormat
{
u4 magic;
u2 minor_verison;
u2 major_version;
u2 constant_pool_count; //按照JVM规范,此值等于cp_info的记录数+1
cp_info *constant_pool; //[constant_pool_count-1]
u2 access_flags;
u2 this_class;
u2 super_class;
u2 interfaces_count;
u2 *interfaces; //[interfaces_count]
u2 fields_count;
field_info *fields; //[fields_count]
u2 methods_count;
method_info *methods;   //[methods_count];
u2 attributes_count;
attribute_info *attributes; //[attributes_count];
};

struct cp_info
{
u1 tag;
u1 *info;
};

enum 
{
    CONSTANT_Utf8 = 1,
    CONSTANT_Unicode,
    CONSTANT_Integer,
    CONSTANT_Float,
    CONSTANT_Long,
    CONSTANT_Double,
    CONSTANT_Class,
    CONSTANT_String,
    CONSTANT_Fieldref,
    CONSTANT_Methodref,
    CONSTANT_InterfaceMethodref,
    CONSTANT_NameAndType,
};

struct CONSTANT_Class_info
{
u1 tag;
u2 name_index;  //常量池索引,该索引处的常量项必须是一个CONSTANT_Utf8_info
};

struct CONSTANT_Fieldref_info
{
u1 tag;
u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
};

struct CONSTANT_Methodref_info
{
u1 tag;
u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
};

struct CONSTANT_InterfaceMethodref_info
{
u1 tag;
u2 class_index; //常量池索引,必须指向一个CONSTANT_Class_info
u2 name_and_type_index; //常量池索引,必须指向一个CONSTANT_NameAndType_info
};

struct CONSTANT_String_info
{
u1 tag;
u2 string_index;    //常量池索引,必须指向一个CONSTANT_Utf8_info
};

struct CONSTANT_Integer_info
{
u1 tag;
u4 bytes;   //4字节整数,高字节在前
};

struct CONSTANT_Float_info
{
u1 tag;
u4 bytes;   //4字节浮点数,IEEE 754格式,高字节在前
};

struct CONSTANT_Long_info
{
u1 tag;
u4 high_bytes;  //8字节整数,高字节在前
u4 low_bytes;
};

struct CONSTANT_Double_info
{
u1 tag;
u4 high_bytes;  //双精度浮点数,IEEE 754格式,高字节在前
u4 low_bytes;
};

struct CONSTANT_NameAndType_info
{
u1 tag;
u2 name_index;  //field或method的简单名字。该索引必须指向一个CONSTANT_Utf8_info
u2 descriptor_index;    //field或method的描述符。该索引必须指向一个CONSTANT_Utf8_info
};

struct CONSTANT_Utf8_info
{
u1 tag;
u2 length;
u1 *bytes;  //bytes的长度为length
};

#pragma pack()

class ClassFileParser
{
public:
ClassFileParser(const u1 *classInBuffer, int length);
virtual ~ClassFileParser(void);

void parseVersion() throw (Exception);
void parseConstantPool() throw (Exception);

void printSummary();

private:
const u1 * _class_buffer;
int _buffer_length;

bool valid_cp_index(int index, int cp_count) {
    return (index >= 1 && index < cp_count);
}

bool is_supported_version(u2 majorVersion, u2 minorVersion);
bool check_utf8_string(const u1 *bytes, int length);
void saveConstantPool() throw (Exception);

private:
u2 _major_version;
u2 _minor_version;

int _cp_count;
int _cp_length;
u1 ** _cp_index;
u1 * _cp_data;

u1 _tag_0;
};

class ClassBufferInput
{
public:
ClassBufferInput(const u1* buffer, int length);

const u1* buffer() const { return _buffer_start; }
int length() const { return _buffer_end - _buffer_start; }
const u1* current() const { return _current; }

u1 read_u1() throw (Exception);
u2 read_u2() throw (Exception);
u4 read_u4() throw (Exception);

void skip_u1() throw (Exception);
void skip_u2() throw (Exception);
void skip_u4() throw (Exception);
void skip_n(int n) throw (Exception);

//java class文件中u2,u4的存储都是高字节在前,低字节在后
static u2 read_java_u2(const u1 *buffer);
static u4 read_java_u4(const u1 *buffer);

private:
const u1* _buffer_start;
const u1* _buffer_end;
const u1* _current;

void guarantee_size(int size) throw (Exception);
};


class ClassBufferOutput
{
public:
ClassBufferOutput(u1* buffer, int length);

u1* buffer() const { return _buffer_start; }
int length() const { return _buffer_end - _buffer_start; }
u1* current() const { return _current; }

void write_u1(u1 c) throw (Exception);
void write_u2(u2 x) throw (Exception);
void write_u4(u4 x) throw (Exception);
void write_bytes(const u1 *bytes, int n) throw (Exception);

private:
u1* _buffer_start;
u1* _buffer_end;
u1* _current;

void guarantee_size(int size) throw (Exception);
};

ClassFileParser.cpp

#include "ClassFileParser.h"
#include <stdio.h>
#include <memory.h>
#define CLASS_FILE_MAGIC_U4 0xCAFEBABE

ClassFileParser::ClassFileParser(const u1 *classInBuffer, int length)
{
    _class_buffer = classInBuffer;
    _buffer_length = length;
    _major_version = 0;
    _minor_version = 0;
    _cp_count = 0;
    _cp_length = -1;
    _cp_data = NULL;
    _cp_index = NULL;
    _tag_0 = 0;
}

ClassFileParser::~ClassFileParser(void)
{
}

void ClassFileParser::parseVersion() throw (Exception)
{
    ClassBufferInput in(_class_buffer, _buffer_length);
    u4 magic = in.read_u4();
    assert_exception(magic == CLASS_FILE_MAGIC_U4, "bad magic value");
    u2 minorVersion = in.read_u2();
    u2 majorVersion = in.read_u2();
    assert_exception(is_supported_version(majorVersion, minorVersion), 
        "unsupported class version");
    _major_version = majorVersion;
    _minor_version = minorVersion;
}

bool ClassFileParser::is_supported_version(u2 major, u2 minor)
{
    //实际的Java虚拟机的版本,如SUN的Hotspot,令人费解
    //比如1.5版本,推测major=1,minor=5,可是SUN的Hotspot虚拟机却不认
    //查看Hotspot源代码,最小版本竟从45开始
    if (major > 45 && major <= 51)
        return true;
    return false;
}

void ClassFileParser::parseConstantPool() throw (Exception)
{
    int magic_version_length = sizeof(u4) + sizeof(u2) + sizeof(u2);
    ClassBufferInput in(_class_buffer + magic_version_length, 
                        _buffer_length - magic_version_length);
    u2 cp_count = in.read_u2();  //常量池项目数 + 1
    assert_exception(cp_count >= 1, "bad constant pool size");
    //下面遍历一遍常量池,为了统计常量池的长度(字节数),顺便执行一些检查
    int cp_length = 0;
    int cp_info_length;
    for (int index = 1; index < cp_count; index++)
    {
        cp_info_length = -1;
        u1 tag = in.read_u1();
        printf("index:%d, tag: %d, ", index, tag);
        switch (tag)
        {
        case CONSTANT_Utf8:
            {
                u2 length = in.read_u2();
                //检查utf8字符串
                bool isUtf8 = check_utf8_string(in.current(), length);
                assert_exception(isUtf8, "bad utf8 string");
                in.skip_n(length);
                cp_info_length = 2 + length;
            }
            break;
        case CONSTANT_Integer:
            {
                in.skip_u4();
                cp_info_length = 4;
            }
            break;
        case CONSTANT_Float:
            {
                in.skip_u4();
                cp_info_length = 4;
            }
            break;
        case CONSTANT_Long:
            {
                in.skip_n(8);
                cp_info_length = 8;
                index++;    //JVM规范:8字节的常量池项在计数上占两个
            }
            break;
        case CONSTANT_Double:
            {
                in.skip_n(8);
                cp_info_length = 8;
                index++;    //JVM规范:8字节的常量池项在计数上占两个
            }
            break;
        case CONSTANT_Class:
            {
                u2 name_and_index = in.read_u2();
                assert_exception(valid_cp_index(name_and_index, cp_count), 
                    "bad constant pool index");
                cp_info_length = 2;
            }
            break;
        case CONSTANT_String:
            {
                u2 string_index = in.read_u2();
                assert_exception(valid_cp_index(string_index, cp_count), 
                    "bad constant pool index");
                cp_info_length = 2;
            }
            break;
        case CONSTANT_Fieldref:
        case CONSTANT_Methodref:
        case CONSTANT_InterfaceMethodref:
            {
                u2 class_index = in.read_u2();
                u2 name_and_type_index = in.read_u2();
                assert_exception(valid_cp_index(class_index, cp_count),
                    "bad constant pool index");
                assert_exception(valid_cp_index(name_and_type_index, cp_count),
                    "bad constant pool index");
                cp_info_length = 4;
            }
            break;
        case CONSTANT_NameAndType:
            {
                u2 name_index = in.read_u2();
                u2 descriptor_index = in.read_u2();
                assert_exception(valid_cp_index(name_index, cp_count),
                    "bad constant pool index");
                assert_exception(valid_cp_index(descriptor_index, cp_count),
                    "bad constant pool index");
                cp_info_length = 4;
            }
            break;
        default:
            {
                char msg[30];
                sprintf(msg, "unknown tag: %d", tag);
                assert_exception(false, msg);
            }
            break;
        }
        assert_exception(cp_info_length != -1, "internal error");
        printf("length: %d\n", cp_info_length);
        cp_length += (cp_info_length + 1);
    }

    _cp_count = cp_count;
    _cp_length = cp_length;

    saveConstantPool();

}

bool ClassFileParser::check_utf8_string(const u1 *bytes, int length)
{
    //Todo: check utf8 string
    return true;
}

void ClassFileParser::saveConstantPool() throw (Exception)
{
    int magic_version_length = sizeof(u4) + sizeof(u2) + sizeof(u2);
    ClassBufferInput in(_class_buffer + magic_version_length, 
                        _buffer_length - magic_version_length);
    u2 cp_count = in.read_u2();  //常量池项目数 + 1

    //现在已知常量池的长度,保存到一个数组中(目前运行时常量池与原始常量池完全相同!)
    //另用一个数组保存各个常量项的索引
    _cp_data = new u1  [_cp_length];
    assert_exception(_cp_data != NULL, "out of memory");

    _cp_index = new u1 * [cp_count - 1];
    assert_exception(_cp_index != NULL, "out of memory");

    ClassBufferOutput out(_cp_data, _cp_length);
    for (int index = 0; index < cp_count - 1; index++)
    {
        u1 tag = in.read_u1();
        switch (tag)
        {
        case CONSTANT_Utf8:
            {
                u2 length = in.read_u2();

                _cp_index[index] = out.current();
                out.write_u1(tag);
                out.write_u2(length);
                out.write_bytes(in.current(), length);
                in.skip_n(length);
            }
            break;
        case CONSTANT_Integer:
            {
                u4 bytes = in.read_u4();

                _cp_index[index] = out.current();
                out.write_u1(tag);
                out.write_u4(bytes);
            }
            break;
        case CONSTANT_Float:
            {
                u4 bytes = in.read_u4();

                _cp_index[index] = out.current();
                out.write_u1(tag);
                out.write_u4(bytes);
            }
            break;
        case CONSTANT_Long:
            {
                u4 high_bytes = in.read_u4();
                u4 low_bytes = in.read_u4();

                _cp_index[index] = out.current();
                out.write_u1(tag);
                out.write_u4(high_bytes);
                out.write_u4(low_bytes);
                index++;    //JVM规范:8字节的常量池项在计数上占两个
                _cp_index[index] = &_tag_0;
            }
            break;
        case CONSTANT_Double:
            {
                u4 high_bytes = in.read_u4();
                u4 low_bytes = in.read_u4();

                _cp_index[index] = out.current();
                out.write_u1(tag);
                out.write_u4(high_bytes);
                out.write_u4(low_bytes);
                index++;    //JVM规范:8字节的常量池项在计数上占两个
                _cp_index[index] = &_tag_0;
            }
            break;
        case CONSTANT_Class:
            {
                u2 name_and_index = in.read_u2();

                _cp_index[index] = out.current();
                out.write_u1(tag);
                out.write_u2(name_and_index);
            }
            break;
        case CONSTANT_String:
            {
                u2 string_index = in.read_u2();

                _cp_index[index] = out.current();
                out.write_u1(tag);
                out.write_u2(string_index);
            }
            break;
        case CONSTANT_Fieldref:
        case CONSTANT_Methodref:
        case CONSTANT_InterfaceMethodref:
            {
                u2 class_index = in.read_u2();
                u2 name_and_type_index = in.read_u2();

                _cp_index[index] = out.current();
                out.write_u1(tag);
                out.write_u2(class_index);
                out.write_u2(name_and_type_index);
            }
            break;
        case CONSTANT_NameAndType:
            {
                u2 name_index = in.read_u2();
                u2 descriptor_index = in.read_u2();

                _cp_index[index] = out.current();
                out.write_u1(tag);
                out.write_u2(name_index);
                out.write_u2(descriptor_index);
            }
            break;
        default:
            {
                char msg[30];
                sprintf(msg, "unknown tag: %d", tag);
                assert_exception(false, msg);
            }
            break;
        }
    }

    //验证相等
    //const u1 * p1 = _class_buffer + magic_version_length + 2;
    //int cmp = memcmp(p1, _cp_data, _cp_length);

    //检查常量项相互之间的引用是否正确
    for (int index = 1; index < cp_count; index++)
    {
        u1 * pCurrent = _cp_index[index - 1];
        u1 tag = *pCurrent++;
        switch (tag)
        {
        case CONSTANT_Utf8:
            break;
        case CONSTANT_Integer:
            break;
        case CONSTANT_Float:
            break;
        case CONSTANT_Long:
            {
                index++;
            }
            break;
        case CONSTANT_Double:
            {
                index++;
            }
            break;
        case CONSTANT_Class:
            {
                u2 name_index = ClassBufferInput::read_java_u2(pCurrent);
                u1 ref_tag = *(_cp_index[name_index - 1]);
                assert_exception(ref_tag == CONSTANT_Utf8, 
                    "name_index refered is not CONSTANT_Utf8");
            }
            break;
        case CONSTANT_String:
            {
                u2 string_index = ClassBufferInput::read_java_u2(pCurrent);
                u1 ref_tag = *(_cp_index[string_index - 1]);
                assert_exception(ref_tag == CONSTANT_Utf8,
                    "string_index refered is not CONSTANT_Utf8");
            }
            break;
        case CONSTANT_Fieldref:
        case CONSTANT_Methodref:
        case CONSTANT_InterfaceMethodref:
            {
                u2 class_index = ClassBufferInput::read_java_u2(pCurrent);
                pCurrent += 2;
                u2 name_and_type_index = ClassBufferInput::read_java_u2(pCurrent);
                u1 ref_tag1 = *(_cp_index[class_index - 1]);
                assert_exception(ref_tag1 == CONSTANT_Class,
                    "class_index refered is not CONSTANT_Class");
                u1 ref_tag2 = *(_cp_index[name_and_type_index - 1]);
                assert_exception(ref_tag2 == CONSTANT_NameAndType,
                    "name_and_type_index refered is not CONSTANT_NameAndType");
            }
            break;
        case CONSTANT_NameAndType:
            {
                u2 name_index = ClassBufferInput::read_java_u2(pCurrent);
                pCurrent += 2;
                u2 descriptor_index = ClassBufferInput::read_java_u2(pCurrent);
                u1 ref_tag1 = *(_cp_index[name_index - 1]);
                assert_exception(ref_tag1 == CONSTANT_Utf8,
                    "name_index refered is not CONSTANT_Utf8");
                u1 ref_tag2 = *(_cp_index[descriptor_index - 1]);
                assert_exception(ref_tag2 == CONSTANT_Utf8,
                    "descriptor_index refered is not CONSTANT_Utf8");
            }
            break;
        }
    }
}

void ClassFileParser::printSummary()
{
    printf("class version: %d.%d, cp count: %d, cp length: %d\n",
        _major_version, _minor_version, _cp_count, _cp_length);
}


//ClassBufferInput

ClassBufferInput::ClassBufferInput(const u1* buffer, int length)
{
    _buffer_start = buffer;
    _buffer_end = buffer + length;
    _current = buffer;
}

void ClassBufferInput::guarantee_size(int size) throw (Exception)
{
    if (size > (_buffer_end - _buffer_start)) {
        throw Exception("unexpected end of file");
    }
}

u1 ClassBufferInput::read_u1() throw (Exception)
{
    guarantee_size(1);
    u1 c = *_current++;
    return c;
}

u2 ClassBufferInput::read_u2() throw (Exception)
{
    guarantee_size(2);
    u2 result = read_java_u2(_current);
    _current += 2;
    return result;
}

u4 ClassBufferInput::read_u4() throw (Exception)
{
    guarantee_size(4);
    u4 result = read_java_u4(_current);
    _current += 4;
    return result;
}

u2 ClassBufferInput::read_java_u2(const u1 *buffer)
{
    u1 c1 = buffer[0];
    u1 c2 = buffer[1];
    u2 result = (u2)c1 << 8 | (u2)c2;
    return result;
}

u4 ClassBufferInput::read_java_u4(const u1 *buffer)
{
    u1 c1 = buffer[0];
    u1 c2 = buffer[1];
    u1 c3 = buffer[2];
    u1 c4 = buffer[3];
    u4 result = (u4)c1 << 24 | (u4)c2 << 16 | (u4)c3 << 8 | (u4)c4;
    return result;
}

void ClassBufferInput::skip_u1() throw (Exception)
{
    guarantee_size(1);
    _current++;
}

void ClassBufferInput::skip_u2() throw (Exception)
{
    guarantee_size(2);
    _current += 2;
}

void ClassBufferInput::skip_u4() throw (Exception)
{
    guarantee_size(4);
    _current += 4;
}

void ClassBufferInput::skip_n(int n) throw (Exception)
{
    guarantee_size(n);
    _current += n;
}


//ClassBufferOutput

ClassBufferOutput::ClassBufferOutput(u1* buffer, int length)
{
    _buffer_start = buffer;
    _buffer_end = buffer + length;
    _current = buffer;
}

void ClassBufferOutput::guarantee_size(int size) throw (Exception)
{
    if (size > (_buffer_end - _buffer_start)) {
        throw Exception("insufficient buffer");
    }
}

void ClassBufferOutput::write_u1(u1 c) throw (Exception)
{
    guarantee_size(1);
    *_current++ = c;
}

void ClassBufferOutput::write_u2(u2 x) throw (Exception)
{
    guarantee_size(2);
    u1 c1 = x >> 8;
    u1 c2 = x;
    *_current++ = c1;
    *_current++ = c2;
}

void ClassBufferOutput::write_u4(u4 x) throw (Exception)
{
    guarantee_size(4);
    u1 c1 = x >> 24;
    u1 c2 = x >> 16;
    u1 c3 = x >> 8;
    u1 c4 = x;
    *_current++ = c1;
    *_current++ = c2;
    *_current++ = c3;
    *_current++ = c4;
}

void ClassBufferOutput::write_bytes(const u1 *bytes, int n) throw (Exception)
{
    guarantee_size(n);
    for (int i = 0; i < n; i++) {
        *_current++ = *bytes++;
    }
}

util.h

#ifndef _util_h
#define _util_h

class Exception
{
public:
Exception();
Exception(const char *msg);
const char * getMessage() const { return _msg; }

private:
static const int MAX_MSG_SIZE = 256;
char _msg[MAX_MSG_SIZE+1];
};

void assert_exception(bool b) throw (Exception);
void assert_exception(bool b, const char *msg) throw (Exception);

#endif

util.cpp

#include "util.h"
#include <string.h>

Exception::Exception()
{
    memset(_msg, 0, MAX_MSG_SIZE);
}

Exception::Exception(const char *msg)
{
    strncpy(_msg, msg, MAX_MSG_SIZE-1);
    _msg[MAX_MSG_SIZE-1] = '\0';
}

void assert_exception(bool b) throw (Exception)
{
    if (!b) {
        throw Exception();
    }
}

void assert_exception(bool b, const char *msg) throw (Exception)
{
    if (!b) {
        throw Exception(msg);
    }
}

这个JVMTest工程可以用Visual Studio编译,或者在Linux上编译。我用的是cygwin,编译命令:g++ -g ClassFileParser.cpp JVMTest.cpp util.cpp

 

抱歉!评论已关闭.