STEP
2——记录/恢复线程运行上下文
首先说明一下什么是“陷阱帧”。当一个在用户模式执行的线程请求一个系统调用,在x86平台上,也就是一条INT指令,或者sysenter指令,随即就会通过一个软件中断陷入的到内核。这时CPU的工作状态由之前的用户态切换到内核态(也就是驱动开发中常说的ring3到ring0)。在这个切换过程中,系统要做一些前期的工作。因为系统服务例程是占用调用者线程的上下文来执行的,所以有必要保存调用者被中断时的状态,以便系统服务例程结束后,调用者可以继续运行。由于一个线程有两个栈,一个是用户模式栈,一个是内核模式栈,在用户态使用的是用户栈,在内核态使用的是内核栈,所以陷入到内核时并不需要保存用户栈的内容,需要保存的仅仅是一些寄存器的状态。这就是线程的“硬件上下文”。为此,操作系统定了一个名为_KTRAP_FRAME的结构体,这个结构体就代表了“硬件上下文”,也就是所谓的“陷阱帧”。_KTRAP_FRMAE定义如下:
typedef struct
_KTRAP_FRAME {
//
// Following 4 values are only used and defined
for DBG systems,
// but are always allocated to make switching
from DBG to non-DBG
// and back quicker. They are not DEVL because they have a non-0
// performance impact.
//
ULONG
DbgEbp; // Copy of User
EBP set up so KB will work.
ULONG
DbgEip; // EIP of caller
to system call, again, for KB.
ULONG
DbgArgMark; // Marker to show
no args here.
ULONG
DbgArgPointer; // Pointer to the
actual args
//
// Temporary values used when frames are edited.
//
//
// NOTE:
Any code that want's ESP must materialize it, since it
// is not stored in the frame for kernel
mode callers.
//
// And code that sets ESP in a KERNEL
mode frame, must put
// the new value in TempEsp, make sure
that TempSegCs holds
// the real SegCs value, and put a
special marker value into SegCs.
//
ULONG
TempSegCs;
ULONG
TempEsp;
//
// Debug registers.
//
ULONG
Dr0;
ULONG
Dr1;
ULONG
Dr2;
ULONG
Dr3;
ULONG
Dr6;
ULONG
Dr7;
//
// Segment registers
//
ULONG
SegGs;
ULONG
SegEs;
ULONG
SegDs;
//
// Volatile registers
//
ULONG
Edx;
ULONG
Ecx;
ULONG
Eax;
//
// Nesting state, not part of context record
//
ULONG
PreviousPreviousMode;
PEXCEPTION_REGISTRATION_RECORD
ExceptionList;
//
Trash if caller was user mode.
//
Saved exception list if caller
//
was kernel mode or we're in
// an interrupt.
//
// FS is TIB/PCR pointer, is here to make save
sequence easy
//
ULONG
SegFs;
//
// Non-volatile registers
//
ULONG
Edi;
ULONG
Esi;
ULONG
Ebx;
ULONG
Ebp;
//
// Control registers
//
ULONG
ErrCode;
ULONG
Eip;
ULONG
SegCs;
ULONG
EFlags;
ULONG
HardwareEsp; // WARNING -
segSS:esp are only here for stacks
ULONG
HardwareSegSs; // that involve a
ring transition.
ULONG
V86Es; // these will be
present for all transitions from
ULONG
V86Ds; // V86 mode
ULONG
V86Fs;
ULONG
V86Gs;
} KTRAP_FRAME;
在陷入内核时,系统首先会在内核栈的栈底建立一个“陷阱帧”,等到系统调用的结束返回时,再根据陷阱帧中记录的内容,设置好寄存器状态,使得用户线程继续运行。
要明确的一点是,我们的系统调用也是系统调用,其执行过程和一般的系统调用没有任何区别。所以,当我们的这系统调用执行时,“陷阱帧”已经被系统创建好了。当系统调用结束时,系统自己就会根据“陷阱帧”的内容设置好寄存器状态。这些完全不用我们来操心。我们仅仅需要把“陷阱帧”记录下来,替换回去,这样就能达到目的了。
这里我先把我的这个系统调用的代码列出来:
NTSTATUS
NtRecordOrRecoveryTrapFrame(
)
{
ULONG nByte;
PKTHREAD CurrentThread;
NTSTATUS
ntStatus = STATUS_SUCCESS;
UNICODE_STRING szPath;
IO_STATUS_BLOCK IoStatusBlock;
HANDLE
FileHandle= 0;
PKTRAP_FRAME TrapFrame;
PVOID Reader;
PAGED_CODE();
DbgPrint("****Record or Recovery Trap
Frame....../n/n");
RtlInitUnicodeString(&szPath,
L"//SystemRoot//TrapFrame");
CurrentThread = KeGetCurrentThread ();
TrapFrame=CurrentThread->TrapFrame;
DbgPrint("TrapFrame Value is
0x%X/n/n",TrapFrame);
nByte=sizeof(KTRAP_FRAME);
DbgPrint("nByte is
%d(0x%X)/n/n",nByte,nByte);
PspRecordOrCopyData(&szPath, TrapFrame,
nByte);
return STATUS_SUCCESS;
}
NtRecordOrRecoveryTrapFrame就是系统调用对应的内部例程的名称了。这个函数很简单,最核心的就是TrapFrame=CurrentThread->TrapFrame;这句代码。这句代码获得了TrapFame的首地址。TrapFrame就是我们关心“陷阱帧”,它的首地址是保存在Tcb中的。Tcb是PKTHREAD的一个实例。PKTREAD定义如下:
typedef struct _KTHREAD {
//
// The
dispatcher header and mutant listhead are fairly infrequently
// referenced.
//
DISPATCHER_HEADER Header;
LIST_ENTRY
MutantListHead;
//
// The
following fields are referenced during context switches and wait
// operatings.
They have been carefully laid out to get the best cache
// hit ratios.
//
PVOID
InitialStack;
PVOID
StackLimit;
PVOID
KernelStack;
KSPIN_LOCK
ThreadLock;
union {
KAPC_STATE
ApcState;
struct {
UCHAR
ApcStateFill[KAPC_STATE_ACTUAL_LENGTH];
BOOLEAN ApcQueueable;
volatile UCHAR NextProcessor;
volatile UCHAR DeferredProcessor;
UCHAR
AdjustReason;
SCHAR
AdjustIncrement;
};
};
KSPIN_LOCK
ApcQueueLock;
#if !defined(_AMD64_)
ULONG
ContextSwitches;
volatile UCHAR
State;
UCHAR
NpxState;
KIRQL
WaitIrql;
KPROCESSOR_MODE WaitMode;
#endif
LONG_PTR
WaitStatus;
union {
PKWAIT_BLOCK WaitBlockList;
PKGATE
GateObject;
};
BOOLEAN
Alertable;
BOOLEAN
WaitNext;
UCHAR
WaitReason;
SCHAR
Priority;
UCHAR
EnableStackSwap;
volatile UCHAR
SwapBusy;
BOOLEAN
Alerted[MaximumMode];
union {
LIST_ENTRY
WaitListEntry;
SINGLE_LIST_ENTRY SwapListEntry;
};
PRKQUEUE
Queue;
#if !defined(_AMD64_)
ULONG
WaitTime;
union {
struct {
SHORT
KernelApcDisable;
SHORT
SpecialApcDisable;
};
ULONG
CombinedApcDisable;
};
#endif
PVOID Teb;
union {
KTIMER
Timer;
struct {
UCHAR
TimerFill[KTIMER_ACTUAL_LENGTH];
//
//
N.B. The following bit number definitions must match the
// following bit field.
//
//
N.B. These bits can only be written with interlocked
// operations.
//
#define KTHREAD_AUTO_ALIGNMENT_BIT 0
#define KTHREAD_DISABLE_BOOST_BIT 1
union
{
struct {
LONG AutoAlignment : 1;
LONG DisableBoost : 1;
LONG ReservedFlags : 30;
};
LONG ThreadFlags;
};
};
};
union {
KWAIT_BLOCK WaitBlock[THREAD_WAIT_OBJECTS + 1];
struct {
UCHAR
WaitBlockFill0[KWAIT_BLOCK_OFFSET_TO_BYTE0];
BOOLEAN SystemAffinityActive;
};
struct {
UCHAR
WaitBlockFill1[KWAIT_BLOCK_OFFSET_TO_BYTE1];
CCHAR
PreviousMode;
};
struct {
UCHAR
WaitBlockFill2[KWAIT_BLOCK_OFFSET_TO_BYTE2];
UCHAR
ResourceIndex;
};
struct {
UCHAR
WaitBlockFill3[KWAIT_BLOCK_OFFSET_TO_BYTE3];
UCHAR
LargeStack;
};
#if defined(_AMD64_)
struct {
UCHAR
WaitBlockFill4[KWAIT_BLOCK_OFFSET_TO_LONG0];
ULONG
ContextSwitches;
};
struct {
UCHAR
WaitBlockFill5[KWAIT_BLOCK_OFFSET_TO_LONG1];
volatile UCHAR State;
UCHAR
NpxState;
KIRQL
WaitIrql;
KPROCESSOR_MODE WaitMode;
};
struct {
UCHAR
WaitBlockFill6[KWAIT_BLOCK_OFFSET_TO_LONG2];
ULONG
WaitTime;
};
struct {
UCHAR
WaitBlockFill7[KWAIT_BLOCK_OFFSET_TO_LONG3];
union
{
struct {
SHORT KernelApcDisable;
SHORT SpecialApcDisable;
};
ULONG CombinedApcDisable;
};
};
#endif
};
LIST_ENTRY
QueueListEntry;
//
// The
following fields are accessed during system service dispatch.
//
PKTRAP_FRAME
TrapFrame;
PVOID
CallbackStack;
PVOID
ServiceTable;
#if defined(_AMD64_)
ULONG
KernelLimit;
#endif
//
// The
following fields are referenced during ready thread and wait
// completion.
//
UCHAR
ApcStateIndex;
UCHAR
IdealProcessor;
BOOLEAN
Preempted;
BOOLEAN
ProcessReadyQueue;
#if defined(_AMD64_)
PVOID
Win32kTable;
ULONG
Win32kLimit;
#endif
BOOLEAN
KernelStackResident;
SCHAR
BasePriority;
SCHAR
PriorityDecrement;
CHAR
Saturation;
KAFFINITY
UserAffinity;
PKPROCESS
Process;
KAFFINITY
Affinity;
//
// The below
fields are infrequently referenced.
//
PKAPC_STATE
ApcStatePointer[2];
union {
KAPC_STATE
SavedApcState;
struct {
UCHAR
SavedApcStateFill[KAPC_STATE_ACTUAL_LENGTH];
CCHAR
FreezeCount;
CCHAR SuspendCount;
UCHAR
UserIdealProcessor;
UCHAR
CalloutActive;
#if defined(_AMD64_)
BOOLEAN CodePatchInProgress;
#elif defined(_X86_)
UCHAR
Iopl;
#else
UCHAR
OtherPlatformFill;
#endif
};
};
PVOID
Win32Thread;
PVOID
StackBase;
union {
KAPC
SuspendApc;
struct {
UCHAR
SuspendApcFill0[KAPC_OFFSET_TO_SPARE_BYTE0];
SCHAR
Quantum;
};
struct {
UCHAR
SuspendApcFill1[KAPC_OFFSET_TO_SPARE_BYTE1];
UCHAR
QuantumReset;
};
struct {
UCHAR
SuspendApcFill2[KAPC_OFFSET_TO_SPARE_LONG];
ULONG
KernelTime;
};
struct {
UCHAR
SuspendApcFill3[KAPC_OFFSET_TO_SYSTEMARGUMENT1];
PVOID
TlsArray;
};
struct {
UCHAR
SuspendApcFill4[KAPC_OFFSET_TO_SYSTEMARGUMENT2];
PVOID
BBTData;
};
struct {
UCHAR
SuspendApcFill5[KAPC_ACTUAL_LENGTH];
UCHAR
PowerState;
ULONG
UserTime;
};
};