O API的时候你考虑到了吗?

在Driver中调用I/O API的时候你考虑到了吗?

去年学习了之后就给忘了,现在又花了半天时间熟悉了这块的知识,防止自己再忘记先记录下来警醒自己。

本文主题在于指出在驱动中调用I/O函数时存在的问题,轻者卡死,重者BSOD

1. 前言

驱动中我们经常使用一些I/O函数来查询文件、设备的信息,比如IoQueryFileDosDeviceName获取进程的DOS路径,IoVolumeDeviceToDosName获取卷的DOS名称等等,一般使用这些函数的场景无外乎在LoadImage回调,CreateProcess回调,微文件过滤器/传统文件过滤器注册的callback,甚至会在一些内核Hook点中调用。可能不做线上产品用户不多的时候很多问题不会被察觉,自己尝试的时候基本上也无法出现问题。但是如果用户一多可能会遇到各种奇奇怪怪的反馈,那么到底哪里容易出问题呢?

2. I/O API的特殊性

Windows中的I/O管理器提供的API大多数都是异步完成的,而其内部泛滥地使用APC,导致很多I/O函数对使用场景有很高的要求,例如:

IoVolumeDeviceToDosName

Starting with Windows Vista, you must ensure that APCs are not

disabled before calling this routine. The KeAreAllApcsDisabled routine

can be used to verify that APCs are not disabled.

意思很明确,就是说这个API内部需要用到KernelApc,在调用时得确保当前线程的APCs可以执行,可以用KeAreAllApcsDisabled这个API做判断。

你无法预知当前的代码执行时的环境是怎么样的,比如说在LoadImage回调,你无法确保当前的IRQL一定是PASSIVE_LEVEL,或者没有在一些内核的临界区范围内,说到内核的临界区,现在常用的两种:

KeEnterCriticalRegion与KeEnterGuardedRegion。

//

// Enters a Guarded Region

//

#define KeEnterGuardedRegionThread(_Thread) \

{ \

/* Sanity checks */ \

ASSERT(KeGetCurrentIrql() <= APC_LEVEL); \

ASSERT(_Thread == KeGetCurrentThread()); \

ASSERT((_Thread->SpecialApcDisable <= 0) && \

(_Thread->SpecialApcDisable != -32768)); \

\

/* Disable Special APCs */ \

_Thread->SpecialApcDisable--; \

}

#define KeEnterGuardedRegion() \

{ \

PKTHREAD _Thread = KeGetCurrentThread(); \

KeEnterGuardedRegionThread(_Thread); \

}

//

// Leaves a Guarded Region

//

#define KeLeaveGuardedRegionThread(_Thread) \

{ \

/* Sanity checks */ \

ASSERT(KeGetCurrentIrql() <= APC_LEVEL); \

ASSERT(_Thread == KeGetCurrentThread()); \

ASSERT(_Thread->SpecialApcDisable < 0); \

\

/* Leave region and check if APCs are OK now */ \

if (!(++_Thread->SpecialApcDisable)) \

{ \

/* Check for Kernel APCs on the list */ \

if (!IsListEmpty(&_Thread->ApcState. \

ApcListHead[KernelMode])) \

{ \

/* Check for APC Delivery */ \

KiCheckForKernelApcDelivery(); \

} \

} \

}

#define KeLeaveGuardedRegion() \

{ \

PKTHREAD _Thread = KeGetCurrentThread(); \

KeLeaveGuardedRegionThread(_Thread); \

}

//

// Enters a Critical Region

//

#define KeEnterCriticalRegionThread(_Thread) \

{ \

/* Sanity checks */ \

ASSERT(_Thread == KeGetCurrentThread()); \

ASSERT((_Thread->KernelApcDisable <= 0) && \

(_Thread->KernelApcDisable != -32768)); \

\

/* Disable Kernel APCs */ \

_Thread->KernelApcDisable--; \

}

#define KeEnterCriticalRegion() \

{ \

PKTHREAD _Thread = KeGetCurrentThread(); \

KeEnterCriticalRegionThread(_Thread); \

}

//

// Leaves a Critical Region

//

#define KeLeaveCriticalRegionThread(_Thread) \

{ \

/* Sanity checks */ \

ASSERT(_Thread == KeGetCurrentThread()); \

ASSERT(_Thread->KernelApcDisable < 0); \

\

/* Enable Kernel APCs */ \

_Thread->KernelApcDisable++; \

\

/* Check if Kernel APCs are now enabled */ \

if (!(_Thread->KernelApcDisable)) \

{ \

/* Check if we need to request an APC Delivery */ \

if (!(IsListEmpty(&_Thread->ApcState.ApcListHead[KernelMode])) && \

!(_Thread->SpecialApcDisable)) \

{ \

/* Check for the right environment */ \

KiCheckForKernelApcDelivery(); \

} \

} \

}

#define KeLeaveCriticalRegion() \

{ \

PKTHREAD _Thread = KeGetCurrentThread(); \

KeLeaveCriticalRegionThread(_Thread); \

}

我们看到在调用KeEnterCriticalRegion后当前线程的KernelApcDisable是非零值,而

调用KeEnterGuardedRegion之后,当前线程的SpecialApcDisable是非零值

这两个是什么玩意呢??

我们接着来看Apc分发的时候如何使用这两个值的

VOID

NTAPI

KiDeliverApc(IN KPROCESSOR_MODE DeliveryMode,

IN PKEXCEPTION_FRAME ExceptionFrame,

IN PKTRAP_FRAME TrapFrame)

{

PKTHREAD Thread = KeGetCurrentThread();

PKPROCESS Process = Thread->ApcState.Process;

PKTRAP_FRAME OldTrapFrame;

PLIST_ENTRY ApcListEntry;

PKAPC Apc;

KLOCK_QUEUE_HANDLE ApcLock;

PKKERNEL_ROUTINE KernelRoutine;

PVOID NormalContext;

PKNORMAL_ROUTINE NormalRoutine;

PVOID SystemArgument1;

PVOID SystemArgument2;

ASSERT_IRQL_EQUAL(APC_LEVEL);

/* Save the old trap frame and set current one */

OldTrapFrame = Thread->TrapFrame;

Thread->TrapFrame = TrapFrame;

/* Clear Kernel APC Pending */

Thread->ApcState.KernelApcPending = FALSE;

/* Check if Special APCs are disabled */

if (Thread->SpecialApcDisable) goto Quickie; // 总开关,如果SpecialApcDisable那么整个线程的APC都不会被执行

/* Do the Kernel APCs first */

while (!IsListEmpty(&Thread->ApcState.ApcListHead[KernelMode]))

{

/* Lock the APC Queue */

KiAcquireApcLockAtApcLevel(Thread, &ApcLock);

/* Check if the list became empty now */

if (IsListEmpty(&Thread->ApcState.ApcListHead[KernelMode]))

{

/* It is, release the lock and break out */

KiReleaseApcLock(&ApcLock);

break;

}

/* Kernel APC is not pending anymore */

Thread->ApcState.KernelApcPending = FALSE;

/* Get the next Entry */

ApcListEntry = Thread->ApcState.ApcListHead[KernelMode].Flink;

Apc = CONTAINING_RECORD(ApcListEntry, KAPC, ApcListEntry);

/* Save Parameters so that it's safe to free the Object in the Kernel Routine*/

NormalRoutine = Apc->NormalRoutine;

KernelRoutine = Apc->KernelRoutine;

NormalContext = Apc->NormalContext;

SystemArgument1 = Apc->SystemArgument1;

SystemArgument2 = Apc->SystemArgument2;

/* Special APC */

if (!NormalRoutine)

{

/* Remove the APC from the list */

RemoveEntryList(ApcListEntry);

Apc->Inserted = FALSE;

/* Release the APC lock */

KiReleaseApcLock(&ApcLock);

/* Call the Special APC */

KernelRoutine(Apc,

&NormalRoutine,

&NormalContext,

&SystemArgument1,

&SystemArgument2);

/* Make sure it returned correctly */

if (KeGetCurrentIrql() != ApcLock.OldIrql)

{

KeBugCheckEx(IRQL_UNEXPECTED_VALUE,

(KeGetCurrentIrql() << 16) |

(ApcLock.OldIrql << 8),

(ULONG_PTR)KernelRoutine,

(ULONG_PTR)Apc,

(ULONG_PTR)NormalRoutine);

}

}

else

{

/* Normal Kernel APC, make sure it's safe to deliver */

if ((Thread->ApcState.KernelApcInProgress) ||

(Thread->KernelApcDisable)) // 子开关,控制着Normal KernelApc的执行与否

{

/* Release lock and return */

KiReleaseApcLock(&ApcLock);

goto Quickie;

}

/* Dequeue the APC */

RemoveEntryList(ApcListEntry);

Apc->Inserted = FALSE;

/* Go back to APC_LEVEL */

KiReleaseApcLock(&ApcLock);

/* Call the Kernel APC */

KernelRoutine(Apc,

&NormalRoutine,

&NormalContext,

&SystemArgument1,

&SystemArgument2);

/* Make sure it returned correctly */

if (KeGetCurrentIrql() != ApcLock.OldIrql)

{

KeBugCheckEx(IRQL_UNEXPECTED_VALUE,

(KeGetCurrentIrql() << 16) |

(ApcLock.OldIrql << 8),

(ULONG_PTR)KernelRoutine,

(ULONG_PTR)Apc,

(ULONG_PTR)NormalRoutine);

}

/* Check if there still is a Normal Routine */

if (NormalRoutine)

{

/* At Passive Level, an APC can be prempted by a Special APC */

Thread->ApcState.KernelApcInProgress = TRUE;

KeLowerIrql(PASSIVE_LEVEL);

/* Call and Raise IRQL back to APC_LEVEL */

NormalRoutine(NormalContext, SystemArgument1, SystemArgument2);

KeRaiseIrql(APC_LEVEL, &ApcLock.OldIrql);

}

/* Set Kernel APC in progress to false and loop again */

Thread->ApcState.KernelApcInProgress = FALSE;

}

}

/* Now we do the User APCs */

if ((DeliveryMode == UserMode) &&

!(IsListEmpty(&Thread->ApcState.ApcListHead[UserMode])) &&

(Thread->ApcState.UserApcPending))

{

/* Lock the APC Queue */

KiAcquireApcLockAtApcLevel(Thread, &ApcLock);

/* It's not pending anymore */

Thread->ApcState.UserApcPending = FALSE;

/* Check if the list became empty now */

if (IsListEmpty(&Thread->ApcState.ApcListHead[UserMode]))

{

/* It is, release the lock and break out */

KiReleaseApcLock(&ApcLock);

goto Quickie;

}

/* Get the actual APC object */

ApcListEntry = Thread->ApcState.ApcListHead[UserMode].Flink;

Apc = CONTAINING_RECORD(ApcListEntry, KAPC, ApcListEntry);

/* Save Parameters so that it's safe to free the Object in the Kernel Routine*/

NormalRoutine = Apc->NormalRoutine;

KernelRoutine = Apc->KernelRoutine;

NormalContext = Apc->NormalContext;

SystemArgument1 = Apc->SystemArgument1;

SystemArgument2 = Apc->SystemArgument2;

/* Remove the APC from Queue, and release the lock */

RemoveEntryList(ApcListEntry);

Apc->Inserted = FALSE;

KiReleaseApcLock(&ApcLock);

/* Call the kernel routine */

KernelRoutine(Apc,

&NormalRoutine,

&NormalContext,

&SystemArgument1,

&SystemArgument2);

/* Check if there's no normal routine */

if (!NormalRoutine)

{

/* Check if more User APCs are Pending */

KeTestAlertThread(UserMode);

}

else

{

/* Set up the Trap Frame and prepare for Execution in NTDLL.DLL */

KiInitializeUserApc(ExceptionFrame,

TrapFrame,

NormalRoutine,

NormalContext,

SystemArgument1,

SystemArgument2);

}

}

Quickie:

/* Make sure we're still in the same process */

if (Process != Thread->ApcState.Process)

{

/* Erm, we got attached or something! BAD! */

KeBugCheckEx(INVALID_PROCESS_ATTACH_ATTEMPT,

(ULONG_PTR)Process,

(ULONG_PTR)Thread->ApcState.Process,

Thread->ApcStateIndex,

KeGetCurrentPrcb()->DpcRoutineActive);

}

/* Restore the trap frame */

Thread->TrapFrame = OldTrapFrame;

}

An

asynchronous procedure call (APC) is a function that executes

asynchronously. APCs are similar to deferred procedure calls (DPCs), but

unlike DPCs, APCs execute within the context of a particular thread.

Drivers (other than file systems and file-system filter drivers) do not

use APCs directly, but other parts of the operating system do, so you

need to be aware of how APCs work.

The Windows operating system uses three kinds of APCs:

1.

User APCs run strictly in user mode and only when the current thread is

in an alertable wait state. The operating system uses user APCs to

implement mechanisms such as overlapped I/O and the QueueUserApc Win32

routine.

2. Normal kernel APCs run in kernel

mode at IRQL = PASSIVE_LEVEL. A normal kernel APC preempts all user-mode

code, including user APCs. Normal kernel APCs are generally used by

file systems and file-system filter drivers.

3.

Special kernel APCs run in kernel mode at IRQL = APC_LEVEL. A special

kernel APC preempts user-mode code and kernel-mode code that executes at

IRQL = PASSIVE_LEVEL, including both user APCs and normal kernel APCs.

The operating system uses special kernel APCs to handle operations such

as I/O request completion.

其实总结一下很简单,一个用户模式的Apc,一个内核模式的Apc(分为NormalRoutine为NULL的SpecialKernelApc和不为NULL的NormalKernelApc)

区别在于,

SpecialKernelApc只执行KernelRoutine,

IRQL为APC_LEVEL,而

NormalKernelApc不仅仅执行 KernelRoutine还执行

NormalRoutine,在PASSIVE_LEVEL下执行NormalRoutine。

但是I/O API也没有说明他内部用的是哪个类型的Kernel APC,要稳一点就判断总开关

SpecialApcDisable。不过一般MSDN都会说明。

3. KeAreApcsDisabled/KeAreAllApcsDisabled

BOOLEAN

NTAPI

KeAreApcsDisabled(VOID)

{

/* Return the Kernel APC State */

return KeGetCurrentThread()->CombinedApcDisable ? TRUE : FALSE;

}

BOOLEAN

NTAPI

KeAreAllApcsDisabled(VOID)

{

/* Return the Special APC State */

return ((KeGetCurrentThread()->SpecialApcDisable) ||

(KeGetCurrentIrql() >= APC_LEVEL)) ? TRUE : FALSE;

}

typedef struct _KTHREAD

{

......

union

{

struct

{

SHORT KernelApcDisable;

SHORT SpecialApcDisable;

};

ULONG CombinedApcDisable;

};

......

};

主要看这两个API有什么区别,可能很多人看不出来什么区别。。。

我也理解了很久,

KeAreApcsDisabled是说只要当前在内核临界区内就是Disable状态,这个可以是子开关KernelApcDisable或者是总开关SpecialApcDisable至少一个有值,要是

SpecialApcDisable则就是所有Apc都是无效状态(与KeAreAllApcsDisabled判断相同),要是KernelApcDisable就是Normal KernelApc失效;要是用的 KeEnterCriticalRegion就只能用这个函数检查,一般用这个就可以了,当然最好if ( KeAreApcsDisabled() || __readcr8() == APC_LEVEL )

而KeAreAllApcsDisabled则是真正意义上的所以APC都无效,但是对于KeEnterCriticalRegion的临界区这个API是无法判断的。

4. 解决办法

当出现无法调用I/O API的时候,建议使用劳务线程,这个线程的执行环境还是比较稳定的,而且就算是做同步响应也不会耗时很久。例如:

typedef struct tag_FyWorkQueueItem

{

WORK_QUEUE_ITEM WorkQueueItem;

PVOID lpParameter1;

PVOID lpParameter2;

PVOID lpParameter3;

KEVENT CompleteEvent;

BOOL bStatus;

} FyWorkQueueItem, *PFyWorkQueueItem;

PUNICODE_STRING QueryProcessObjectName(IN HANDLE ProcessId)

{

NTSTATUS Status = STATUS_SUCCESS;

PEPROCESS EProcess = NULL;

HANDLE hProcess = NULL;

ULONG ulRealSize = 0;

PUNICODE_STRING lpuniImageFileName = NULL;

BOOL bSuccess = FALSE;

if (KeGetCurrentIrql() <= APC_LEVEL)

{

Status = PsLookupProcessByProcessId(ProcessId, &EProcess);

if (NT_SUCCESS(Status) && EProcess)

{

Status = ObOpenObjectByPointer((PVOID)EProcess, OBJ_KERNEL_HANDLE, NULL,

PROCESS_ALL_ACCESS, NULL, KernelMode, &hProcess);

if (NT_SUCCESS(Status))

{

Status = ZwQueryInformationProcess(hProcess, ProcessImageFileName, NULL, 0, &ulRealSize);

if (Status == STATUS_INFO_LENGTH_MISMATCH)

{

lpuniImageFileName = (PUNICODE_STRING)ExAllocatePoolWithTag(NonPagedPool,

ulRealSize + sizeof(UNICODE_STRING), 'hiti');

if (lpuniImageFileName)

{

memset(lpuniImageFileName, 0, ulRealSize + sizeof(UNICODE_STRING));

Status = ZwQueryInformationProcess(hProcess, ProcessImageFileName,

lpuniImageFileName, ulRealSize + sizeof(UNICODE_STRING), &ulRealSize);

if (NT_SUCCESS(Status))

{

bSuccess = TRUE;

}

}

}

ZwClose(hProcess);

}

ObDereferenceObject(EProcess);

}

}

if (!bSuccess)

{

ExFreePool(lpuniImageFileName);

lpuniImageFileName = NULL;

}

return lpuniImageFileName;

}

BOOL GetProcessImageFileName(

IN HANDLE ProcessId,

OUT WCHAR* lpwzImageFileName,

IN ULONG uMaxSize)

{

NTSTATUS Status = STATUS_SUCCESS;

HANDLE FileHandle = NULL;

IO_STATUS_BLOCK IoStatusBlock = { 0 };

PUNICODE_STRING lpuniProcessObjectName = NULL;

OBJECT_ATTRIBUTES oa = { 0 };

PFILE_OBJECT FileObject = NULL;

POBJECT_NAME_INFORMATION ObjectNameInformation = NULL;

BOOL bStatus = FALSE;

if (KeGetCurrentIrql() > PASSIVE_LEVEL) {

return FALSE;

}

lpuniProcessObjectName = QueryProcessObjectName(ProcessId);

if (!lpuniProcessObjectName) {

return FALSE;

}

InitializeObjectAttributes(&oa, lpuniProcessObjectName, OBJ_KERNEL_HANDLE | OBJ_CASE_INSENSITIVE, NULL, NULL);

Status = IoCreateFile(

&FileHandle,

FILE_READ_ATTRIBUTES,

&oa,

&IoStatusBlock,

NULL,

FILE_ATTRIBUTE_NORMAL,

FILE_SHARE_READ | FILE_SHARE_WRITE,

FILE_OPEN,

FILE_NON_DIRECTORY_FILE,

NULL,

0,

CreateFileTypeNone,

NULL,

IO_NO_PARAMETER_CHECKING);

if (!NT_SUCCESS(Status))

{

ExFreePool(lpuniProcessObjectName);

return FALSE;

}

Status = ObReferenceObjectByHandle(FileHandle, FILE_ANY_ACCESS, *IoFileObjectType,

KernelMode, (PVOID*)&FileObject, NULL);

if (NT_SUCCESS(Status) && FileObject)

{

Status = IoQueryFileDosDeviceName(FileObject, &ObjectNameInformation);

if (NT_SUCCESS(Status))

{

if (ObjectNameInformation)

{

if (ObjectNameInformation->Name.Length <= sizeof(WCHAR) * uMaxSize)

{

memset(lpwzImageFileName, 0, 2 * uMaxSize);

memcpy(lpwzImageFileName, ObjectNameInformation->Name.Buffer,

ObjectNameInformation->Name.Length);

bStatus = TRUE;

}

ExFreePool(ObjectNameInformation);

ObjectNameInformation = NULL;

}

}

ObDereferenceObject(FileObject);

}

ObCloseHandle(FileHandle, KernelMode);

FileHandle = NULL;

ExFreePool(lpuniProcessObjectName);

return bStatus;

}

VOID QueryProcessFileNameWorkItem(IN PFyWorkQueueItem lpFyWorkQueueItem)

{

lpFyWorkQueueItem->bStatus = GetProcessImageFileName(

(HANDLE)lpFyWorkQueueItem->lpParameter1,

(WCHAR*)lpFyWorkQueueItem->lpParameter2,

(ULONG)lpFyWorkQueueItem->lpParameter3);

KeSetEvent(&lpFyWorkQueueItem->CompleteEvent, IO_NO_INCREMENT, FALSE);

}

BOOL GetProcessImageFileNameSafeIrql(

IN HANDLE ProcessId,

OUT WCHAR* lpwzImageFileName,

IN ULONG uMaxSize)

{

BOOL bStatus;

FyWorkQueueItem WorkItem;

if (KeGetCurrentIrql() <= APC_LEVEL)

{

if (KeAreApcsDisabled() || KeGetCurrentIrql() == APC_LEVEL)

{

memset(&WorkItem, 0, sizeof(WorkItem));

KeInitializeEvent(&WorkItem.CompleteEvent, NotificationEvent, FALSE);

WorkItem.bStatus = FALSE;

WorkItem.WorkQueueItem.List.Flink = NULL;

WorkItem.WorkQueueItem.WorkerRoutine = (PWORKER_THREAD_ROUTINE)QueryProcessFileNameWorkItem;

WorkItem.lpParameter1 = (PVOID)ProcessId;

WorkItem.lpParameter2 = (PVOID)lpwzImageFileName;

WorkItem.lpParameter3 = (PVOID)uMaxSize;

WorkItem.WorkQueueItem.Parameter = &WorkItem;

ExQueueWorkItem(&WorkItem.WorkQueueItem, DelayedWorkQueue);

KeWaitForSingleObject(&WorkItem.CompleteEvent, Executive, KernelMode, FALSE, NULL);

bStatus = WorkItem.bStatus;

}

else

{

bStatus = GetProcessImageFileName(ProcessId, lpwzImageFileName, uMaxSize);

}

}

else

{

bStatus = FALSE;

}

return bStatus;

}


分享到:


相關文章: