Author Topic: VK_KHR_performance_query: New Vulkan Extension to Expose Performance Metrics  (Read 1155 times)

0 Members and 1 Guest are viewing this topic.

JeGX

  • Global Moderator
  • Hero Member
  • *****
  • Posts: 2198
    • Geeks3D.com
VK_KHR_performance_query is a new extension included in Vulkan 1.1.128 specifications:

Quote
The Vulkan Working Group has just released the VK_KHR_performance_query extension, which provides a cross-vendor common mechanism to expose performance metrics. These may be used to obtain data from a Vulkan device, typically a graphics card or SoC, to measure the workload demand and assess the impact of application modifications and optimizations. GPU vendor tools often provide such hardware counters, with the information tailored to their specific implementation. VK_KHR_performance_query provides a cross-vendor common mechanism to expose such counters, enabling third-party tools and in-engine profilers access in a standardized manner. Annotations are made in-line with existing Vulkan commands, so that it is possible to correlate specific bundles of work to the counter data captured – with the exact granularity dependent on hardware capabilities.

The effort to produce this extension was a collaborative effort within the Vulkan Working Group, with multiple contributors from across GPU vendors and game developers. The Vulkan Working Group particularly wishes to to call out the support of the LPGPU2 project with collaborators Codeplay, Samsung Electronics, and Think Silicon, that helped drive the effort. An Intel/Mesa implementation that exposes the extension for all their microarchitectures with Vulkan support is under review. A pull request is open to add support to RenderDoc, displaying the sampled counter data for each event in a frame capture. We expect more implementations to follow, with Arm intending to support this extension in future Vulkan drivers for the Mali GPUs; Qualcomm in future Vulkan drivers for Snapdragon; Samsung Mobile supporting it on its Galaxy devices soon; and Intel Windows Vulkan drivers for all supported platforms shortly.

Links:
- source
- VK_KHR_performance_query extension


JeGX

  • Global Moderator
  • Hero Member
  • *****
  • Posts: 2198
    • Geeks3D.com
A code sample from VK_KHR_performance_query spec:

Code: [Select]
// A previously created physical device
VkPhysicalDevice physicalDevice;

// One of the queue families our device supports
uint32_t queueFamilyIndex;

uint32_t counterCount;

// Get the count of counters supported
vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
  physicalDevice,
  queueFamilyIndex,
  &counterCount,
  NULL,
  NULL);

VkPerformanceCounterKHR* counters =
  malloc(sizeof(VkPerformanceCounterKHR) * counterCount);
VkPerformanceCounterDescriptionKHR* counterDescriptions =
  malloc(sizeof(VkPerformanceCounterDescriptionKHR) * counterCount);

// Get the counters supported
vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
  physicalDevice,
  queueFamilyIndex,
  &counterCount,
  counters,
  counterDescriptions);

// Try to enable the first 8 counters
uint32_t enabledCounters[8];

const uint32_t enabledCounterCount = min(counterCount, 8));

for (uint32_t i = 0; i < enabledCounterCount; i++) {
  enabledCounters[i] = i;
}

// A previously created device that had the performanceCounterQueryPools feature
// set to VK_TRUE
VkDevice device;

VkQueryPoolPerformanceCreateInfoKHR performanceQueryCreateInfo = {
  VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR,
  NULL,

  // Specify the queue family that this performance query is performed on
  queueFamilyIndex,

  // The number of counters to enable
  enabledCounterCount,

  // The array of indices of counters to enable
  enabledCounters
};


// Get the number of passes our counters will require.
uint32_t numPasses;

vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
  physicalDevice,
  &performanceQueryCreateInfo,
  &numPasses);

VkQueryPoolCreateInfo queryPoolCreateInfo = {
  VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
  &performanceQueryCreateInfo,
  0,

  // Using our new query type here
  VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR,

  1,

  0
};

VkQueryPool queryPool;

VkResult result = vkCreateQueryPool(
  device,
  &queryPoolCreateInfo,
  NULL,
  &queryPool);

assert(VK_SUCCESS == result);

// A queue from queueFamilyIndex
VkQueue queue;

// A command buffer we want to record counters on
VkCommandBuffer commandBuffer;

VkCommandBufferBeginInfo commandBufferBeginInfo = {
  VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
  NULL,
  0,
  NULL
};

VkAcquireProfilingLockInfoKHR lockInfo = {
  VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR,
  NULL,
  0,
  UINT64_MAX // Wait forever for the lock
};

// Acquire the profiling lock before we record command buffers
// that will use performance queries

result = vkAcquireProfilingLockKHR(device, &lockInfo);

assert(VK_SUCCESS == result);

result = vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo);

assert(VK_SUCCESS == result);

vkCmdResetQueryPool(
  commandBuffer,
  queryPool,
  0,
  1);

vkCmdBeginQuery(
  commandBuffer,
  queryPool,
  0,
  0);

// Perform the commands you want to get performance information on
// ...

// Perform a barrier to ensure all previous commands were complete before
// ending the query
vkCmdPipelineBarrier(commandBuffer,
  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
  0,
  0,
  NULL,
  0,
  NULL,
  0,
  NULL);

vkCmdEndQuery(
  commandBuffer,
  queryPool,
  0);

result = vkEndCommandBuffer(commandBuffer);

assert(VK_SUCCESS == result);

for (uint32_t counterPass = 0; counterPass < numPasses; counterPass++) {

  VkPerformanceQuerySubmitInfoKHR performanceQuerySubmitInfo = {
    VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR,
    NULL,
    counterPass
  };


  // Submit the command buffer and wait for its completion
  // ...
}

// Release the profiling lock after the command buffer is no longer in the
// pending state.
vkReleaseProfilingLockKHR(device);

result = vkResetCommandBuffer(commandBuffer, 0);

assert(VK_SUCCESS == result);

// Create an array to hold the results of all counters
VkPerformanceCounterResultKHR* recordedCounters = malloc(
  sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount);

result = vkGetQueryPoolResults(
  device,
  queryPool,
  0,
  1,
  sizeof(VkPerformanceCounterResultKHR) * enabledCounterCount,
  recordedCounters,
  sizeof(VkPerformanceCounterResultKHR),
  NULL);

// recordedCounters is filled with our counters, we'll look at one for posterity
switch (counters[0].storage) {
  case VK_PERFORMANCE_COUNTER_STORAGE_INT32:
    // use recordCounters[0].int32 to get at the counter result!
    break;
  case VK_PERFORMANCE_COUNTER_STORAGE_INT64:
    // use recordCounters[0].int64 to get at the counter result!
    break;
  case VK_PERFORMANCE_COUNTER_STORAGE_UINT32:
    // use recordCounters[0].uint32 to get at the counter result!
    break;
  case VK_PERFORMANCE_COUNTER_STORAGE_UINT64:
    // use recordCounters[0].uint64 to get at the counter result!
    break;
  case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32:
    // use recordCounters[0].float32 to get at the counter result!
    break;
  case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64:
    // use recordCounters[0].float64 to get at the counter result!
    break;
}