drm/amd/display: Optimizations for DML math
[why] Conditionals in the DML basic math functions significantly impact mode enumeration. [how] Remove conditionals for floor/ceil operations which are used frequently in DML and add an assertion for invalid callers using zero granuality. Fix existing callers that rely on 0 granularity. Tested-by: Daniel Wheeler <daniel.wheeler@amd.com> Reviewed-by: Nevenko Stupar <Nevenko.Stupar@amd.com> Acked-by: Pavle Kotarac <Pavle.Kotarac@amd.com> Signed-off-by: Aric Cyr <aric.cyr@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
6da15a236c
commit
17529ea2ac
@ -526,10 +526,10 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v)
|
||||
}
|
||||
if (v->max_swath_height_c[k] > 0.0) {
|
||||
v->swath_width_granularity_c = 256.0 /dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / v->max_swath_height_c[k];
|
||||
}
|
||||
v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pixel_in_detc[k] * v->max_swath_height_c[k];
|
||||
if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
|
||||
v->rounded_up_max_swath_size_bytes_c =dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
|
||||
v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width_yper_state[i][j][k] / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pixel_in_detc[k] * v->max_swath_height_c[k];
|
||||
if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
|
||||
v->rounded_up_max_swath_size_bytes_c = dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
|
||||
}
|
||||
}
|
||||
if (v->rounded_up_max_swath_size_bytes_y + v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 2.0) {
|
||||
v->swath_height_yper_state[i][j][k] = v->max_swath_height_y[k];
|
||||
@ -552,14 +552,14 @@ void mode_support_and_system_configuration(struct dcn_bw_internal_vars *v)
|
||||
v->lines_in_det_chroma = v->det_buffer_size_in_kbyte * 1024.0 / 3.0 / v->byte_per_pixel_in_dety[k] / (v->swath_width_yper_state[i][j][k] / 2.0);
|
||||
}
|
||||
v->effective_lb_latency_hiding_source_lines_luma =dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] /dcn_bw_max2(v->h_ratio[k], 1.0)), 1.0)) - (v->vtaps[k] - 1.0);
|
||||
v->effective_lb_latency_hiding_source_lines_chroma =dcn_bw_min2(v->max_line_buffer_lines,dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] / 2.0 /dcn_bw_max2(v->h_ratio[k] / 2.0, 1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0);
|
||||
v->effective_detlb_lines_luma =dcn_bw_floor2(v->lines_in_det_luma +dcn_bw_min2(v->lines_in_det_luma * v->required_dispclk[i][j] * v->byte_per_pixel_in_dety[k] * v->pscl_factor[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_luma), v->swath_height_yper_state[i][j][k]);
|
||||
v->effective_detlb_lines_chroma =dcn_bw_floor2(v->lines_in_det_chroma +dcn_bw_min2(v->lines_in_det_chroma * v->required_dispclk[i][j] * v->byte_per_pixel_in_detc[k] * v->pscl_factor_chroma[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_chroma), v->swath_height_cper_state[i][j][k]);
|
||||
if (v->byte_per_pixel_in_detc[k] == 0.0) {
|
||||
v->urgent_latency_support_us_per_state[i][j][k] = v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]);
|
||||
}
|
||||
else {
|
||||
v->urgent_latency_support_us_per_state[i][j][k] =dcn_bw_min2(v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] *dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]), v->effective_detlb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / (v->v_ratio[k] / 2.0) - v->effective_detlb_lines_chroma * v->swath_width_yper_state[i][j][k] / 2.0 *dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]));
|
||||
v->effective_lb_latency_hiding_source_lines_chroma = dcn_bw_min2(v->max_line_buffer_lines, dcn_bw_floor2(v->line_buffer_size / v->lb_bit_per_pixel[k] / (v->swath_width_yper_state[i][j][k] / 2.0 / dcn_bw_max2(v->h_ratio[k] / 2.0, 1.0)), 1.0)) - (v->vta_pschroma[k] - 1.0);
|
||||
v->effective_detlb_lines_chroma = dcn_bw_floor2(v->lines_in_det_chroma + dcn_bw_min2(v->lines_in_det_chroma * v->required_dispclk[i][j] * v->byte_per_pixel_in_detc[k] * v->pscl_factor_chroma[k] / v->return_bw_per_state[i], v->effective_lb_latency_hiding_source_lines_chroma), v->swath_height_cper_state[i][j][k]);
|
||||
v->urgent_latency_support_us_per_state[i][j][k] = dcn_bw_min2(v->effective_detlb_lines_luma * (v->htotal[k] / v->pixel_clock[k]) / v->v_ratio[k] - v->effective_detlb_lines_luma * v->swath_width_yper_state[i][j][k] * dcn_bw_ceil2(v->byte_per_pixel_in_dety[k], 1.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]), v->effective_detlb_lines_chroma * (v->htotal[k] / v->pixel_clock[k]) / (v->v_ratio[k] / 2.0) - v->effective_detlb_lines_chroma * v->swath_width_yper_state[i][j][k] / 2.0 * dcn_bw_ceil2(v->byte_per_pixel_in_detc[k], 2.0) / (v->return_bw_per_state[i] / v->no_of_dpp[i][j][k]));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1146,10 +1146,10 @@ void display_pipe_configuration(struct dcn_bw_internal_vars *v)
|
||||
}
|
||||
if (v->maximum_swath_height_c > 0.0) {
|
||||
v->swath_width_granularity_c = 256.0 /dcn_bw_ceil2(v->byte_per_pix_detc, 2.0) / v->maximum_swath_height_c;
|
||||
}
|
||||
v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pix_detc * v->maximum_swath_height_c;
|
||||
if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
|
||||
v->rounded_up_max_swath_size_bytes_c =dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
|
||||
v->rounded_up_max_swath_size_bytes_c = (dcn_bw_ceil2(v->swath_width / 2.0 - 1.0, v->swath_width_granularity_c) + v->swath_width_granularity_c) * v->byte_per_pix_detc * v->maximum_swath_height_c;
|
||||
if (v->source_pixel_format[k] == dcn_bw_yuv420_sub_10) {
|
||||
v->rounded_up_max_swath_size_bytes_c = dcn_bw_ceil2(v->rounded_up_max_swath_size_bytes_c, 256.0) + 256;
|
||||
}
|
||||
}
|
||||
if (v->rounded_up_max_swath_size_bytes_y + v->rounded_up_max_swath_size_bytes_c <= v->det_buffer_size_in_kbyte * 1024.0 / 2.0) {
|
||||
v->swath_height_y[k] = v->maximum_swath_height_y;
|
||||
|
@ -23,6 +23,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "os_types.h"
|
||||
#include "dcn_calc_math.h"
|
||||
|
||||
#define isNaN(number) ((number) != (number))
|
||||
@ -69,8 +70,8 @@ float dcn_bw_max2(const float arg1, const float arg2)
|
||||
|
||||
float dcn_bw_floor2(const float arg, const float significance)
|
||||
{
|
||||
if (significance == 0)
|
||||
return 0;
|
||||
ASSERT(significance != 0);
|
||||
|
||||
return ((int) (arg / significance)) * significance;
|
||||
}
|
||||
float dcn_bw_floor(const float arg)
|
||||
@ -80,17 +81,14 @@ float dcn_bw_floor(const float arg)
|
||||
|
||||
float dcn_bw_ceil(const float arg)
|
||||
{
|
||||
float flr = dcn_bw_floor2(arg, 1);
|
||||
|
||||
return flr + 0.00001 >= arg ? arg : flr + 1;
|
||||
return (int) (arg + 0.99999);
|
||||
}
|
||||
|
||||
float dcn_bw_ceil2(const float arg, const float significance)
|
||||
{
|
||||
float flr = dcn_bw_floor2(arg, significance);
|
||||
if (significance == 0)
|
||||
return 0;
|
||||
return flr + 0.00001 >= arg ? arg : flr + significance;
|
||||
ASSERT(significance != 0);
|
||||
|
||||
return ((int) (arg / significance + 0.99999)) * significance;
|
||||
}
|
||||
|
||||
float dcn_bw_max3(float v1, float v2, float v3)
|
||||
|
@ -4478,17 +4478,17 @@ void dml20v2_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode
|
||||
locals->EffectiveLBLatencyHidingSourceLinesLuma),
|
||||
locals->SwathHeightYPerState[i][j][k]);
|
||||
|
||||
locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
|
||||
locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
|
||||
locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
|
||||
locals->EffectiveLBLatencyHidingSourceLinesChroma),
|
||||
locals->SwathHeightCPerState[i][j][k]);
|
||||
|
||||
if (locals->BytePerPixelInDETC[k] == 0) {
|
||||
locals->UrgentLatencySupportUsPerState[i][j][k] = locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
|
||||
/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
|
||||
dml_ceil(locals->BytePerPixelInDETY[k], 1) / (locals->ReturnBWPerState[i][0] / locals->NoOfDPP[i][j][k]);
|
||||
} else {
|
||||
locals->EffectiveDETLBLinesChroma = dml_floor(locals->LinesInDETChroma + dml_min(
|
||||
locals->LinesInDETChroma * locals->RequiredDISPCLK[i][j] * locals->BytePerPixelInDETC[k] *
|
||||
locals->PSCL_FACTOR_CHROMA[k] / locals->ReturnBWPerState[i][0],
|
||||
locals->EffectiveLBLatencyHidingSourceLinesChroma),
|
||||
locals->SwathHeightCPerState[i][j][k]);
|
||||
locals->UrgentLatencySupportUsPerState[i][j][k] = dml_min(
|
||||
locals->EffectiveDETLBLinesLuma * (locals->HTotal[k] / locals->PixelClock[k])
|
||||
/ locals->VRatio[k] - locals->EffectiveDETLBLinesLuma * locals->SwathWidthYPerState[i][j][k] *
|
||||
|
@ -806,10 +806,12 @@ static bool CalculatePrefetchSchedule(
|
||||
|
||||
if (myPipe->SourceScan == dm_horz) {
|
||||
*swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockWidth256BytesY) + myPipe->BlockWidth256BytesY;
|
||||
*swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
|
||||
if (myPipe->BlockWidth256BytesC > 0)
|
||||
*swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockWidth256BytesC) + myPipe->BlockWidth256BytesC;
|
||||
} else {
|
||||
*swath_width_luma_ub = dml_ceil(SwathWidthY - 1, myPipe->BlockHeight256BytesY) + myPipe->BlockHeight256BytesY;
|
||||
*swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
|
||||
if (myPipe->BlockWidth256BytesC > 0)
|
||||
*swath_width_chroma_ub = dml_ceil(SwathWidthY / 2 - 1, myPipe->BlockHeight256BytesC) + myPipe->BlockHeight256BytesC;
|
||||
}
|
||||
|
||||
prefetch_bw_oto = (PrefetchSourceLinesY * *swath_width_luma_ub * dml_ceil(BytePerPixelDETY, 1) + PrefetchSourceLinesC * *swath_width_chroma_ub * dml_ceil(BytePerPixelDETC, 2)) / Tsw_oto;
|
||||
|
@ -6322,10 +6322,6 @@ static void CalculateSwathWidth(
|
||||
|
||||
for (k = 0; k < NumberOfActivePlanes; ++k) {
|
||||
enum odm_combine_mode MainPlaneODMCombine = 0;
|
||||
surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
|
||||
surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
|
||||
surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
|
||||
surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
|
||||
|
||||
if (SourceScan[k] != dm_vert) {
|
||||
SwathWidthSingleDPPY[k] = ViewportWidth[k];
|
||||
@ -6365,8 +6361,6 @@ static void CalculateSwathWidth(
|
||||
|
||||
surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
|
||||
surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
|
||||
surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
|
||||
surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
|
||||
|
||||
if (SourceScan[k] != dm_vert) {
|
||||
MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
|
||||
@ -6374,6 +6368,7 @@ static void CalculateSwathWidth(
|
||||
swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
|
||||
Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
|
||||
if (BytePerPixC[k] > 0) {
|
||||
surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
|
||||
swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
|
||||
Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
|
||||
} else {
|
||||
@ -6385,6 +6380,7 @@ static void CalculateSwathWidth(
|
||||
swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (long) dml_ceil(SwathWidthY[k] - 1,
|
||||
Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
|
||||
if (BytePerPixC[k] > 0) {
|
||||
surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
|
||||
swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, (long) dml_ceil(SwathWidthC[k] - 1,
|
||||
Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
|
||||
} else {
|
||||
|
@ -6933,8 +6933,6 @@ static void CalculateSwathWidth(
|
||||
{
|
||||
int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
|
||||
int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
|
||||
int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
|
||||
int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
|
||||
|
||||
#ifdef __DML_VBA_DEBUG__
|
||||
dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
|
||||
@ -6945,6 +6943,8 @@ static void CalculateSwathWidth(
|
||||
MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
|
||||
swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
|
||||
if (BytePerPixC[k] > 0) {
|
||||
int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
|
||||
|
||||
swath_width_chroma_ub[k] = dml_min(
|
||||
surface_width_ub_c,
|
||||
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
|
||||
@ -6956,6 +6956,8 @@ static void CalculateSwathWidth(
|
||||
MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
|
||||
swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
|
||||
if (BytePerPixC[k] > 0) {
|
||||
int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
|
||||
|
||||
swath_width_chroma_ub[k] = dml_min(
|
||||
surface_height_ub_c,
|
||||
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
|
||||
|
@ -7049,8 +7049,6 @@ static void CalculateSwathWidth(
|
||||
{
|
||||
int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
|
||||
int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
|
||||
int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
|
||||
int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
|
||||
|
||||
#ifdef __DML_VBA_DEBUG__
|
||||
dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
|
||||
@ -7061,6 +7059,8 @@ static void CalculateSwathWidth(
|
||||
MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
|
||||
swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
|
||||
if (BytePerPixC[k] > 0) {
|
||||
int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
|
||||
|
||||
swath_width_chroma_ub[k] = dml_min(
|
||||
surface_width_ub_c,
|
||||
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
|
||||
@ -7072,6 +7072,8 @@ static void CalculateSwathWidth(
|
||||
MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
|
||||
swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
|
||||
if (BytePerPixC[k] > 0) {
|
||||
int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
|
||||
|
||||
swath_width_chroma_ub[k] = dml_min(
|
||||
surface_height_ub_c,
|
||||
(int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
|
||||
|
@ -719,8 +719,8 @@ void dml32_CalculateSwathWidth(
|
||||
|
||||
unsigned int surface_width_ub_l;
|
||||
unsigned int surface_height_ub_l;
|
||||
unsigned int surface_width_ub_c;
|
||||
unsigned int surface_height_ub_c;
|
||||
unsigned int surface_width_ub_c = 0;
|
||||
unsigned int surface_height_ub_c = 0;
|
||||
|
||||
#ifdef __DML_VBA_DEBUG__
|
||||
dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
|
||||
@ -784,21 +784,6 @@ void dml32_CalculateSwathWidth(
|
||||
|
||||
surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
|
||||
surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
|
||||
surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
|
||||
surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
|
||||
|
||||
#ifdef __DML_VBA_DEBUG__
|
||||
dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
|
||||
dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
|
||||
dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
|
||||
dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
|
||||
dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
|
||||
dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
|
||||
dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
|
||||
dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
|
||||
dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
|
||||
dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
|
||||
#endif
|
||||
|
||||
if (!IsVertical(SourceRotation[k])) {
|
||||
MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
|
||||
@ -818,6 +803,7 @@ void dml32_CalculateSwathWidth(
|
||||
Read256BytesBlockWidthY[k]);
|
||||
}
|
||||
if (BytePerPixC[k] > 0) {
|
||||
surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
|
||||
if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
|
||||
swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
|
||||
dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
|
||||
@ -848,6 +834,7 @@ void dml32_CalculateSwathWidth(
|
||||
Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
|
||||
}
|
||||
if (BytePerPixC[k] > 0) {
|
||||
surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
|
||||
if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
|
||||
swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
|
||||
dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
|
||||
@ -866,6 +853,16 @@ void dml32_CalculateSwathWidth(
|
||||
}
|
||||
|
||||
#ifdef __DML_VBA_DEBUG__
|
||||
dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
|
||||
dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
|
||||
dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
|
||||
dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
|
||||
dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
|
||||
dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
|
||||
dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
|
||||
dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
|
||||
dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
|
||||
dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
|
||||
dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
|
||||
dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
|
||||
dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
|
||||
|
@ -76,14 +76,9 @@ static inline double dml_floor(double a, double granularity)
|
||||
|
||||
static inline double dml_round(double a)
|
||||
{
|
||||
double round_pt = 0.5;
|
||||
double ceil = dml_ceil(a, 1);
|
||||
double floor = dml_floor(a, 1);
|
||||
const double round_pt = 0.5;
|
||||
|
||||
if (a - floor >= round_pt)
|
||||
return ceil;
|
||||
else
|
||||
return floor;
|
||||
return dml_floor(a + round_pt, 1);
|
||||
}
|
||||
|
||||
/* float
|
||||
|
Loading…
Reference in New Issue
Block a user