Optimize DrawHalfTransparentBlendedRectTo
`DrawHalfTransparentBlendedRectTo` takes up a significant chunk of time when rendering store and quest dialogs. Optimize the function to read 2 pixels at a time and write 4 pixels at a time.
This commit is contained in:
parent
335b0b9349
commit
b7beb2fbe9
3 changed files with 87 additions and 7 deletions
|
|
@ -12,6 +12,8 @@
|
|||
*/
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
#include "engine/render/common_impl.h"
|
||||
#include "lighting.h"
|
||||
|
|
@ -21,19 +23,71 @@
|
|||
namespace devilution {
|
||||
namespace {
|
||||
|
||||
void DrawHalfTransparentBlendedRectTo(const Surface &out, int sx, int sy, int width, int height)
|
||||
// Expects everything to be 4-byte aligned.
|
||||
void DrawHalfTransparentAligned32BlendedRectTo(const Surface &out, unsigned sx, unsigned sy, unsigned width, unsigned height)
|
||||
{
|
||||
BYTE *pix = out.at(sx, sy);
|
||||
assert(out.pitch() % 4 == 0);
|
||||
|
||||
for (int row = 0; row < height; row++) {
|
||||
for (int col = 0; col < width; col++) {
|
||||
*pix = paletteTransparencyLookup[0][*pix];
|
||||
pix++;
|
||||
auto *pix = reinterpret_cast<uint32_t *>(out.at(static_cast<int>(sx), static_cast<int>(sy)));
|
||||
assert(reinterpret_cast<intptr_t>(pix) % 4 == 0);
|
||||
|
||||
const uint16_t *lookupTable = paletteTransparencyLookupBlack16;
|
||||
|
||||
const unsigned skipX = (out.pitch() - width) / 4;
|
||||
width /= 4;
|
||||
while (height-- > 0) {
|
||||
for (unsigned i = 0; i < width; ++i, ++pix) {
|
||||
const uint32_t v = *pix;
|
||||
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
|
||||
*pix = lookupTable[v & 0xFFFF] | (lookupTable[(v >> 16) & 0xFFFF] << 16);
|
||||
#else
|
||||
*pix = lookupTable[(v >> 16) & 0xFFFF] | (lookupTable[v & 0xFFFF] << 16);
|
||||
#endif
|
||||
}
|
||||
pix += out.pitch() - width;
|
||||
pix += skipX;
|
||||
}
|
||||
}
|
||||
|
||||
void DrawHalfTransparentUnalignedBlendedRectTo(const Surface &out, unsigned sx, unsigned sy, unsigned width, unsigned height)
|
||||
{
|
||||
uint8_t *pix = out.at(static_cast<int>(sx), static_cast<int>(sy));
|
||||
const uint8_t *lookupTable = paletteTransparencyLookup[0];
|
||||
const unsigned skipX = out.pitch() - width;
|
||||
for (unsigned y = 0; y < height; ++y) {
|
||||
for (unsigned x = 0; x < width; ++x, ++pix) {
|
||||
*pix = lookupTable[*pix];
|
||||
}
|
||||
pix += skipX;
|
||||
}
|
||||
}
|
||||
|
||||
void DrawHalfTransparentBlendedRectTo(const Surface &out, unsigned sx, unsigned sy, unsigned width, unsigned height)
|
||||
{
|
||||
// All SDL surfaces are 4-byte aligned and divisible by 4.
|
||||
// However, our coordinates and widths may not be.
|
||||
|
||||
// First, draw the leading unaligned part.
|
||||
if (sx % 4 != 0) {
|
||||
const unsigned w = 4 - sx % 4;
|
||||
DrawHalfTransparentUnalignedBlendedRectTo(out, sx, sy, w, height);
|
||||
sx += w;
|
||||
width -= w;
|
||||
}
|
||||
|
||||
if (static_cast<int>(sx + width) == out.w()) {
|
||||
// The pitch is 4-byte aligned, so we can simply extend the width to the pitch.
|
||||
width = out.pitch() - sx;
|
||||
} else if (width % 4 != 0) {
|
||||
// Draw the trailing unaligned part.
|
||||
const unsigned w = width % 4;
|
||||
DrawHalfTransparentUnalignedBlendedRectTo(out, sx + (width / 4) * 4, sy, w, height);
|
||||
width -= w;
|
||||
}
|
||||
|
||||
// Now everything is divisible by 4. Draw the aligned part.
|
||||
DrawHalfTransparentAligned32BlendedRectTo(out, sx, sy, width, height);
|
||||
}
|
||||
|
||||
void DrawHalfTransparentStippledRectTo(const Surface &out, int sx, int sy, int width, int height)
|
||||
{
|
||||
BYTE *pix = out.at(sx, sy);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue