Skip to content

Commit

Permalink
Optimize floor drawing
Browse files Browse the repository at this point in the history
  • Loading branch information
glebm committed Aug 11, 2024
1 parent 5b076e2 commit c17adfa
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 98 deletions.
14 changes: 4 additions & 10 deletions Source/engine/render/blit_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,6 @@

namespace devilution {

#if __cpp_lib_execution >= 201902L
#define DEVILUTIONX_BLIT_EXECUTION_POLICY std::execution::unseq,
#else
#define DEVILUTIONX_BLIT_EXECUTION_POLICY
#endif

DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitFillDirect(uint8_t *dst, unsigned length, uint8_t color)
{
DVL_ASSUME(length != 0);
Expand Down Expand Up @@ -48,7 +42,7 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitFillWithMap(uint8_t *dst, unsigned
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitPixelsWithMap(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, unsigned length, const uint8_t *DVL_RESTRICT colorMap)
{
DVL_ASSUME(length != 0);
std::transform(DEVILUTIONX_BLIT_EXECUTION_POLICY src, src + length, dst, [colorMap](uint8_t srcColor) { return colorMap[srcColor]; });
std::transform(DVL_EXECUTION_UNSEQ src, src + length, dst, [colorMap](uint8_t srcColor) { return colorMap[srcColor]; });
}

struct BlitWithMap {
Expand All @@ -67,15 +61,15 @@ struct BlitWithMap {
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitFillBlended(uint8_t *dst, unsigned length, uint8_t color)
{
DVL_ASSUME(length != 0);
std::for_each(DEVILUTIONX_BLIT_EXECUTION_POLICY dst, dst + length, [tbl = paletteTransparencyLookup[color]](uint8_t &dstColor) {
std::for_each(DVL_EXECUTION_UNSEQ dst, dst + length, [tbl = paletteTransparencyLookup[color]](uint8_t &dstColor) {
dstColor = tbl[dstColor];
});
}

DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitPixelsBlended(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, unsigned length)
{
DVL_ASSUME(length != 0);
std::transform(DEVILUTIONX_BLIT_EXECUTION_POLICY src, src + length, dst, dst, [pal = paletteTransparencyLookup](uint8_t srcColor, uint8_t dstColor) {
std::transform(DVL_EXECUTION_UNSEQ src, src + length, dst, dst, [pal = paletteTransparencyLookup](uint8_t srcColor, uint8_t dstColor) {
return pal[srcColor][dstColor];
});
}
Expand All @@ -94,7 +88,7 @@ struct BlitBlended {
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void BlitPixelsBlendedWithMap(uint8_t *DVL_RESTRICT dst, const uint8_t *DVL_RESTRICT src, unsigned length, const uint8_t *DVL_RESTRICT colorMap)
{
DVL_ASSUME(length != 0);
std::transform(DEVILUTIONX_BLIT_EXECUTION_POLICY src, src + length, dst, dst, [colorMap, pal = paletteTransparencyLookup](uint8_t srcColor, uint8_t dstColor) {
std::transform(DVL_EXECUTION_UNSEQ src, src + length, dst, dst, [colorMap, pal = paletteTransparencyLookup](uint8_t srcColor, uint8_t dstColor) {
return pal[dstColor][colorMap[srcColor]];
});
}
Expand Down
124 changes: 90 additions & 34 deletions Source/engine/render/dun_render.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,12 @@

#include <SDL_endian.h>

#include <algorithm>
#include <climits>
#include <cstdint>
#include <cstring>

#include "engine/render/blit_impl.hpp"
#include "levels/dun_tile.hpp"
#include "lighting.h"
#include "options.h"
#include "utils/attributes.h"
#ifdef DEBUG_STR
#include "engine/render/text_render.hpp"
Expand Down Expand Up @@ -282,16 +280,6 @@ DVL_ALWAYS_INLINE Clip CalculateClip(int_fast16_t x, int_fast16_t y, int_fast16_
return clip;
}

DVL_ALWAYS_INLINE bool IsFullyDark(const uint8_t *DVL_RESTRICT tbl)
{
return tbl == FullyDarkLightTable;
}

DVL_ALWAYS_INLINE bool IsFullyLit(const uint8_t *DVL_RESTRICT tbl)
{
return tbl == FullyLitLightTable;
}

template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderSquareFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl)
{
Expand Down Expand Up @@ -906,7 +894,7 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoid(uint8_t *DVL_RESTR
}

template <LightType Light, bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTileType(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
void RenderTileType(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
switch (tile) {
case TileType::Square:
Expand Down Expand Up @@ -997,7 +985,7 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderRightTrapezoidOrTransparentSquare
}

template <bool Transparent>
DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTileDispatch(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
void RenderTileDispatch(TileType tile, uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, const uint8_t *DVL_RESTRICT src, const uint8_t *DVL_RESTRICT tbl, Clip clip)
{
if (IsFullyDark(tbl)) {
RenderTileType<LightType::FullyDark, Transparent>(tile, dst, dstPitch, src, tbl, clip);
Expand All @@ -1009,7 +997,7 @@ DVL_ALWAYS_INLINE DVL_ATTRIBUTE_HOT void RenderTileDispatch(TileType tile, uint8
}

// Blit with left and vertical clipping.
void RenderBlackTileClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, int sx, DiamondClipY clipY)
void RenderSingleColorTileClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, int sx, DiamondClipY clipY, uint8_t color)
{
dst += XStep * (LowerHeight - clipY.lowerBottom - 1);
// Lower triangle (drawn bottom to top):
Expand All @@ -1018,9 +1006,9 @@ void RenderBlackTileClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstP
const auto w = 2 * XStep * i;
const auto curX = sx + TILE_WIDTH / 2 - XStep * i;
if (curX >= 0) {
memset(dst, 0, w);
memset(dst, color, w);
} else if (-curX <= w) {
memset(dst - curX, 0, w + curX);
memset(dst - curX, color, w + curX);
}
}
dst += 2 * XStep + XStep * clipY.upperBottom;
Expand All @@ -1030,17 +1018,17 @@ void RenderBlackTileClipLeftAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstP
const auto w = 2 * XStep * (TriangleUpperHeight - i);
const auto curX = sx + TILE_WIDTH / 2 - XStep * (TriangleUpperHeight - i);
if (curX >= 0) {
memset(dst, 0, w);
memset(dst, color, w);
} else if (-curX <= w) {
memset(dst - curX, 0, w + curX);
memset(dst - curX, color, w + curX);
} else {
break;
}
}
}

// Blit with right and vertical clipping.
void RenderBlackTileClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, int_fast16_t maxWidth, DiamondClipY clipY)
void RenderSingleColorTileClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, int_fast16_t maxWidth, DiamondClipY clipY, uint8_t color)
{
dst += XStep * (LowerHeight - clipY.lowerBottom - 1);
// Lower triangle (drawn bottom to top):
Expand All @@ -1050,7 +1038,7 @@ void RenderBlackTileClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dst
const auto endX = TILE_WIDTH / 2 + XStep * i;
const auto skip = endX > maxWidth ? endX - maxWidth : 0;
if (width > skip)
memset(dst, 0, width - skip);
memset(dst, color, width - skip);
}
dst += 2 * XStep + XStep * clipY.upperBottom;
// Upper triangle (drawn bottom to top):
Expand All @@ -1066,35 +1054,35 @@ void RenderBlackTileClipRightAndVertical(uint8_t *DVL_RESTRICT dst, uint16_t dst
}

// Blit with vertical clipping only.
void RenderBlackTileClipY(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, DiamondClipY clipY)
void RenderSingleColorTileClipY(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, DiamondClipY clipY, uint8_t color)
{
dst += XStep * (LowerHeight - clipY.lowerBottom - 1);
// Lower triangle (drawn bottom to top):
const auto lowerMax = LowerHeight - clipY.lowerTop;
for (auto i = 1 + clipY.lowerBottom; i <= lowerMax; ++i, dst -= dstPitch + XStep) {
memset(dst, 0, 2 * XStep * i);
memset(dst, color, 2 * XStep * i);
}
dst += 2 * XStep + XStep * clipY.upperBottom;
// Upper triangle (drawn bottom to top):
const auto upperMax = TriangleUpperHeight - clipY.upperTop;
for (auto i = 1 + clipY.upperBottom; i <= upperMax; ++i, dst -= dstPitch - XStep) {
memset(dst, 0, TILE_WIDTH - 2 * XStep * i);
memset(dst, color, TILE_WIDTH - 2 * XStep * i);
}
}

// Blit a black tile without clipping (must be fully in bounds).
void RenderBlackTileFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch)
// Blit a single color tile without clipping (must be fully in bounds).
void RenderSingleColorTileFull(uint8_t *DVL_RESTRICT dst, uint16_t dstPitch, uint8_t color)
{
dst += XStep * (LowerHeight - 1);
// Tile is fully in bounds, can use constant loop boundaries.
// Lower triangle (drawn bottom to top):
for (unsigned i = 1; i <= LowerHeight; ++i, dst -= dstPitch + XStep) {
memset(dst, 0, 2 * XStep * i);
memset(dst, color, 2 * XStep * i);
}
dst += 2 * XStep;
// Upper triangle (drawn bottom to to top):
for (unsigned i = 1; i <= TriangleUpperHeight; ++i, dst -= dstPitch - XStep) {
memset(dst, 0, TILE_WIDTH - 2 * XStep * i);
memset(dst, color, TILE_WIDTH - 2 * XStep * i);
}
}

Expand Down Expand Up @@ -1189,7 +1177,16 @@ void RenderTile(const Surface &out, Point position,
#endif
}

void world_draw_black_tile(const Surface &out, int sx, int sy)
void BlitFloorTileToBuffer(LevelCelBlock levelCelBlock, const uint8_t *tbl, uint8_t *out)
{
const TileType tile = levelCelBlock.type();
const auto *pFrameTable = reinterpret_cast<const uint32_t *>(pDungeonCels.get());
const auto *src = reinterpret_cast<const uint8_t *>(&pDungeonCels[SDL_SwapLE32(pFrameTable[levelCelBlock.frame()])]);
const Clip clip { .top = 0, .bottom = 0, .left = 0, .right = 0, .width = Width, .height = GetTileHeight(tile) };
RenderTileDispatch</*Transparent=*/false>(tile, out, DunFrameWidth, src, tbl, clip);
}

void RenderSingleColorTile(const Surface &out, int sx, int sy, uint8_t color)
{
#ifdef DEBUG_RENDER_OFFSET_X
sx += DEBUG_RENDER_OFFSET_X;
Expand All @@ -1205,16 +1202,75 @@ void world_draw_black_tile(const Surface &out, int sx, int sy)
uint8_t *dst = out.at(sx, static_cast<int>(sy - clip.bottom));
if (clip.width == TILE_WIDTH) {
if (clip.height == TriangleHeight) {
RenderBlackTileFull(dst, out.pitch());
RenderSingleColorTileFull(dst, out.pitch(), color);
} else {
RenderBlackTileClipY(dst, out.pitch(), clipY);
RenderSingleColorTileClipY(dst, out.pitch(), clipY, color);
}
} else {
if (clip.right == 0) {
RenderBlackTileClipLeftAndVertical(dst, out.pitch(), sx, clipY);
RenderSingleColorTileClipLeftAndVertical(dst, out.pitch(), sx, clipY, color);
} else {
RenderBlackTileClipRightAndVertical(dst, out.pitch(), clip.width, clipY);
RenderSingleColorTileClipRightAndVertical(dst, out.pitch(), clip.width, clipY, color);
}
}
}

void RenderOpaqueTile(const Surface &out, Point position, LevelCelBlock levelCelBlock, const uint8_t *tbl)
{
const TileType tile = levelCelBlock.type();
const Clip clip = CalculateClip(position.x, position.y, Width, GetTileHeight(tile), out);
if (clip.width <= 0 || clip.height <= 0) return;
const auto *pFrameTable = reinterpret_cast<const uint32_t *>(pDungeonCels.get());
const auto *src = reinterpret_cast<const uint8_t *>(&pDungeonCels[SDL_SwapLE32(pFrameTable[levelCelBlock.frame()])]);
uint8_t *dst = out.at(static_cast<int>(position.x + clip.left), static_cast<int>(position.y - clip.bottom));
const uint16_t dstPitch = out.pitch();
RenderTileDispatch</*Transparent=*/false>(tile, dst, dstPitch, src, tbl, clip);
}

void RenderFullyLitOpaqueTile(TileType tile, const Surface &out, Point position, const uint8_t *DVL_RESTRICT src)
{
const Clip clip = CalculateClip(position.x, position.y, Width, GetTileHeight(tile), out);
if (clip.width <= 0 || clip.height <= 0) return;
uint8_t *dst = out.at(static_cast<int>(position.x + clip.left), static_cast<int>(position.y - clip.bottom));
const uint16_t dstPitch = out.pitch();
RenderTileType<LightType::FullyLit, /*Transparent=*/false>(tile, dst, dstPitch, src, nullptr, clip);
}

void DunTileApplyTrans(LevelCelBlock levelCelBlock, uint8_t *dst, const uint8_t *tbl)
{
const TileType tile = levelCelBlock.type();
const auto *pFrameTable = reinterpret_cast<const uint32_t *>(pDungeonCels.get());
const auto *src = reinterpret_cast<const uint8_t *>(&pDungeonCels[SDL_SwapLE32(pFrameTable[levelCelBlock.frame()])]);

switch (tile) {
case TileType::Square:
BlitPixelsWithMap(dst, src, Width * Height, tbl);
break;
case TileType::TransparentSquare:
for (size_t i = 0; i < Height; ++i) {
uint_fast8_t drawWidth = Width;
while (drawWidth > 0) {
auto v = static_cast<int8_t>(*src++);
*dst++ = v;
if (v > 0) {
BlitPixelsWithMap(dst, src, v, tbl);
src += v;
dst += v;
} else {
v = static_cast<int8_t>(-v);
}
drawWidth -= v;
}
}
break;
case TileType::LeftTriangle:
case TileType::RightTriangle:
BlitPixelsWithMap(dst, src, 544, tbl);
break;
case TileType::LeftTrapezoid:
case TileType::RightTrapezoid:
BlitPixelsWithMap(dst, src, 800, tbl);
break;
}
}

Expand Down
33 changes: 29 additions & 4 deletions Source/engine/render/dun_render.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@

#include <cstdint>

#include <SDL_endian.h>

#include "engine/point.hpp"
#include "engine/surface.hpp"
#include "levels/dun_tile.hpp"
#include "lighting.h"

// #define DUN_RENDER_STATS
#ifdef DUN_RENDER_STATS
Expand Down Expand Up @@ -175,11 +174,37 @@ void RenderTile(const Surface &out, Point position,
LevelCelBlock levelCelBlock, MaskType maskType, const uint8_t *tbl);

/**
* @brief Render a black 64x31 tile ◆
* @brief Render a single color 64x31 tile ◆
* @param out Target buffer
* @param sx Target buffer coordinate (left corner of the tile)
* @param sy Target buffer coordinate (bottom corner of the tile)
* @param color Color index
*/
void RenderSingleColorTile(const Surface &out, int sx, int sy, uint8_t color = 0);

inline bool IsFullyDark(const uint8_t *DVL_RESTRICT tbl)
{
return tbl == FullyDarkLightTable;
}

inline bool IsFullyLit(const uint8_t *DVL_RESTRICT tbl)
{
return tbl == FullyLitLightTable;
}

/**
* @brief Renders a tile without masking.
*/
void RenderOpaqueTile(const Surface &out, Point position, LevelCelBlock levelCelBlock, const uint8_t *tbl);

/**
* @brief Renders a tile without masking and without lighting.
*/
void RenderFullyLitOpaqueTile(TileType tile, const Surface &out, Point position, const uint8_t *DVL_RESTRICT src);

/**
* @brief Writes a tile with the color swaps from `tbl` to `dst`.
*/
void world_draw_black_tile(const Surface &out, int sx, int sy);
void DunTileApplyTrans(LevelCelBlock levelCelBlock, uint8_t *DVL_RESTRICT dst, const uint8_t *tbl);

} // namespace devilution
Loading

0 comments on commit c17adfa

Please sign in to comment.