#include <Windows.h>
#include <sys\timeb.h>
#include <SDL.h>
#include <stdio.h>
#include <winnt.h>

#include "Constants.h"
#include "Timing.h"
#include "Io.h"
#include "ULABus.h"
#include "Memory.h"
#include "Cpu.h"
#include "Log.h"
#include "Sound.h"
#include "TapUi.h"
#include "TapLoader.h"
#include "PokeUi.h"
#include "Notification.h"
#include "SaveStates.h"
#include "Screenshots.h"
#include "Video.h"
#include "Sna.h"
#include "Z80.h"
#include "Rewind.h"
#include "GameController.h"
#include "Contention.h"

enum videoSoundSwitch {
    VideoSoundSwitch_OnLag = 0,
    VideoSoundSwitch_OnFrameskip = 1
};

static SDL_Surface* _surface;
static SDL_Renderer* _renderer;
static SDL_Window* _window;
static enum videoDisplayMode _displayMode;
static SDL_DisplayMode _sdlDisplayMode;

static int _windowWidth;
static int _windowHeight;

// whether any effect is active
static int _effect = 0;
static int _scanline_effect_amount;

static Uint32 _videoSurfacePixelWidth;
static Uint32 _videoSurfaceWidthZoomedBytes;

static BOOL _videoIsInitialized = FALSE;
SDL_Rect _entireWindowRectangle;

static Uint32 _rendererFlags = SDL_RENDERER_SOFTWARE;
enum frameRenderMode { ForceSkipPixels, DrawPixels };

HANDLE _callbackTimerHandle;
HANDLE _videoFrameMutex;

// we free all pixel cache pointers upon shutdown, and sometimes
// a frame is still drawing
// this mutex thus isolates screen drawing from the pointers freeing
HANDLE _pixelCacheMutex;

static volatile char _isDestroying;
enum videoMode _videoMode;
enum videoSoundSwitch _videoSoundSwitch = VideoSoundSwitch_OnLag;

static volatile char _timerCallbackDestroyed;
static SDL_TimerID _timer;

static double _lagAllowanceBeforeFrameskip = 1.05f;

struct timeb _videoStart, _videoEnd;
struct timeb _currentFrameStart;

static Uint64 _framesAtNormalSpeedSinceNormalSpeed = 0;
static Uint64 _framesSkipped = 0;
struct timeb _videoStartSinceNormalSpeed;
static Uint8 _lastFrameWasNormalSpeed;

static Uint8 _frameskipIsEnabled = 1;
static Uint64 _consecutiveSkippedFrames = 0;

static Uint64 _frames = 0;
static Uint64 _framesAtNormalSpeed = 0;
static Uint64 _tstates = 0;
static Uint8 _zoom = 1;
static Uint32 _zoomTimesEight = 8;
static Uint32 _zoomTimesThirtyTwo = 32;
static Uint32 _visibleBorderWidthSidesTimesZoomTimesFour;

static Uint32 _zoomedVisibleSpectrumLeftBorderWidth;
static Uint32 _zoomedVisibleSpectrumRightBorderWidth;
static Uint32 _zoomedVisibleSpectrumScreenWidth;
static Uint32 _zoomedVisibleSpectrumScreenWidthTimesFour;

static Uint8 _isFlashInverse = 0;
static Uint8 _wasFlashInverse = 0;
static Uint8 _isFlashInverseAttributeMask;

static Uint8 _timingWasFasterLastFrame = 0;

#define _DEBUG_BUFFER_SIZE 256
static char _debugBuffer[_DEBUG_BUFFER_SIZE];

static Uint8 _palette[16*3] = {
    0x00, 0x00, 0x00,   // black
    0x00, 0x22, 0xc7,   // blue
    0xd6, 0x28, 0x16,   // red
    0xd4, 0x33, 0xc7,   // magenta
    0x00, 0xc5, 0x25,   // green
    0x00, 0xc7, 0xc9,   // cyan
    0xcc, 0xc8, 0x2a,   // yellow
    0xca, 0xca, 0xca,   // white

    // bright versions
    0x00, 0x00, 0x00,   // black
    0x00, 0x2b, 0xfb,   // blue
    0xff, 0x33, 0x1c,   // red
    0xff, 0x40, 0xfc,   // magenta
    0x00, 0xf9, 0x2f,   // green
    0x00, 0xfb, 0xfe,   // cyan
    0xff, 0xfc, 0x36,   // yellow
    0xff, 0xff, 0xff,   // white
};

// stores pre-computed values so we only convert RGB -> RGBA once
static Uint32 _rgbaPalette[16];

// count of total 8-pixel lines (represented by 1 byte in video memory) have been drawn
static Uint64 _renderedSegmentsCount = 0;

// stores pre-computed pointers to screen addresses of the start of each ZX Spectrum paper scan line
static Uint32* _paperScanlineStartPointers[VISIBLE_SPECTRUM_SCREEN_HEIGHT];

// stores pre-computed pointers to screen addresses of the start of each ZX Spectrum visible screen
static Uint32* _visibleScreenScanlineStartPointers[VISIBLE_SPECTRUM_SCREEN_HEIGHT];
// stores pre-computed pointers to screen addresses of the start of each ZX Spectrum border to the right of paper
static Uint32* _visibleScreenScanlineRightBorderStartPointers[VISIBLE_SPECTRUM_SCREEN_HEIGHT];

// stores pre-computed addresses of attribute entries for each 8x8 pixel ZX Spectrum square
static int _attributeAreaAddresses[SPECTRUM_PAPER_HEIGHT][SPECTRUM_PAPER_WIDTH/8];

// stores pre-rendered zoomed ZX Spectrum screen data bytes, according to pixel data, ink colour, paper colour, inverse status
// format:
//     [screen data byte] [attribute byte] --> RGBA data buffer
static Uint32* _zoomedRGBAByteRenderedPixels[256][256];
static Uint64 _prerenderedDataByteCount = 0;
static Uint8 _pixelCacheIsActive = 1;
static Uint8 _warnedPixelCacheIsNotActive = 0;
// holds all zeroes to serve as the stand-in if we're still rendering after the pixel cache has been cleared
static Uint32 _pixelCacheInactiveBuffer[8 * MAX_ZOOM];

static Uint32 _zoomedPaperSizeRGBA;
static Uint32 _zoomedVisibleSpectrumScanLineSizeRGBA;

// the reason for not having a value of 1 is because the border can change late in current frame
//     and a value of 1 would cause the border be redrawn only until the end of current frame
// thus, a value higher than 1 guarantees that the entire screen will be redrawn even if the border
//     changes late in a frame
#define BORDER_RENDERING_THIS_MANY_FRAMES_AFTER_CHANGE 2
static Uint8 _forceBorderRenderingForThisManyMoreFrames = BORDER_RENDERING_THIS_MANY_FRAMES_AFTER_CHANGE;
static Uint8 _lastBorderColour = 0;

// scanline Y ----> ptr to ZX Spectrum scan line start
static Uint32 _rowstartPointers[SPECTRUM_SCREEN_HEIGHT];

#define ALL_SEGMENTS_DIRTY_ON_SCANLINE (0xffffffff)
// for each paper scanline, it stores bit flags telling which 8-pixel segments are dirty
// e.g.:
//       left-most 8-pixel segment on the scan line is            bit 0
//       second 8-pixel segment from the left on the scan line is bit 1
//       third 8-pixel segment from the left on the scan line is  bit 2
// Y of scanline ----> dirty flags
static Uint32 _videoScanlineDirtyFlags[SPECTRUM_PAPER_HEIGHT];

// video memory offset ----> Y of scanline
static Uint8 _videoVideoOffsetToScanlineYCoord[SPECTRUM_PAPER_WIDTH * SPECTRUM_PAPER_HEIGHT];

// attribute memory offset ----> Y of all scanlines affected by that attribute byte
static Uint8 _videoAttributeOffsetToScanlineYCoord[SPECTRUM_VIDEO_RAM_ATTRIBUTE_AREA_LENGTH][8];

// tracks dirty scanlines
// assumes address is no smaller than SPECTRUM_VIDEO_RAM_START
void video_handle_memory_write(Uint16 address) {
    if (address >= SPECTRUM_VIDEO_RAM_START + SPECTRUM_VIDEO_RAM_LENGTH) {
        // past attributes area
        return;
    }

    if (address >= SPECTRUM_VIDEO_RAM_ATTRIBUTE_AREA_START) {
        // value is being written to video attribute area
        Uint16 attributeAreaOffset = address - SPECTRUM_VIDEO_RAM_ATTRIBUTE_AREA_START;
        Uint8 xOfEightPixels = attributeAreaOffset % 32;
        for (Uint8 scanline = 0; scanline < 8; scanline++) {
            Uint8 yOfScanline = _videoAttributeOffsetToScanlineYCoord[attributeAreaOffset][scanline];
            // mark bit flag of respective dirty 8-pixel segment
            _videoScanlineDirtyFlags[yOfScanline] |= 1 << xOfEightPixels;
        }
        return;
    }

    // value is being written to video pixel area
    Uint16 videoOffset = address - SPECTRUM_VIDEO_RAM_START;
    Uint8 yOfScanline = _videoVideoOffsetToScanlineYCoord[videoOffset];
    
    Uint8 xOfEightPixels = videoOffset % 32;
    // mark bit flag of respective dirty 8-pixel segment
    _videoScanlineDirtyFlags[yOfScanline] |= 1 << xOfEightPixels;
}

// precompute rowstart pointers
//
void _video_initialize_rowstart_pointers() {
    int y = HIDE_TOP_BORDER_COUNT;
    y += VISIBLE_SPECTRUM_TOP_BORDER_COUNT;

    // ==============================================
    // TOP THIRD
    // ==============================================
    int rowStartAddressAtStartOfThird = SPECTRUM_VIDEO_RAM_START;

    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        int mod = k % 8;
        int div = k / 8;
        int rowStartAddress = rowStartAddressAtStartOfThird + (mod * SPECTRUM_PAPER_HEIGHT / 24) * (SPECTRUM_PAPER_WIDTH / 8) + (div * SPECTRUM_PAPER_WIDTH / 8);
        _rowstartPointers[y + k] = rowStartAddress;
    }
    y += SPECTRUM_PAPER_HEIGHT / 3;
    rowStartAddressAtStartOfThird += (SPECTRUM_PAPER_WIDTH / 8) * (SPECTRUM_PAPER_HEIGHT / 3);

    // ==============================================
    // MIDDLE THIRD
    // ==============================================
    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        int mod = k % 8;
        int div = k / 8;
        int rowStartAddress = rowStartAddressAtStartOfThird + (mod * SPECTRUM_PAPER_HEIGHT / 24) * (SPECTRUM_PAPER_WIDTH / 8) + (div * SPECTRUM_PAPER_WIDTH / 8);
        _rowstartPointers[y + k] = rowStartAddress;
    }
    y += SPECTRUM_PAPER_HEIGHT / 3;
    rowStartAddressAtStartOfThird += (SPECTRUM_PAPER_WIDTH / 8) * (SPECTRUM_PAPER_HEIGHT / 3);

    // ==============================================
    // BOTTOM THIRD
    // ==============================================
    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        int mod = k % 8;
        int div = k / 8;
        int rowStartAddress = rowStartAddressAtStartOfThird + (mod * SPECTRUM_PAPER_HEIGHT / 24) * (SPECTRUM_PAPER_WIDTH / 8) + (div * SPECTRUM_PAPER_WIDTH / 8);
        _rowstartPointers[y + k] = rowStartAddress;
    }
}

void _video_initialize_scanline_dirty_flags() {
    for (int y = 0; y < SPECTRUM_PAPER_HEIGHT; y++) {
        _videoScanlineDirtyFlags[y] = ALL_SEGMENTS_DIRTY_ON_SCANLINE;
    }
}

void video_force_next_frame_full_render() {
    _video_initialize_scanline_dirty_flags();
    _forceBorderRenderingForThisManyMoreFrames = BORDER_RENDERING_THIS_MANY_FRAMES_AFTER_CHANGE;
}

void _video_initialize_attribute_area_to_scanline_Y_coord_map() {
    for (Uint16 attributeOffset = 0; attributeOffset < SPECTRUM_VIDEO_RAM_ATTRIBUTE_AREA_LENGTH; attributeOffset++) {
        Uint8 firstAffectedScanline = (attributeOffset / (SPECTRUM_PAPER_WIDTH/8)) * 8;
        for (Uint8 y = firstAffectedScanline; y < firstAffectedScanline + 8; y++) {
            Uint8 scanlineIndex = y - firstAffectedScanline;
            _videoAttributeOffsetToScanlineYCoord[attributeOffset][scanlineIndex] = y;
        }
    }
}

void _video_initialize_video_data_area_to_scanline_Y_coord_map() {
    // ==============================================
    // BORDER BEFORE (ABOVE) PAPER
    // ==============================================
    int y = HIDE_TOP_BORDER_COUNT;
    y += VISIBLE_SPECTRUM_TOP_BORDER_COUNT;

    // ==============================================
    // TOP THIRD
    // ==============================================
    int rowStartAddressAtStartOfThird = SPECTRUM_VIDEO_RAM_START;

    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        int mod = k % 8;
        int div = k / 8;
        int rowStartAddress = rowStartAddressAtStartOfThird + (mod * SPECTRUM_PAPER_HEIGHT / 24) * (SPECTRUM_PAPER_WIDTH / 8) + (div * SPECTRUM_PAPER_WIDTH / 8);

        for (int x = 0; x < SPECTRUM_PAPER_WIDTH / 8; x++) {
            Uint16 videoOffset = rowStartAddress + x - SPECTRUM_VIDEO_RAM_START;
            Uint8 colY = y - HIDE_TOP_BORDER_COUNT - VISIBLE_SPECTRUM_TOP_BORDER_COUNT + k;
            _videoVideoOffsetToScanlineYCoord[videoOffset] = colY;
        }
    }
    y += SPECTRUM_PAPER_HEIGHT / 3;
    rowStartAddressAtStartOfThird += (SPECTRUM_PAPER_WIDTH / 8) * (SPECTRUM_PAPER_HEIGHT / 3);

    // ==============================================
    // MIDDLE THIRD
    // ==============================================
    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        int mod = k % 8;
        int div = k / 8;
        int rowStartAddress = rowStartAddressAtStartOfThird + (mod * SPECTRUM_PAPER_HEIGHT / 24) * (SPECTRUM_PAPER_WIDTH / 8) + (div * SPECTRUM_PAPER_WIDTH / 8);

        for (int x = 0; x < SPECTRUM_PAPER_WIDTH / 8; x++) {
            Uint16 videoOffset = rowStartAddress + x - SPECTRUM_VIDEO_RAM_START;
            Uint8 colY = y - HIDE_TOP_BORDER_COUNT - VISIBLE_SPECTRUM_TOP_BORDER_COUNT + k;
            _videoVideoOffsetToScanlineYCoord[videoOffset] = colY;
        }
    }
    y += SPECTRUM_PAPER_HEIGHT / 3;
    rowStartAddressAtStartOfThird += (SPECTRUM_PAPER_WIDTH / 8) * (SPECTRUM_PAPER_HEIGHT / 3);

    // ==============================================
    // BOTTOM THIRD
    // ==============================================
    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        int mod = k % 8;
        int div = k / 8;
        int rowStartAddress = rowStartAddressAtStartOfThird + (mod * SPECTRUM_PAPER_HEIGHT / 24) * (SPECTRUM_PAPER_WIDTH / 8) + (div * SPECTRUM_PAPER_WIDTH / 8);

        for (int x = 0; x < SPECTRUM_PAPER_WIDTH / 8; x++) {
            Uint16 videoOffset = rowStartAddress + x - SPECTRUM_VIDEO_RAM_START;
            Uint8 colY = y - HIDE_TOP_BORDER_COUNT - VISIBLE_SPECTRUM_TOP_BORDER_COUNT + k;
            _videoVideoOffsetToScanlineYCoord[videoOffset] = colY;
        }
    }
}


void _video_log_renderer_choice() {
    if (_rendererFlags == SDL_RENDERER_SOFTWARE) {
        sprintf_s(_debugBuffer, _DEBUG_BUFFER_SIZE - 10, "Using video renderer: software");
    }
    else if (_rendererFlags == SDL_RENDERER_ACCELERATED) {
        sprintf_s(_debugBuffer, _DEBUG_BUFFER_SIZE - 10, "Using video renderer: accelerated");
    }
    else {
        sprintf_s(_debugBuffer, _DEBUG_BUFFER_SIZE - 10, "Using video renderer: UNKNOWN");
    }
    log_write(_debugBuffer);
}

double _video_get_frameskip_percentage() {
    double percentage = ((double)_framesSkipped / (double)_framesAtNormalSpeed) * 100.0f;
    return percentage;
}

Uint64 _video_get_runtime_ms() {
    struct timeb now;
    ftime(&now);

    Uint64 runTimeMs = (Uint64)(1000.0 * (now.time - _videoStart.time)
        + (now.millitm - _videoStart.millitm)) - (Uint32)VIDEO_DELAY_TIMER_START_MS;
    return runTimeMs;
}

double _video_get_actual_average_milliseconds_per_frame() {
    Uint64 timeSpentWhileCPUFasterMs = timing_get_total_real_time_spent_while_faster();
    Uint64 totalTimeAllMs = _video_get_runtime_ms();

    Uint64 idleTimeAllMs = timing_get_total_real_idle_time();
    Uint64 idleTimeWhileFasterMs = timing_get_total_real_idle_time_spent_while_faster();
    Uint64 idleTimeMs = idleTimeAllMs - idleTimeWhileFasterMs;

    Uint64 runTimeMs = totalTimeAllMs - timeSpentWhileCPUFasterMs - idleTimeAllMs;
    double result = (double)runTimeMs / (double)_framesAtNormalSpeed;
    return result;
}

void _video_handle_flash() {
    _wasFlashInverse = _isFlashInverse;
    _isFlashInverse = _frames & 0x10;

    if (_wasFlashInverse != _isFlashInverse) {
        // flashing is inverting this frame
        for (Uint16 attributeOffset = 0; attributeOffset < SPECTRUM_VIDEO_RAM_ATTRIBUTE_AREA_LENGTH; attributeOffset++) {
            // flag as dirty all scanlines whose attribute byte has the FLASH bit set
            Uint8 xOfEightPixels = attributeOffset % 32;
            if (memory_read8(SPECTRUM_VIDEO_RAM_ATTRIBUTE_AREA_START + attributeOffset) & 0x80) {
                for (Uint8 i = 0; i < 8; i++) {
                    // flag as dirty the scanlines affected by this attribute byte
                    Uint8 y = _videoAttributeOffsetToScanlineYCoord[attributeOffset][i];
                    _videoScanlineDirtyFlags[y] |= 1 << xOfEightPixels;
                }
            }
        }
    }

    if (_isFlashInverse) {
        _isFlashInverseAttributeMask = 0x80 | 0x7F;
    }
    else {
        _isFlashInverseAttributeMask = 0;
    }
    _isFlashInverseAttributeMask |= 0x7F;
}

// see declaration of _rgbaPalette for how this works
//
void _video_compute_rgba_palette(SDL_Surface* surface) {
    for (int i = 0; i < 16; i++) {
        Uint8 red = _palette[i * 3 + 0];
        Uint8 green = _palette[i * 3 + 1];
        Uint8 blue = _palette[i * 3 + 2];
        
        Uint32 rgba = SDL_MapRGBA(surface->format, red, green, blue, 255);
        _rgbaPalette[i] = rgba;
    }
}

// see declaration of _paperScanlineStartPointers for how this works
//
void _video_compute_scanline_pointers(SDL_Surface* surface) {
    int displayLeft, displayTop;
    video_get_display_coordinates(&displayLeft, &displayTop);

    for (int y = HIDE_TOP_BORDER_COUNT; y < VISIBLE_SPECTRUM_SCREEN_HEIGHT + HIDE_TOP_BORDER_COUNT; y++) {
        int offset = ((y - HIDE_TOP_BORDER_COUNT) * _zoom) * _videoSurfacePixelWidth + ((SPECTRUM_BORDER_WIDTH - HIDE_LEFT_BORDER_COUNT) * _zoom);
        Uint32* pixelPtr = (Uint32*)surface->pixels + offset;

        pixelPtr += displayLeft + displayTop * (Uint64)_videoSurfacePixelWidth;

        _paperScanlineStartPointers[y - HIDE_TOP_BORDER_COUNT] = pixelPtr;
    }
}

// see declaration of _visibleScreenScanlineStartPointers for how this works
// see declaration of _visibleScreenScanlineRightBorderStartPointers for how this works
//
void _video_compute_visible_screen_scanline_pointers() {
    int displayLeft, displayTop;
    video_get_display_coordinates(&displayLeft, &displayTop);

    for (int y = HIDE_TOP_BORDER_COUNT; y < VISIBLE_SPECTRUM_SCREEN_HEIGHT + HIDE_TOP_BORDER_COUNT; y++) {
        int offset = ((y - HIDE_TOP_BORDER_COUNT) * _zoom) * _videoSurfacePixelWidth;
        Uint32* pixelPtr = (Uint32*)_surface->pixels + offset;

        pixelPtr += displayLeft + displayTop * (Uint64)_videoSurfacePixelWidth;

        _visibleScreenScanlineStartPointers[y - HIDE_TOP_BORDER_COUNT] = pixelPtr;
        _visibleScreenScanlineRightBorderStartPointers[y - HIDE_TOP_BORDER_COUNT] = 
            pixelPtr + (VISIBLE_SPECTRUM_LEFT_BORDER_WIDTH + SPECTRUM_PAPER_WIDTH) * (Uint64)_zoom;
    }
}


// see declaration of _attributeAreaAddresses for how this works
//
void _video_compute_attribute_addresses() {
    for (int y = 0; y < SPECTRUM_PAPER_HEIGHT; y++) {
        for (int i = 0; i < SPECTRUM_PAPER_WIDTH; i += 8) {
            int attributeRowNumber = y / 8;
            int attributeRowOffset = attributeRowNumber * (SPECTRUM_PAPER_WIDTH / 8);
            int attributeByteOffset = attributeRowOffset + i / 8;
            int attributeAddress = SPECTRUM_VIDEO_RAM_ATTRIBUTE_AREA_START + attributeByteOffset;

            _attributeAreaAddresses[y][i / 8] = attributeAddress;
        }
    }
}

// see declaration of _zoomedRGBAByteRenderedPixels for how this works
// returns a rendered data byte (8 pixels wide ZX Spectrum line)
//
// caches the rendered data byte for future use
//
// consumer does NOT own the return pointer and MUST NOT free it
Uint32* _video_get_rendered_data_byte(Uint8 pixelByte, Uint8 attributeByte) {
    Uint32* rgbaBuffer = _zoomedRGBAByteRenderedPixels[pixelByte][attributeByte];
    if (rgbaBuffer != NULL) {
        // we've rendered before this combination of pixel data, ink, paper, inverse, bright
        // so we can just return what we already have
        return rgbaBuffer;
    }

    Uint8 checkedZoom = _zoom;
    if (checkedZoom == 0) {
        // built-in Visual Studio static code analysis complains that the allocated 
        // buffers may be of size 0 if _zoom is 0
        checkedZoom = 1;
    }

    if (!_pixelCacheIsActive) {
        if (!_warnedPixelCacheIsNotActive) {
            _warnedPixelCacheIsNotActive = 1;
            log_write("Warning: pixel cache was access after it became inactive; returning hardcoded buffer");
        }
        // we've freed the cache memory because we're shutting down
        // so now we just return pointers to a hardcover 
        return &_pixelCacheInactiveBuffer[0];
    }

    // we have to make a new buffer
    rgbaBuffer = (Uint32*)malloc(8 * (Uint64)checkedZoom * sizeof(Uint32));
    if (rgbaBuffer == NULL) {
        log_write("Error: could not allocate pre-rendered data byte");
        return NULL;
    }

    // figure out R, G, B for this 8-pixel wide line
    // attributes
    // bits:       7        6     5   4   3      2   1   0
    //         FLASH   BRIGHT    P2  P1  P0     I2  I1  I0
    // but it was transformed previously, so it's now 
    // bits:       7        6     5   4   3      2   1   0
    //       INVERSE   BRIGHT    P2  P1  P0     I2  I1  I0
    Uint8 isBright = (attributeByte & 0x40) >> 6;
    Uint8 isInverse = (attributeByte & 0x80);

    Uint8 inkColourPaletteEntry = attributeByte & 0x07;
    Uint8 paperColourPaletteEntry = (attributeByte >> 3) & 0x07;

    Uint32 inkRGBA = _rgbaPalette[inkColourPaletteEntry];
    Uint32 paperRGBA = _rgbaPalette[paperColourPaletteEntry];

    Uint32 inkRGBABright = _rgbaPalette[inkColourPaletteEntry + SPECTRUM_COLOUR_COUNT];
    Uint32 paperRGBABright = _rgbaPalette[paperColourPaletteEntry + SPECTRUM_COLOUR_COUNT];

    Uint16 pixelByteCopy = pixelByte;
    // pre-render an 8-pixel row of ZX Spectrum bytes to a (8*zoom)-wide RGBA buffer
    for (int j = 0; j < 8; j++) {
        // j iterates through each pixel of the 8-pixel row

        for (int zoomCur = 0; zoomCur < checkedZoom; zoomCur++) {
            // write as many RGBA entries as zoom into the pre-rendered buffer
            if ((pixelByteCopy & 0x80)) {
                // pixel is set
                if (!isInverse) {
                    // it's not inverse
                    if (!isBright) {
                        rgbaBuffer[j * checkedZoom + zoomCur] = inkRGBA;
                    }
                    else {
                        rgbaBuffer[j * checkedZoom + zoomCur] = inkRGBABright;
                    }
                }
                else {
                    // it's inverse
                    if (!isBright) {
                        rgbaBuffer[j * checkedZoom + zoomCur] = paperRGBA;
                    }
                    else {
                        rgbaBuffer[j * checkedZoom + zoomCur] = paperRGBABright;
                    }
                }
            }
            else {
                // pixel is reset
                if (!isInverse) {
                    // it's not inverse
                    if (!isBright) {
                        rgbaBuffer[j * checkedZoom + zoomCur] = paperRGBA;
                    }
                    else {
                        rgbaBuffer[j * checkedZoom + zoomCur] = paperRGBABright;
                    }
                }
                else {
                    // it's inverse
                    if (!isBright) {
                        rgbaBuffer[j * checkedZoom + zoomCur] = inkRGBA;
                    }
                    else {
                        rgbaBuffer[j * checkedZoom + zoomCur] = inkRGBABright;
                    }
                }
            }
        }

        // shift next pixel into the left-most position
        pixelByteCopy = pixelByteCopy << 1;
    }

    _zoomedRGBAByteRenderedPixels[pixelByte][attributeByte] = rgbaBuffer;
    _prerenderedDataByteCount++;
    return rgbaBuffer;
}

void _video_destroy_prerendered_data_bytes() {
    for (Uint16 pixelByte = 0; pixelByte < 256; pixelByte++) {
        for (Uint16 attributeByte = 0; attributeByte < 256; attributeByte++) {
            Uint32** ptr = &_zoomedRGBAByteRenderedPixels[pixelByte][attributeByte];
            if (*ptr != NULL) {
                free(*ptr);
                *ptr = NULL;
            }
        }
    }
    _pixelCacheIsActive = 0;
}

void _video_destroy_prerendered_data_bytes__wrapper() {
    DWORD dwWaitResult = WaitForSingleObject(_pixelCacheMutex, INFINITE);

    switch (dwWaitResult) {
    case WAIT_OBJECT_0:
        // this thread got ownership of the mutex
        __try {
            _video_destroy_prerendered_data_bytes();
        }

        __finally {
            if (!ReleaseMutex(_pixelCacheMutex))
            {
                // an error happened
            }
        }
        break;

    case WAIT_ABANDONED:
        // this thread got ownership of an abandoned mutex
        return;
    }
}

BOOL _video_is_lagging() {
    if (_framesAtNormalSpeedSinceNormalSpeed == 0) {
        // assume first frame is not lagging and avoid divide by 0
        return FALSE;
    }

    struct timeb now;
    ftime(&now);
    Uint64 runTimeMs = (Uint64)(1000.0 * (now.time - _videoStartSinceNormalSpeed.time)
        + (now.millitm - _videoStartSinceNormalSpeed.millitm));

    double frames = (double)_framesAtNormalSpeedSinceNormalSpeed;
    double millisPerFrame = (double)runTimeMs / frames;
    double targetMillisPerFrame = (double)timing_get_target_milliseconds_per_video_frame();

    BOOL isLagging = millisPerFrame > (targetMillisPerFrame * _lagAllowanceBeforeFrameskip);
    return isLagging;
}

void video_set_lag_allowance_before_frameskip(Uint8 percent) {
    _lagAllowanceBeforeFrameskip = 1.0f + (double)percent / 100.0f;
}

void _video_run_cpu_for_scanline(Uint8 isPaperScanline) {
    Uint32 allocation = timing_get_states_per_scanline_allocation();
    Uint64 tstatesConsumed;

    // run a scanline's worth of CPU tstates, while orchestrating for contention when the scanline
    // is a paper scanline and tape is not running
    //
    // the reason for excluding contention delays when tape is running is that contention delay calculation
    // is stochastic (based on averages), as opposed to deterministic
    // this causes fluctuations which result in unwanted audio drift
    struct tapRunState tapeState = taploader_get_state();
    Uint8 tapeIsNotRunning = tapeState.type == Stopped || tapeState.type == Silence;
    if (isPaperScanline && tapeIsNotRunning) {
        tstatesConsumed = cpu_run_many(allocation, contention_before_cpu_instruction_executed, contention_finalize_instruction);
    }
    else {
        tstatesConsumed = cpu_run_many(allocation, NULL, NULL);
    }

    if (!timing_is_faster()) {
        // we only care about statistics when we're running at normal speed
        _tstates += tstatesConsumed;

        // same for more accurate timing
        timing_report_actual_states_last_scanline((Uint32)tstatesConsumed);
    }

    Uint8 borderColour = io_read8_border_colour();
    if (borderColour != _lastBorderColour) {
        // border changed, so we force rendering of the border for next few frames
        _forceBorderRenderingForThisManyMoreFrames = BORDER_RENDERING_THIS_MANY_FRAMES_AFTER_CHANGE;
    }
    _lastBorderColour = borderColour;
}

void _video_write_left_border_line(int y) {
    Uint8 paletteEntryColour = io_read8_border_colour();
    Uint32 rgba = _rgbaPalette[paletteEntryColour];

    Uint32* pixelPtr = _visibleScreenScanlineStartPointers[y - HIDE_TOP_BORDER_COUNT];

    for (Uint8 dup = 0; dup < _zoom; dup++) {
        SDL_memset4(pixelPtr, rgba, _zoomedVisibleSpectrumLeftBorderWidth);
        
        // move a line down on screen
        pixelPtr += _videoSurfacePixelWidth;
    }
}

void _video_write_right_border_line(int y) {
    Uint8 paletteEntryColour = io_read8_border_colour();
    Uint32 rgba = _rgbaPalette[paletteEntryColour];

    Uint32* pixelPtr = _visibleScreenScanlineRightBorderStartPointers[y - HIDE_TOP_BORDER_COUNT];
    
    for (Uint8 dup = 0; dup < _zoom; dup++) {
        SDL_memset4(pixelPtr, rgba, _zoomedVisibleSpectrumRightBorderWidth);

        // move a line down on screen
        pixelPtr += _videoSurfacePixelWidth;
    }
}

void _video_write_border_scan_line(int y) {
    Uint8 paletteEntryColour = io_read8_border_colour();
    Uint32 rgba = _rgbaPalette[paletteEntryColour];

    Uint32* pixelPtr = _visibleScreenScanlineStartPointers[y - HIDE_TOP_BORDER_COUNT];

    for (Uint8 dup = 0; dup < _zoom; dup++) {
        SDL_memset4(pixelPtr, rgba, _zoomedVisibleSpectrumScreenWidth);

        // move a line down on screen
        pixelPtr += _videoSurfacePixelWidth;
    }
}

// ASSUMES paper scan line is dirty
void _video_write_paper_scan_line(int y, int rowStartAddress) {
    Uint32 yMinusLinesBeforePaper = y - SPECTRUM_BORDER_LINES_BEFORE_PAPER;

    // ============ LEFT BORDER ===============
    if (_forceBorderRenderingForThisManyMoreFrames) {
        _video_write_left_border_line(y);
    }

    // ============ PAPER ===============
    Uint32* pixelPtr = _paperScanlineStartPointers[y - HIDE_TOP_BORDER_COUNT];

    Uint32 dirtyFlags = _videoScanlineDirtyFlags[yMinusLinesBeforePaper];

    // a single horizontal 1-pixel thick line as wide as the paper
    for (int i = 0; i < SPECTRUM_PAPER_WIDTH / 8; i += 1) {
        // i iterates through each 8-pixel wide segment

        if (!(dirtyFlags & 1)) {
            // we're not drawing this segment
            dirtyFlags = dirtyFlags >> 1;       // next flag
            pixelPtr += _zoomTimesEight;        // advance pointer as if we did
            continue;
        }

        Uint8 pixelByte = memory_read8(rowStartAddress + i);
        ula_bus_write8(pixelByte);

        int attributeAddress = _attributeAreaAddresses[yMinusLinesBeforePaper][i];
        Uint8 attributeByte = memory_read8(attributeAddress);

        // convert FLASH bit to "inverse"
        attributeByte &= _isFlashInverseAttributeMask;
        // have we seen this segment/ink/paper/inverse/bright combination before?
        Uint32* renderedRGBAPtr = _zoomedRGBAByteRenderedPixels[pixelByte][attributeByte];
        if (renderedRGBAPtr == NULL) {
            // we have not yet seen this combination of pixel data and attribute, so we render it
            // to an 8-pixel zoomed screen line
            renderedRGBAPtr = _video_get_rendered_data_byte(pixelByte, attributeByte);
        }

        Uint32* destination = pixelPtr;
        for (Uint8 dup = 0; dup < _zoom; dup++) {
            SDL_memcpy4(destination, renderedRGBAPtr, _zoomTimesEight);
            // move a line down on screen
            destination += _videoSurfacePixelWidth;
        }

        _renderedSegmentsCount++;
        dirtyFlags = dirtyFlags >> 1;       // next flag
        pixelPtr += _zoomTimesEight;        // 8 zoomed RGBA pixels at    (ptr is 32bit) 4 bytes each
    }

    // ============ RIGHT BORDER ===============
    if (_forceBorderRenderingForThisManyMoreFrames) {
        _video_write_right_border_line(y);
    }

    // we've drawn everything we needed to draw on this scanline
    _videoScanlineDirtyFlags[yMinusLinesBeforePaper] = 0;
}

inline Uint8 _video_get_must_render_pixels() {
    if (timing_is_faster()) {
        // turbo mode is on, so rarely draw pixels ...
        if ((_frames & 0x03) == 0x03) {
            // ... but when we do, it must be guaranteed to take place
            return 1;
        }

        return 0;
    }
    
    // due to how "is lagging" is computed, without this the frameskip will be infinite
    // when breakpointing during debugging
    // 
    // EXPERIMENTALLY DETERMINED
    Uint64 maxConsecutiveSkippedFrames = 4;
    if (_frameskipIsEnabled && (_consecutiveSkippedFrames >= maxConsecutiveSkippedFrames)) {
        // we're not skipping this frame, because we're at the limit of
        // allowed consecutive skipped frames
        _consecutiveSkippedFrames = 0;
        return 1;
    }

    if (_frameskipIsEnabled && _video_is_lagging()) {
        return 0;
    }

    return 1;
}

void _video__preframe() {
    if (_isDestroying || _timerCallbackDestroyed) {
        _timerCallbackDestroyed = 1;
        return;
    }

    savestates_handle_load();
    sound_handle_state_change();

    if (screenshots_is_scheduled()) {
        screenshots_snap_and_save(_surface, VISIBLE_SPECTRUM_SCREEN_HEIGHT * _zoom);
    }

    rewind_notify_start_of_frame();

    _forceBorderRenderingForThisManyMoreFrames = 2;
    if (_timingWasFasterLastFrame && !timing_is_faster() ||
        !_timingWasFasterLastFrame && timing_is_faster() ) {
        // timing has become faster or normal, so ensure border is drawn for several frames
        // this fixes left-over loading "lines" on the border after something finishes loading
        // with turbo on - example: Lemmings

        _forceBorderRenderingForThisManyMoreFrames = 10;    // why 10? 50 seems too many, and 2 is not enough...
    }

    if (_frames == 0) {
        // first frame is guaranteed to be at normal speed, so we 
        // can initialize this timer here
        ftime(&_videoStartSinceNormalSpeed);
    }

    ftime(&_currentFrameStart);

    sound_notify_start_of_frame();
    game_controller_notify_start_of_frame();

    // reliance on whether it's lagging at all lowers resyncs by a lot
    // interestingly, even though dynamic takes into account actual elapsed time,
    // using only dynamic will resync more than a combination of static/dynamic on 
    // slower computers (approx 20% frameskip)
    // 
    // EXPERIMENTALLY DETERMINED
    double dynamicSoundFrameskipPercentageThreshold = 13.0f;

    switch (_videoSoundSwitch)
    {
    case VideoSoundSwitch_OnLag:
        if (_video_is_lagging()) {
            sound_set_sampling_mode(SoundDynamicSampling);
        }
        else {
            sound_set_sampling_mode(SoundStaticSampling);
        }
        break;
    case VideoSoundSwitch_OnFrameskip:
        if (_video_get_frameskip_percentage() > dynamicSoundFrameskipPercentageThreshold) {
            sound_set_sampling_mode(SoundDynamicSampling);
        }
        else {
            sound_set_sampling_mode(SoundStaticSampling);
        }
        break;
    default:
        break;
    }

    _video_handle_flash();
    if (_forceBorderRenderingForThisManyMoreFrames > 0) {
        // we're beginning a new frame
        _forceBorderRenderingForThisManyMoreFrames--;
    }
    cpu_request_interrupt();
}

void _video__postframe() {
    _timingWasFasterLastFrame = timing_is_faster();

    if (!timing_is_faster()) {
        _framesAtNormalSpeed++;
        _framesAtNormalSpeedSinceNormalSpeed++;
        
        if (!_lastFrameWasNormalSpeed) {
            // we have switched to normal speed during THIS frame
            _framesAtNormalSpeedSinceNormalSpeed = 0;
            ftime(&_videoStartSinceNormalSpeed);
        }
        _lastFrameWasNormalSpeed = 1;
    }
    else {
        _lastFrameWasNormalSpeed = 0;
    }

    double msPerFrame = (double)timing_get_target_milliseconds_per_video_frame();
    Uint64 framesPerSecond = (Uint64)(1000.0f / msPerFrame);
    Uint64 soundFrameDelay = framesPerSecond / 2;
    if (_frames == 0) {
        // delay start of sound to accumulate some samples
        sound_play();
    }

    _frames++;

    double averageMsPerFrame = _video_get_actual_average_milliseconds_per_frame();
    timing_set_actual_milliseconds_per_video_frame(averageMsPerFrame);
    
    sound_notify_end_of_frame();
    game_controller_notify_end_of_frame();

    // compute this frame's duration
    struct timeb currentFrameEnd;
    ftime(&currentFrameEnd);
    Uint64 frameTimeMs = (Uint64)(1000.0 * (currentFrameEnd.time - _currentFrameStart.time)
        + (currentFrameEnd.millitm - _currentFrameStart.millitm));

    // record this frame's excess time as idle time if this frame took an extremely long time
    // this is to account for:
    //     - poor scheduling of this thread by OS, or
    //     - debugging and breakpointing, causing video frames to take an extreme amount of time

    // EXPERIMENTALLY DETERMINED
    Uint64 idleFrameTimeLimit = 10 * (Uint64)timing_get_target_milliseconds_per_video_frame();
    if (frameTimeMs > idleFrameTimeLimit) {
        // this frame includes idle time
        Uint64 idleTime = frameTimeMs - timing_get_target_milliseconds_per_video_frame();
        timing_record_idle_time(idleTime);
    }

    savestates_handle_save();
    sna_handle_save();
    z80_handle_save();
}

Uint8 _video__uis(Uint8 forceRenderUis) {
    // render other modules, but only if something interesting happened
    Uint8 renderAnyUis = forceRenderUis;

    if (tapui_render(_renderer, forceRenderUis)) {
        renderAnyUis = 1;
    }

    if (pokeui_render(_renderer, forceRenderUis)) {
        renderAnyUis = 1;
    }

    if (notification_render(_renderer)) {
        renderAnyUis = 1;
    }

    return renderAnyUis;
}

void _video__DEBUG_draw_ZX_Spectrum_screen_outline() {
    SDL_SetRenderDrawColor(_renderer, 255, 0, 0, 255);
    SDL_Rect _videoSpectrumVisible;
    _videoSpectrumVisible.x = 0;
    _videoSpectrumVisible.y = 0;
    _videoSpectrumVisible.w = VISIBLE_SPECTRUM_SCREEN_WIDTH * _zoom;
    _videoSpectrumVisible.h = VISIBLE_SPECTRUM_SCREEN_HEIGHT * _zoom;
    SDL_RenderFillRect(_renderer, &_videoSpectrumVisible);

    SDL_SetRenderDrawColor(_renderer, 0, 0, 255, 255);
    _videoSpectrumVisible.x++;
    _videoSpectrumVisible.y++;
    _videoSpectrumVisible.w-=2;
    _videoSpectrumVisible.h-=2;
    SDL_RenderFillRect(_renderer, &_videoSpectrumVisible);
}

void video_enable_scanline_effect(Uint8 amount) {
    if (amount > 1) {
        amount = 1;
    }
    _scanline_effect_amount = amount;

    if (_scanline_effect_amount > 0) {
        _effect = 1;
    }
}

void _video_scanline_effect() {
    Uint32 line = 0;
    // y iterates over ZX Spectrum scan lines
    for (int y = HIDE_TOP_BORDER_COUNT; y < VISIBLE_SPECTRUM_SCREEN_HEIGHT + HIDE_TOP_BORDER_COUNT; y++) {
        Uint32* pixelPtr = _visibleScreenScanlineStartPointers[y - HIDE_TOP_BORDER_COUNT];

        for (int z = 0; z < _zoom; z++) {
            // z interates over zoomed scan lines

            // 1 as opposed to 0 guarantees no scanlines are drawn at zoom=1
            // in which case the screen would be completely black
            if (line % _zoom == 1) {
                SDL_memset4(pixelPtr, 0, _videoSurfacePixelWidth);
            }

            pixelPtr += _videoSurfacePixelWidth;
            line++;
        }
    }
    //this needs to look the same for zoom 3 and 4
}

void ____effects() {
    if (!_effect) {
        return;
    }

    if (_scanline_effect_amount > 0) {
        _video_scanline_effect();
    }
}

// draws the pixels which are part of the spectrum screen (as in, no UIs, etc.)
//
void _video__spectrum_pixels(Uint8 isHardwareRenderer) {
    SDL_LockSurface(_surface);

    // run CPU for the hidden border lines
    for (int i = 0; i < HIDE_TOP_BORDER_COUNT; i++) {
        _video_run_cpu_for_scanline(0);
    }

    // ==============================================
    // BORDER BEFORE (ABOVE) PAPER
    // ==============================================
    int y = HIDE_TOP_BORDER_COUNT;

    for (int i = 0; i < VISIBLE_SPECTRUM_TOP_BORDER_COUNT; i++) {
        // this is checked each iteration because running the CPU for a scanline might
        // cause a border change, and thus force a render
        if (_forceBorderRenderingForThisManyMoreFrames) {
            _video_write_border_scan_line(y + i);
        }
        _video_run_cpu_for_scanline(0);
    }
    y += VISIBLE_SPECTRUM_TOP_BORDER_COUNT;

    // ==============================================
    // TOP THIRD
    // ==============================================
    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        if (!_videoScanlineDirtyFlags[y + k - SPECTRUM_BORDER_LINES_BEFORE_PAPER]) {
            // this scanline is not dirty, so we only draw its left and right borders
            if (_forceBorderRenderingForThisManyMoreFrames) {
                // .. but only if we have to
                _video_write_left_border_line(y + k);
                _video_write_right_border_line(y + k);
            }
        }
        else {
            _video_write_paper_scan_line(y + k, _rowstartPointers[y + k]);
        }
        _video_run_cpu_for_scanline(1);
    }
    y += SPECTRUM_PAPER_HEIGHT / 3;

    // ==============================================
    // MIDDLE THIRD
    // ==============================================
    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        if (!_videoScanlineDirtyFlags[y + k - SPECTRUM_BORDER_LINES_BEFORE_PAPER]) {
            // this scanline is not dirty, so we only draw its left and right borders
            if (_forceBorderRenderingForThisManyMoreFrames) {
                // .. but only if we have to
                _video_write_left_border_line(y + k);
                _video_write_right_border_line(y + k);
            }
        }
        else {
            _video_write_paper_scan_line(y + k, _rowstartPointers[y + k]);
        }
        _video_run_cpu_for_scanline(1);
    }
    y += SPECTRUM_PAPER_HEIGHT / 3;

    // ==============================================
    // BOTTOM THIRD
    // ==============================================
    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        if (!_videoScanlineDirtyFlags[y + k - SPECTRUM_BORDER_LINES_BEFORE_PAPER]) {
            // this scanline is not dirty, so we only draw its left and right borders
            if (_forceBorderRenderingForThisManyMoreFrames) {
                // .. but only if we have to
                _video_write_left_border_line(y + k);
                _video_write_right_border_line(y + k);
            }
        }
        else {
            _video_write_paper_scan_line(y + k, _rowstartPointers[y + k]);
        }
        _video_run_cpu_for_scanline(1);
    }
    y += SPECTRUM_PAPER_HEIGHT / 3;

    // ==============================================
    // BORDER AFTER (BELOW) PAPER
    // ==============================================
    for (int i = 0; i < SPECTRUM_BORDER_LINES_AFTER_PAPER - HIDE_BOTTOM_BORDER_COUNT; i++) {
        // this is checked each iteration because running the CPU for a scanline might
        // cause a border change, and thus force a render
        if (_forceBorderRenderingForThisManyMoreFrames) {
            _video_write_border_scan_line(y + i);
        }
        _video_run_cpu_for_scanline(0);
    }

    // run CPU for the hidden border lines
    for (int i = 0; i < HIDE_BOTTOM_BORDER_COUNT; i++) {
        _video_run_cpu_for_scanline(0);
    }

    ____effects();

    SDL_UnlockSurface(_surface);

    // this is unnecessary when using a software renderer
    if (isHardwareRenderer) {
        // convert to a texture the surface to which we've drawn the pixels
        SDL_Texture* entireWindowTexture = SDL_CreateTextureFromSurface(_renderer, _surface);

        // render play area
        SDL_RenderCopy(_renderer, entireWindowTexture, NULL, &_entireWindowRectangle);
        SDL_DestroyTexture(entireWindowTexture);
    }

    //_video__DEBUG_draw_ZX_Spectrum_screen_outline();
}

void _video__frame_without_pixels() {
    int y = 0;

    // border before (above) paper
    for (int i = 0; i < SPECTRUM_BORDER_LINES_BEFORE_PAPER; i++) {
        _video_run_cpu_for_scanline(0);
    }
    y += SPECTRUM_BORDER_LINES_BEFORE_PAPER;

    int rowStartAddressAtStartOfThird = 16384;

    // top third of paper
    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        _video_run_cpu_for_scanline(1);
    }
    y += SPECTRUM_PAPER_HEIGHT / 3;
    rowStartAddressAtStartOfThird += (SPECTRUM_PAPER_WIDTH / 8) * (SPECTRUM_PAPER_HEIGHT / 3);

    // middle third of paper
    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        _video_run_cpu_for_scanline(1);
    }
    y += SPECTRUM_PAPER_HEIGHT / 3;
    rowStartAddressAtStartOfThird += (SPECTRUM_PAPER_WIDTH / 8) * (SPECTRUM_PAPER_HEIGHT / 3);

    // bottom third of paper
    for (int k = 0; k < SPECTRUM_PAPER_HEIGHT / 3; k++) {
        _video_run_cpu_for_scanline(1);
    }
    y += SPECTRUM_PAPER_HEIGHT / 3;

    ula_bus_go_inactive();

    // border after (below) paper
    for (int i = 0; i < SPECTRUM_BORDER_LINES_AFTER_PAPER; i++) {
        _video_run_cpu_for_scanline(0);
    }
}

void _video__spectrum_pixels__wrapper(Uint8 isHardwareRenderer) {
    DWORD dwWaitResult = WaitForSingleObject(_pixelCacheMutex, INFINITE);

    switch (dwWaitResult) {
    case WAIT_OBJECT_0:
        // this thread got ownership of the mutex
        __try {
            _video__spectrum_pixels(isHardwareRenderer);
        }

        __finally {
            if (!ReleaseMutex(_pixelCacheMutex))
            {
                // an error happened
            }
        }
        break;

    case WAIT_ABANDONED:
        // this thread got ownership of an abandoned mutex
        return;
    }
}

void _video__frame_pixels(enum frameRenderMode mode) {
    Uint8 isHardwareRenderer = _rendererFlags != SDL_RENDERER_SOFTWARE;
    Uint8 drawPixels = mode != ForceSkipPixels;

    // render ZX Spectrum screen pixels
    if (drawPixels) {
        _video__spectrum_pixels__wrapper(isHardwareRenderer);
    }
    else {
        _video__frame_without_pixels();
    }

    // render any other pixels (such as for various UIs)
    Uint8 forceUiRendering = isHardwareRenderer;
    Uint8 uisRendered = _video__uis(forceUiRendering);

    if (uisRendered || drawPixels) {
        SDL_RenderPresent(_renderer);
    }
}

// Top level timer-independent (that is, called by all different runners: SDL, QueueTimer, etc.)
// per-frame-invoked function
void _video_frame_runner() {
    _video__preframe();

    Uint8 mustRenderPixels = _video_get_must_render_pixels();

    if (mustRenderPixels) {
        _video__frame_pixels(DrawPixels);
    }
    else {
        _consecutiveSkippedFrames++;
        if (!timing_is_faster()) {
            _framesSkipped++;
        }
        _video__frame_pixels(ForceSkipPixels);
    }
    _video__postframe();
}

Uint32 _video_callback__via_SDL_Timer(void* param) {
    struct timeb now;
    ftime(&now);

    // the reason why we have to implement our own delayed start is because SDL2's SDL_AddTimer
    //     does not provide a way to delay the start of the callback invocations
    Uint64 runTimeMs = (Uint64)(1000.0 * (now.time - _videoStart.time)
        + (now.millitm - _videoStart.millitm));
    if (runTimeMs > (Uint64)VIDEO_DELAY_TIMER_START_MS) {
        // we do nothing for the first bit of time
        _video_frame_runner();
    }

    return timing_get_target_milliseconds_per_video_frame();
}

void __stdcall _video_callback__via_QueueTimer(PVOID lpParameter, BOOLEAN TimerOrWaitFired) {
    DWORD dwWaitResult = WaitForSingleObject(_videoFrameMutex, INFINITE);
    
    switch (dwWaitResult) {
        case WAIT_OBJECT_0:
            // this thread got ownership of the mutex
            __try {
                _video_frame_runner();
            }

            __finally {
                if (!ReleaseMutex(_videoFrameMutex))
                {
                    // an error happened
                }
            }
            break;

        case WAIT_ABANDONED:
            // this thread got ownership of an abandoned mutex
            return;
    }
}

void video_disable_frameskip() {
    _frameskipIsEnabled = 0;
}

void _video_set_zoom(Uint8 zoom) {
    _zoom = zoom;
    _zoomTimesEight = _zoom * 8;
    _zoomTimesThirtyTwo = _zoom * 32;
    _visibleBorderWidthSidesTimesZoomTimesFour = _zoom * sizeof(Uint32) * VISIBLE_BORDER_WIDTH_SIDES;

    _zoomedVisibleSpectrumLeftBorderWidth = VISIBLE_SPECTRUM_LEFT_BORDER_WIDTH * _zoom;
    _zoomedVisibleSpectrumRightBorderWidth = VISIBLE_SPECTRUM_RIGHT_BORDER_WIDTH * _zoom;
    _zoomedVisibleSpectrumScreenWidth = VISIBLE_SPECTRUM_SCREEN_WIDTH * _zoom;
    _zoomedVisibleSpectrumScreenWidthTimesFour = _zoomedVisibleSpectrumScreenWidth * sizeof(Uint32);
}

BOOL video_start__via_SDL_Timer(SDL_Window* window, int windowWidth, int windowHeight) {
    log_write("Using a SDL Timer-based video callback");
    
    _frames = 0;
    _framesAtNormalSpeed = 0;
    _framesAtNormalSpeedSinceNormalSpeed = 0;
    _lastFrameWasNormalSpeed = !timing_is_faster();
    _tstates = 0;

    _isDestroying = 0;
    _timerCallbackDestroyed = 0;
    ftime(&_videoStart);

    _entireWindowRectangle.x = 0;
    _entireWindowRectangle.y = 0;
    _entireWindowRectangle.w = _surface->w;
    _entireWindowRectangle.h = _surface->h;

    // hardware renderers crash when the screen locks, resulting in
    // frozen video
    _renderer = SDL_CreateRenderer(window, -1, _rendererFlags);
    if (_renderer == NULL) {
        log_write("Error: could not get a video renderer");
        return FALSE;
    }
    _video_log_renderer_choice();

    _videoIsInitialized = TRUE;

    _timer = SDL_AddTimer(timing_get_target_milliseconds_per_video_frame(), (SDL_TimerCallback)_video_callback__via_SDL_Timer, NULL);
    log_write("Started timer, yielding CPU control to it");
    return TRUE;
}

BOOL video_start__via_QueueTimer(SDL_Window* window, int windowWidth, int windowHeight) {
    log_write("Using a QueueTimer-based video callback");
    _frames = 0;
    _framesAtNormalSpeed = 0;
    _framesAtNormalSpeedSinceNormalSpeed = 0;
    _lastFrameWasNormalSpeed = !timing_is_faster();
    _tstates = 0;

    _isDestroying = 0;
    _timerCallbackDestroyed = 0;
    ftime(&_videoStart);

    _entireWindowRectangle.x = 0;
    _entireWindowRectangle.y = 0;
    _entireWindowRectangle.w = _surface->w;
    _entireWindowRectangle.h = _surface->h;

    // hardware renderers crash when the screen locks, resulting in
    // frozen video
    _renderer = SDL_CreateRenderer(window, -1, _rendererFlags);
    if (_renderer == NULL) {
        log_write("Error: could not get a video renderer");
        return FALSE;
    }
    _video_log_renderer_choice();
    
    _videoFrameMutex = CreateMutex(NULL, FALSE, NULL);
    if (!_videoFrameMutex) {
        log_write("Error: unable to create video frame timer mutex");
        return FALSE;
    }
    
    Uint16 framePeriod = timing_get_target_milliseconds_per_video_frame();
    ULONG flags = WT_EXECUTEINTIMERTHREAD;
    BOOL res = CreateTimerQueueTimer(&_callbackTimerHandle, NULL, _video_callback__via_QueueTimer, NULL, VIDEO_DELAY_TIMER_START_MS, framePeriod, flags);
    if (!res) {
        log_write("Error: unable to start video frame timer");
        _callbackTimerHandle = NULL;
        return FALSE;
    }
    
    _videoIsInitialized = TRUE;
    log_write("Started video timer, yielding CPU control to it");

    return TRUE;
}

SDL_Renderer* video_get_renderer() {
    return _renderer;
}

void _video_report_statistics() {
    char* message = (char*)malloc(500 * sizeof(char));
    if (message == NULL) {
        return;
    }

    Uint64 timeSpentWhileCPUFasterMs = timing_get_total_real_time_spent_while_faster();
    Uint64 totalTimeMs = _video_get_runtime_ms();
    Uint64 runTimeMs = totalTimeMs - timeSpentWhileCPUFasterMs;

    sprintf_s(message, 490, "Zoom: %u", _zoom);
    log_write(message);

    if (_frameskipIsEnabled) {
        log_write("Frameskip is enabled");
        sprintf_s(message, 490, "Lag allowance before frameskip: %u%%", (Uint16)round(_lagAllowanceBeforeFrameskip * 100.0f));
        log_write(message);
    }
    else {
        log_write("Frameskip is disabled");
    }

    switch (_videoSoundSwitch)
    {
    case VideoSoundSwitch_OnLag:
        log_write("Video sound switching: 0 (on lag)");
        break;
    case VideoSoundSwitch_OnFrameskip:
        log_write("Video sound switching: 1 (on frameskip)");
        break;
    default:
        break;
    }

    long long totalPrerenderedDataEntries = 256 * SPECTRUM_COLOUR_COUNT * SPECTRUM_COLOUR_COUNT * 2 * 2;
    double cachedPixelBytesPercentage = (double)_prerenderedDataByteCount / (double)totalPrerenderedDataEntries * 100.0f;

    long long cachedPixelSizeBytes = _prerenderedDataByteCount * _zoomTimesEight * sizeof(Uint32);
    long long cachedPixelSizeKilobytes = cachedPixelSizeBytes / 1024;

    if (cachedPixelSizeBytes % 1024 != 0) {
        cachedPixelSizeKilobytes++;
    }

    if (cachedPixelSizeKilobytes == 0) {
        cachedPixelSizeKilobytes = 1;
    }

    sprintf_s(message, 490, "Pixel cache usage: %I64d entries (%.1f%% of available), %I64dkb", 
        _prerenderedDataByteCount, 
        cachedPixelBytesPercentage, 
        cachedPixelSizeKilobytes);
    log_write(message);

    if (_effect) {
        log_write_string_int("Effect: CRT-like scanlines, magnitude %d", _scanline_effect_amount);
    }

    double frames = (double)_framesAtNormalSpeed;
    sprintf_s(message, 490, "Total video frames (rendered and skipped): %lld", (Uint64)frames);
    log_write(message);

    double percentage = _video_get_frameskip_percentage();
    sprintf_s(message, 490, "Frames skipped: %I64d (%.1f%% of total)", _framesSkipped, percentage);
    log_write(message);

    double fps = frames / ((double)runTimeMs / 1000.f);
    sprintf_s(message, 490, "Average frames per second: %.2f", fps);
    log_write(message);

    double millisPerFrame = (double)runTimeMs / frames;
    Uint32 targetMillisPerFrame = timing_get_target_milliseconds_per_video_frame();
    sprintf_s(message, 490, "Average milliseconds per frame: %.3f (target is %u)", millisPerFrame, targetMillisPerFrame);
    log_write(message);

    long double tstates = (long double)_tstates;
    long double tstatesPerVideoFrame = tstates / frames;
    Uint64 targetTstatesPerVideoFrame = timing_get_states_per_video_frame();
    percentage = (tstatesPerVideoFrame / (long double)targetTstatesPerVideoFrame) * 100.0f;
    sprintf_s(message, 490, "Average tstates per video frame: %.2f (target is %lld), %.1f%%", 
        tstatesPerVideoFrame, 
        targetTstatesPerVideoFrame,
        percentage);
    log_write(message);

    sprintf_s(message, 490, "Total rendered 8 pixel-wide segments: %I64u",
        _renderedSegmentsCount);
    log_write(message);
    long double segmentsPerVideoFrame = ((long double)_renderedSegmentsCount) / frames;
    sprintf_s(message, 490, "Average rendered segments per video frame: %.1f",
        segmentsPerVideoFrame);
    log_write(message);

    log_write("\nDURATION");
    sprintf_s(message, 490, "Total real duration (CPU at normal speed): %I64u ms", runTimeMs);
    log_write(message);
    sprintf_s(message, 490, "Total real duration (CPU at faster speed): %I64u ms", timeSpentWhileCPUFasterMs);
    log_write(message);
    sprintf_s(message, 490, "Total real duration (CPU normal and fast): %I64u ms", totalTimeMs);
    log_write(message);

    free(message);
}

void video_destroy__via_SDL_Timer() {
    log_write("Destroying SDL_Timer-based video");
    _isDestroying = 1;
    while (!_timerCallbackDestroyed) {}

    SDL_RemoveTimer(_timer);
    ftime(&_videoEnd);
}

void video_destroy__via_QueueTimer() {
    log_write("Destroying QueueTimer-based video");
    if (_callbackTimerHandle) {
        _isDestroying = 1;
        // we busy-wait for callback to become NOOP only if we actually initialized
        // the callback timer successfully
        while (!_timerCallbackDestroyed) {}

        if (!DeleteTimerQueueTimer(NULL, _callbackTimerHandle, NULL)) {
            log_write("Warning: deletion of QueueTimer failed");
        }
    }

    ftime(&_videoEnd);

    if (_videoFrameMutex) {
        CloseHandle(_videoFrameMutex);
    }

    if (_surface) {
        SDL_FreeSurface(_surface);
    }
}

BOOL _video_initialize_display_mode() {
    switch (_displayMode) {
    case Video_DisplayMode_Windowed:
        _sdlDisplayMode.w = _windowWidth;
        _sdlDisplayMode.h = _windowHeight;
        log_write("Display mode: windowed");
        break;
    case Video_DisplayMode_DesktopFullscreen:
        SDL_SetWindowFullscreen(_window, SDL_WINDOW_FULLSCREEN_DESKTOP);
        SDL_GetCurrentDisplayMode(0, &_sdlDisplayMode);
        sprintf_s(_debugBuffer, _DEBUG_BUFFER_SIZE - 10, "Display mode: desktop fullscreen (width:%dpx, height:%dpx)",
            _sdlDisplayMode.w,
            _sdlDisplayMode.h);
        log_write(_debugBuffer);
        break;
    case Video_DisplayMode_Fullscreen:
        SDL_SetWindowFullscreen(_window, SDL_WINDOW_FULLSCREEN);
        SDL_GetCurrentDisplayMode(0, &_sdlDisplayMode);
        sprintf_s(_debugBuffer, _DEBUG_BUFFER_SIZE - 10, "Display mode: fullscreen (width:%dpx, height:%dpx)",
            _sdlDisplayMode.w,
            _sdlDisplayMode.h);
        log_write(_debugBuffer);
        break;
    default:
        log_write("Error: unsupported display mode");
        return FALSE;
        break;
    }

    return TRUE;
}

void video_get_display_coordinates(int* left, int* top) {
    *left = (_sdlDisplayMode.w - _windowWidth) / 2;
    *top = (_sdlDisplayMode.h - _windowHeight) / 2;
}

Uint8 video_start(enum videoMode videoMode, SDL_Window* window, char zoom, int windowWidth, int windowHeight, enum videoDisplayMode videoDisplayMode) {
    BOOL result;

    _videoMode = videoMode;
    _video_set_zoom(zoom);
    _window = window;
    _displayMode = videoDisplayMode;
    _windowWidth = windowWidth;
    _windowHeight = windowHeight;

    _video_initialize_scanline_dirty_flags();
    _video_initialize_video_data_area_to_scanline_Y_coord_map();
    _video_initialize_attribute_area_to_scanline_Y_coord_map();
    _video_initialize_rowstart_pointers();

    if (!_video_initialize_display_mode()) {
        return FALSE;
    }

    _surface = SDL_GetWindowSurface(_window);
    if (!_surface) {
        log_write("Error: unable to create SDL window surface");
        return FALSE;
    }

    int displayLeft, displayTop;
    video_get_display_coordinates(&displayLeft, &displayTop);

    _videoSurfacePixelWidth = _surface->pitch / sizeof(Uint32);
    _videoSurfaceWidthZoomedBytes = _surface->pitch;

    _video_compute_rgba_palette(_surface);
    _video_compute_scanline_pointers(_surface);
    _video_compute_visible_screen_scanline_pointers();
    _video_compute_attribute_addresses();

    _zoomedPaperSizeRGBA = SPECTRUM_PAPER_WIDTH * _zoom * sizeof(Uint32);
    _zoomedVisibleSpectrumScanLineSizeRGBA = VISIBLE_SPECTRUM_SCREEN_WIDTH * _zoom * sizeof(Uint32);

    _pixelCacheMutex = CreateMutex(NULL, FALSE, NULL);
    if (!_pixelCacheMutex) {
        log_write("Error: unable to create pixel cache mutex");
        return FALSE;
    }

    switch (_videoMode)
    {
    case Video_viaQueueTimer:
        result = video_start__via_QueueTimer(window, windowWidth, windowHeight);
        return result;
        break;
    case Video_viaSDLTimer:
        result = video_start__via_SDL_Timer(window, windowWidth, windowHeight);
        return result;
        break;
    default:
        break;
    }

    return FALSE;
}

void video_destroy() {
    log_write("\nVIDEO");

    switch (_videoMode)
    {
    case Video_viaQueueTimer:
        video_destroy__via_QueueTimer();
        break;
    case Video_viaSDLTimer:
        video_destroy__via_SDL_Timer();
        break;
    default:
        break;
    }

    _video_destroy_prerendered_data_bytes__wrapper();
    _video_report_statistics();

    if (_pixelCacheMutex) {
        CloseHandle(_pixelCacheMutex);
    }
}

void video_set_renderer_type(Uint8 renderer) {
    switch (renderer) {
    case 0:
        _rendererFlags = SDL_RENDERER_SOFTWARE;
        break;
    case 1:
        _rendererFlags = SDL_RENDERER_ACCELERATED;
        break;
    default:
        _rendererFlags = SDL_RENDERER_SOFTWARE;
        break;
    }
}