#include <sys\timeb.h>
#include <stdio.h>
#include <windows.h>
#include <timeapi.h>

#include "Timing.h"
#include "Log.h"

#define _DEBUG_BUFFER_SIZE 256
static char _debugBuffer[_DEBUG_BUFFER_SIZE];

Uint8 _timing_is_ready = 0;

#define TARGET_RESOLUTION_MS 1
UINT _windows_timer_min_resolution;
Uint8 _timer_resolution_set = 0;

Uint32 _timing_tstates_per_scanline__default;
Uint64 _timing_tstates_per_video_frame__default;

Uint32 _timing_tstates_per_scanline;
Uint64 _timing_tstates_per_video_frame;

Uint16 _timing_milliseconds_per_video_frame;
Uint8 _timing_tstate_factor;

double _timing_actualmilliseconds_per_video_frame;

Uint64 _idleTimeWhileCPUFaster = 0;
Uint64 _idleTimeAll = 0;

Uint8 _timing_is_ready_to_destroy = 0;

Sint64 _last_scanline_tstate_overflow_delta;

Uint32 _per_scanline_adjustment_countdown_after_factor_change = 0;

// used to keep track of real time spent while CPU is in faster mode
struct timeb _fasterTimingStart;
Uint64 _fasterTotalRunTimeMs = 0;

// no longer used, kept here to illustrate the structure which led to
// a buffer size of just one
// 
// duration in video frames before average compensation is re-computed
//#define DYN_COMPENSATION_BUFFER_SIZE__FRAMES 0.004f

Uint64 _dynCompensationBufferSize;
Sint64* _dynCompensationBuffer;
Uint64 _dynCompensationBufferIndex;
Sint32 _dynCompensationAverage;
double _dynCompensationTotal;

void _timing_initialize_dynamic_compensation() {
	_dynCompensationAverage = 0;
	_dynCompensationTotal = 0.0f;
	_dynCompensationBufferIndex = 0;

	// The size of this buffer was previously initialized to be one or more video frames' worth 
	// of scanlines.
	// After experiments with both longer, and then shorter buffers showed best results with a buffer
	// of size 1.
	// The structure of the code was kept, though it ultimately gets optimized out of existence.
	// However, various places where experimental changes took place still retain the comments which
	// explain the experimental findings.
	// 
	// A buffer size of 1 also no longer makes timing-dependent border effects "dance up and down", such
	// as in the Mask III menu, or in a simple BORDER->BORDER->BORDER->PAUSE 1->GOTO START program.
	// 
	//_dynCompensationBufferSize = (Uint64)(DYN_COMPENSATION_BUFFER_SIZE__FRAMES * (double)SPECTRUM_SCREEN_HEIGHT);
	_dynCompensationBufferSize = 1;
	_dynCompensationBuffer = (Sint64*)malloc(_dynCompensationBufferSize * sizeof(Sint64));
}

void _timing_dynamic_compensation__add_sample(Sint64 sample) {
	_dynCompensationBuffer[_dynCompensationBufferIndex] = sample;

	_dynCompensationBufferIndex++;
	if (_dynCompensationBufferIndex >= _dynCompensationBufferSize) {
		_dynCompensationBufferIndex = 0;
	}
}

Uint32 timing_get_states_per_scanline_allocation() {
	if (_timing_is_ready_to_destroy || !_timing_is_ready) {
		// skip all buffer operations if shutting down
		return _timing_tstates_per_scanline;
	}

	_timing_dynamic_compensation__add_sample(_last_scanline_tstate_overflow_delta);

	if (_dynCompensationBufferIndex == 0) {
		// we've just filled the buffer
		double total = 0.0f;
		for (Uint64 i = 0; i < _dynCompensationBufferSize; i++) {
			total += (double)_dynCompensationBuffer[i];
		}

		/*  TESTS FOR ALL VERSIONS AND EXPERIMENTS
		    (MEASUREMENTS ARE ALL FROM FINAL CONFIGURATION ADOPTED FOR VERSION 31)
										69888 target
                 resyncs(double-based)   avg tstates     test 
            v31             0           69930 100.1%     manic miner
			v31             0           69931 100.1%     ping pong
			v31             0           69975 100.1%     nipper2
			v31             0           69985 100.1%     dizzy
			v31             0           69998 100.2%     BASIC idle
			v31             0           70056 100.2%     BASIC key down
			v31             0           69903 100.0%     mr beep
		*/
		/*why does BORDER->BORDER->BORDER->PAUSE 1->GOTO START match up with all other emulators
			but Mask III border is higher? it's unlikely that the >100% actual tstates are a problem
			because it doesn't exceed 100.2%. The 0.2% of 311 scanlines/frame is less than one scanline/frame, which does NOT
			explain why on Mask III, the border is about 10 scanlines too high*/
		// PRIOR (version 30) VERSION TEST RESULTS:
		//                           // inconsistent tstate variance (99.6%-101.1%) across tests
		//                           // inconsistent resyncs (4.1/s - 12.5/s) acros tests
		//                           // Mask III would freeze on game start

		// EXPERIMENTS ON THIS BRANCH:
		// (no adjustment to total)  // yielded good results, and is the adopted solution; 100.0%-100.2% tstate variance
		//                           // 6.4/s - 6.8/s (very consistent) resyncs per second across all test cases
		//                           // NOTE: the number of resyncs was dramatically reduced with double-based sample
		//                           //       spacing in the Sound.c module
		//                           // NOTE: Mask III no longer freezes
		// 
		//     total *= 1.5f;        // similar numbers as (no adjustment) above

		// combine with previous
		total += _dynCompensationTotal;
		if (total < _dynCompensationTotal) {
			// in this case, our current sample set is negative on average, which means that 
			// we are decreasing our compensation

			// EXPERIMENTS ON THIS BRANCH:
			//     total *= 1.5f;        // yielded unacceptable crackling in BASIC key down, and 99.2%-100.0% tstate variance
			//     total *= 1.04f;       // yielded good results, with 100.0%-100.2% tstate variance, but slightly more resyncs
			//                           // than without any adjustment on this branch
		}
		else {
			// in this case, our current sample set total is positive on average, which 
			// means that we are increasing our compensation - causing a further decrease
			// in tstate allocations
			
			// EXPERIMENTS ON THIS BRANCH:
			//     total *= 1.5f;        // yielded unacceptable resyncs, and 99.6%-99.9% tstate variance
			//     total *= 1.04f;       // yielded good results, with 100.0%-100.1% tstate variance, but slightly more resyncs
			//                           // than without any adjustment on this branch
		}
		_dynCompensationAverage = (Sint32)ceil((total / (double)_dynCompensationBufferSize));
		_dynCompensationTotal = total;
	}

	Uint32 allocation = _timing_tstates_per_scanline - _dynCompensationAverage;
	return allocation;
}

void _timing_record_overflow_delta(Sint64 overflowDelta) {
	_last_scanline_tstate_overflow_delta = overflowDelta;
}

void _timing_reset_overflow_delta() {
	_last_scanline_tstate_overflow_delta = 0;
}

void _timing_handle_adjustment_after_tstate_factor_change(Uint32 actualTstates) {
	if (_per_scanline_adjustment_countdown_after_factor_change > 0) {
		// countdown after a tstate factor change
		_per_scanline_adjustment_countdown_after_factor_change--;
		_timing_reset_overflow_delta();
		return;
	}

	//if (actualTstates <= _timing_tstates_per_scanline) {
	//	_timing_reset_overflow();
	//	return;
	//}

	//// we had an overflow
	//Uint32 overflow = actualTstates - _timing_tstates_per_scanline; // guaranteed positive (see check above)
	//_timing_record_overflow(overflow);


	Sint64 overflowDelta = (Sint64)actualTstates - (Sint64)_timing_tstates_per_scanline;
	_timing_record_overflow_delta(overflowDelta);
}

void timing_report_actual_states_last_scanline(Uint32 actualTstates) {
	_timing_handle_adjustment_after_tstate_factor_change(actualTstates);
}

Uint64 timing_get_states_per_video_frame() {
	return _timing_tstates_per_video_frame;
}

Uint16 timing_get_target_milliseconds_per_video_frame() {
	return _timing_milliseconds_per_video_frame;
}

void _timing_apply_tstate_factor__worker(factor) {
	_timing_reset_overflow_delta();
	_per_scanline_adjustment_countdown_after_factor_change = 5;

	_timing_tstate_factor = factor;
	_timing_tstates_per_scanline = _timing_tstates_per_scanline__default * _timing_tstate_factor;
	_timing_tstates_per_video_frame = _timing_tstates_per_video_frame__default * _timing_tstate_factor;
}

Uint64 timing_get_total_real_time_spent_while_faster() {
	return _fasterTotalRunTimeMs;
}

void timing_apply_tstate_factor(Uint8 factor) {
	if (_timing_is_ready_to_destroy) {
		return;
	}

	Uint8 previousFactor = _timing_tstate_factor;
	_timing_apply_tstate_factor__worker(factor);

	if (factor == 1 && previousFactor > 1) {
		// CPU is resumed to normal speed mode, so add the elapsed time to total time
		struct timeb now;
		ftime(&now);
		Uint64 elapsedFasterTimeMs = (Uint64)(1000.0 * (now.time - _fasterTimingStart.time)
			+ (now.millitm - _fasterTimingStart.millitm));

		// accumulate
		_fasterTotalRunTimeMs += elapsedFasterTimeMs;
	}
	else if (factor > 1 && previousFactor == 1) {
		// CPU has just become faster, so mark the start time
		ftime(&_fasterTimingStart);
	}
}

Uint8 timing_is_faster() {
	return _timing_tstate_factor > 1;
}

void timing_prepare_destroy() {
	_timing_is_ready_to_destroy = 1;
	_timing_apply_tstate_factor__worker(1);
}

double timing_get_actual_milliseconds_per_video_frame() {
	return _timing_actualmilliseconds_per_video_frame;
}

void timing_set_actual_milliseconds_per_video_frame(double actualMsPerVideoFrame) {
	_timing_actualmilliseconds_per_video_frame = actualMsPerVideoFrame;
}

void timing_record_idle_time(Uint64 durationMs) {
	if (timing_is_faster()) {
		_idleTimeWhileCPUFaster += durationMs;
	}
	else {
		_idleTimeAll += durationMs;
	}
}

Uint64 timing_get_total_real_idle_time() {
	return _idleTimeAll;
}

Uint64 timing_get_total_real_idle_time_spent_while_faster() {
	return _idleTimeWhileCPUFaster;
}

void _timing_set_windows_timer_resolution() {
	TIMECAPS tc;
	if (timeGetDevCaps(&tc, sizeof(TIMECAPS)) != TIMERR_NOERROR)
	{
		log_write("Warning: unable to query Windows timer device capabilities");
		return;
	}

	sprintf_s(_debugBuffer, _DEBUG_BUFFER_SIZE - 10, "Windows timer device capabilities: %ums min, %ums max",
		tc.wPeriodMin,
		tc.wPeriodMax);
	log_write(_debugBuffer);

	_windows_timer_min_resolution = min(max(tc.wPeriodMin, TARGET_RESOLUTION_MS), tc.wPeriodMax);
	if (TIMERR_NOERROR != timeBeginPeriod(_windows_timer_min_resolution)) {
		log_write("Warning: unable to begin Windows timer period");
		return;
	}
	
	sprintf_s(_debugBuffer, _DEBUG_BUFFER_SIZE - 10, "Windows timer device resolution set to %ums",
		_windows_timer_min_resolution);
	log_write(_debugBuffer);

	_timer_resolution_set = 1;
}

void timing_start(Uint16 millisecondsPerVideoFrame) {
	// 24 tstates for left border
	// 128 tstates for paper
	// 24 tstates for right border
	// 48 tstates for horizontal retrace
	_timing_tstates_per_scanline__default = 224;

	_timing_reset_overflow_delta();

	_timing_milliseconds_per_video_frame = millisecondsPerVideoFrame;
	timing_set_actual_milliseconds_per_video_frame((double)millisecondsPerVideoFrame);

	_timing_tstates_per_video_frame__default = (Uint64)(SPECTRUM_SCREEN_HEIGHT) * (_timing_tstates_per_scanline__default);

	timing_apply_tstate_factor(1);
	_timing_set_windows_timer_resolution();
	_timing_initialize_dynamic_compensation();
	_timing_is_ready = 1;
}

void timing_destroy() {
	_timing_is_ready = 0;
	if (_timer_resolution_set) {
		if (TIMERR_NOERROR != timeEndPeriod(_windows_timer_min_resolution)) {
			log_write("Warning: unable to end Windows timer period");
		}
	}

	if (_dynCompensationBuffer != NULL) {
		free(_dynCompensationBuffer);
		_dynCompensationBuffer = NULL;
	}
}
