##########################################################################
#
# StudioFactory
#
# The desktop Audio/Video studio.
# Copyright (C) 2002-2006  Peter Wendrich (pwsoft@syntiac.com)
# Homepage: http://www.syntiac.com/studiofactory.html
#
##########################################################################
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
##########################################################################
#
# DSP optimizer for x86 arch
#
##########################################################################

ccModule('DspOpt x86');

rv('const int maxDspOptBufferSize = 1024*1024;');
rv('unsigned char *theDspOptBuffer = NULL;');
rv('int theDspOptBufferIndex = 0;');
rv('int theDspOptCallCount = 0;');
rv('bool theDspOptEnabledFlag = false;');
rv('bool theDspOptActiveFlag = false;');

cci('mainTerm', 'if (theDspOptBuffer) VirtualFree(theDspOptBuffer, 0, MEM_RELEASE);');
cc('recalcDsp', 'recalcDspOpt();');


rc(<<EOF);

// Enum is in processor numeric order: EAX ECX EDX EBX ESP EBP ESI EDI
typedef enum {
	REG_EAX,
	REG_ECX,
	REG_EDX,
	REG_EBX,
	REG_ESP,
	REG_EBP,
	REG_ESI,
	REG_EDI
} Register;

static void reserveDspOptBuffer(void) {
	if (NULL == theDspOptBuffer) {
		theDspOptBuffer = (unsigned char *)VirtualAlloc(NULL, maxDspOptBufferSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
	}
}

static void assembleByte(int aByte) {
	assert(theDspOptBufferIndex < maxDspOptBufferSize);
	theDspOptBuffer[theDspOptBufferIndex++] = (unsigned char)aByte;
}

static void assemble32Bits(int aValue) {
	assembleByte(aValue);
	assembleByte(aValue>>8);
	assembleByte(aValue>>16);
	assembleByte(aValue>>24);
}

static void assembleRet(void) {
	assembleByte(0xC3);
}

static void compileClearEax(void) {
	// INSTR: xor eax, eax
	assembleByte(0x33);
	assembleByte(0xC0);
}

static void compileClearEdx(void) {
	// INSTR: xor edx, edx
	assembleByte(0x33);
	assembleByte(0xD2);
}

static void compileLoadEax(void *aLocation) {
	assembleByte(0xA1);
	assemble32Bits((int)(aLocation));
}

static void compile_mov_eax_imm(int aValue) {
	assembleByte(0xB8);
	assemble32Bits(aValue);
}

static void compile_mov_eax_PTR_ecx_byte_offset(int aByteOffset) {
	assembleByte(0x8B);
	assembleByte(0x41);
	assembleByte(aByteOffset);
}

static void compileStoreEax(void *aLocation) {
	assembleByte(0xA3);
	assemble32Bits((int)(aLocation));
}

static void compileStoreEcx(void *aLocation) {
	assembleByte(0x89);
	assembleByte(0x0D);
	assemble32Bits((int)(aLocation));
}

static void compileStoreEdx(void *aLocation) {
	assembleByte(0x89);
	assembleByte(0x15);
	assemble32Bits((int)(aLocation));
}

static void compile_and_ebx_imm(int aImmediateValue) {
	if ((-128 <= aImmediateValue)
	&& (aImmediateValue < 128)) {
		assembleByte(0x83);
		assembleByte(0xE3);
		assembleByte(aImmediateValue);
	} else {
		assembleByte(0x81);
		assembleByte(0xE3);
		assemble32Bits(aImmediateValue);
	}
}

static void compile_and_edx_imm(int aImmediateValue) {
	if ((-128 <= aImmediateValue)
	&& (aImmediateValue < 128)) {
		assembleByte(0x83);
		assembleByte(0xE2);
		assembleByte(aImmediateValue);
	} else {
		assembleByte(0x81);
		assembleByte(0xE2);
		assemble32Bits(aImmediateValue);
	}
}

static void compile_and_ebx_edx(void) {
	assembleByte(0x23);
	assembleByte(0xDA);
}

static void compile_mov_ebx_edx(void) {
	assembleByte(0x8B);
	assembleByte(0xDA);
}

static void compile_or_ebx_edx(void) {
	assembleByte(0x0B);
	assembleByte(0xDA);
}

static void compile_mov_ecx_imm(int aValue) {
	assembleByte(0xB9);
	assemble32Bits(aValue);
}

static void compile_mov_PTR_ecx_byte_offset_ebx(int aByteOffset) {
	assembleByte(0x89);
	assembleByte(0x59);
	assembleByte(aByteOffset);
}

static void compile_cmp_PTR_eax_edx(void) {
	assembleByte(0x39);
	assembleByte(0x10);
}

static void compile_setg_dl(void) {
	assembleByte(0x0F);
	assembleByte(0x9F);
	assembleByte(0xC2);
}

static void compile_setle_dl(void) {
	assembleByte(0x0F);
	assembleByte(0x9E);
	assembleByte(0xC2);
}

static void compile_sub_edx_byte(int aByte) {
	assembleByte(0x83);
	assembleByte(0xea);
	assembleByte(aByte);
}

static void assembleInc(Register aRegister) {
	assert((0 <= aRegister) && (8 > aRegister));
	assembleByte(0x40 + (int)aRegister);
}

static void assembleDec(Register aRegister) {
	assert((0 <= aRegister) && (8 > aRegister));
	assembleByte(0x48 + (int)aRegister);
}

static void assemblePush(Register aRegister) {
	assert((0 <= aRegister) && (8 > aRegister));
	assembleByte(0x50 + (int)aRegister);
}

static void assemblePop(Register aRegister) {
	assert((0 <= aRegister) && (8 > aRegister));
	assembleByte(0x58 + (int)aRegister);
}

static void compileSaveAllRegisters(void) {
	assemblePush(REG_ECX);
	assemblePush(REG_EDX);
	assemblePush(REG_EBX);
	assemblePush(REG_ESI);
	assemblePush(REG_EDI);
}

static void compileRestoreAllRegisters(void) {
	assemblePop(REG_EDI);
	assemblePop(REG_ESI);
	assemblePop(REG_EBX);
	assemblePop(REG_EDX);
	assemblePop(REG_ECX);
}

// !!!TODO make more generic functions of those
static void assembleAdd_EAX_mem(void *aLocation) {
	assembleByte(0x03);
	assembleByte(0x05);
	assemble32Bits((int)(aLocation));
}

static void assembleImul_EAX_mem(void *aLocation) {
	assembleByte(0x0F);
	assembleByte(0xAF);
	assembleByte(0x05);
	assemble32Bits((int)(aLocation));
}

static void assembleSar_EAX(int aBitCount) {
	assembleByte(0xC1);
	assembleByte(0xF8);
	assembleByte(aBitCount);
}

static void compileObjectAdd(int *zeroInput, DspObjectPtr aObject) {
	bool first = true;
	int i;

	// Add all inputs
	for(i=0;i<maxObjectIo;i++) {
		if (zeroInput != aObject->io[i].input) {
			if (first) {
				// INSTR: mov eax, addr
				assembleByte(0xA1);
				first = false;
			} else {
				// INSTR: add eax, addr
				assembleByte(0x03);
				assembleByte(0x05);
			}
			assemble32Bits((int)(aObject->io[i].input));
		}
	}

	if (first) {
		// No input, so always output zero
		aObject->io[0].output = 0;
	} else {
		// Update module output
		compileStoreEax(&aObject->io[0].output);
	}
}

static void compileObjectAnd(int *zeroInput, DspObjectPtr aObject) {
	int highestInput = 0;
	bool seenZero = 0;
	bool hasZeroInput = true;
	int i;
	
	for(i=0;i<maxObjectIo;i++) {
		if (zeroInput != aObject->io[i].input) {
			highestInput = i + 1;
			hasZeroInput = seenZero;
		} else {
			seenZero = true;
		}
	}
	if (hasZeroInput) {
		// No inputs or an open input, so always output zero
		aObject->io[0].output = 0;
	} else {
		// For each input do following routine:
		//
		// mov eax, [ecx + xxx]		Next input
		// eor edx, edx				prepare EDX
		// cmp [eax], edx			positive signal?
		// setle dl					negative flag to edx
		// sub edx, 1				change 0/1 to -1/0
		// mov/and ebx, edx			combine results.
	
		compile_mov_ecx_imm((int)(void*)&(aObject->io[0].input));
		for(i=0;i<highestInput;i++) {
			compile_mov_eax_PTR_ecx_byte_offset((char*)(&aObject->io[i].input)-(char*)(&aObject->io[0].input));
			compileClearEdx();
			compile_cmp_PTR_eax_edx();
			compile_setle_dl();
			compile_sub_edx_byte(1);
			if (i == 0) {
				compile_mov_ebx_edx();
			} else {
				compile_and_ebx_edx();
			}
		}
		compile_and_ebx_imm(32767);
		compile_mov_PTR_ecx_byte_offset_ebx((char*)(&aObject->io[0].output)-(char*)(&aObject->io[0].input));
	}
}

static void compileObjectCtr(int *zeroInput, DspObjectPtr aObject) {
	// If first input is not connected skip processing second input and first knob
	if (zeroInput != aObject->io[0].input) {
		compileLoadEax(&aObject->io[0].knob);
		if (zeroInput != aObject->io[1].input) {
			assembleAdd_EAX_mem(aObject->io[1].input);
		}
		assembleImul_EAX_mem(aObject->io[0].input);
		assembleSar_EAX(14);
		assembleAdd_EAX_mem(&aObject->io[2].knob);
	} else {
		compileLoadEax(&aObject->io[2].knob);
	}

	// Input 2 is offset
	if (zeroInput != aObject->io[2].input) {
		assembleAdd_EAX_mem(aObject->io[2].input);
	}

	// Update module output
	compileStoreEax(&aObject->io[0].output);
}

static void compileObjectNot(int *zeroInput, DspObjectPtr aObject) {
	if (zeroInput == aObject->io[0].input) {
		// No inputs so always output '1'
		aObject->io[0].output = 32767;
	} else {
		compileLoadEax(&aObject->io[0].input);
		compileClearEdx();
		compile_cmp_PTR_eax_edx();
		compile_setg_dl();
		compile_sub_edx_byte(1);
		compile_and_edx_imm(32767);
		compileStoreEdx(&aObject->io[0].output);
	}
}

static void compileObjectOr(int *zeroInput, DspObjectPtr aObject) {
	int highestInput = 0;
	int i;
	
	for(i=0;i<maxObjectIo;i++) {
		if (zeroInput != aObject->io[i].input) {
			highestInput = i + 1;
		}
	}
	if (highestInput == 0) {
		// No inputs so always output zero
		aObject->io[0].output = 0;
	} else {
		// For each input do following routine:
		//
		// mov eax, [ecx + xxx]		Next input
		// eor edx, edx				prepare EDX
		// cmp [eax], edx			positive signal?
		// setle dl					negative flag to edx
		// sub edx, 1				change 0/1 to -1/0
		// mov/or ebx, edx			combine results.
	
		compile_mov_ecx_imm((int)(void*)&(aObject->io[0].input));
		for(i=0;i<highestInput;i++) {
			compile_mov_eax_PTR_ecx_byte_offset((char*)(&aObject->io[i].input)-(char*)(&aObject->io[0].input));
			compileClearEdx();
			compile_cmp_PTR_eax_edx();
			compile_setle_dl();
			compile_sub_edx_byte(1);
			if (i == 0) {
				compile_mov_ebx_edx();
			} else {
				compile_or_ebx_edx();
			}
		}
		compile_and_ebx_imm(32767);
		compile_mov_PTR_ecx_byte_offset_ebx((char*)(&aObject->io[0].output)-(char*)(&aObject->io[0].input));
	}
}

static void compileObjectSub(int *zeroInput, DspObjectPtr aObject) {
	if (zeroInput != aObject->io[1].input) {
		compileLoadEax(aObject->io[1].input);

		if (zeroInput != aObject->io[0].input) {
			// INSTR: sub eax, addr
			assembleByte(0x2b);
			assembleByte(0x05);
			assemble32Bits((int)(aObject->io[0].input));
		}

		// Update module output
		compileStoreEax(&aObject->io[0].output);
	} else {
		if (zeroInput != aObject->io[0].input) {
			compileClearEax();

			// INSTR: sub eax, addr
			assembleByte(0x2b);
			assembleByte(0x05);
			assemble32Bits((int)(aObject->io[0].input));

			// Update module output
			compileStoreEax(&aObject->io[0].output);
		} else {
			// No inputs, so always output zero
			aObject->io[0].output = 0;
		}
	}
}

static void recalcDspOpt(void) {
	theDspOptBufferIndex = 0;
	theDspOptCallCount = 0;

	// Only aquire locks when necessary:
	// - Recalc request when active
	// - Change between states active and inactive
	if (theDspOptActiveFlag || (theDspOptActiveFlag != theDspOptEnabledFlag)) {
		reserveDspOptBuffer();
		if (theDspOptBuffer
		&& (0 <= thePlayingProject)) {
			lockMutex(theAudioMutex);
			theDspOptActiveFlag = theDspOptEnabledFlag;

			if (theDspOptActiveFlag) {
				// Only generate code when in active state
				int *myZeroInput = &(projects[thePlayingProject].dsp[0].io[0].output);
				DspObject *myObject;

				compileSaveAllRegisters();
				for(myObject=projects[thePlayingProject].dsp + 1;myObject->dspRoutine;myObject++) {
					if ((dspRoutine_ADD1 == myObject->dspRoutine)
					|| (dspRoutine_ADD2 == myObject->dspRoutine)
					|| (dspRoutine_ADD3 == myObject->dspRoutine)
					|| (dspRoutine_ADD4 == myObject->dspRoutine)
					|| (dspRoutine_ADD5 == myObject->dspRoutine)
					|| (dspRoutine_ADD6 == myObject->dspRoutine)
					|| (dspRoutine_ADD7 == myObject->dspRoutine)
					|| (dspRoutine_ADD8 == myObject->dspRoutine)) {
						compileObjectAdd(myZeroInput, myObject);
					} else if ((dspRoutine_AND1 == myObject->dspRoutine)
					|| (dspRoutine_AND2 == myObject->dspRoutine)
					|| (dspRoutine_AND3 == myObject->dspRoutine)
					|| (dspRoutine_AND4 == myObject->dspRoutine)
					|| (dspRoutine_AND5 == myObject->dspRoutine)
					|| (dspRoutine_AND6 == myObject->dspRoutine)
					|| (dspRoutine_AND7 == myObject->dspRoutine)
					|| (dspRoutine_AND8 == myObject->dspRoutine)) {
						compileObjectAnd(myZeroInput, myObject);
					} else if (dspRoutine_CTR == myObject->dspRoutine) {
						compileObjectCtr(myZeroInput, myObject);
					} else if (dspRoutine_NOT == myObject->dspRoutine) {
						compileObjectNot(myZeroInput, myObject);
					} else if (dspRoutine_SUB == myObject->dspRoutine) {
						compileObjectSub(myZeroInput, myObject);
					} else if ((dspRoutine_OR1 == myObject->dspRoutine)
					|| (dspRoutine_OR2 == myObject->dspRoutine)
					|| (dspRoutine_OR3 == myObject->dspRoutine)
					|| (dspRoutine_OR4 == myObject->dspRoutine)
					|| (dspRoutine_OR5 == myObject->dspRoutine)
					|| (dspRoutine_OR6 == myObject->dspRoutine)
					|| (dspRoutine_OR7 == myObject->dspRoutine)
					|| (dspRoutine_OR8 == myObject->dspRoutine)) {
						compileObjectOr(myZeroInput, myObject);
						continue;
					} else if (dspRoutine_Dummy != myObject->dspRoutine) {
						compile_mov_ecx_imm((int)myObject);
						// INSTR: call relative
						assembleByte(0xe8);
						assemble32Bits((int)(myObject->dspRoutine) - (int)(theDspOptBuffer + theDspOptBufferIndex + 4));
						theDspOptCallCount++;
					}
				}
				compileRestoreAllRegisters();
				assembleRet();
			}
			unlockMutex(theAudioMutex);
		}
	}

	logprintf("recalcDspOpt generated %d bytes of code\\n", theDspOptBufferIndex);
	// !!! for debugging
	transportWindowRefresh = true;
}
EOF


#
#
#
#	// xor eax, eax
#	theDspOptBuffer[myIndex++] = 0x33;
#	theDspOptBuffer[myIndex++] = 0xC0;

#		// INSTR: mov esi, addr
#	theDspOptBuffer[myIndex++] = 0x8B;
#	theDspOptBuffer[myIndex++] = 0x35;

#		// INSTR: sub eax, esi
#		theDspOptBuffer[myIndex++] = 0x2b;
#		theDspOptBuffer[myIndex++] = 0xc6;
