;TO DO: These will be organized in one of the following ways:
;        1. They will be included in the sys_data_sel and sys_code_sel and
;           will be put in a specific order so that in the shared_data_sel
;           the location of each offset within the sys selectors can be
;           looked up and then it can be called from within the sys selectors.
;        2. They will each have their own selector so that a file with
;           equates pointing to each selector can be used to easily use these
;           calls throughout the system code.
;        3. Both 1 and 2 above...

;********************* Prepare DMA Channel for Data Transfer ***********************
dma_xfer:		;in: al = channel 0-7
			;    ah = mode: bits7-6: 00=demand,01=signal,10=block,11=cascade; bit5=address increment; bit4=auto initializing
			;		    bits3-2: operation: 00=verify, 01=write, 10=read; bits1-0:channel select
			;    edi = physical offset in memory to write to/read from(must be in 1st MB memory)
			;    ecx = number of bytes being transferred
			;    NOTE: The transfer cannot cross a 64k boundary in memory. Ex: If edi = 0, largest number ecx can
			;		be is 64k=65536. If edi = 27fffh, the largest number ecx can be is 8000h (27fffh + 8000h = 2ffffh)
	push eax
	push edx
	pushfd			;Push flags (including int enable (bit9)) so this cli doesn't screw up anything else
	cli
	push eax
	push eax
	push eax

	cmp al, 4
	jb dma_master
	jmp dma_xfer_slave

dma_master:
	;mask channel
	or al, 4	;or channel with bit 2
	out 0ah, al	;=00000100xb | 4 -> bit2=mask, bits1-0 will select the channel to mask

	mov al, 0
	out 0ch, al	;any write clears LSB/MSB flip-flop of address and counter registers

	;Set dma mode register
	mov al, ah	;read op would be: 06h =00000110xb: 00=demand mode,0=address increment,0=auto initializing,
	out 0bh, al	;01=write mem xfer,10=ch2

	pop eax
	and ax, 0ffh	;only need low byte (channel #)
	mov dx, 2	;multiply by 2 to get the channel's base address register location
	mul dx		;ch0 = 0, ch1 = 2, etc.
	mov dx, ax	;mov result into dx so we can write to that port

	mov ax, di
	out dx, al	;send low byte offset
	mov al, ah
	out dx, al	;send high byte offset

	inc dx		;length port is 1 greater than the address port
	mov eax, ecx
	out dx, al	;send low byte data xfer length
	mov al, ah
	out dx, al	;high byte, (If it was 1 sector being transferred, low byte would be 0, high 2. (200h = 512)

	pop eax
	and eax, 03h		;only need last two bits: channel select 0,1,2,or3
	add eax, dma_xfer_pages	;add this offset to the offset of the pages data area (b/c this is only reading it can use the cs seg b/c
				;I set this code to allow it to be read in the segment selector entry.
	mov dx, word [cs:eax]	;Get correct page register into dx
	mov eax, edi
	shr eax, 16		;get bits 16-19 into al (page is the high nibble of a 20bit address)
	out dx, al		;send page

	;Unmask channel
	pop eax
	out 0ah, al

dma_xfer_done:
	popfd		;pop flags (including interrupt enable flag back to how it was before the cli)
	pop edx
	pop eax
	clc
	retf
	;********* Prepare DMA for Data Transfer on Slave ******
dma_xfer_slave:		;in: al = channel#
			;    ah = mode: bits7-6: 00=demand,01=signal,10=block,11=cascade; bit5=address increment; bit4=auto initializing
			;		    bits3-2: operation: 00=verify, 01=write, 10=read; bits1-0:channel select
			;    edi = physical offset in memory to write to (must be in 1st MB memory)
			;    ecx = number of bytes being transferred
			;    NOTE: The transfer cannot cross a 64k boundary in memory. Ex: If edi = 0, largest number ecx can
			;		be is 64k=65536. If edi = 27fffh, the largest number ecx can be is 8000h (27fffh + 8000h = 2ffffh)
			;    ALSO NOTE: Channels 4-7 are 16-bit channels NOT 8 bit like channels 0-3!!!

	;slave mask channel
	sub al, 4	;get channel bits as 00,01,10,or11
	or al, 4	;or channel bits with bit 2
	out 0d4h, al	;=00000100xb | 4 -> bit2=mask, bits1-0 will select the channel to mask

	;slave channel clear register
	mov al, 0
	out 0d8h, al	;any write clears LSB/MSB flip-flop of address and counter registers

	;Set dma slave mode register
	mov al, ah	;read op would be: 06h =00000110xb: 00=demand mode,0=address increment,0=auto initializing,
	out 0d6h, al	;01=write mem xfer,10=ch2

	pop eax
	sub al, 4	;Get it as 0,1,2,or3 instead of 4,5,6,or7 b/c this makes this calculation convenient:
	and ax, 0ffh	;only need low byte (channel #)
	mov dx, 4	;multiply by 4 to get the channel's base address register's offset from 0c0h
	mul dx		;ch4 = 0c0h, ch5 = 0c4h, etc.
	add ax, 0c0h	;add base to offset to get port # of channel's address port
	mov dx, ax	;mov result into dx so we can write to that port

	mov ax, di
;	out dx, ax	;send word offset
	out dx, al	;send low byte offset
	mov al, ah
	out dx, al	;send high byte offset

	add dx, 2	;length port is 1 greater than the address port
	mov eax, ecx
;	out dx, ax
	out dx, al	;send low byte data xfer length
	mov al, ah
	out dx, al	;high byte, (If it was 1 sector being transferred, low byte would be 0, high 2. (200h = 512)

	pop eax
	and eax, 07h		;only need last 3 bits (channel 4,5,6,or7)
	add eax, dma_xfer_pages
	mov dx, word [cs:eax]	;Get correct page register into dx
	mov eax, edi
	shr eax, 16		;get bits 16-19 into al (page is the high nibble of a 20bit address)
	out dx, al		;send page

	;Unmask channel
	pop eax
	sub al, 4		;only need channel bits (ch4=00, ch5=01, etc.)
	out 0d4h, al

	jmp dma_xfer_done
	;***************************** DMA Data *****************************
dma_xfer_pages	equ	$ - dma_xfer
	db 087h, 083h, 081h, 082h, 08fh, 08bh, 089h, 08ah

end_dma_xfer:

;******************** Delay Processor for ecx milliseconds ******************
delay:		;dword at shared_data_sel:cpu_speed should equal the speed
		;ecx should tell # of microseconds to delay.
		;When it returns, ecx will hold # micros it actually was able to delay
	push eax
	push fs
	push ecx			;this push is for later in this procedure...

	mov ax, shared_data_sel
	mov fs, ax

	;A new technique for minimal memory use has been implemented here: 8 16-byte memory sections in a row have been
	;created and are available for any proc for use as long as it is not already in use (check temp_mask: bit0 set if
	;temp_data1 is in use, etc.). More than one could also be used because they are in consecutive order in memory.
	;Example: check to make sure none are used, mask temp_mask completely, and use temp_data1 as a 128 byte location
	;When you are done using one, however, make sure you clear the bit to open that memory spot back up.
	;NOTE to self: If this system becomes outdated or useless, consider removing it

	mov ecx, 0ffffh			;timeout = about 1/2 millisecond on a 100 mhz cpu (almost nothing on average cpu)
delay_wait_until_not_busy:
	test byte [fs:temp_mask], 4
	jz delay_not_busy
	loop delay_wait_until_not_busy
	jmp delay_err1			;If loop finished, return with error indicated temp_data place was in use

delay_not_busy:
	or byte [fs:temp_mask], 4	;00000100xb = mask temp_data3 in shared_data_sel so that other proc's won't use it

	mov dword [fs:temp_data3], 07fffffffh	;make sure its a positive integer! So I had to use 7fffffffh * 2 instead
	fild dword [fs:temp_data3]		;of 0ffffffffh + 1
	mov dword [fs:temp_data3], 2
        fimul dword [fs:temp_data3]     ;put a gig into fpu by putting in 1/2 gig and mult. by 2 (could have made a ten byte
					;real #, but wanted to avoid hassle of figuring it out on paper first)
        fidiv dword [fs:cpu_speed]      ;divide by cpu speed to get # seconds max this delay can pause (limited by register
	mov dword [fs:temp_data3], 1000000	;size)
	fimul dword [fs:temp_data3]	;multiply by 1000000 to get max. number of microseconds it can pause
	fistp dword [fs:temp_data3]	;save this	NOTE:Using 1/1,000,000 s instead of ms b/c more flexible...

	pop ecx				;get ecx from stack
	cmp ecx, dword [fs:temp_data3]
	jna delay_not_toomany
	
	mov ecx, dword [fs:temp_data3]	;If # ms specified was too many, pause for maximum amount	
delay_not_toomany:
	push ecx			;Save the # micros this will actually pause for (use to return to calling program)
	mov dword [fs:temp_data3], 1000000	;We will divide hertz (# cycles in 1 sec) by million to get # cycles per microsecond
	fild dword [fs:cpu_speed]	;load cpu speed into an fpu data register
	fidiv dword [fs:temp_data3]	;now divide that by 1,000,000 to get cycles per microsecond
	mov dword [fs:temp_data3], ecx	;save # microseconds so we can multiply
	fimul dword [fs:temp_data3]	;multiply #cyles/micros by #micros to get # cycles needed
	fdiv dword [fs:loop_cycles]	;Divide # cycles by the # cycles per loop opcode to get # loop opcodes to use
					;# cycles per loop opcode is a 32-bit real, not an integer. This way a more accurate
					;result can be acheived.
	fistp dword [fs:temp_data3]	;save # loops & pop fpu stack

	mov ecx, dword [fs:temp_data3]
	and byte [fs:temp_mask], 0fbh	;clear bit2 of temp_mask to indicate that temp_data3 is no longer in use.
delay_thedelay:	loop delay_thedelay	;ecx already contains # loops needed from two lines ago.

	pop ecx		;Pop # micros this actually paused for. The calling program can use this to decide if it needs to pause
			;again.
	pop fs
	pop eax
	clc
	retf
delay_err1:
	pop ecx	;ecx hadn't been popped yet
	pop fs
	pop eax
	stc
	mov al, 1
	retf
end_delay:

;************************ Restart Computer Code *************************
restart:		;Once system needs to do special things, more intricate stuff will be added, but for now it's a
			;simple restart command.
	mov al, 0feh
	out 064h, al
	hlt
end_restart:

;*************************** Various keyboard functions **********************************
keyboard:

keyboard_wait2write	equ	$ - keyboard
	out 0edh, al			;This port is a delay port
	in al, 064h			
	test al, 1
	jz kybrd_not_waiting4read
	in al, 060h
	mov ecx, 0fffh
	kybrd_pausing1:	out 0edh, al
			loop kybrd_pausing1
	jmp keyboard_wait2write
kybrd_not_waiting4read:
	out 0edh, al
	in al, 064h
	test al, 2
	jz kybrd_not_busy
	mov ecx, 0fffh
	kybrd_pausing2:	out 0edh, al
			loop kybrd_pausing2
	jmp kybrd_not_waiting4read
kybrd_not_busy:
	mov ecx, 0ffffh
	kybrd_pausing3:	loop kybrd_pausing3
	retf
end_keyboard: