;                 EXP2.ASM as of April 29, 1981
; 
;  This  routine is extracted and CP/Mified from an  article 
; 'An  Inroduction  to Data Compression' by  Harold  Corbin, 
; that appeared in the April 1981 issue of Byte magazine.
; 
; CP/Mified by: Kelly Smith, CP/M-Net "SYSOP" April 29, 1981
; 
;  EXP2  (text  expansion routine) accepts data prepared  by 
; the COMP2 (text compression routine) program.  The program 
; runs  until  all bits have been processed  from  the  data 
; buffer (dbuf),  then returns control to CP/M. The decoding 
; process  consists of adding a data bit to the table  entry 
; point,  getting an increment index that points to the next 
; 0-1 pair,  and continues until a "tag" (MSB = 1) in bit 7, 
; indicates  that  the  next  table  entry  is  the  desired 
; character.  The  data are processed 8 bits per  byte,  MSB 
; first.
; 
;  The Huffman Code was derived for the following table,  by 
; the  relative frequency of occurence in a random  sampling 
; of English text.  Frequently used characters are  assigned 
; shorter  bit 'code' patterns,  and seldom used  characters 
; are  assigned longer bit 'code' patterns.  Note:  with any 
; set of codes generated, it is important that no code has a 
; shorter code as part of its beginning. For example, if the 
; letter  'E'  is 100,  then 10010 cannot be  the  code  for 
; another  letter...because in scanning the bit stream  from 
; left  to right (using EXP2,  the text expansion  routine), 
; the decoding algorithm would think that 10010 is 'E' (100) 
; followed  by  10  and NOT the different  letter  that  was 
; intended.
; 
;           Letter    Frequency (%)  Huffman Code
; 
;           E         13.0           100
;           T         10.5           001
;           A          8.1           1111
;           O          7.9           1110
;           N          7.1           1100
;           R          6.8           1011
;           I          6.3           1010
;           S          6.1           0110
;           H          5.2           0101
;           D          3.8           11011
;           L          3.4           01111
;           F          2.9           01001
;           C          2.7           01000
;           M          2.5           00011
;           U          2.4           00010
;           G          2.0           00001
;           Y          1.9           00000
;           P          1.9           110101
;           W          1.5           011101
;           B          1.4           011100
;           V           .9           1101001
;           K           .4           110100011
;           X           .15          110100001
;           J           .13          110100000
;           Q           .11          1101000101
;           Z           .07          1101000100
; 
;  The  EXP2 program expects to find the data to be decoded, 
; in the packed form of 8 bit bytes as encoded by COMP2,  in 
; the data buffer (dbuf).  Basically, the program searches a 
; binary  tree to decode the bit stream generated by  COMP2, 
; selecting  the  appropriate branch in the  tree  structure 
; depending  upon the value of each bit in the data  stream. 
; Note: the data in the table is uniquely dependent upon the 
; code generated by COMP2.
; 
;  There are three parts to the table structure:  the  index 
; values   that  allow  the  program  to  step  through  the 
; appropriate number of table entries (i.e.,  tree branches) 
; as the data stream bit values are serially  examined;  the 
; decoded  character that results from the search;  and  the 
; flag  to indicate to the program that the next table entry 
; found  is a character and not an index  value.  The  index 
; values  are always in pairs,  with a separate index  value 
; for a 1 or a 0 bit stream value. Therefore, as the program 
; scans through the table at each pair of index values,  one 
; or  the other is selected,  depending upon whether the bit 
; in khe data stream was a 1 or a 0.
; 
;  The  table scanning process consists of adding  the  data 
; bit to the current table address. This gives a new address 
; whose contents (an index value) is added to the address of 
; the  index  value itself...this new table address  is  the 
; address  of  the  next node in the  tree  structure.  This 
; process  continues until a flag  is  detected,  indicating 
; that the next entry is the desired character.  If the flag 
; is  on (MSB = 1),  the remaining 7 bits are interpreted as 
; an  ASCII character.  If the flag is off (MSB  =  0),  the 
; remaining  bits  are interpreted as an index  value.  This 
; limits  the index value to 127,  the maximum value in  the 
; table that can be skipped when processing one data bit.
; 
; 
;
true	equ	-1	; define true
false	equ	not true; define false
;
stdcpm	equ	true	; true if regular CP/M (base address 0000h)
altcpm	equ	false	; true if alternate CP/M (base address 4200h)
;
	if	stdcpm	; if standard CP/M...
base	equ	0000h	; bsae for standard CP/M system
	endif		; end if...
;
	if	altcpm	; if alternate CP/M...
base	equ	4200h	; base for H8 or TRS-80 CP/M system
	endif		; end if...
;
bdos	equ	base+5	; CP/M BDOS entry address for function call
;
conout	equ	2	; write console character
;
dbuf	equ	04000h	; data buffer for compressed data
;

	org	base+100h

start:	lxi	h,0	; save "old" CP/M stack pointer
	dad	sp
	shld	oldstk
	lxi	sp,stack; make "new" stack pointer
	lxi	h,dbuf+2; store pointer to next bit location in data buffer
	shld	dadd
	lxi	h,dbuf	; point to data buffer bit count
	mov	c,m	; get it...
	inx	h
	mov	b,m
	mvi	a,1	; set initial position
	sta	pos
expand:	push	b	; save bit count on the stack
	lxi	h,expansion$table	; point to expansion decode table
next:	push	h	; save pointer on the stack
	lhld	dadd	; get data address bit position
	lda	pos	; get bit position
	mov	b,a
	mov	a,m	; get the data
bit:	ral		; get desired bit into carry
	dcr	b	; de-bump bit position count
	jnz	bit	; loop 'til done
	mvi	a,0	; clear data, but preserve carry
	ral		; restore single isolated bit
	mov	c,a	; ...the data value
	mvi	b,0
	shld	dadd	; save data address bit position
	pop	h	; recover pointer
	dad	b	; make bias to table with data bit position
	mov	a,m	; get new pointer
	ral		; check if "tag" bit...
	jc	put$character
	rar		; restore to original position
	mov	e,a	; make bias
	mvi	d,0
	dad	d	; calculate 'tabel'+'data bit'+'pointer'
	pop	b	; recover bit count
	call	decb	; reduce bit count
	push	b
	jmp	next	; process next character
;
put$character:
;
	rar		; restore byte to original position
	ani	07fh	; mask-off "tag" bit
	mov	e,a	; compute bias to expansion table
	mvi	d,0
	dad	d	; add bias
	mov	a,m	; get expanded character from table
	push	b	; save all registers
	push	d
	push	h
	mov	e,a	; CP/M needs character in E register
	mvi	c,conout; output character function
	call	bdos
	pop	h	; recover registers
	pop	d
	pop	b
	pop	b	; adust stack, get bit count
	call	decb	; reduce bit count
	jmp	expand	; expand next character
;
decb:	dcx	b	; reduce bit count
	mov	a,c	; check for all characters expanded
	ora	b
	jz	exit	; if all characters expanded, exit to CP/M
	lda	pos	; get bit position, and update
	inr	a
	sta	pos
	cpi	9	; all 8 bits processed?
	rnz		; return, if not...
	mvi	a,1	; all bits processed, reset bit position
	sta	pos
	push	h	; save table pointer on stack
	lhld	dadd	; get current data byte address, and update
	inx	h
	shld	dadd
	pop	h	; recover table pointer
	ret		; process next bit...
;
exit:	lhld	oldstk	; get "old" CP/M stack pointer
	sphl		; and restore...
	ret		; return to CP/M
;
expansion$table:
;
	db	02ah	; 0	0
	db	001h	; 1	1
	db	002h	; 2	0
	db	008h	; 3	1
	db	082h	; 4	0
	db	002h	; 5	1
	db	'E'	; 6		'E'
	db	082h	; 7	0
	db	082h	; 8	1
	db	'I'	; 9		'I'
	db	'R'	; 10		'R'
	db	006h	; 11	0
	db	001h	; 12	1
	db	082h	; 13	0
	db	082h	; 14	1
	db	'O'	; 15		'O'
	db	'A'	; 16		'A'
	db	082h	; 17	0
	db	002h	; 18	1
	db	'N'	; 19		'N'
	db	003h	; 20	0
	db	081h	; 21	1
	db	'D'	; 22		'D'
	db	003h	; 23	0
	db	081h	; 24	1
	db	'P'	; 25		'P'
	db	003h	; 26	0
	db	081h	; 27	1
	db	'V'	; 28		'V'
	db	002h	; 29	0
	db	005h	; 30	1
	db	082h	; 31	0
	db	082h	; 32	1
	db	'J'	; 33		'J'
	db	'X'	; 34		'X'
	db	003h	; 35	0
	db	081h	; 36	1
	db	'K'	; 37		'K'
	db	082h	; 38	0
	db	082h	; 39	1
	db	'Z'	; 40		'Z'
	db	'Q'	; 41		'Q'
	db	002h	; 42	0
	db	00eh	; 43	1
	db	003h	; 44	0
	db	081h	; 45	1
	db	'T'	; 46		'T'
	db	002h	; 47	0
	db	005h	; 48	1
	db	082h	; 49	0
	db	082h	; 50	1
	db	'Y'	; 51		'Y'
	db	'G'	; 52		'G'
	db	082h	; 53	0
	db	082h	; 54	1
	db	'U'	; 55		'U'
	db	'M'	; 56		'M'
	db	002h	; 57	0
	db	008h	; 58	1
	db	003h	; 59	0
	db	081h	; 60	1
	db	'H'	; 61		'H'
	db	082h	; 62	0
	db	082h	; 63	1
	db	'C'	; 64		'C'
	db	'F'	; 65		'F'
	db	082h	; 66	0
	db	002h	; 67	1
	db	'S'	; 68		'S'
	db	003h	; 69	0
	db	081h	; 70	1
	db	'L'	; 71		'L'
	db	082h	; 72	0
	db	082h	; 73	1
	db	'B'	; 74		'B'
	db	'W'	; 75		'W'
;
dadd:	dw	dbuf+2	; next data address
;
pos:	ds	1	; current bit location
;
oldstk:	ds	2	; storage for "old" CP/M stack pointer
	ds	32	; storage for 16 level stack
stack	equ	$	; top of local stack
;
	end	start
