* @(#)  BASIC 5.01  dg_col  3.2  11/25/87  14:20:14 *

*
* Dump Graphics COLOR 
* CSUB utility
* assembly support
* Mary Sue Rowan, BASIC 5.1, 11/24/87
*
       nosyms
       rorg 0
       def  gdump
       def  gdump_red
       def  gdump_red_c
       def  gdump_c
       def  gdump_edc
       def  gdump_ed
       def  gdump_red_ed
       def  gdump_red_edc
       def  gon    
       def  gcbinit
       def  init_err
*
*
       refa global_rr    
       refa gator_rr     
       REFA GBOX_WP
*
* make sure global_rr is initialized correctly & managed correctly to handle
* gator as well as topcat:  for gator, replacement rule is in format $xxxN;
*                           for topcat, replacement rule is:         $xNxx;
* so maintain global_rr as $xNxN
* global_rr initialized in CRT and modified in status/control sections (Steve
* T will help make changes)
       
       refa sysglobals
       
* alpha_mask initialized by CRT
* NOTE:  declared in DECL/GLOBVARS & V68C/AGLOBAL as BYTE ==> word
*        access least significant byte as ALPHA_MASK+1
       refa     ALPHA_MASK              * planes enabled for alpha (byte)
       refa     bm_devaddr              * address of alpha display
*
*
*
addr1           equ    0                    integer 
addr2           equ   addr1+4               integer  
addr3           equ   addr2+4               integer
addr4           equ   addr3+4               integer

devicetype      equ   addr4+4               shortint
deviceaddress   equ   devicetype+2          integer 
monitortype     equ   deviceaddress+4       shortint

plane1_addr     equ   monitortype+2         integer
plane2_offset   equ   plane1_addr+4         integer
plane3_offset   equ   plane2_offset+4       integer

n_glines        equ   plane3_offset+4       shortint
gspacing        equ   n_glines+2            shortint
bytesperline    equ   gspacing+2            shortint

xclip_min       equ   bytesperline+2        shortint
xclip_max       equ   xclip_min+2           shortint
yclip_min       equ   xclip_max+2           shortint 
yclip_max       equ   yclip_min+2           shortint
hard_xmax       equ   yclip_max+2           shortint
hard_ymax       equ   hard_xmax+2           shortint
lower_left      equ   hard_ymax+2           integer

background      equ   lower_left+4          shortint
red_intensity   equ   background+2          shortint
grn_intensity   equ   red_intensity+2       shortint
blu_intensity   equ   grn_intensity+2       shortint
dither_patternS equ   blu_intensity+2       2 * 16 bytes

* there is code in several procedures which assumes that the following
*   four parameters are in the order given.  do not move or re-order
*   these parameters !

x1              equ   dither_patternS+32    integer
x2              equ   x1+4                  integer
y1              equ   x2+4                  integer
y2              equ   y1+4                  integer

* the next 2 fields are new
* non_square = 0 = square pixels, 1 = nonsquare
* graph_mask = bit mask of planes write or display enabled with graphics

FILLER1         EQU   Y2+4                  BYTE
NON_SQUARE      EQU   FILLER1+1             BYTE   
FILLER2         EQU   NON_SQUARE+1          BYTE
GRAPH_MASK      EQU   FILLER2+1             BYTE    

cursor_x        equ   GRAPH_MASK+1          shortint
cursor_y        equ   cursor_x+2            shortint

BM_area_rr      equ   CURSOR_Y+2            byte             
area_draw_mode  equ   BM_area_rr+1          byte             
gator_vec_rr    equ   area_draw_mode+1      byte            
vec_draw_mode   equ   gator_vec_rr+1        byte            
lpen_num        equ   vec_draw_mode+1       SHORTINT 
pen_num         equ   lpen_num+2            shortint
cpen            equ   pen_num+2             integer
max_pen         equ   cpen+4                integer
linepat         equ   max_pen+4             shortint
oldpat          equ   linepat+2             shortint

old_a5          equ   oldpat+2              integer
old_a6          equ   old_a5+4              integer

tm              equ   old_a6+4              byte
vis             equ   tm+1                  byte
  
rgltemp1        equ   vis+1                 integer 
rgltemp2        equ   rgltemp1+4            integer  
rgltemp3        equ   rgltemp2+4            integer 
rgltemp4        equ   rgltemp3+4            integer
rgltemp5        equ   rgltemp4+4            integer

repeatrate      equ   rgltemp5+4            shortint
repeatcount     equ   repeatrate+2          shortint
index           equ   repeatcount+2         integer 
softvec         equ   index+4               240 bytes of instructions

t1              equ   rgltemp1
t2              equ   rgltemp2
t3              equ   rgltemp3
t4              equ   rgltemp4
t5              equ   rgltemp5

* gator_vec_rr and vec_draw_mode (bytes) overlay vec_draw_mode in gcb
*       draw mode       bm_vec_rr       vec_draw_mode           action
*       ---------       ---------       -------------           ------
*       dominant            3                0                  pixel in
*       erase               4                1                  not(in) and old
*       nondominant         7                2                  or
*       compliment          6                3                  exor
*
*
             page
*
* device dependent equates
*
a9836conoff  equ     $51FFFD                   address of gon/goff bit
a9826a       equ     $538001                   start of 26a frame buffer
a9836a       equ     $538000                   start of 36a frame buffer
a9836c       equ     $520000                   start of 36c frame buffer

gat_stat     equ     $4001                     gator status register
gat_rr       equ     $4008                     gator replacement rule register

* the following block is 3.1 stuff
gbox_stat    equ     $0002                     gbox status register
* next 3 items pointed at by crtidrom 37,39    note: odd bytes for all 3
gbox_rr      equ     $5006                     gbox replacement rule register
* next 4 items pointed at by crtidrom 33,35
gbox_NWRITE  equ     $6009                     gbox write protect register

*
* Topcat control registers
*  (all of size WORD except topc_wmove, _nblank and _fben which are BYTEs)
*
TOPC_PRR     equ     $40EA                     pixel replacement rule
TOPC_FBEN    equ     $4090                     enable writes to frame buffer
*
* ID Rom registers
*  (to be accessed using MOVEP (alternate bytes of memory))
*
ID_FBW       equ     $000D                     pixel width of displayed f.b.
ID_FBH       equ     $0011                     pixel height of displayed f.b.
ID_FBHIGH    equ     $0009                     pixel height of entire fb
ID_FBA       equ     $005D                     pointer to starting address of frame buffer
ID_BOOL      equ     $0017                     bit 0 set if pixels are non-square
ID_NP        equ     $005B                     if 0, number of planes mask determined by
ID_CMAP      equ     $0033                     relative address of color map
*                                               writing 1's & reading back (nonexistant planes return 0)
*                                              else register represents number of planes mask

*
* device types
*
crt26a       equ     0                         9826a type device
crt36a       equ     1                         9836a type device
crt27a       equ     2                         98627 type device
crt36c       equ     3                         9836c type device
crtgat       equ     4                         gator type device
crtgbx       equ     5                         gbox type device    3.1
CRTTPC       equ     6                         topcat type device
             
             page

*
*
*
*  utility subroutines
*
            rorg    0
rgl         equ     *

rts         equ     *
            cmpi    #crtgat,devicetype(a6)     if gator
            blt.s   rts1                       3.1
            movea.l deviceaddress(a6),a0       then      (NM changed order)
            bgt.s   rtsgt                      3.1
rtslp1      btst    #7,gat_stat(a0)
            beq.s   rtslp1
            move    global_rr,gator_rr         set current replacement rule
            move    global_rr,gat_rr(a0)       set replacement rule to global
            bra.s   rts1                                   3.1
rtsgt       cmpi    #crttpc,devicetype(a6)     then (3.1)  if topcat
            bne.s   rtsgbx
            move    global_rr,topc_prr(a0)
            bra     rts1

rtsgbx      btst    #4,gbox_stat(a0)                       3.1
            bne.s   rtsgbx                                 3.1
*           move    global_rr,gator_rr         set current replacement rule  "        6/14
            move    global_rr,gbox_rr(a0)      set replacement rule to global "
rts1        movem.l old_a5(a6),a5-a6
            rts



            page
*
*  procedure gcbinit(gcb:anyptr);
*
gcbinit     movea.l 4(sp),a0                   a0 = address of pointer to gcb
            move.l  (sp)+,(sp)                 stack return address
            movem.l a5-a6,old_a5(a0)
            movea.l a0,a6
            move    devicetype(a6),d0
            move    d0,d2
            cmpi    #crtgat,d0
            bgt     gcbnew
            mulu    #34,d0                     calc offset into init_table
            movea.l d0,a0
            lea     init_table(a0),a0
            lea     plane1_addr(a6),a1
            moveq   #16,d1
            bra     initloop
            
init_table  dc.l  a9826a   plane1_addr         default values for 9826a
            dc.l       0   plane2_offset       ( not used )
            dc.l       0   plane3_offset       ( not used )
            dc       300   n_glines
            dc         2   gspacing
            dc       100   bytesperline
            dc         0   xclip_min
            dc       399   xclip_max
            dc         0   yclip_min
            dc       299   yclip_max
            dc       399   hard_xmax
            dc       299   hard_ymax
            dc.l $53F4CD   lower_left
            
            dc.l  a9836a   plane1_addr         default values for 9836a
            dc.l       0   plane2_offset       ( not used )
            dc.l       0   plane3_offset       ( not used )
            dc       390   n_glines
            dc         1   gspacing
            dc        64   bytesperline
            dc         0   xclip_min
            dc       511   xclip_max
            dc         0   yclip_min
            dc       389   yclip_max
            dc       511   hard_xmax
            dc       389   hard_ymax
            dc.l $53E140   lower_left
            
            dc.l       0   plane1_addr         default values for 98627a
            dc.l   $8000   plane2_offset
            dc.l  $10000   plane3_offset
            dc       390   n_glines
            dc         1   gspacing
            dc        64   bytesperline
            dc         0   xclip_min
            dc       511   xclip_max
            dc         0   yclip_min
            dc       389   yclip_max
            dc       511   hard_xmax
            dc       389   hard_ymax
            dc.l   $6140   lower_left
            
            dc.l  a9836c   plane1_addr         default values for 9836c
            dc.l       0   not used
            dc.l       0   not used
            dc       390   n_glines
            dc         1   gspacing
            dc       512   bytesperline
            dc         0   xclip_min
            dc       511   xclip_max
            dc         0   yclip_min
            dc       389   yclip_max
            dc       511   hard_xmax
            dc       389   hard_ymax
            dc.l $550A00   lower_left

            dc.l       0   plane1_addr         default values for gator
            dc.l       0   not used
            dc.l       0   not used
            dc       768   n_glines
            dc         1   gspacing
            dc      1024   bytesperline
            dc         0   xclip_min
            dc      1023   xclip_max
            dc         0   yclip_min
            dc       767   yclip_max
            dc      1023   hard_xmax
            dc       767   hard_ymax
            dc.l  $BFC00   lower_left
            
maxpen      dc.b       1   26a
            dc.b       1   36a
            dc.b       7   27a
            dc.b      15   36c
            dc.b       1   gat

initloop    move    (a0)+,(a1)+                copy some stuff from init_table
            dbra    d1,initloop
            clr     (a1)+                      clear background color
            moveq   #0,d0
            move.b  maxpen(d2),d0              get maximum pen
            move.l  d0,max_pen(a6)             store max_pen
            clr.b   non_square(a6)             pixels are square      
            cmpi    #crt27a,d2
            beq     gcb10                      if moonunit clean up size
            cmpi    #crtgat,d2
            blt     rts                        if not gator then we're done  "
            movea.l deviceaddress(a6),a0       NM changed order 
            moveq   #0,d1                      get gator address
            move.b  gat_stat(a0),d1
            lsl     #4,d1
            swap    d1                         calculate FB addr from status
            move.l  d1,plane1_addr(a6)         store gator address
            add.l   d1,lower_left(a6)          and calculate lower left
            bra     rts
             
* for > gator
*
gcbnew      clr.l   plane2_offset(a6)          init constant values
            clr.l   plane3_offset(a6)
            clr     xclip_min(a6)
            clr     yclip_min(a6)
            clr     background(a6)
            clr.b   non_square(a6)             init to false, check later
            moveq   #1,d0
            move    d0,gspacing(a6)
            movea.l deviceaddress(a6),a0
            movep   id_fbh(a0),d0              get height of displayed fb
            move    d0,n_glines(a6)            n_glines = fbh
            subq    #1,d0           
            move    d0,hard_ymax(a6)           hard_ymax = fbh - 1
            move    d0,yclip_max(a6)           yclip_max = fbh - 1
            
            move.l  d0,d2                      d2 = fbh - 1
            
            movep   id_fbw(a0),d0              get width of displayed fb
            move    d0,d3                      d3 = fbw
            move    d0,bytesperline(a6)        bytesperline = fbw
            
* check for non square pixels
            btst    #0,id_bool(a0)             if square pixels
            beq.s   gcb051                        then goto gcb051
            bset    #0,non_square(a6)          else set non_square
            lsr     #1,d0                      use fbw div 2
gcb051      subq    #1,d0                      fbw [div 2] - 1
            move    d0,xclip_max(a6)           xclip_max = fbw [div 2] - 1
            move    d0,hard_xmax(a6)           hard_xmax = fbw [div 2] - 1

            mulu    d3,d2                      d2 = bypesperline * (fbh - 1)
find_fba    movep   id_fba(a0),d0              get frame buf location pointer 
            movea.l a0,a1
            adda    d0,a1                      a1 = addr of bits 16-23 of fb start addr
            moveq   #0,d0
            move.b  (a1),d0                    d0 = bits 16-23 of fb addr
            swap    d0                         d0 = addr frame buffer
            move.l  d0,plane1_addr(a6)         store fb address
            add.l   d0,d2                      calc lower_left 
            move.l  d2,lower_left(a6)
* this code handles only byte/pixel data
            
* To determine which planes exist, look at location $005B.  If nonzero, it 
* represents the number of existing planes.
* If zero, the number of planes must be determined by writing 1's
* to the planes and reading them back -- nonexistant planes return 0
*
* ASSUMPTION:  planes are loaded CONTIGUOUSLY from lsb to msb
*
            moveq   #0,d0
            moveq   #0,d1
            move.b  id_np(a0),d0               examine number of planes register
            bne.s   gcb052                     if NOT 0, d0 = number of existing planes
            movep   id_fbhigh(a0),d2           set A2 to 5th to last line of fb
            subq    #6,d2
            mulu    bytesperline(a6),d2        if 0, must determine mask:      
            movea.l plane1_addr(a6),a2
            adda.l  d2,a2                       A2 points to 5th line from end
            cmpi    #crtgbx,devicetype(a6)      (insure all planes writeable)
            bgt.s   gcbfben
            move    #3,gbox_rr(a0)              for gator
            move.b  #0,gbox_wp
            move.b  #0,gbox_nwrite(a0)
            bra.s   gcbnp
gcbfben     move.b  #3,topc_prr(a0)             for topcat
            move.b  #-1,topc_fben(a0)     
gcbnp       move.b  #-1,(a2)                   write 1's to (off-screen) fb 
            move.b  (a2),d0                    and read it back: nonexistant planes return 0
            bra.s   gcb053

gcb_l       bset    d0,d1
gcb052      dbra    d0,gcb_l
            move.l  d1,d0
gcb053      move.l  d0,max_pen(a6)
            bra     rts
            
*  if the device is a 98627 or 98627 gload image, n_glines must be
*  calculated.  if monitor type = (1,2,6) then n_glines and the
*  other parameters associated with variable height (hard_ymax,
*  yclip_max, etc) are already correct.

gcb10       move.l  deviceaddress(a6),d1
            add.l   #$8000,d1
            move.l  d1,plane1_addr(a6)
            add.l   d1,lower_left(a6)
            cmpi    #3,monitortype(a6)
            blt     rts                        n_glines is already set up
            beq.s   init474
            cmpi    #6,monitortype(a6)
            bge     rts                        n_glines is already set up
init512     move   #512,n_glines(a6)
            move   #511,yclip_max(a6)
            move   #511,hard_ymax(a6)
            addi.l #$1E80,lower_left(a6)
            bra    rts
init474     move   #474,n_glines(a6)
            move   #473,yclip_max(a6)
            move   #473,hard_ymax(a6)
            addi.l #$1500,lower_left(a6)
            bra    rts
            
            
              
*
*  procedure gon(gcb:anyptr);
*
*  don't really change hardware for > gator, 
*   pass in current display_mask and pass back modified hardware value
*   through index field
*
gon         movea.l 4(sp),a0                   a0 = address of pointer to gcb
            move.l  (sp)+,(sp)                 stack return address
            movem.l a5-a6,old_a5(a0)
            movea.l a0,a6
            move    devicetype(a6),d0
            cmpi    #crttpc,d0                 3.1   (check for tpc not gbx)
            bgt     rts                        if unknown type return 
            add     d0,d0
            jmp     gontable(d0)
gontable    bra.s   gon_9826a
            bra.s   gon_9836a
            bra.s   gon_98627
            bra.s   gon_9836c
            bra.s   gon_gator
            bra.s   gon_gbox                   3.1
            bra.s   gon_topc                   NM
gon_9826a   equ     *
gon_9836a   bclr    #7,plane1_addr+2(a6)
            bclr    #7,lower_left+2(a6)
gtst        movea.l plane1_addr(a6),a0
            tst.b   (a0)                       read graphics ram to turn it on
            bra     rts
gon_98627   movea.l deviceaddress(a6),a0
            move.b  #$80,1(a0)
            bra     rts
gon_9836c   move.b  #1,a9836conoff
            bra     rts
gon_gator   movea.l deviceaddress(a6),a0       get gator address
            move.b  #12,$6001(a0)              set register 12 on 6845
            move.b  #32,$6003(a0)              turn on gator
            bra     rts
gon_gbox    equ     *
gon_topc    movea.l deviceaddress(a6),a0       bug!!! for turn to B2
            move.l  index(a6),d1               save current disp_mask in d1
            moveq   #0,d0
            cmpa.l  bm_devaddr,a0             if not current alpha device
            bne.s   gon_1                        don't use alpha_mask, use 0
            move.b  alpha_mask+1,d0            d0 = graphic planes only
gon_1       not.b   d0
            and.b   graph_mask(a6),d0          d0 = G and NOT(A)
            or.b    d0,d1
            move.l  d1,index(a6)               return new disp_mask in index
            bra     rts                     
            
            
            

            page

*
*  dump graphics ROTATED
*                     Monochrome,  PUT OUT 4 BITS/PIXEL
*                     REMOVE MOONUNIT
*
*
gdump       equ *
            movea.l 4(sp),a0                   a0 = address of pointer to gcb
            move.l  (sp)+,(sp)                 stack return address
            movem.l a5-a6,old_a5(a0)
            movea.l a0,a6                      a6 = address of pointer to gcb
            move    n_glines(a6),d3            d3 = number of graphics lines
            subq    #1,d3                      d3 = d3-1
            move    bytesperline(a6),d4        d4 = bytes/line
            movea.l lower_left(a6),a1          a1 = address of lower left
            movea.l addr1(a6),a0               a0 = address of string
            movea.l a0,a3                      
            suba.l  rgltemp1(a6),a3            a3 = location of escape sequences
            move.l  index(a6),d0               d0 = number of line to be dumped
            move.l  max_pen(a6),d5             d5 = plane mask
            AND.B   GRAPH_MASK(A6),D5
            bsr     set_x
            moveq   #7,d2                      d2 = counter for bits in string           
            moveq   #1,d6                      d6 = counter for which repetition of row
            move    devicetype(a6),d1
            cmpi    #crt36c,d1
            bge     ge36c                      treat 36c, gator, gbox, topc same
            add     d1,d1
            jmp     getable(d1)
getable     bra.s   ge26a_pre
            bra     ge36a
*
ge36a       move.l  #1,d7                      this is a correction for n_glines=390
            bra.s   ge26a
*
ge26a_pre   clr.l   d7
ge26a       move.b  (a1),d1
            btst    d0,d1
            beq.s   ge26a10
            bset    d2,(a0)
            subq    #1,d2
            bset    d2,(a0)
            subq    #1,d2
            bra.s   ge26a20
ge26a10     bclr    d2,(a0)
            subq    #1,d2
            bclr    d2,(a0)
            subq.b  #1,d2
ge26a20     bge.s   ge26a30
            adda    #1,a0
            moveq   #7,d2
ge26a30     suba    d4,a1
            dbra    d3,ge26a
            subi.l  #1,d6                      test if need to go thru loop again
            tst     d6
            blt     ge26a35
            movea.l a2,a1                      reset start of line and do it again
            adda    d7,a0                      correction for nglines=390
            move.l  rgltemp1(a6),d5            d5 = counter for adding escape seq to second row
            subi.l  #1,d5
ge26a32     move.b  (a3)+,(a0)+                add the new escape sequences for second row
            dbra    d5,ge26a32
            moveq   #7,d2                      set up the counters
            move    n_glines(a6),d3            d3 = number of graphics lines
            subq    #1,d3                      d3 = d3-1
            bra     ge26a
ge26a35     bra     rts
            
ge36c       move.b  (a1),d1
            and.b   d5,d1
            beq.s   ge36c10
            bset    d2,(a0)
            subq.b  #1,d2
            bset    d2,(a0)
            subq    #1,d2
            bra.s   ge36c20
ge36c10     bclr    d2,(a0)
            subq.b  #1,d2
            bclr    d2,(a0)
            subq.b  #1,d2
ge36c20     bge.s   ge36c30
            adda    #1,a0
            moveq   #7,d2
ge36c30     suba    d4,a1
            dbra    d3,ge36c
            subi.l  #1,d6                      test if need to go thru loop again
            tst     d6
            blt     ge36c35
            movea.l a2,a1                      reset start of line and do it again
*           adda    #1,a0                      correction for nglines=390
            move.l  rgltemp1(a6),d7            d7 = counter for adding escape seq to second row
            subi.l  #1,d7
ge36c32     move.b  (a3)+,(a0)+                add the new escape sequences for second row
            dbra    d7,ge36c32
            moveq   #7,d2                      set up the counters
            move    n_glines(a6),d3            d3 = number of graphics lines
            subq    #1,d3                      d3 = d3-1
            bra     ge36c
ge36c35     bra     rts
*
*
set_x       equ *   
            move    d0,d1                        d1 = d0 = number of line to be dumped
            cmpi    #crt36c,devicetype(a6)
            bge.s   sx10
            lsr     #3,d1
            tst     devicetype(a6)
            bne.s   sx10
            lsl     #1,d1
sx10        adda    d1,a1
            movea.l a1,a2                      a2 = copy of a1 for second row
            andi.l  #7,d0
            neg     d0
            addq    #7,d0
            rts
            
            
*
*
*  dump graphics reduced (NORMAL DUMP GRAPHICS)
*      MONOCHROMATIC; NOT ROTATED
*      Changed from the system routine to put out 4 dots per pixel.
*      It accomplishes this by doing two rows per time it is called.
*      When it gets done with one row, it must reset to the start
*      of the row, put out the escape sequence for another row,
*      and repeat the row.  In a single row, for each pixel it examines, 
*      it puts out two bits of information in the buffer to be dumped.
*
*      removed moonunit stuff
gdump_red   equ *
            movea.l 4(sp),a0                   a0 = address of pointer to gcb
            move.l  (sp)+,(sp)                 stack return address
            movem.l a5-a6,old_a5(a0)
            movea.l a0,a6                      a6 = address of pointer to gcb
            movea.l addr1(a6),a0               a0 = address of string
            movea.l a0,a3                      a3 = address of the escape seqences for row
            suba.l  rgltemp1(a6),a3            adjust a3 to not write over escape seq.
            movea.l lower_left(a6),a1          a1 = address of lower left
            move.l  index(a6),d0               d0 = row to be dumped
            mulu    bytesperline(a6),d0        d0 = row * bytes/line
            suba.l  d0,a1                      a1 = address of line dumped
            movea.l a1,a2                      a2 = copy of a1
            move.l  #1,d5                      d5 = counter for which time row is dumped
            move    devicetype(a6),d1
            cmpi    #crt36c,d1
            bge     gr36c                      treat 36c, gator, gbox, topc same
            add     d1,d1
            jmp     grtable(d1)
grtable     bra.s   gr26a
            bra.s   gr36a
            
            
gr26a       moveq   #49,d0                     50 bytes/line
gr26a10     moveq   #7,d3
            moveq   #15,d1
            move.w  #0,d2
            move.b  (a1)+,d4
gr26a20     btst    d3,d4
            beq.s   gr26a28
            bset    d1,d2
            subq    #1,d1
            bset    d1,d2                      repeat the dot
            bra     gr26a30
gr26a28     subq    #1,d1
gr26a30     subq    #1,d3
            dbra    d1,gr26a20
            move.l  d2,d7                      save d2
            lsr     #8,d2                      do only a byte at the time because could
            move.b  d2,(a0)+                   be on an odd boundary; do the higher byte first
            move.b  d7,(a0)+                   do the lower byte of d2
            adda    #1,a1                      for 9826, dump one, skip one
            dbra    d0,gr26a10
            subi.l  #1,d5                      test if need to go thru loop again
            tst     d5
            blt     gr26a35
            movea.l a2,a1                      reset start of line and do it again
            move.l  rgltemp1(a6),d6            d6 = counter for adding escape seq to second row
            subi.l  #1,d6
gr26a32     move.b  (a3)+,(a0)+                add the new escape sequences for second row
            dbra    d6,gr26a32
            bra     gr26a
gr26a35     bra     rts


gr36a       moveq   #63,d0                     64 bytes/line
gr36a10     moveq   #7,d3
            moveq   #15,d1
            move.w  #0,d2
            move.b  (a1)+,d4
gr36a20     btst    d3,d4
            beq.s   gr36a28
            bset    d1,d2
            subq    #1,d1
            bset    d1,d2                      repeat the dot
            bra     gr36a30
gr36a28     subq    #1,d1
gr36a30     subq    #1,d3
            dbra    d1,gr36a20
            move.l  d2,d7                      save d2
            lsr     #8,d2                      do only a byte at the time because could
            move.b  d2,(a0)+                   be on an odd boundary; do the higher byte first
            move.b  d7,(a0)+                   do the lower byte of d2
            dbra    d0,gr36a10
            subi.l  #1,d5                      test if need to go thru loop again
            tst     d5
            blt     gr36a35
            movea.l a2,a1                      reset start of line and do it again
            move.l  rgltemp1(a6),d6            d6 = counter for adding escape seq to second row
            subi.l  #1,d6
gr36a32     move.b  (a3)+,(a0)+                add the new escape sequences for second row
            dbra    d6,gr36a32
            bra     gr36a
gr36a35     bra     rts
            
gr36c       move    hard_xmax(a6),d0
            addq    #1,d0
            lsr     #3,d0                      (xmax+1) div 8
            subq    #1,d0
            move.l  max_pen(a6),d3
            and.b   graph_mask(a6),d3          dump only planes write enabled for graphics
            tst.b   non_square(a6)             if nonsquare pixels
            bne.s   grnsp                         then goto grnsp 
gr36c10     moveq   #15,d1
            move.w  #0,d2
gr36c20     move.b  (a1)+,d4
            and.b   d3,d4
            beq.s   gr36c28
            bset    d1,d2
            subq    #1,d1
            bset    d1,d2                      repeat the dot
            bra     gr36c30
gr36c28     subq    #1,d1
gr36c30     dbra    d1,gr36c20
            move.l  d2,d7                      save d2
            lsr     #8,d2                      do only a byte at the time because could
            move.b  d2,(a0)+                   be on an odd boundary; do the higher byte first
            move.b  d7,(a0)+                   do the lower byte of d2
            dbra    d0,gr36c10
            subi.l  #1,d5                      test if need to go thru loop again
            tst     d5
            blt     gr36c35
            movea.l a2,a1                      reset start of line and do it again
            move.l  rgltemp1(a6),d6            d6 = counter for adding escape seq to second row
            subi.l  #1,d6
gr36c32     move.b  (a3)+,(a0)+                add the new escape sequences for second row
            dbra    d6,gr36c32
            bra     gr36c
gr36c35     bra     rts

* for non square pixels, output one dot for each pixel PAIR
*
grnsp       moveq   #15,d1                      FOR NON SQUARE PIXELS
            moveq   #0,d2
grnsp20     move.w  (a1)+,d4                   dts bug FSDat00048 (enhancement)
*           or.b    (a1)+,d4                   if either pixel pair is set, output     dts bug FSDat00048 (enhancement)
            and.b   d3,d4
            beq.s   grnsp28
            bset    d1,d2
            subq    #1,d1
            bset    d1,d2                      repeat the dot
            bra     grnsp30
grnsp28     subq    #1,d1
grnsp30     dbra    d1,grnsp20
            move.l  d2,d7                      save d2
            lsr     #8,d2                      do only a byte at the time because could
            move.b  d2,(a0)+                   be on an odd boundary; do the higher byte first
            move.b  d7,(a0)+                   do the lower byte of d2
            dbra    d0,grnsp
            subi.l  #1,d5                      test if need to go thru loop again
            tst     d5
            blt     grnsp35
            movea.l a2,a1                      reset start of line and do it again
            move.l  rgltemp1(a6),d6            d6 = counter for adding escape seq to second row
            subi.l  #1,d6
grnsp32     move.b  (a3)+,(a0)+                add the new escape sequences for second row
            dbra    d6,grnsp32
            bra     gr36c
grnsp35     bra     rts
            
*  
*  additions for gdump_red_c and gdump_c
*
count_gdrc   equ   rgltemp4                    count in pixrowbuff (temporary)
rowoffset    equ   rgltemp2                    row offset in pixrow
planeoffset    equ   rgltemp3                    plane offset in pixrow
*
*  clear_buff  CLEARS THE PIXROW BUFFER, IS USED BY ALL THE COLOR ROUTINES
*              USES A4,D1,D6,D4,D3,D7
*
*
clear_buff  move.l  planeoffset(a6),d1           set up count to clear buffers
            sub.l   #11,d1                     subtract 8 for count+allignment
*                                               2 for len, and 1 for dbra inst
            asr     #2,d1                      divide by 4, cuz clearing longs
            movea.l a5,a4                      clear pixrow buffers each time
            adda.l  rgltemp1(a6),a4            don't clear the count bytes
            move.l  #5,d6                      6 buffers
            move.l  #8,d4                      find fudge factor  (8 - count)
            sub.l   rgltemp1(a6),d4             this is for the last few bytes
            subq    #1,d4                       in each plane
            move.l  d4,d3
plane       clr.b   (a4)+
            dbra    d4,plane
            move.l  d1,d7
here        clr.l   (a4)+
            dbra    d7,here
            adda.l  rgltemp1(a6),a4
            adda.l  #2,a4                      make up for the length bytes
            move.l  d3,d4                      re-store fudge factor
            dbra    d6,plane
            rts
*
*
*  dump graphics reduced colored (NORMAL DUMP GRAPHICS COLORED)
*          COLOR; DITHER;  NOT ROTATED
*
gdump_red_c equ *
            movea.l 4(sp),a0                   a0 = address of pointer to gcb
            move.l  (sp)+,(sp)                 stack return address
            movem.l a5-a6,old_a5(a0)
            movea.l a0,a6                      a6 = address of pointer to gcb
            movea.l addr2(a6),a5               a5 = address of string (pixrowbuff)
            movea.l lower_left(a6),a1          a1 = address of lower left
            move.l  index(a6),d0               d0 = row to be dumped
            mulu    bytesperline(a6),d0        d0 = row * bytes/line
            suba.l  d0,a1                      a1 = address of line dumped
            bsr     clear_buff                 clear the pixrow buff to start
*
            move    devicetype(a6),d1
            cmpi    #crt36c,d1
            blt     gr27a_r                    treat 36c, gator, gbox, topc same
*
*
gr36c_grdc  move.l  rgltemp1(a6),count_gdrc(a6) temporary count holder
            move    hard_xmax(a6),d0
            addq    #1,d0
            lsr     #2,d0                      (xmax+1) div 4
            subq    #1,d0                      d0 = bytes left to go in line
            bsr     getcmapind_r               get color map index of pixel
*
gdrc10      moveq   #3,d5                      d5 = which 2 bits in byte of buffer
*                                                  3 => leftmost two
gdrc20      clr.l   d1                         d1 = dotrow
gdrc21      cmpi.b  #2,d1
            beq     drow_r  
gdrc22      clr.l   d3                         d3 = plane
gdrc23      cmpi.b  #3,d3
            beq     pln_r      
            movea.l addr4(a6),a2               get addr of xmap
            adda.l  d2,a2                      get addr of xmap[cmapindex]
            clr.l   d7
            move.b  (a2),d7                    d7 = index into sqtcmap
            subq    #1,d7               
            lsl     #2,d7                      multiply by 4 (offset in sqtcmap)
            add.l   d3,d7                      second offset in sqtcmap
            movea.l addr3(a6),a3
            adda.l  d7,a3                      a3 = addr sqtcmap entry
            movea.l a5,a4
            move.l  rowoffset(a6),d4              
            mulu    d1,d4
            adda.l  d4,a4
            move.l  planeoffset(a6),d4
            mulu    d3,d4
            adda.l  d4,a4
            adda.l  count_gdrc(a6),a4          pixrow byte location
            clr.l   d4
            move.b  (a3),d4                    get sqtcmap entry
            tst.b   d1                         if dotrow = 0
            beq     gdrc30                     go to gdrc30
            andi.w  #3,d4                      get rightmost 2 bits of sqtcmap entry
            bra     gdrc40                     
gdrc30      andi.b  #12,d4                     get next-to-rightmost bits
            lsr     #2,d4                      shift them to right
gdrc40      cmpi.b  #3,d5                      get mask for which bits of pixrowbuff
            bne     gdrc50                     if d5 is 3, shift left 6                      
            lsl     #6,d4
            bra.s   gdrc60
gdrc50      cmpi.b  #2,d5                      if d5 is 2, shift left 4
            bne     gdrc70
            lsl     #4,d4
            bra.s   gdrc60
gdrc70      cmpi.b  #1,d5                      if d5 is 1, shift left 2
            bne     gdrc60
            lsl     #2,d4
*
gdrc60      or.b    d4,(a4)                    or it with the pixrow byte
            addq    #1,d3                      increment plane
            bra.s   gdrc23
pln_r       addq    #1,d1                      increment dotrow
            bra     gdrc21
drow_r      bsr     getcmapind_r               get color map index of next pixel
gdrc90      dbra    d5,gdrc20                  if done 4 pixels, start on next one
            addi.l  #1,count_gdrc(a6)          increment count in pixrow buffers
            dbra    d0,gdrc10                  keep going until done with a row
            bra     rts
*
getcmapind_r clr.l   d7                        get color map index of pixel 
            move.b  graph_mask(a6),d7 
            move.l  max_pen(a6),d6 
            and.l   d7,d6 
            clr.l   d2 
            move.b  (a1)+,d2                   pixel
            tst.b   non_square(a6)
            beq     square_2
            adda    #1,a1
square_2    and.l   d6,d2                      d2 = color map index of present pixel 
            rts          
*
*
*
gr27a_r     bsr     getoffsets
            move.l  rgltemp1(a6),count_gdrc(a6) temporary count holder
            move    #127,d0                     64 bytes/line
            moveq   #7,d6                      d6 = counter for bits in moonunit frame buff
            bsr     moon_cmap                  get color map index for moonunit; put in d2
*
gr27a_r10   moveq   #3,d5                      d5 = which 2 bits in byte of buffer
*                                                  3 => leftmost two
gr27a_r20   clr.l   d1                         d1 = dotrow
gr27a_r21   cmpi.b  #2,d1
            beq     gr27a_rdrow
gr27a_r22   clr.l   d3                         d3 = plane
gr27a_r23   cmpi.b  #3,d3
            beq     gr27a_rpln  
            movea.l addr4(a6),a4               get addr of xmap
            adda.l  d2,a4                      get addr of xmap[cmapindex]
            clr.l   d7
            move.b  (a4),d7                    d7 = index into sqtcmap
            subq    #1,d7               
            lsl     #2,d7                      multiply by 4 (offset in sqtcmap)
            add.l   d3,d7                      second offset in sqtcmap
            movea.l addr3(a6),a0
            adda.l  d7,a0                      a0 = addr sqtcmap entry
            movea.l a5,a4
            move.l  rowoffset(a6),d4              
            mulu    d1,d4
            adda.l  d4,a4
            move.l  planeoffset(a6),d4
            mulu    d3,d4
            adda.l  d4,a4
            adda.l  count_gdrc(a6),a4          pixrow byte location
            clr.l   d4
            move.b  (a0),d4                    get sqtcmap entry
            tst.b   d1                         if dotrow = 0
            beq     gr27a_r30                     go to gr27a_r30
            andi.w  #3,d4                      get rightmost 2 bits of sqtcmap entry
            bra     gr27a_r40                     
gr27a_r30   andi.b  #12,d4                     get next-to-rightmost bits
            lsr     #2,d4                      shift them to right
gr27a_r40   cmpi.b  #3,d5                      get mask for which bits of pixrowbuff
            bne     gr27a_r50                     if d5 is 3, shift left 6                      
            lsl     #6,d4
            bra.s   gr27a_r60
gr27a_r50   cmpi.b  #2,d5                      if d5 is 2, shift left 4
            bne     gr27a_r70
            lsl     #4,d4
            bra.s   gr27a_r60
gr27a_r70   cmpi.b  #1,d5                      if d5 is 1, shift left 2
            bne     gr27a_r60
            lsl     #2,d4
*
gr27a_r60   or.b    d4,(a4)                    or it with the pixrow byte
            addq    #1,d3                      increment plane
            bra.s   gr27a_r23
gr27a_rpln  addq    #1,d1                      increment dotrow
            bra     gr27a_r21
gr27a_rdrow subq.b  #1,d6                                
            bge.b   gr27a_r80                   
            moveq   #7,d6
            move.b  (a1)+,d2                    get plane 1 byte
            move.b  (a2)+,d2                    get plane 2 byte
            move.b  (a3)+,d2                    get plane 3 byte
gr27a_r80   bsr     moon_cmap                  get color map index of next pixel
gr27a_r90   dbra    d5,gr27a_r20                  if done 4 pixels, start on next one
            addi.l  #1,count_gdrc(a6)          increment count in pixrow buffers
            dbra    d0,gr27a_r10                  keep going until done with a row
            bra     rts
*
*
getoffsets  movea.l a1,a2
            movea.l a1,a3
            adda.l  plane2_offset(a6),a2
            adda.l  plane3_offset(a6),a3
            rts
*
*           get color map index for moonunit; d2 = an index into xmap
*
*
moon_cmap   btst    d6,(a1)
            beq     moon_t2
            btst    d6,(a2)
            beq     moon_t3
            btst    d6,(a3)
            beq     moon_t4
            moveq   #7,d2                       white
            rts
moon_t4     moveq   #6,d2                       cyan
            rts
moon_t3     btst    d6,(a3)
            beq     moon_t7
            moveq   #5,d2                       magenta
            rts
moon_t7     moveq   #4,d2                       blue
            rts
moon_t2     btst    d6,(a2)
            beq     moon_t5
            btst    d6,(a3)
            beq     moon_t8
            moveq   #3,d2                       yellow
            rts
moon_t8     moveq   #2,d2                       green
            rts
moon_t5     btst    d6,(a3)          
            beq     moon_t6                    
            moveq   #1,d2                       red
            rts
moon_t6     moveq   #0,d2                       black
            rts         
*
*
*
*  dump graphics colored (ROTATED DUMP GRAPHICS COLORED)
*         DITHER; COLOR; ROTATED
*
gdump_c     equ *
            movea.l 4(sp),a0                   a0 = address of pointer to gcb
            move.l  (sp)+,(sp)                 stack return address
            movem.l a5-a6,old_a5(a0)
            movea.l a0,a6                      a6 = address of pointer to gcb
            movea.l addr2(a6),a5               a5 = address of string (pixrowbuff)
            movea.l lower_left(a6),a1          a1 = address of lower left
            adda.l  index(a6),a1               a1 = address of line dumped
            bsr     clear_buff                 clear pixrow buffers each time
*
            move    devicetype(a6),d1
            cmpi    #crt36c,d1
            blt     gr27a                      treat 36c, gator, gbox, topc same
*
*
gr36c_gdc   move.l  rgltemp1(a6),count_gdrc(a6) temporary count holder
            move    n_glines(a6),d0
            lsr     #2,d0                      n_glines div 4
            subq    #1,d0                      d0 = bytes left to go in line
            move    n_glines(a6),d7            get nglines mod 4
            and     #3,d7                      d7 > 0 shows we need to go back and finish 
*                                                         each line
            clr.l   rgltemp5(a6)               if rgltemp5 is 0, then don't count down d7
            bsr     getcmapind                 d2 = color map index
*
gdc10       moveq   #3,d5                      d5 = which 2 bits in byte of buffer
*                                                  3 => leftmost two
gdc20       clr.l   d1                         d1 = dotrow
gdc21       cmpi.b  #2,d1
            beq     drow
gdc22       clr.l   d3                         d3 = plane
gdc23       cmpi.b  #3,d3
            beq     pln_c    
            movea.l addr4(a6),a2               get addr of xmap
            adda.l  d2,a2                      get addr of xmap[cmapindex]
            clr.l   d4
            move.b  (a2),d4                    d4 = index into sqtcmap
            subq    #1,d4               
            lsl     #2,d4                      multiply by 4 (offset in sqtcmap)
            add.l   d3,d4                      second offset in sqtcmap
            movea.l addr3(a6),a3
            adda.l  d4,a3                      a3 = addr sqtcmap entry
            movea.l a5,a4
            move.l  rowoffset(a6),d4           d4 = working reg              
            mulu    d1,d4
            adda.l  d4,a4
            move.l  planeoffset(a6),d4
            mulu    d3,d4
            adda.l  d4,a4
            adda.l  count_gdrc(a6),a4          pixrow byte location
            clr.l   d4
            move.b  (a3),d4                    get sqtcmap entry
            tst.b   d1                         if dotrow = 0
            beq     gdc30                      go to gdc30
            andi.w  #3,d4                      get rightmost 2 bits of sqtcmap entry
            bra     gdc40                     
gdc30       andi.b  #12,d4                     get next-to-rightmost bits 
            lsr     #2,d4                      shift them to right 
gdc40       cmpi.b  #3,d5                      get mask for which bits of pixrowbuff 
            bne     gdc50                      if d5 is 3, shift left 6 
            lsl     #6,d4 
            bra.s   gdc60 
gdc50       cmpi.b  #2,d5                      if d5 is 2, shift left 4 
            bne     gdc70 
            lsl     #4,d4 
            bra.s   gdc60 
gdc70       cmpi.b  #1,d5                      if d5 is 1, shift left 2 
            bne     gdc60 
            lsl     #2,d4 
* 
gdc60       or.b    d4,(a4)                    or it with the pixrow byte 
            addq    #1,d3                      increment plane 
            bra.s   gdc23 
pln_c       addq    #1,d1                      increment dotrow 
            bra     gdc21 
drow        tst.l   rgltemp5(a6)               are we counting down d7?
            beq     nocountdown                if not, go to normal situation
            subi.l  #1,d7
            bge     proceed
            bra     skipit                     if d7 is 0 then last pixel
*
nocountdown move.l  d0,d6                      last pixel?                 
            or.l    d5,d6                       
            tst     d6                         if d0 and d5 are 0, test d7                           
            bne     proceed
            tst     d7                         if d7 is 0, then skip address decrement
            beq     skipit
proceed     suba    bytesperline(a6),a1        decrement to next pixel (in next line)
skipit      bsr     getcmapind                 get color map index of next pixel 
            dbra    d5,gdc20                   if done 4 pixels, start on next one 
            addi.l  #1,count_gdrc(a6)          increment count in pixrow buffers 
            dbra    d0,gdc10                   keep going until done with a line
            tst     d7                         if d7 is nonzero, go thru loop once more
            ble     gdc90
            move.l  #1,rgltemp5(a6)            set the switch 
            subq    #1,d7
            clr.l   d0
            bra     gdc10
gdc90       bra     rts 
* 
* 
getcmapind  clr.l   d4                         get color map index of pixel 
            move.b  graph_mask(a6),d4 
            move.l  max_pen(a6),d6 
            and.l   d4,d6 
            clr.l   d2 
            move.b  (a1),d2                    pixel
            and.l   d6,d2                      d2 = color map index of present pixel 
            rts          
*
gr27a       movea.l lower_left(a6),a1           address of plane 1 is lowerleft + (index/8)
            move.l  index(a6),d6
            lsr     #3,d6
            adda.l  d6,a1
            bsr     getoffsets
            move.l  rgltemp1(a6),count_gdrc(a6) temporary count holder
            move.l  index(a6),d2
            and     #7,d2                                 
            moveq   #7,d6
            sub     d2,d6                      d6 = which bit of frame buff byte
            bsr     moon_cmap                  get color map index for moonunit; put in d2
            move    n_glines(a6),d0
            lsr     #2,d0                      n_glines div 4
            subq    #1,d0                      d0 = pixels left to go in line
            move    n_glines(a6),d7            get nglines mod 4
            and     #3,d7                      d7 > 0 shows we need to go back and finish 
*                                                         each line
gr27a_10    moveq   #3,d5                      d5 = which 2 bits in byte of buffer
*                                                  3 => leftmost two
gr27a_20    clr.l   d1                         d1 = dotrow
gr27a_21    cmpi.b  #2,d1
            beq     gr27a_drow
gr27a_22    clr.l   d3                         d3 = plane
gr27a_23    cmpi.b  #3,d3
            beq     gr27a_pln    
            movea.l addr4(a6),a0               a0 = get addr of xmap
            adda.l  d2,a0                      get addr of xmap[cmapindex]
            clr.l   d4
            move.b  (a0),d4                    d4 = index into sqtcmap
            subq    #1,d4               
            lsl     #2,d4                      multiply by 4 (offset in sqtcmap)
            add.l   d3,d4                      second offset in sqtcmap
            movea.l addr3(a6),a0
            adda.l  d4,a0                      a0 = addr sqtcmap entry
            movea.l a5,a4
            move.l  rowoffset(a6),d4           d4 = computation of offset into pixrowbuff              
            mulu    d1,d4
            adda.l  d4,a4
            move.l  planeoffset(a6),d4
            mulu    d3,d4
            adda.l  d4,a4
            adda.l  count_gdrc(a6),a4          pixrow byte location
            clr.l   d4
            move.b  (a0),d4                    d4 = dither pattern (sqtcmap entry)
            tst.b   d1                         if dotrow = 0
            beq     gr27a_30                      go to gr27a_30
            andi.w  #3,d4                      get rightmost 2 bits of sqtcmap entry
            bra     gr27a_40                     
gr27a_30    andi.b  #12,d4                     get next-to-rightmost bits 
            lsr     #2,d4                      shift them to right 
gr27a_40    cmpi.b  #3,d5                      get mask for which bits of pixrowbuff 
            bne     gr27a_50                      if d5 is 3, shift left 6 
            lsl     #6,d4 
            bra.s   gr27a_60 
gr27a_50    cmpi.b  #2,d5                      if d5 is 2, shift left 4 
            bne     gr27a_70 
            lsl     #4,d4 
            bra.s   gr27a_60 
gr27a_70    cmpi.b  #1,d5                      if d5 is 1, shift left 2 
            bne     gr27a_60 
            lsl     #2,d4 
* 
gr27a_60    or.b    d4,(a4)                    or it with the pixrow byte 
            addq    #1,d3                      increment plane 
            bra.s   gr27a_23 
gr27a_pln   addq    #1,d1                      increment dotrow 
            bra     gr27a_21 
gr27a_drow  clr.l   d2                          decrement to next line
            move.w  bytesperline(a6),d2
            suba    d2,a1                       get plane 1 byte
            suba    d2,a2                       get plane 2 byte
            suba    d2,a3                       get plane 3 byte
            bsr     moon_cmap                  get color map index of next pixel 
            dbra    d5,gr27a_20                   if done 4 pixels, start on next one 
            addi.l  #1,count_gdrc(a6)          increment count in pixrow buffers 
            dbra    d0,gr27a_10                   keep going until done with a line
            tst     d7                          if d7 is nonzero, go thru loop once more
            beq     gr27a_90
            clr.l   d7
            clr.l   d0
            bra     gr27a_10
gr27a_90    bra     rts 
* 
*
*  procedure init_err(gcb:gcbptr)
*        initializes the accumulated error arrays with values that
*        range between -8,8.  This will help alleviate problems
*        in patterns that the error diffusion algorithm can create
*        The accumulated error arrays are pointed to by the addr3 and 
*        addr4 fields of the temporary gcb
*        Note that I choose regs that will not be stepped on by
*        myrand, to avoid having to save and restore registers
*        Also, only initialize 1-biggest_xval values in the error arrays;
*        leaving the 0th value and the biggest_xval_1 th values as overflow
*        areas for the error diffusion
*
*
init_err    equ *
            movea.l 4(sp),a0                   a0 = address of pointer to gcb
            move.l  (sp)+,(sp)                 stack return address
            move.l  4(sp),d6                   get biggest_xval
            move.l  (sp)+,(sp)                 stack return address
            movem.l a5-a6,old_a5(a0)
            movea.l a0,a6                      a6 = address of pointer to gcb
            movea.l addr3(a6),a3               a3 = address of accum_err_one array
            movea.l addr4(a6),a4               a4 = address of accum_err_two array
            sub.l   #1,d6                      biggest_xval -1
            clr.l   d3
            move.l  #2,d3                      d3 = counter for outer loop, 3 rows          
            move.l  #731,-(sp)                 put a random seed on the stack
outloop     clr.l   d5                         
            move.l  d6,d5                      d5 = loop counter for middle loop 
midloop     bsr     myrand                     my random number gen
            move.w  (sp)+,d4                   get result
            move.w  d4,(a3)+                   store byte in accum_err_one
            bsr     myrand                     my random number gen
            move.w  (sp)+,d4                   get result
            move.w  d4,(a4)+                   store byte in accum_err_two
            dbra    d5,midloop                 go get next 2 bytes
            dbra    d3,outloop                 do next row
            move.l  (sp)+,d0                   pop my seed!
            movem.l old_a5(a6),a5-a6           restore regs
            rts                                
*
*      myrand   this was melded together from the PAWS RANDOM and RAND
*               programs.  
*               Generate a pseudo-random number with the formula
*               Xn <- (16807 * Xn-1) MOD (2^31 - 1), where Xn-1 is the
*               previous random number. A shortcut computation is:
*                   C <- 16807 * Xn-1.
*                   Xn <- C MOD 2^31 + C DIV 2^31.
*                   If Xn > 2^31 - 1, then Xn <- Xn - (2^31 - 1)
*
*               Returns a 16 bit integer which is scaled to the range 
*                  -8 <= n <= +8
*
*               I give it my own random seed on the stack in the calling 
*               program
*
myrand      equ *
            movea.l (sp)+,a2        save ret addr
            moveq   #16,d2          generate numbers in range 0-16
            move.l  (sp)+,d0        use last random seed 
            move.l  d0,d1           leave bottom 16 bits in d0
            swap    d1              get top 16 bits into d1
            mulu    #16807,d0       get one partial product in d0
            mulu    #16807,d1       high order partial product in d1
            swap    d1              align middle 16 bits of product in high d1
            add.w   d1,d1           most of (product div 2^31) is in low d1
            add.l   d1,d0           compute (product mod 2^31) + (product div 2^31)
            bcc.s   rnd1            any carries out of 32nd bit are part of the div
            addq.l  #2,d0           (so propagate into appropriate position)
rnd1        bpl.s   rnd2            bit 31 is also part of the div
            sub.l   #$7FFFFFFF,d0        so remove it and add it back to bit 0
rnd2        move.l  d0,-(sp)        save this as next seed
            asl.l   #1,d0           normalize
            swap    d0                   to 16 bits
            mulu    d2,d0           scale to range
            swap    d0
            sub.w   #8,d0           now make it -8 <= n <= 8
            move.w  d0,-(sp)        return result
            jmp     (a2)
*
*
*  dump graphics   ROTATED, MONO, ONE DOT PER PIXEL         
*
gdump_ed    equ *
            movea.l 4(sp),a0                   a0 = address of pointer to gcb
            move.l  (sp)+,(sp)                 stack return address
            movem.l a5-a6,old_a5(a0)
            movea.l a0,a6                      a6 = address of pointer to gcb
            move    n_glines(a6),d3            d3 = number of graphics lines
            subq    #1,d3                      d3 = d3-1
            move    bytesperline(a6),d4        d4 = bytes/line
            movea.l lower_left(a6),a1          a1 = address of lower left
            movea.l addr1(a6),a0               a0 = address of string
            move.l  index(a6),d0               d0 = number of line to be dumped
            move.l  max_pen(a6),d5             d5 = plane mask
            AND.B   GRAPH_MASK(A6),D5
            bsr     set_x_ed
            moveq   #7,d2
            move    devicetype(a6),d1
            cmpi    #crt36c,d1
            bge     ge36c_ed                   treat 36c, gator, gbox, topc same
*
ge26a_ed    move.b  (a1),d5
            btst    d0,d5
            beq.s   ge26a10_ed
            bset    d2,(a0)
            subq.b  #1,d2
            bra.s   ge26a20_ed
ge26a10_ed  bclr    d2,(a0)
            subq.b  #1,d2
ge26a20_ed  bge.s   ge26a30_ed
            adda    #1,a0
            moveq   #7,d2
ge26a30_ed  suba    d4,a1
            dbra    d3,ge26a_ed
            bra     rts
            
ge36c_ed    move.b  (a1),d6
            and.b   d5,d6
            beq.s   ge36c10_ed
            bset    d2,(a0)
            subq.b  #1,d2
            bra.s   ge36c20_ed
ge36c10_ed  bclr    d2,(a0)
            subq.b  #1,d2
ge36c20_ed  bge.s   ge36c30_ed
            adda    #1,a0
            moveq   #7,d2
ge36c30_ed  suba    d4,a1
            dbra    d3,ge36c_ed
            bra     rts
*
*
set_x_ed    equ *   
            move    d0,d1                        d1 = d0 = number of line to be dumped
            cmpi    #crt36c,devicetype(a6)
            bge.s   sx10_ed
            lsr     #3,d1
            tst     devicetype(a6)
            bne.s   sx10_ed
            lsl     #1,d1
sx10_ed     adda    d1,a1
            andi.l  #7,d0
            neg     d0
            addq    #7,d0
            rts
*
*     MONO, NON-ROTATED, ONE DOT PER PIXEL
*
*  dump graphics reduced
*
gdump_red_ed  equ *
            movea.l 4(sp),a0                   a0 = address of pointer to gcb
            move.l  (sp)+,(sp)                 stack return address
            movem.l a5-a6,old_a5(a0)
            movea.l a0,a6                      a6 = address of pointer to gcb
            movea.l addr1(a6),a0               a0 = address of string
            movea.l lower_left(a6),a1          a1 = address of lower left
            move.l  index(a6),d0               d0 = row to be dumped
            mulu    bytesperline(a6),d0        d0 = row * bytes/line
            suba.l  d0,a1                      a1 = address of line dumped
            move    devicetype(a6),d1
            cmpi    #crt36c,d1
            bge     gr36c_ed                   treat 36c, gator, gbox, topc same
            add     d1,d1
            jmp     grtable_ed(d1)
grtable_ed  bra.s   gr26a_ed
            bra.s   gr36a_ed
            
gr26a_ed    moveq   #50,d0             50 bytes/line; 1 dumped at a time
            bra.s   gr26a20_ed 
gr26a10_ed  move.b  (a1),(a0)+
            addq    #2,a1
gr26a20_ed  dbra    d0,gr26a10_ed
            bra     rts
            
gr36a_ed    moveq   #16,d0             64 bytes/line; 4 dumped at a time
            bra.s   gr36a20_ed 
gr36a10_ed  move.l  (a1)+,(a0)+
gr36a20_ed  dbra    d0,gr36a10_ed
            bra     rts
            
gr36c_ed    move    hard_xmax(a6),d0
            addq    #1,d0
            lsr     #3,d0                      (xmax+1) div 8
            subq    #1,d0
            move.l  max_pen(a6),d3
            and.b   graph_mask(a6),d3          dump only planes write enabled for graphics
            tst.b   non_square(a6)             if nonsquare pixels
            bne.s   grnsp_ed                         then goto grnsp_ed
gr36c10_ed  moveq   #7,d1
            moveq   #0,d2
gr36c20_ed  move.b  (a1)+,d4
            and.b   d3,d4
            beq.s   gr36c30_ed
            bset    d1,d2
gr36c30_ed  dbra    d1,gr36c20_ed
            move.b  d2,(a0)+
            dbra    d0,gr36c10_ed
            bra     rts

* for non square pixels, output one dot for each pixel PAIR
*
grnsp_ed    moveq   #7,d1                      for non square pixels
            moveq   #0,d2
grnsp20_ed  move.w  (a1)+,d4                   dts bug FSDat00048 (enhancement)
*           or.b    (a1)+,d4                   if either pixel pair is set, output     dts bug FSDat00048 (enhancement)
            and.b   d3,d4
            beq.s   grnsp30_ed
            bset    d1,d2
grnsp30_ed  dbra    d1,grnsp20_ed
            move.b  d2,(a0)+
            dbra    d0,grnsp_ed
            bra     rts
            
*
*
*  dump graphics error diffusion colored 
*          COLOR; ERROR DIFFUSION;  NOT ROTATED
*
gdump_red_edc   equ *
            movea.l 4(sp),a0                   a0 = address of pointer to gcb
            move.l  (sp)+,(sp)                 stack return address
            movea.l 4(sp),a2                   a2 = address system color map 
            move.l  (sp)+,(sp)                 stack return address
            movem.l a5-a6,old_a5(a0)
            movea.l a0,a6                      a6 = address of pointer to gcb
            movea.l addr2(a6),a5               a5 = address of string (pixrowbuff)
            movea.l lower_left(a6),a1          a1 = address of lower left
            move.l  index(a6),d0               d0 = row to be dumped
            mulu    bytesperline(a6),d0        d0 = row * bytes/line
            suba.l  d0,a1                      a1 = address of line dumped
            bsr     clear_buff                 clear pixrow buffers each time
*
            move    devicetype(a6),d1
            cmpi    #crt36c,d1
            blt     gr27a_edc                  treat 36c, gator, gbox, topc same
*
*
gr36c_edc   clr.l   d0
            move.w  hard_xmax(a6),d0         
            subq    #1,d0                      d0 = bytes left to go in line
*
            movea.l addr3(a6),a3               a3 = accum_err_one array
            movea.l addr4(a6),a4               a4 = accum_err_two array
            move.l rgltemp1(a6),d5             d5 = running count of pixrowbuff offset
            adda.l  d5,a5                      don't overwrite the initial esc seq
            clr.l   d4                         d4 = temporary count holder in err arrays
loop2       moveq   #7,d3                      d3 = which bit in pixrow array
loop3       bsr     getcmapind_r               d2 = color map index of present pixel
            tst.l   rgltemp5(a6)               is background=on
            bne     edc10                      if yes, proceed as usual
            tst.l   d2                         if no, test color map index.  if <>0
            bne     edc10                          then proceed as usual
            movea.l a5,a0
            bset    d3,(a0)                    otherwise   set all three planes on,
            adda.l  planeoffset(a6),a0
            bset    d3,(a0)                    to print white
            adda.l  planeoffset(a6),a0
            bset    d3,(a0)                    to print white
            bra     edc32                         jump to end of loop and don't propagate err
edc10       moveq   #2,d1                      d1 = plane
* get syscmap entry
            movea.l a2,a0                      a0 = working copy of system color map loc
            mulu    #4,d2                      d2 = offset into system color map
            adda.l  d2,a0                      a0 = location of syscmap entry
loop1       clr.l   d7
            move.b  (a0),d7                    d7 = running total of accum err
            cmp.w   #255,d7                     to eliminate random noise, don't
            bne     edc12                       diffuse error if color table DAC value
            bset    d3,(a5)                     is 0 or 255.
            bra     edc22
edc12       tst.w   d7
            bne     edc14
            beq     edc22
edc14       add.w   (a3),d7
            cmpi.w  #127,d7                      compare to half dac
            ble     edc20                      if err > 127 then set the bit
            bset    d3,(a5)                        in the pixrow buffer 
*  propagate error
            subi    #255,d7                    delta = d7 - 255
edc20       asr     #2,d7                      1/4  delta
            add.w   d7,-2(a4)                  accum_err_two-1 = accum_err_two-1 + delta/4
            move.w  d7,d6
            asr     #1,d6
            add.w   d6,d7                      d7 = 3/8 delta
            add.w   d7,2(a3)
            add.w   d7,(a4)
edc22       tst     d1                         skip over these if d1 =0
            ble     edc30
            adda.l  rgltemp4(a6),a3            set arrays up for next plane
            adda.l  rgltemp4(a6),a4
            adda.l  planeoffset(a6),a5           set pixrow up for next plane
            adda.l  #1,a0                      next plane in color map entry
edc30       dbra    d1,loop1                   next plane
edc32       sub.w   #1,d0                      another pixel finished
            movea.l addr3(a6),a3               reset array locations   
            movea.l addr4(a6),a4               
            movea.l addr2(a6),a5               
            adda.l  d5,a5                      don't write over initial esc seq            
            addi.l  #2,d4                      increment array locs
            adda.l  d4,a3            
            adda.l  d4,a4            
edc40       dbra    d3,loop3                   finish bits in byte
            add.b   #1,d5                      increment pixelrow array
            adda.l  #1,a5                      update count in pixrow buff
            tst     d0  
            bge     loop2
            bra     rts
                           
gr27a_edc   bsr     getoffsets                 NO error diffusion here, no color map!
            move.l  #63,d0                     64 bytes/line
            move.l  rgltemp1(a6),d5            d5 = running count of pixrowbuff offset
            adda.l  d5,a5                      don't overwrite the initial esc seq
lp2         moveq   #7,d6                      d6 = which bit in pixrow array
lp3         bsr     moon_cmap                  d2 = color map index of present pixel
            tst.l   d6                         fix addresses for next time around
            bne     dontfix
            move.b  (a1)+,d3                    get plane 1 byte
            move.b  (a2)+,d3                    get plane 2 byte
            move.b  (a3)+,d3                    get plane 3 byte
dontfix     tst.l   rgltemp5(a6)               is background=on
            bne     gr27_edc10                      if yes, proceed as usual
            tst.l   d2                         if no, test color map index.  if <>0
            bne     gr27_edc10                          then proceed as usual
            movea.l a5,a0
            bset    d6,(a0)                    otherwise   set all three planes on,
            adda.l  planeoffset(a6),a0
            bset    d6,(a0)                    to print white
            adda.l  planeoffset(a6),a0
            bset    d6,(a0)                    to print white
            bra     gr27_edc32                    jump to end of loop and don't propagate err
gr27_edc10  moveq   #2,d1                      d1 = plane
            movea.l a5,a4                      a4 = working reg for pixrowbuff
lp1         move.l  #2,d4
            sub.l   d1,d4                      d4 = bit to test in "color map" index
            btst    d4,d2
            beq     gr27_edc20
            bset    d6,(a4)
gr27_edc20  adda.l  planeoffset(a6),a4                    set pixrow up for next plane
gr27_edc30  dbra    d1,lp1                     next plane
gr27_edc32  movea.l a5,a4                      restore a4                      
gr27_edc40  dbra    d6,lp3                     finish bits in byte
            adda.l  #1,a5                      update count in pixrow buff
            dbra    d0,lp2  
            bra     rts
*
*
*  dump graphics error diffusion colored rotated 
*          COLOR; ERROR DIFFUSION;  ROTATED
*
gdump_edc   equ *
            movea.l 4(sp),a0                   a0 = address of pointer to gcb
            move.l  (sp)+,(sp)                 stack return address
            movea.l 4(sp),a2                   a2 = address system color map 
            move.l  (sp)+,(sp)                 stack return address
            movem.l a5-a6,old_a5(a0)
            movea.l a0,a6                      a6 = address of pointer to gcb
            movea.l addr2(a6),a5               a5 = address of string (pixrowbuff)
            movea.l lower_left(a6),a1          a1 = address of lower left
            adda.l  index(a6),a1               a1 = address of line dumped
            bsr     clear_buff                 clear pixrow buffers each time
*
            move    devicetype(a6),d1
            cmpi    #crt36c,d1
            blt     gr27a_ec                   treat 36c, gator, gbox, topc same
*
gr36c_ec    clr.l   d0
            move.w  n_glines(a6),d0          
            subq    #1,d0                      d0 = bytes left to go in line
*
            movea.l addr3(a6),a3               a3 = accum_err_one array
            movea.l addr4(a6),a4               a4 = accum_err_two array
            move.l  rgltemp1(a6),d5             d5 = running count of pixrowbuff offset
            adda.l  d5,a5                      don't overwrite the initial esc seq
            move.l  #0,-(sp)                   tos= temporary count holder for err arrays
lop2        moveq   #7,d3                      d3 = which bit in pixrow array
lop3        bsr     getcmapind                 d2 = color map index of present pixel
            suba    bytesperline(a6),a1        prepare for next pixel
            tst.l   rgltemp5(a6)               is background=on
            bne     ec10                       if yes, proceed as usual
            tst.l   d2                         if no, test color map index.  if <>0
            bne     ec10                           then proceed as usual
            movea.l a5,a0
            bset    d3,(a0)                    otherwise   set all three planes on,
            adda.l  planeoffset(a6),a0
            bset    d3,(a0)                    to print white
            adda.l  planeoffset(a6),a0
            bset    d3,(a0)                    to print white
            bra     ec32                         jump to end of loop and don't propagate err
ec10        moveq   #2,d1                      d1 = plane
* get syscmap entry
            movea.l a2,a0                      a0 = working copy of system color map loc
            mulu    #4,d2                      d2 = offset into system color map
            adda.l  d2,a0                      a0 = location of syscmap entry
lop1        clr.l   d7
            move.b  (a0),d7                    d7 = running total of accum err
            cmp.w   #255,d7                     to eliminate random noise, don't
            bne     ec12                       diffuse error if color table DAC value
            bset    d3,(a5)                     is 0 or 255.
            bra     ec22
ec12        tst.w   d7
            bne     ec14
            beq     ec22
ec14        add.w   (a3),d7
            cmpi.w  #127,d7                      compare to half dac
            ble     ec20                      if err > 127 then set the bit
            bset    d3,(a5)                        in the pixrow buffer 
*  propagate error
            subi    #255,d7                    delta = d7 - 255
ec20        asr     #2,d7                      1/4  delta
            add.w   d7,-2(a4)                  accum_err_two-1 = accum_err_two-1 + delta/4
            move.w  d7,d6
            asr     #1,d6
            add.w   d6,d7                      d7 = 3/8 delta
            add.w   d7,2(a3)
            add.w   d7,(a4)
ec22        tst     d1                         skip over these if d1 =0
            ble     ec30
            adda.l  rgltemp4(a6),a3            set arrays up for next plane
            adda.l  rgltemp4(a6),a4
            adda.l  planeoffset(a6),a5           set pixrow up for next plane
            adda.l  #1,a0                      next plane in color map entry
ec30        dbra    d1,lop1                    next plane
ec32        sub.w   #1,d0                      another pixel finished
            movea.l addr3(a6),a3               reset array locations   
            movea.l addr4(a6),a4               
            movea.l addr2(a6),a5               
            adda.l  d5,a5                      don't write over initial esc seq            
            addi.l  #2,(sp)                    increment array locs
            adda.l  (sp),a3            
            adda.l  (sp),a4            
ec40        dbra    d3,lop3                    finish bits in byte
            add.b   #1,d5                      increment pixelrow array
            adda.l  #1,a5                      update count in pixrow buff
            tst     d0  
            bge     lop2
            move.l  (sp)+,d0                   clean off stack
            bra     rts
*
gr27a_ec    movea.l lower_left(a6),a1           address of plane 1 is lowerleft + (index/8)
            move.l  index(a6),d3
            lsr     #3,d3
            adda.l  d3,a1
            bsr     getoffsets
            clr.l   d3                          
            move.w  bytesperline(a6),d3        d3 = amount to subtract from frame buff addrs
            move    n_glines(a6),d0
            asr     #3,d0                      
            subq    #1,d0                      d0 = pixels left to go in line
            adda.l  rgltemp1(a6),a5            don't step on initial escape seq
            move    n_glines(a6),d7            get nglines mod 8
            and     #3,d7                      d7 > 0 shows we need to go back and finish 
            move.l  index(a6),d2
            and     #7,d2
            move.l  #7,d6
            sub     d2,d6                      d6 = which bit in frame buff byte
*
l2          moveq   #7,d5                      d5 = which bit in pixrow array
l3          bsr     moon_cmap                  d2 = color map index of present pixel
            suba    d3,a1                      set up for next pixel:  get plane 1 byte
            suba    d3,a2                       get plane 2 byte
            suba    d3,a3                       get plane 3 byte
            tst.l   rgltemp5(a6)               is background=on
            bne     gr27_ec10                      if yes, proceed as usual
            tst.l   d2                         if no, test color map index.  if <>0
            bne     gr27_ec10                          then proceed as usual
            movea.l a5,a0
            bset    d5,(a0)                    otherwise   set all three planes on,
            adda.l  planeoffset(a6),a0
            bset    d5,(a0)                    to print white
            adda.l  planeoffset(a6),a0
            bset    d5,(a0)                    to print white
            bra     gr27_ec32                    jump to end of loop and don't propagate err
gr27_ec10   moveq   #2,d1                      d1 = plane
            movea.l a5,a4                      a4 = working reg for pixrowbuff
l1          move.l  #2,d4
            sub.l   d1,d4                      d4 = bit to test in "color map" index
            btst    d4,d2
            beq     gr27_ec20
            bset    d5,(a4)
gr27_ec20   adda.l  planeoffset(a6),a4                    set pixrow up for next plane
gr27_ec30   dbra    d1,l1                      next plane
gr27_ec32   movea.l a5,a4                      restore a4                      
gr27_ec40   dbra    d5,l3                      finish bits in byte
            adda.l  #1,a5                      update count in pixrow buff
            dbra    d0,l2                      do next 8 lines   
            tst     d7                           was n_glines mod 8 0?
            beq     gr27_ec90                    if not, go thru loop once more
            clr.l   d7
            clr.l   d0
            bra.s   l2
gr27_ec90   bra     rts

