/* ______ ___ ___ * /\ _ \ /\_ \ /\_ \ * \ \ \L\ \\//\ \ \//\ \ __ __ _ __ ___ * \ \ __ \ \ \ \ \ \ \ /'__`\ /'_ `\/\`'__\/ __`\ * \ \ \/\ \ \_\ \_ \_\ \_/\ __//\ \L\ \ \ \//\ \L\ \ * \ \_\ \_\/\____\/\____\ \____\ \____ \ \_\\ \____/ * \/_/\/_/\/____/\/____/\/____/\/___L\ \/_/ \/___/ * /\____/ * \_/__/ * * Math routines, compiled sprite wrapper, etc. * * By Shawn Hargreaves. * * fixsqrt() and fixhypot() routines by David Kuhling. * * See readme.txt for copyright information. */ #include "asmdefs.inc" .text /* empty bank switch routine for the standard VGA mode and memory bitmaps */ FUNC(_stub_bank_switch) movl BMP_LINE(%edx, %eax, 4), %eax ret FUNC(_stub_unbank_switch) ret FUNC(_stub_bank_switch_end) ret /* void apply_matrix_f(MATRIX_f *m, float x, float y, float z, * float *xout, float *yout, float *zout); * Floating point vector by matrix multiplication routine. */ FUNC(apply_matrix_f) #define MTX ARG1 #define X ARG2 #define Y ARG3 #define Z ARG4 #define XOUT ARG5 #define YOUT ARG6 #define ZOUT ARG7 pushl %ebp movl %esp, %ebp pushl %ebx movl MTX, %edx movl XOUT, %eax movl YOUT, %ebx movl ZOUT, %ecx flds M_V00(%edx) fmuls X flds M_V01(%edx) fmuls Y flds M_V02(%edx) fmuls Z fxch %st(2) faddp %st(0), %st(1) flds M_V10(%edx) fxch %st(2) faddp %st(0), %st(1) fxch %st(1) fmuls X fxch %st(1) fadds M_T0(%edx) flds M_V11(%edx) fmuls Y flds M_V12(%edx) fmuls Z fxch %st(1) faddp %st(0), %st(3) flds M_V20(%edx) fxch %st(3) faddp %st(0), %st(1) fxch %st(2) fmuls X fxch %st(2) fadds M_T1(%edx) flds M_V21(%edx) fmuls Y flds M_V22(%edx) fmuls Z fxch %st(4) faddp %st(0), %st(1) fxch %st(1) fstps (%ebx) faddp %st(0), %st(2) fstps (%eax) fadds M_T2(%edx) fstps (%ecx) popl %ebx movl %ebp, %esp popl %ebp ret /* end of apply_matrix_f() */ #undef X #undef Y /* void draw_compiled_sprite(BITMAP *bmp, COMPILED_SPRITE *sprite, int x, y) * Draws a compiled sprite onto the specified bitmap at the specified * position, _ignoring_ clipping. The bitmap must be in the same format * that the sprite was compiled for. */ FUNC(draw_compiled_sprite) #define BMP ARG1 #define SPRITE ARG2 #define X ARG3 #define Y ARG4 pushl %ebp movl %esp, %ebp subl $4, %esp /* 1 local variable: */ #define PLANE -4(%ebp) pushl %ebx pushl %esi pushl %edi movl BMP, %edx /* bitmap pointer in edx */ #ifdef USE_FS movw BMP_SEG(%edx), %fs /* load segment selector into fs */ #endif movl SPRITE, %ebx cmpw $0, CMP_PLANAR(%ebx) /* is the sprite planar or linear? */ je linear_compiled_sprite movl X, %ecx /* get write plane mask in bx */ andb $3, %cl movl $0x1102, %ebx shlb %cl, %bh movl BMP_LINE+4(%edx), %ecx /* get line width in ecx */ subl BMP_LINE(%edx), %ecx movl X, %esi /* get destination address in edi */ shrl $2, %esi movl Y, %edi movl BMP_LINE(%edx, %edi, 4), %edi addl %esi, %edi movl $0x3C4, %edx /* port address in dx */ movl $0, PLANE /* zero the plane counter */ _align_ planar_compiled_sprite_loop: movl %ebx, %eax /* set the write plane */ outw %ax, %dx movl %edi, %eax /* get address in eax */ movl PLANE, %esi /* get the drawer function in esi */ shll $3, %esi addl SPRITE, %esi movl CMP_DRAW(%esi), %esi call *%esi /* and draw the plane! */ incl PLANE /* next plane */ cmpl $4, PLANE jge draw_compiled_sprite_done rolb $1, %bh /* advance the plane position */ adcl $0, %edi jmp planar_compiled_sprite_loop _align_ linear_compiled_sprite: movl X, %eax movzwl CMP_COLOR_DEPTH(%ebx), %ecx cmpl $24, %ecx jne normal_linear_compiled_sprite leal (%eax, %eax, 2), %eax jmp end24bpp_linear_compiled_sprite _align_ normal_linear_compiled_sprite: addl $7, %ecx shrl $4, %ecx shll %cl, %eax end24bpp_linear_compiled_sprite: movl %eax, %ecx /* x coordinate in ecx */ movl Y, %edi /* y coordinate in edi */ movl BMP_WBANK(%edx), %esi /* bank switch function in esi */ movl CMP_DRAW(%ebx), %ebx /* drawer function in ebx */ call *%ebx /* and draw it! */ draw_compiled_sprite_done: movl BMP, %edx UNWRITE_BANK() popl %edi popl %esi popl %ebx movl %ebp, %esp popl %ebp ret /* end of draw_compiled_sprite() */ /* void _do_stretch(BITMAP *source, BITMAP *dest, void *drawer, * int sx, fixed sy, fixed syd, int dx, int dy, int dh, * int color_depth); * * Helper function for stretch_blit(), calls the compiled line drawer. */ FUNC(_do_stretch) #define SOURCE ARG1 #define DEST ARG2 #define DRAWER ARG3 #define SX ARG4 #define SY ARG5 #define SYD ARG6 #define DX ARG7 #define DY ARG8 #define DH ARG9 #define COL_DEPTH ARG10 pushl %ebp movl %esp, %ebp pushl %edi pushl %esi pushl %ebx pushw %es movl DEST, %edx movw BMP_SEG(%edx), %es /* load destination segment */ movl DRAWER, %ebx /* the actual line drawer */ movl BMP_ID(%edx), %eax testl $BMP_ID_PLANAR, %eax jnz stretch_modex_loop movl COL_DEPTH, %eax cmpl $8, %eax je stretch_normal_loop cmpl $15, %eax je stretch_bpp_16 cmpl $16, %eax je stretch_bpp_16 cmpl $24, %eax je stretch_bpp_24 cmpl $32, %eax je stretch_bpp_32 jmp stretch_done /* special loop for 24 bit */ _align_ stretch_bpp_24: movl SX, %eax leal (%eax, %eax, 2), %eax movl %eax, SX movl DX, %eax leal (%eax, %eax, 2), %eax movl %eax, DX _align_ stretch_loop24: movl SOURCE, %edx /* get source line (in esi) and bank */ movl SY, %eax shrl $16, %eax READ_BANK() movl %eax, %esi addl SX, %esi movl DEST, %edx /* get dest line (in edi) and bank */ movl DY, %eax WRITE_BANK() movl %eax, %edi addl DX, %edi pushl %edx pushl %ebx call *%ebx /* draw (clobbers eax, ebx, ecx, edx) */ popl %ebx popl %edx movl SYD, %eax /* next line in source bitmap */ addl %eax, SY incl DY /* next line in dest bitmap */ decl DH jg stretch_loop24 jmp stretch_done /* special loop for mode-X */ _align_ stretch_modex_loop: movl SOURCE, %edx /* get source line (in esi) and bank */ movl SY, %eax shrl $16, %eax movl BMP_LINE(%edx, %eax, 4), %esi addl SX, %esi movl DEST, %edx /* get dest line (in edi) and bank */ movl DY, %eax movl BMP_LINE(%edx, %eax, 4), %edi addl DX, %edi call *%ebx /* draw the line (clobbers eax and ecx) */ movl SYD, %eax /* next line in source bitmap */ addl %eax, SY incl DY /* next line in dest bitmap */ decl DH jg stretch_modex_loop jmp stretch_done _align_ stretch_bpp_16: shll $1, SX shll $1, DX jmp stretch_normal_loop _align_ stretch_bpp_32: shll $2, SX shll $2, DX /* normal stretching loop */ _align_ stretch_normal_loop: movl SOURCE, %edx /* get source line (in esi) and bank */ movl SY, %eax shrl $16, %eax READ_BANK() movl %eax, %esi addl SX, %esi movl DEST, %edx /* get dest line (in edi) and bank */ movl DY, %eax WRITE_BANK() movl %eax, %edi addl DX, %edi call *%ebx /* draw the line (clobbers eax and ecx) */ movl SYD, %eax /* next line in source bitmap */ addl %eax, SY incl DY /* next line in dest bitmap */ decl DH jg stretch_normal_loop stretch_done: popw %es movl SOURCE, %edx UNWRITE_BANK() movl DEST, %edx UNWRITE_BANK() popl %ebx popl %esi popl %edi movl %ebp, %esp popl %ebp ret /* end of _do_stretch() */ /* unsigned long _blender_trans24(unsigned long x, y, n); * 24 bit trans blender function. See colblend.c for the others. */ FUNC(_blender_trans24) pushl %ebp movl %esp, %ebp pushl %esi pushl %ecx pushl %ebx movl ARG1, %esi movl ARG2, %ebx movl ARG3, %ecx movl %esi, %eax movl %ebx, %edx andl $0xFF00FF, %eax andl $0xFF00FF, %edx orl %ecx, %ecx jz noinc incl %ecx noinc: subl %edx, %eax imull %ecx, %eax shrl $8, %eax addl %ebx, %eax andl $0xFF00, %ebx andl $0xFF00, %esi subl %ebx, %esi imull %ecx, %esi shrl $8, %esi addl %ebx, %esi andl $0xFF00FF, %eax andl $0xFF00, %esi orl %esi, %eax popl %ebx popl %ecx popl %esi movl %ebp, %esp popl %ebp ret /* end of _blender_trans24() */ /* fixed fixsqrt(fixed x); * Fixed point square root routine. This code is based on the fixfloat * library by Arne Steinarson. */ FUNC(fixsqrt) pushl %ebp movl %esp, %ebp /* This routine is based upon the following idea: * sqrt (x) = sqrt (x/d) * sqrt(d) * d = 2^(2n) * sqrt (x) = sqrt (x / 2^(2n)) * 2^n * `x/2^(2n)' has to fall into the range 0..255 so that we can use the * square root lookup table. So `2n' is the number of bits `x' has to be * shifted to the left to become smaller than 256. The best way to find `2n' * is to do a reverse bit scan on `x'. This is achieved by the i386 ASM * instruction `bsr'. */ movl ARG1, %eax /* eax = `x' */ orl %eax, %eax /* check whether `x' is negative... */ jle sqrt_error_check /* jump to error-checking if x <= 0 */ movl %eax, %edx /* bit-scan is done on edx */ shrl $6, %edx xorl %ecx, %ecx /* if no bit set: default %cl = 2n = 0 */ bsrl %edx, %ecx andb $0xFE, %cl /* make result even --> %cl = 2n */ shrl %cl, %eax /* shift x to fall into range 0..255 */ /* table lookup... */ movzwl GLOBL(_sqrt_table)(,%eax,2), %eax shrb $1, %cl /* %cl = n */ shll %cl, %eax /* multiply `sqrt(x/2^(2n))' by `2^n' */ shrl $4, %eax /* adjust the result */ jmp sqrt_done _align_ sqrt_error_check: /* here we go if x<=0 */ jz sqrt_done /* if zero, return eax=0 */ movl GLOBL(allegro_errno), %edx movl $ERANGE, (%edx) /* on overflow, set errno */ xorl %eax, %eax /* return zero */ _align_ sqrt_done: movl %ebp, %esp popl %ebp ret /* end of fixsqrt() */ /* fixed fixhypot(fixed x, fixed y); * Return fixed point sqrt (x*x+y*y), which is the length of the * hypotenuse of a right triangle with sides of length x and y, or the * distance of point (x|y) from the origin. This routine is faster and more * accurate than using the direct formula fixsqrt (fixmul (x,x), fixmul(y,y)). * It will also return correct results for x>=256 or y>=256 where fixmul(x) * or fixmul(y) would overflow. */ FUNC(fixhypot) pushl %ebp movl %esp, %ebp /* The idea of this routine is: * sqrt (x^2+y^2) = sqrt ((x/d)^2+(y/d)^2) * d * d = 2^n * Since `x' and `y' are fixed point numbers, they are multiplied in the * following way: * x^2 = (x*x)/2^16 * so we come to the formula: * sqrt(x^2+y^2) = sqrt((x*x + y*y)/2^(16+2n)) * 2^n * and this is almost the same problem as calculating the square root in * `fixsqrt': find `2n' so that `(x*x+y*y)/2^(16+2n)' is in the range 0..255 * so that we can use the square root lookup table. */ movl ARG1, %eax /* edx:eax = x*x */ imull %eax movl %eax, %ecx /* save edx:eax */ pushl %edx movl ARG2, %eax /* edx:eax = y*y */ imull %eax addl %ecx, %eax /* edx:eax = x*x + y*y */ popl %ecx adcl %ecx, %edx cmpl $0x3FFFFFFF, %edx /* check for overflow */ ja hypot_overflow /* And now we're doing a bit-scan on `x*x+y*y' to find out by how * many bits it needs to be shifted to fall into the range 0..255. * Since the intermediate result is 64 bit we may need two bitscans * in case that no bit is set in the upper 32 bit. */ bsrl %edx, %ecx jz hypot_part2 /* we got the bit with the first step */ incb %cl /* make cl even */ incb %cl andb $0xFE, %cl shrdl %cl, %edx, %eax /* make eax fall into range 0..255 */ shrl $24, %eax /* eax = table lookup square root */ movzwl GLOBL(_sqrt_table)(,%eax,2), %eax shrb $1, %cl /* adjust result... */ shll %cl, %eax jmp hypot_done /* we didn't get the bit with the first step -- so we make another * scan on the remaining bits in `eax' to get `2n'. */ _align_ hypot_part2: shrl $16, %eax /* eax = (x*x+y*y)/2^16 */ movl %eax, %edx /* edx is used for scanning */ shrl $6, %edx xorl %ecx, %ecx /* default `2n' if no bit is set */ bsrl %edx, %ecx andb $0xFE, %cl /* make cl=2n even */ shrl %cl, %eax /* make eax fall into range 0..255 */ /* eax = table lookup square root */ movzwl GLOBL(_sqrt_table)(,%eax,2), %eax shrb $1, %cl /* cl = n */ shll %cl, %eax /* adjust result... */ shrl $4, %eax jmp hypot_done _align_ hypot_overflow: /* overflow */ movl GLOBL(allegro_errno), %eax movl $ERANGE, (%eax) /* set errno */ movl $0x7FFFFFFF, %eax /* and return MAXINT */ _align_ hypot_done: movl %ebp, %esp popl %ebp ret /* end of fixhypot() */