#include <malloc.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <sys/farptr.h>
#include <crt0.h>

#include "../../jds/vga.h"
#include "../../jds/kb.h"

int **Sierp_Lookuptable;
int Sierp_Lookuppoints[6];
int SierpIters;
int window_width = 1000000, window_height = 1000000;

#define NUMPAL (7)

void   __crt0_load_environment_file(char *_app_name) { }
char **__crt0_glob_function(char *_arg) { return 0; }

#ifndef M_PI
#define M_PI 3.14159265359
#endif

int ScaledSin(int angle, int scaling) {
  return ((int) (sin(((double) angle) * M_PI / 180.0) * ((double)scaling)));
}

int ScaledCos(int angle, int scaling) {
  return ((int) (cos(((double) angle) * M_PI / 180.0) * ((double)scaling)));
}

void Usage() {
  printf("\n\nUsage: sierp.exe [-about] [-novwait] [-fade] [-nocrossfades] [-vbe 0x101]\n"
             "                 [-pal 1-%i] [-window 320x200] [-farptr]\n",NUMPAL);
  exit(0);
}

void About() {
  printf("\nA nice effect. Fun at parties. \n"
         "Amuse your friends. We like it.\n"
         "Blitter feedback is your friend!\n"
         "\n"
         "About donationware: \n"
         "     If you like this program and would like to see more software like\n"
         "it, please send money in any quantity to help motivate the continued\n"
         "development of unique software. (The suggested quantity is $5.00)\n"
         "\nContact the author by:\n"
         "  Snail Mail:\n"
         "     Justin Frankel\n"
         "     60 Palisades Drive North\n"
         "     Sedona, AZ 86336\n"
         "  Email:\n"
         "     j.frankel@m.cc.utah.edu\n"
         "  WWW:\n"
         "     http://nullsoft.home.ml.org/\n\n"
         "Yes, there is a Win32 version too!\n");
  exit(0);
}

int SierpInit(int iter) {
  int x;
  int **table;
  SierpIters = (iter & ~3);
  table = Sierp_Lookuptable = malloc(SierpIters*sizeof(int *));
  if (!table) return -1;
  for (x = 0; x < SierpIters; x ++)
    *table++ = Sierp_Lookuppoints + ((rand() % 3)<<1);
  return 0;
}


void SierpRenderTriangleClip(char *FrameBuffer, int x1, int y1,
                         int x2, int y2, int x3, int y3, char color) {
  register int x, y, i;
  int **table = Sierp_Lookuptable;
  int *points = Sierp_Lookuppoints;
  x = points[0] = x1>>1; points[2] = x2>>1; points[4] = x3>>1;
  y = points[1] = y1>>1; points[3] = y2>>1; points[5] = y3>>1;
  table = Sierp_Lookuptable;
  i = SierpIters>>2;
  do {
    x += *(*table); y += *((*table++) + 1);
    if (x > 0 && x < window_width && y > 0 && y < window_height)
      *(FrameBuffer + x + y*window_width) = color;
    x >>= 1;  y >>= 1; 
    x += *(*table); y += *((*table++) + 1);
    if (x > 0 && x < window_width && y > 0 && y < window_height)
      *(FrameBuffer + x + y*window_width) = color;
    x >>= 1;  y >>= 1; 
    x += *(*table); y += *((*table++) + 1);
    if (x > 0 && x < window_width && y > 0 && y < window_height)
      *(FrameBuffer + x + y*window_width) = color;
    x >>= 1;  y >>= 1; 
    x += *(*table); y += *((*table++) + 1);
    if (x > 0 && x < window_width && y > 0 && y < window_height)
      *(FrameBuffer + x + y*window_width) = color;
    x >>= 1;  y >>= 1; 
  } while (--i);
}

void SierpRenderTriangle(char *FrameBuffer, int x1, int y1,
                         int x2, int y2, int x3, int y3, char color) {
  int x, y;
  int **table = Sierp_Lookuptable;
  int *points = Sierp_Lookuppoints;
  if (x1 < 0 || x1 >= window_width ||
      x2 < 0 || x2 >= window_width ||
      x3 < 0 || x3 >= window_width ||
      y1 < 0 || y1 >= window_height ||
      y2 < 0 || y2 >= window_height ||
      y3 < 0 || y3 >= window_height) {
    SierpRenderTriangleClip(FrameBuffer,x1,y1,x2,y2,x3,y3,color);
    return;
  }
  x = points[0] = x1>>1; points[2] = x2>>1; points[4] = x3>>1;
  y = points[1] = y1>>1; points[3] = y2>>1; points[5] = y3>>1;
  asm("
    movl %%eax, %%ebp
    shrl $2, %%ebp
    .align 4, 0x90
    0:
    movl (%%esi), %%eax
    movl %4, %%edx
    addl (%%eax), %%ecx
    addl 4(%%eax), %%ebx
    movl %%ebx, %%eax
    shrl $1, %%ebx
    mull %%edx
    movb %3, %%dl
    addl %%ecx, %%eax
    shrl $1, %%ecx
    movb %%dl, (%%edi, %%eax)

    movl 4(%%esi), %%eax
    movl %4, %%edx
    addl (%%eax), %%ecx
    addl 4(%%eax), %%ebx
    movl %%ebx, %%eax
    shrl $1, %%ebx
    mull %%edx
    movb %3, %%dl
    addl %%ecx, %%eax
    shrl $1, %%ecx
    movb %%dl, (%%edi, %%eax)

    movl 8(%%esi), %%eax
    movl %4, %%edx
    addl (%%eax), %%ecx
    addl 4(%%eax), %%ebx
    movl %%ebx, %%eax
    shrl $1, %%ebx
    mull %%edx
    movb %3, %%dl
    addl %%ecx, %%eax
    shrl $1, %%ecx
    movb %%dl, (%%edi, %%eax)

    movl 12(%%esi), %%eax
    movl %4, %%edx
    addl (%%eax), %%ecx
    addl 4(%%eax), %%ebx
    movl %%ebx, %%eax
    shrl $1, %%ebx
    mull %%edx
    addl %%ecx, %%eax
    shrl $1, %%ecx
    movb %3, %%dl

    addl $16, %%esi
    decl %%ebp
    movb %%dl, (%%edi, %%eax)
    jnz 0b
  "::
    "a" (SierpIters), "S" (table), "D" (FrameBuffer),
    "g" (color), "g" (window_width), "c" (x), "b" (y)
   :"%eax","%ebx","%ecx","%edx","%esi","%edi","%ebp");
}

char ScaleTable[256];

void InitScaleTable() {
  int x;
  ScaleTable[0] = 255;
  for (x = 1; x < 256; x++) ScaleTable[x] = x-1;
}

void ScaleFade(char *outdata, char *indata, 
           int inx, int iny, int inendx, int inendy) {
  int dX; 
  int dY, Y, y, thisy, lasty;
  dX = ((inendx - inx)<<16) / window_width;
  dY = ((inendy - iny)<<16) / window_height;
  inx<<=16;
  Y = iny<<16;
  y = window_height;
  lasty = -10000;
  do {
    thisy = Y>>16;
    if (thisy == lasty) asm("
        movl %%edi, %%esi ; subl %%ecx, %%esi
        shrl $2, %%ecx ; rep ; movsl
      "::"c" (window_width), "D" (outdata): "%edi","%esi","%ecx");
    else asm("
        movl %%eax, %%ebp
        shrl $1, %%ecx
        decl %%edi

        .align 4, 0x90
        0:
        movl %%ebp, %%eax
        shrl $16, %%eax
        movb (%%edx, %%eax), %%al
        andl $255, %%eax
        movb (%%ebx, %%eax), %%al
        movb %%al, 1(%%edi)

        leal (%%ebp,%%esi), %%eax
        shrl $16, %%eax
        movb (%%edx, %%eax), %%al
        andl $255, %%eax
 
        addl $2, %%edi
        leal (%%ebp,%%esi,2), %%ebp
        decl %%ecx      
        movb (%%ebx, %%eax), %%al
        movb %%al, (%%edi)
        jnz 0b
      "
      :: "S" (dX), "a" (inx), "d" (indata + window_width*thisy), 
         "b" (ScaleTable), "c" (window_width), "D" (outdata):
        "%eax","%ebx","%ecx","%edx","%esi","%edi","%ebp");
    outdata += window_width;
    lasty = thisy;
    Y += dY;
  } while (--y);
}

void Scale(char *outdata, char *indata, 
           int inx, int iny, int inendx, int inendy) {
  register int dX; 
  register int dY, Y, y, thisy, lasty;
  dX = ((inendx - inx)<<16) / window_width;
  dY = ((inendy - iny)<<16) / window_height;
  inx<<=16;
  Y = iny<<16;
  y = window_height;
  lasty = -10000;
  do {
    thisy = Y>>16;
    if (thisy == lasty) asm("
        movl %%edi, %%esi ; subl %%ecx, %%esi
        shrl $2, %%ecx ; rep ; movsl
      "::"c" (window_width), "D" (outdata):"%esi", "%ecx", "%edi");
    else asm("
        movl %%eax, %%ebp
        shrl $1, %%ecx
        decl %%edi

        .align 4, 0x90
        0:
        movl %%ebp, %%eax
        shrl $16, %%eax
        movb (%%edx, %%eax), %%al
        movb %%al, 1(%%edi)

        leal (%%ebp,%%esi), %%eax
        shrl $16, %%eax
        movb (%%edx, %%eax), %%al
 
        addl $2, %%edi
        leal (%%ebp,%%esi,2), %%ebp
        decl %%ecx      
        movb %%al, (%%edi)
        jnz 0b
      "
      :: "S" (dX), "a" (inx), "d" (indata + window_width*thisy), 
         "b" (ScaleTable), "c" (window_width), "D" (outdata):
        "%eax","%ebx","%ecx","%edx","%esi","%edi","%ebp");
    outdata += window_width;
    lasty = thisy;
    Y += dY;
  } while (--y);
}

void Rotate(int *outx, int *outy, int angle) {
  *(outx) = (window_width>>1) + 
    ScaledCos(angle,(window_width>>1) - (window_width>>2));
  *(outy) = (window_height>>1) + 
    ScaledSin(angle,(window_height>>1) - (window_height>>2));
}

void SetupPalette(int whichpal, char *pal) {
  char *palptr = pal;
  int x;
  int a;
  memset(pal,0,768);
  palptr+=3;
  for (x = 1; x < 128; x ++) {
    a = (x * 360) / 128;
    switch (whichpal) {
      case 0:
        *(palptr++) = 31 + ScaledSin(a,30);
        *(palptr++) = 31 + ScaledCos(a*3,20);
        *(palptr++) = 21 - ScaledCos(a,15);
      break;
      case 1:
        *(palptr++) = 21 + ScaledSin(a,10);
        *(palptr++) = 32 + ScaledCos(a,15) - ScaledSin(a+45,15);
        *(palptr++) = 32 + ScaledSin(a*3,15) - ScaledCos(a,15);
      break;
      case 2:
        *(palptr++) = 32 + ScaledSin(a,30);
        *(palptr++) = 32 + ScaledCos(a,30);
        *(palptr++) = 32 - ScaledSin(a,30);
      break;
      case 3:
        *(palptr++) = 32 + ScaledSin(a,15) - ScaledCos(-3*a,15);
        *(palptr++) = 10 + ScaledCos(a,5) + ScaledSin(-4*a,5);
        *(palptr++) = 32 + ScaledSin(a*2,20) - ScaledCos(a,10);
      break;
      case 4:
        *(palptr++) = 32 + ScaledSin(a,30);
        *(palptr++) = 32 + ScaledSin(a*2,30);
        *(palptr++) = 32 + ScaledSin(a*4,30);
      break;
      case 5:
        if (x < 64) a = x*2;
        else a = (127-x)*2;
        *(palptr++) = 30 + ScaledSin(a,20) + ScaledCos(a*3,5);
        *(palptr++) = 30 + ScaledCos(a,10) + ScaledSin(-a,10);
        *(palptr++) = 31 + ScaledSin(a*2,20) - ScaledCos(a,10);
      break;
      case 6:
        if (x < 64) a = x;
        else a = (127-x);
        *(palptr++) = a;
        *(palptr++) = a;
        *(palptr++) = a;
      break;
    }
  }
}

void MyDelay(int ticks) {
  int t = _farpeekl(_dos_ds,0x46C) + ticks;
  while (t > _farpeekl(_dos_ds,0x46C));
}

void main(int argc, char *argv[]) {
  double fps;
  char pals[NUMPAL][768];
  char workpal[768];
  char *curpal;
  char *ocurpal;
  int i;
  int nearptr = 1;
  int vid_mode = 0x13;
  int iter = 0;
  int frames;
  int ticks;
  int CurrentFB = 0;
  int whichpal = -1;
  int x1, x2, y1, y2, x3, y3;
  int ScaleFactor;
  int crossfade = 0;
  int nocrossfade = 0;
  int tillnextfade = 500;
  int wait_vsync = 1;
  int ScaleX = 0, ScaleY = 0;
  char *FrameBuffer[2];
  char TriColor = 5;
  int TriAngle = 0;
  int TriXtrans = 0;
  int TriYtrans = 0;
  int fade = 0;
  int AngleTimer;
  kbKeyType escKey = { 1, 0, 0 };
  srand(_farpeekl(_dos_ds,0x46C));
  printf(
"             Sierpinsky Feedback v1.2/DOS32 built on " __DATE__ "\n" 
"                 Donationware -- Copyright (c) 1996, Nullsoft.\n");
  for (i = 1; i < argc; i ++) {
    if (*(argv[i]) == '/') *(argv[i]) = '-';
    if (*(argv[i]) == '-') {
      if (!stricmp(argv[i],"-about")) About();
      else if (!stricmp(argv[i],"-window")) {
        if (++i >= argc) Usage();
        sscanf(argv[i],"%dx%d",&window_width, &window_height);
        if (window_width < 8) window_width = 8;
        if (window_height < 8) window_height = 8;
        window_width &= ~3;
      } else if (!stricmp(argv[i],"-vbe")) {
        if (++i >= argc) Usage();
        sscanf(argv[i],"%X",&vid_mode);
      } else if (!stricmp(argv[i],"-pal")) {
        if (++i >= argc) Usage();
        sscanf(argv[i],"%d",&whichpal);
        whichpal--;
        if (whichpal < 0 || whichpal > NUMPAL-1) Usage();
     } else if (!stricmp(argv[i],"-novwait")) wait_vsync = 0;
     else if (!stricmp(argv[i],"-fade")) fade = 1;
     else if (!stricmp(argv[i],"-nocrossfades")) nocrossfade = 1;
     else if (!stricmp(argv[i],"-farptr")) nearptr = 0;
     else Usage(); 
    } else Usage();
  }
  MyDelay(19);
  vgaTextScreen(1);
  if (vgaSetMode(vid_mode,nearptr) < 0) {
    printf("Error: Unable to set video mode\n");
    exit(0);
  }
  if (vgaScreen.MemLayout != VGA_MEMLAYOUT_LINEAR) {
    vgaSetMode(VGA_MODETEXT,0);
    vgaTextScreen(0);
    printf("Sorry, I need a linear framebuffer.\n"); 
    exit(0);
  }
  if (window_width > vgaScreen.Width) window_width = vgaScreen.Width;
  if (window_height > vgaScreen.Height) window_height = vgaScreen.Height;
  if (vgaScreen.BPP != 8) {
    int tmp = vgaScreen.BPP;
    vgaSetMode(VGA_MODETEXT,0);
    vgaTextScreen(0);
    printf("The mode you selected is %d bpp, I need 8.\n",tmp);
    exit(0);
  }
  FrameBuffer[0] = (char *) malloc(window_width*window_height);
  FrameBuffer[1] = (char *) malloc(window_width*window_height);
  if (!FrameBuffer[0] || !FrameBuffer[1]) {
    vgaSetMode(VGA_MODETEXT,0);
    vgaTextScreen(0);
    printf("Error: not enough memory for framebuffer in %dx%d\n"
           "  Tried to malloc(%d), and call failed\n"
           "  Try freeing up some memory, or try a lower resolution\n"
           "  (or a smaller window size)\n",
           window_width,window_height, window_width*window_height);
    exit(0);
  }
  vgaMemset(FrameBuffer[0],0,window_width*window_height);
  vgaMemset(FrameBuffer[1],0,window_width*window_height);
  iter=window_width*window_height/13;
  if (SierpInit(iter) < 0) {
    vgaSetMode(VGA_MODETEXT,0);
    vgaTextScreen(0);
    printf("Error: not enough memory for Sierpinsky-buffer in %dx%d\n"
           "  Tried to malloc(%ld), and call failed\n"
           "  Try freeing up some memory, or try a lower resolution\n",
           window_width,window_height,SierpIters*sizeof(int *));
    exit(0);
  }
  kbSet(1,2);
  kbMonitorKey(&escKey);
  ScaleFactor = window_width>>5;
  for (frames = 0; frames < NUMPAL; frames++)
    SetupPalette(frames, pals[frames]);
  if (nocrossfade) whichpal = rand() % NUMPAL;
  vgaSetPalette(curpal = pals[whichpal >= 0 ? whichpal : rand() % NUMPAL]);
  if (fade) {
    InitScaleTable();
    ScaleTable[1] = 127;
  }
  AngleTimer = rand() % 360;
  frames = 0;
  ticks = _farpeekl(_dos_ds,0x46C);
  do {
    frames++;
    AngleTimer ++;
    CurrentFB ^= 1;
    if (AngleTimer > 360) AngleTimer -= 360;
    TriAngle += ScaledSin(AngleTimer,10);
    if (!nocrossfade && tillnextfade-- == 0) {
      tillnextfade = 200 + (rand() % 200) - 100;
      crossfade = 128;
      ocurpal = curpal;
      vgaSetFade(curpal = pals[rand()%NUMPAL],ocurpal,workpal);
    }
    if (!fade || !(frames & 7)) {
      TriColor++;
      TriColor &= 127;
      if (!TriColor) TriColor++;
    }
    if (wait_vsync) vgaWaitVSync();
    if (!nearptr) asm("
        movl %%eax, %%ebp
        movw %%es, %%ax
        pushl %%eax
        movw %%bx, %%es
        movl %%ecx, %%ebx
        shrl $2, %%ebx
        0:
        movl %%ebx, %%ecx
        rep ; movsl
        addl %%ebp, %%edi
        decl %%edx
        jnz 0b
        popl %%eax
        movw %%ax, %%es
      "::"c" (window_width), "D" (vgaScreen.LineWidth*((vgaScreen.Height>>1)-
        (window_height>>1))+((vgaScreen.Width>>1)-(window_width>>1))),
         "S" (FrameBuffer[CurrentFB]),
         "b" ((int) vgaScreen.GraphSel), "d" (window_height),
         "a" (vgaScreen.LineWidth-window_width)
       :"%eax","%ebx","%ecx","%edx","%ebp","%esi","%edi");
    else asm("
        shrl $2, %%eax
        0:
          movl %%eax, %%ecx
          rep ; movsl
          addl %%ebx, %%edi
          decl %%edx
          jnz 0b
        "::"d" (window_height), "a" (window_width), 
           "D" (vgaScreen.GraphMem+ 
                vgaScreen.LineWidth*((vgaScreen.Height>>1) -
                (window_height>>1))+((vgaScreen.Width>>1)-(window_width>>1))),
           "S" (FrameBuffer[CurrentFB]),
           "b" (vgaScreen.LineWidth-window_width)
       :"%eax","%ebx","%ecx","%edx","%esi","%edi");
    if (crossfade-- > 0 && crossfade & 1) {
      vgaFadeStep();
      vgaSetPalette(workpal);
    }
    TriXtrans = ScaledSin(AngleTimer<<3,window_width>>2);
    TriYtrans = ScaledSin((AngleTimer<<2) + 33,window_height>>2);
    ScaleX = ScaledSin(AngleTimer<<2, (ScaleFactor>>1)+(ScaleFactor>>2));
    ScaleY = ScaledSin((AngleTimer<<1) + 33, (ScaleFactor>>1)+(ScaleFactor>>2));
    Rotate(&x1,&y1,TriAngle); 
    x1 += TriXtrans; y1 += TriYtrans; 
    Rotate(&x2,&y2,(TriAngle+120));
    x2 += TriXtrans; y2 += TriYtrans; 
    Rotate(&x3,&y3,(TriAngle+240));
    x3 += TriXtrans; y3 += TriYtrans;
    if (fade) ScaleFade(FrameBuffer[CurrentFB^1], FrameBuffer[CurrentFB], 
         ScaleFactor+ScaleX,ScaleFactor+ScaleY,
         window_width-ScaleFactor-ScaleX, window_height-ScaleFactor-ScaleY);
    else Scale(FrameBuffer[CurrentFB^1], FrameBuffer[CurrentFB], 
         ScaleFactor+ScaleX,ScaleFactor+ScaleY,
         window_width-ScaleFactor-ScaleX, window_height-ScaleFactor-ScaleY);
    SierpRenderTriangle(FrameBuffer[CurrentFB^1], x1, y1, x2, y2, x3, y3, 
                        TriColor);
  } while (!escKey.pressed);
  ticks = _farpeekl(_dos_ds,0x46C) - ticks;
  vgaSetMode(VGA_MODETEXT,0);
  vgaTextScreen(0);
  if (ticks) fps = (frames / (ticks/18.2));
  else fps = 0.0;
  printf("\n\n(Try \"sierp -h\" for options)\n"
         "\n\n%.02f Frames/Second @ %dx%d (%.04fM pixels/sec)\n",
         fps, window_width, window_height,
         fps*window_width*window_height*1e-6);
  kbSet(0,0);
  free(Sierp_Lookuptable);
  free(FrameBuffer[0]);
  free(FrameBuffer[1]);
}
