#include <vectrex.h>
#include "controller.h"

static int changes = 0;
static unsigned int debug = 0;

// set -O3 in the gcc options.


// based on https://codereview.stackexchange.com/questions/136406/tetris-in-c-in-200-lines

// Compile with: cc -o tetris -DCLI tetris.c -lncurses

// data structures and algorithm modified for Vectrex's low RAM requirement.

// Removed gratuitous use of malloc/free for temporary space.
// Pre-calculated rotations (by hand!) so they could be stored in ROM.
// Removed need for any temporaries
// Removed copying of entire structs as array parameters
// Eventually removed the structs entirely :-)

#ifdef CLI
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <sys/time.h>
#include <ncurses.h>
#else
#endif

#define ROWS 17
#define COLS 7 // 1..6  0 unused
#define SHAPES 7
#define TRUE 1
#define FALSE 0

unsigned int BitTable[ROWS]; // map of where blocks are.
// blocks are only written here once they stop moving.
// While they're still falling, they are not in this table,
// they're only in the vector bitmap, via WriteCurrentTetrominoToVectorBitmap

// Will need tweaking to use Vectrex clock ticks.
int decrease = 10;
int is_later (void) {
  return TRUE;
}

unsigned int rand(void) {
  static int i=-1;
  i++;
  return (unsigned int)i % SHAPES;
}


long int score = 0;
int GameOn = TRUE;

unsigned int width;
unsigned int rot, shape;
unsigned int row, col;

unsigned int widths[SHAPES] = {
  3,3,3,3,3,2,4
};


static unsigned int Array[4/*rot*/][SHAPES][4/*row*/][4/*col*/] = {
  //rot      row  col          row              row              row
  {//0 shape  |   0 1 2  3     |   0 1 2  3     |   0 1 2  3     |   0 1 2  3
    { /*0*/ /*0*/{0,1,1, 0}, /*1*/{1,1,0, 0}, /*2*/{0,0,0, 0}, /*3*/{ 0,0,0,0} }, // S_shape 
    { /*1*/      {1,1,0, 0},      {0,1,1, 0},      {0,0,0, 0},      { 0,0,0,0} }, // Z_shape 
    { /*2*/      {0,1,0, 0},      {1,1,1, 0},      {0,0,0, 0},      { 0,0,0,0} }, // T_shape 
    { /*3*/      {0,0,1, 0},      {1,1,1, 0},      {0,0,0, 0},      { 0,0,0,0} }, // L_shape 
    { /*4*/      {1,0,0, 0},      {1,1,1, 0},      {0,0,0, 0},      { 0,0,0,0} }, // ML_shape 
    { /*5*/      {1,1, 0,0},      {1,1, 0,0},      { 0,0,0,0},      { 0,0,0,0} }, // SQ_shape
    { /*6*/      {0,0,0,0 },      {1,1,1,1 },      {0,0,0,0 },      { 0,0,0,0} }, // R_shape
  },
  {//1
    { /*0*/ /*0*/{1,0,0, 0}, /*1*/{1,1,0, 0}, /*2*/{0,1,0, 0}, /*3*/{ 0,0,0,0} }, // S_shape 
    { /*1*/      {0,0,1, 0},      {0,1,1, 0},      {0,1,0, 0},      { 0,0,0,0} }, // Z_shape 
    { /*2*/      {0,1,0, 0},      {0,1,1, 0},      {0,1,0, 0},      { 0,0,0,0} }, // T_shape 
    { /*3*/      {0,1,0, 0},      {0,1,0, 0},      {0,1,1, 0},      { 0,0,0,0} }, // L_shape 
    { /*4*/      {0,1,1, 0},      {0,1,0, 0},      {0,1,0, 0},      { 0,0,0,0} }, // ML_shape 
    { /*5*/      {1,1, 0,0},      {1,1, 0,0},      { 0,0,0,0},      { 0,0,0,0} }, // SQ_shape
    { /*6*/      {0,0,1,0 },      {0,0,1,0 },      {0,0,1,0 },      { 0,0,1,0} }, // R_shape
  },
  {//2
    { /*0*/ /*0*/{0,0,0, 0}, /*1*/{0,1,1, 0}, /*2*/{1,1,0, 0}, /*3*/{ 0,0,0,0} }, // S_shape 
    { /*1*/      {1,1,0, 0},      {0,1,1, 0},      {0,0,0, 0},      { 0,0,0,0} }, // Z_shape 
    { /*2*/      {0,0,0, 0},      {1,1,1, 0},      {0,1,0, 0},      { 0,0,0,0} }, // T_shape 
    { /*3*/      {0,0,0, 0},      {1,1,1, 0},      {1,0,0, 0},      { 0,0,0,0} }, // L_shape 
    { /*4*/      {0,0,0, 0},      {1,1,1, 0},      {0,0,1, 0},      { 0,0,0,0} }, // ML_shape 
    { /*5*/      {1,1, 0,0},      {1,1, 0,0},      { 0,0,0,0},      { 0,0,0,0} }, // SQ_shape
    { /*6*/      {0,0,0,0 },      {1,1,1,1 },      {0,0,0,0 },      { 0,0,0,0} }, // R_shape
  },
  {//3
    { /*0*/ /*0*/{0,1,0, 0}, /*1*/{0,1,1, 0}, /*2*/{0,0,1, 0}, /*3*/{ 0,0,0,0} }, // S_shape 
    { /*1*/      {0,1,0, 0},      {1,1,0, 0},      {1,0,0, 0},      { 0,0,0,0} }, // Z_shape 
    { /*2*/      {0,1,0, 0},      {1,1,0, 0},      {0,1,0, 0},      { 0,0,0,0} }, // T_shape 
    { /*3*/      {1,1,0, 0},      {0,1,0, 0},      {0,1,0, 0},      { 0,0,0,0} }, // L_shape 
    { /*4*/      {0,1,0, 0},      {0,1,0, 0},      {1,1,0, 0},      { 0,0,0,0} }, // ML_shape 
    { /*5*/      {1,1, 0,0},      {1,1, 0,0},      { 0,0,0,0},      { 0,0,0,0} }, // SQ_shape
    { /*6*/      {0,1,0,0 },      {0,1,0,0 },      {0,1,0,0 },      { 0,1,0,0} }, // R_shape
  },
};

static inline void displaylist(const int *list) { Draw_VLp((int *)list); }

#define IN_ROM
static inline void set_scale(unsigned int scale) {*(volatile unsigned char *)0xD004 = scale;}
#define DEFINE(name) static const int name[] IN_ROM = {
#define move_rel_xy(x,y) 0 /*MOVE*/, ((int)y), ((int)x)
#define line_rel_xy(x,y) (int)-1 /*LINE*/, ((int)y), ((int)x)
#define ENDDEF(name) 1 /*STOP*/ }; static void SHOW_##name(void) { displaylist(name); }

DEFINE(MINUS)
  move_rel_xy(4,8),  // to 8,4
  line_rel_xy(14,0), // to 8,20
  move_rel_xy(4,-8), // to 0,24
ENDDEF(MINUS)

DEFINE(N0)
  move_rel_xy(0,24), // to 0,24
  line_rel_xy(16,0), // to 16,24
  line_rel_xy(0,-24), // to 16,0
  line_rel_xy(-16,0), // to 0,0
  line_rel_xy(0,24), // to 0,24
  move_rel_xy(24,-24), // to 24,0
ENDDEF(N0)

DEFINE(N1)
  move_rel_xy(8,0), // to 8,0
  line_rel_xy(0,24), // to 8,24
  move_rel_xy(16,-24), // to 24,0
ENDDEF(N1)

DEFINE(N2)
  move_rel_xy(0,24), // to 0,24
  line_rel_xy(16,0), // to 16,24
  line_rel_xy(0,-12), // to 16,12
  line_rel_xy(-16,0), // to 0,12
  line_rel_xy(0,-12), // to 0,0
  line_rel_xy(16,0), // to 16,0
  move_rel_xy(8,0), // to 24,0
ENDDEF(N2)

DEFINE(N3)
  line_rel_xy(16,0), // to 16,0
  move_rel_xy(-8,12), // to 8,12
  line_rel_xy(8,0), // to 16,12
  move_rel_xy(0,-12), // to 16,0
  line_rel_xy(0,24), // to 16,24
  line_rel_xy(-16,0), // to 0,24
  move_rel_xy(24,-24), // to 24,0
ENDDEF(N3)

DEFINE(N4)
  move_rel_xy(0,24), // to 0,24
  line_rel_xy(0,-12), // to 0,12
  line_rel_xy(16,0), // to 16,12
  move_rel_xy(-8,0), // to 8,12
  line_rel_xy(0,-12), // to 8,0
  move_rel_xy(16,0), // to 24,0
ENDDEF(N4)

DEFINE(N5)
  line_rel_xy(16,0), // to 16,0
  line_rel_xy(0,12), // to 16,12
  line_rel_xy(-16,0), // to 0,12
  line_rel_xy(0,12), // to 0,24
  line_rel_xy(16,0), // to 16,24
  move_rel_xy(8,-24), // to 24,0
ENDDEF(N5)

DEFINE(N6)
  move_rel_xy(0,12), // to 0,12
  line_rel_xy(16,0), // to 16,12
  line_rel_xy(0,-12), // to 16,0
  line_rel_xy(-16,0), // to 0,0
  line_rel_xy(0,24), // to 0,24
  move_rel_xy(24,-24), // to 24,0
ENDDEF(N6)

DEFINE(N7)
  move_rel_xy(8,0), // to 8,0
  line_rel_xy(8,24), // to 16,24
  line_rel_xy(-16,0), // to 0,24
  move_rel_xy(24,-24), // to 24,0
ENDDEF(N7)

DEFINE(N8)
  line_rel_xy(16,0), // to 16,0
  move_rel_xy(-16,24), // to 0,24
  line_rel_xy(16,0), // to 16,24
  move_rel_xy(-16,-12), // to 0,12
  line_rel_xy(16,0), // to 16,12
  move_rel_xy(0,12), // to 16,24
  line_rel_xy(0,-24), // to 16,0
  move_rel_xy(-16,24), // to 0,24
  line_rel_xy(0,-24), // to 0,0
  move_rel_xy(24,0), // to 24,0
ENDDEF(N8)

DEFINE(N9)
  move_rel_xy(16,24), // to 16,24
  line_rel_xy(-16,0), // to 0,24
  line_rel_xy(0,-12), // to 0,12
  line_rel_xy(16,0), // to 16,12
  move_rel_xy(0,12), // to 16,24
  line_rel_xy(0,-24), // to 16,0
  move_rel_xy(4,0), // to 24,0
ENDDEF(N9)

static const int *digitlist[10] = {
  N0, N1, N2, N3, N4, N5, N6, N7, N8, N9
};

static inline void show_digit(int digit)
{
  displaylist(digitlist[(int)digit]);
}
static void position_and_scale(int absx, int absy, unsigned int scale)
{
  Reset0Ref();
  set_scale(0x7f);
  Moveto_d(absy, absx);
  set_scale(scale);
}
static void SHOW_SIGNED_NUM(long int num, int absx, int absy, unsigned int scale) { // left-aligned
  int digit, zeroes;

  // If scale is 0, this draws a number starting at the current position, which is at the origin
  // of the previous number offset by the number width.

  if (scale) position_and_scale(absx, absy, 0x20);

  // This replaces code that used divide by 10 and modulo 10.  Much faster.

  // handles full 16 bit range of -32768:32767  -  Uses negative numbers to avoid the issue of negating -32768

  if (num >= 0) num = -num; else SHOW_MINUS();   // TO DO: add a font character for '-'.
  digit = 0;
  zeroes = 1; // CLRing is shorter
  // max 11 add/subtracts...
  if (num <= -20000) { num += 20000; digit += 2; zeroes = 0; }
  if (num <= -10000) { num += 10000; digit += 1; zeroes = 0; }
  if (!zeroes) show_digit(digit);
  digit = 0;
  if (num <= -8000) { num += 8000; digit += 8; zeroes = 0; } else if (num <= -4000) { num += 4000; digit += 4; zeroes = 0; }
  if (num <= -2000) { num += 2000; digit += 2; zeroes = 0; }
  if (num <= -1000) { num += 1000; digit += 1; zeroes = 0; }
  if (!zeroes) show_digit(digit);
  digit = 0;
  if (num <= -800) { num += 800; digit += 8; zeroes = 0; } else if (num <= -400) { num += 400; digit += 4; zeroes = 0; }
  if (num <= -200) { num += 200; digit += 2; zeroes = 0; }
  if (num <= -100) { num += 100; digit += 1; zeroes = 0; }
  if (!zeroes) show_digit(digit);
  digit = 0;
  if (num <= -80) { num += 80; digit += 8; zeroes = 0; } else if (num <= -40) { num += 40; digit += 4; zeroes = 0; }
  if (num <= -20) { num += 20; digit += 2; zeroes = 0; }
  if (num <= -10) { num += 10; digit += 1; zeroes = 0; }
  if (!zeroes) show_digit(digit);
  show_digit((int)-num);
}

static void SHOW_UNSIGNED_NUM(unsigned long int num, int absx, int absy, unsigned int scale) { // left-aligned
  int digit, zeroes;

  // If scale is 0, this draws a number starting at the current position, which is at the origin
  // of the previous number offset by the number width.

  if (scale) position_and_scale(absx, absy, 0x20);

  // This replaces code that used divide by 10 and modulo 10.  Much faster.

  // handles full 16 bit range of -32768:32767  -  Uses negative numbers to avoid the issue of negating -32768

  digit = 0;
  zeroes = 1; // CLRing is shorter
  // max 11 add/subtracts...
  if (num >= 40000) { num -= 40000UL; digit += 4; zeroes = 0; }
  if (num >= 20000) { num -= 20000UL; digit += 2; zeroes = 0; }
  if (num >= 10000) { num -= 10000UL; digit += 1; zeroes = 0; }
  if (!zeroes) show_digit(digit);
  digit = 0;
  if (num >= 8000) { num -= 8000; digit += 8; zeroes = 0; }
  else if (num >= 4000) { num -= 4000; digit += 4; zeroes = 0; }
  if (num >= 2000) { num -= 2000; digit += 2; zeroes = 0; }
  if (num >= 1000) { num -= 1000; digit += 1; zeroes = 0; }
  if (!zeroes) show_digit(digit);
  digit = 0;
  if (num >= 800) { num -= 800; digit += 8; zeroes = 0; }
  else if (num >= 400) { num -= 400; digit += 4; zeroes = 0; }
  if (num >= 200) { num -= 200; digit += 2; zeroes = 0; }
  if (num >= 100) { num -= 100; digit += 1; zeroes = 0; }
  if (!zeroes) show_digit(digit);
  digit = 0;
  if (num >= 80) { num -= 80; digit += 8; zeroes = 0; }
  else if (num >= 40) { num -= 40; digit += 4; zeroes = 0; }
  if (num >= 20) { num -= 20; digit += 2; zeroes = 0; }
  if (num >= 10) { num -= 10; digit += 1; zeroes = 0; }
  if (!zeroes) show_digit(digit);
  show_digit((int)num);
}

static unsigned int horiz[16]; // each byte is 8 x's
static unsigned long vert[8];  // each word is 16 y's

static const unsigned long bitpos[16] = { 
   1UL, 2UL, 4UL, 8UL, 16UL, 32UL, 64UL, 128UL,
  256UL, 512UL, 1024UL, 2048UL, 4096UL, 8192UL, 16384UL, 32768UL 
};
static const unsigned long notbitpos[16] = {
  65535UL-1UL, 65535UL-2UL, 65535UL-4UL, 65535UL-8UL, 65535UL-16UL, 65535UL-32UL, 65535UL-64UL, 65535UL-128UL,
  65535UL-256UL, 65535UL-512UL, 65535UL-1024UL, 65535UL-2048UL, 65535UL-4096UL, 65535UL-8192UL, 65535UL-16384UL, 65535UL-32768UL 
};
static inline unsigned long int vertical(unsigned int x, unsigned int y) {
  // each byte is 8 y's
  return (vert[x]&bitpos[y]); // 0 or non-zero
}

static inline void setvertical(unsigned int x, unsigned int y) {    
  // each byte is 8 y's
  vert[x] |= bitpos[y];
}
static inline void clrvertical(unsigned int x, unsigned int y) {
  // each byte is 8 y's
  vert[x] &= notbitpos[y];
}
static inline void assignvertical(unsigned int x, unsigned int y, unsigned int bitvalue) {
  // each byte is 8 y's
  vert[x] &= notbitpos[y];
  if (bitvalue) vert[x] |= bitpos[y];
}


static inline unsigned long int horizontal(unsigned int x, unsigned int y) {
  // each byte is 8 x's
  return horiz[y]&(int)bitpos[x]; // 0 or non-zero
}

static inline void sethorizontal(unsigned int x, unsigned int y) {    
  // each byte is 8 x's
  horiz[y] |= (int)bitpos[x];
}
static inline void clrhorizontal(unsigned int x, unsigned int y) {
  // each byte is 8 x's
  horiz[y] &= (int)notbitpos[x];
}
static inline void assignhorizontal(unsigned int x, unsigned int y, unsigned int bitvalue) {
  // each byte is 8 x's
  horiz[y] &= (int)notbitpos[x];
  if (bitvalue) horiz[y] |= (int)bitpos[x];
}

#define XMAG 8 // See https://www.facebook.com/groups/vectrex/posts/1523916847818687/
#define YMAG 7
#define XBASE 8  //16
#define YBASE -4 // 8
static inline void DrawHorizontal(int xl, int xr, int y) { // coordinate range is 0:15,0:15
    Reset0Ref();
    Moveto_d((y-8)*YMAG+YBASE, (xl-4)*XMAG+XBASE);//  0..15 -> -8..7
    Draw_Line_d(0, (xr-xl)*XMAG); // draw longest line possible acrld
}

static inline void DrawVertical(int x, int yb, int yt) { // coordinate range is 0:15,0:15
    Reset0Ref();
    Moveto_d((yb-8)*YMAG+YBASE, (x-4)*XMAG+XBASE);//  0..7 -> -4..3
    Draw_Line_d((yt-yb)*YMAG, 0);
}

unsigned int _x;
unsigned int _a;
unsigned int _b;
unsigned int _c;
void initRandom (unsigned int s1, unsigned int s2, unsigned int s3, unsigned int x0) {
  _x = x0;
  _a = s1;
  _b = s2;
  _c = s3;
  _x++;
  _a = (_a ^ _c ^ _x);
  _b = (_b + _a);
  _c = ((_c + (_b >> 1)) ^ _a);
}

unsigned int random8 (void) {
  _x++;
  _a = (_a ^ _c ^ _x);
  _b = (_b + _a);
  _c = ((_c + (_b >> 1)) ^ _a);
  return _c;
}

static int DoesCurrentTetrominoIntersectBitTable(void);
int CanPlaceHere (void) {       // Check the position of the tile
  unsigned int *A;
  unsigned int i, j;
  
  // first a crude check to see if we are within the bounds of the play area.
  // Really this is just to avoid 'array bounds exceeded' errors such as
  // indexing the array by -1

  /* origin is top-left point of shape's bounding rectangle
     The 1-unit gap on the left is not sufficient to handle the
     case of:
          [][]##[]
          [][]##[]
          [][]##[]
          [][]##[]
        (shape 6, rot 2)
     which cannot be pushed over to the leftmost column
     I'm currently hacking that by making the 180 degree rotated form match
     the unrotated form, i.e.:
          []##[][]
          []##[][]
          []##[][]
          []##[][]
   */
  A = &Array[rot][shape][0][0];
  for (i = 0; i < width; i++) { // height really
    for (j = 0; j < width; j++) {  // <= makes 1-column shape work
      if (A[j]) {
        if (row + i == 0 || row + i >= ROWS) return FALSE;
        if (col + j == 0 || col + j >= COLS) return FALSE;
      }
    }
    A += 4;
  }
  // fits in frame but does it overlap with existing dropped tetrominoes?
  return DoesCurrentTetrominoIntersectBitTable();
}

int GetNextTile (void) {           // returns FALSE if table full
  rot = 0;
  shape = (unsigned int)rand()%SHAPES;
  width = widths[shape];
  // All tetrominoes spawn in 2 usually hidden rows at the top of the playfield.
  // They are placed in the center of these rows, rounding to the left.
  col = 3;
  if (width == 2) col++;
  if (width == 4) col--;
  row = 1;
  return (CanPlaceHere ());
}

static inline void SetTableColXRowY(unsigned int *BitTable, unsigned int x, unsigned int y) {
  // each byte is 8 x's
    BitTable[y] |= (int)bitpos[x];
}

static inline unsigned int IsSetTableColXRowY(unsigned int *BitTable, unsigned int x, unsigned int y) {
  // each byte is 8 x's
  return BitTable[y] & (int)bitpos[x];
}


void WriteCurrentTetrominoToVectorBitmap (void) {
  unsigned int *A;
  unsigned int i, j;
  unsigned int LEFT=1;

  A = &Array[rot][shape][0][0]; // The current tetromino
  for (i = 0; i < width; i++) { // really  height
    for (j = 0; j < width; j++) {  // <= makes 1-column shape work
      if (A[j]) {
        setvertical(col+j-LEFT,(ROWS-1)-(row+i));setvertical(col+j+1-LEFT,(ROWS-1)-(row+i));
        sethorizontal(col+j-LEFT,(ROWS-1)-(row+i));sethorizontal(col+j-LEFT,(ROWS-1)-(row+i-1));
      }
    }
    A += 4;
  }
}

void WriteCurrentTetrominoToBitTable (void) {
  unsigned int *A;
  unsigned int i, j;

  // TO DO: With a bitmask for the tetrominoes, writing to the BitTable should just be a shift and an OR.
  A = &Array[rot][shape][0][0]; // The current tetromino
  for (i = 0; i < width; i++) { // really  height
    for (j = 0; j < width; j++) {  // <= makes 1-column shape work
      if (A[j]) {
        // row,col is the location of the current tetromino
        SetTableColXRowY(BitTable, col + j - 1, ROWS-(row + i + 1)); // Now dropped.
      }
    }
    A += 4;
  }
}

static int DoesCurrentTetrominoIntersectBitTable (void) {
  unsigned int *A;
  unsigned int i, j;

  // TO DO: With a bitmask for the tetrominoes, writing to the BitTable should just be a shift and an OR.
  A = &Array[rot][shape][0][0]; // The current tetromino
  for (i = 0; i < width; i++) { // really  height
    for (j = 0; j < width; j++) {  // <= makes 1-column shape work
      if (A[j]) {
        // row,col is the location of the current tetromino
        if (IsSetTableColXRowY(BitTable, col + j - 1, ROWS-(row + i + 1))) return FALSE; // overlaps
      }
    }
    A += 4;
  }
  return TRUE;
}

void Check_and_collapse (void) {       // count lines
  unsigned int i,k,count;
/*
  Once a tetromino lands, it does not lock until the lock delay expires.
  The lock delay behavior, called Infinity by the Tetris Company, resets
  the lock delay whenever the tetromino is moved or rotated.
  Hard drop is generally mapped to up, which has no lock delay.
 */
  count=0;
  for (i = 2; i < ROWS; i++) { // scan top to bottom
    if (BitTable[i] == 63) { // solid row
      count = 0; // need to animate row elimination!
      for (k = i; k >= 3; k--) { // when found, move down everything above...
        BitTable[k] = BitTable[k - 1]; // move down
        count++; // score for every row eliminated
        changes=1;
      }
      BitTable[2] = 0; // emptied top slot
#ifdef CLI
      timer -= decrease--; // Speed it up???
#endif
    }
  }
  score += 100L * (long)count;
}

void PrintTable (void) {
  unsigned int Buffer[ROWS][COLS]; // this can be improved!
  unsigned int *A;
  unsigned int i, j;
return;
  for (i = 0; i < ROWS; i++) for (j = 0; j < COLS; j++) Buffer[i][j] = 0;
  A = &Array[rot][shape][0][0];
  for (i = 0; i < width; i++) { // really height
    for (j = 0; j < width; j++) {
      if (A[j] != 0) Buffer[row + i][col + j] = A[j]; // could just OR
    }
    A += 4;
  }
}

void HardDrop(void) { // This is a 'fast drop' to whereever the tile stops
/*
  Once a tetromino lands, it does not lock until the lock delay expires.
  The lock delay behavior, called Infinity by the Tetris Company, resets
  the lock delay whenever the tetromino is moved or rotated.
  Hard drop is generally mapped to up, which has no lock delay.
 */
  while (CanPlaceHere ()) row++;
  row--;
  WriteCurrentTetrominoToBitTable(); // before collapsing
  Check_and_collapse ();          // check full lines, after putting it down
}

void Move_Right_One_Step(void) {
    col++;                 // move right
    if (!CanPlaceHere ()) col--; 
}

void Move_Left_One_Step(void) {
    if (col > 0) {
      col--;                 // move left
      if (!CanPlaceHere ()) col++;
    }
}

void Rotate_Clockwise_One_Quarter_Turn(void) {
/*
  All tetrominoes exist inside a bounding square and rotate about
  the center of this square unless obstructed. Tetrominoes of width 3
  (J, L, S, T, Z) are placed in the top two rows of the bounding square
  and (for J, L, and T) with the flat side down. I is placed in the top middle row.
 */
  rot = (rot+1)&3; // clockwise
  width = widths[shape];
  if (!CanPlaceHere ()) {
      rot = (rot+3)&3; // undo
      width = widths[shape];
  }
}

void Rotate_AntiClockwise_One_Quarter_Turn(void) {
/*
  All tetrominoes exist inside a bounding square and rotate about
  the center of this square unless obstructed. Tetrominoes of width 3
  (J, L, S, T, Z) are placed in the top two rows of the bounding square
  and (for J, L, and T) with the flat side down. I is placed in the top middle row.
 */
  rot = (rot+3)&3; // widdershins
  width = widths[shape];
  if (!CanPlaceHere ()) {
      rot = (rot+3)&3; // undo
      width = widths[shape];
  }
}

void DrawGrid(void) {
  unsigned int x,y;

  if (changes) {
    // empty out the bitmap.
    for (y = 0; y < 16; y++) horiz[y] = 0;
    for (x = 0; x < 8; x++) vert[x] = 0;
    horiz[0] = 0xFF; //horiz[15] = 0xFF;
    vert[0] = 0x3FFF; vert[6] = 0x3FFF;

    WriteCurrentTetrominoToVectorBitmap();

    // convert block tables to vectors
    for (y = 0; y < ROWS; y++) {
      for (x = 0; x < COLS; x++) {
        if (IsSetTableColXRowY(BitTable, x,y)) {
          setvertical(x,y);setvertical(x+1,y);
          sethorizontal(x,y);sethorizontal(x,y+1);
          // if already set, unset?  two adjacent edges???
        }		  
      }
    }
    changes = 0; // saves about 8000 cycles when doing nothing
  }
  set_scale(255);
  Intensity_a(80);
  // Draw horizontal lines first:
  for (y = 0; y <= 15; y++ ) {
    if (horiz[y] == 0) continue;
    x = 0;
    for (;;) {
      if (horizontal(x,y)) {
        unsigned int x0=x;
        do x++; while ((x < 6) && (horizontal(x,y)));
        // A line starts here at x,y.  It may consist of more than one segment.
        DrawHorizontal((int)x0, (int)x, (int)y);
      } else x++;
      if (x >= 6) break;
    }
  }
  // Then vertical lines
  for (x = 0; x < 7; x++ ) {
    if (vert[x] == 0) continue;
    y = 0;
    for (;;) {
      if (vertical(x,y)) {
        unsigned int y0 = y;
        // A line starts here at x,y.  It may consist of more than one segment.
        do y++; while ((y < 15) && (vertical(x,y)));
        DrawVertical((int)x, (int)y0, (int)y); // line from y=0 to y=1 up to y=14 to y=15
      } else y++;
      if (y >= 15) break;
    }
  }
}

static int debounce_timer = 0;
#define DEBOUNCE_DELAY 12

int main(void) {
  //unsigned int y = 0, x = 0;
#define LOCK_DELAY 20
  int i = 0, dirx=0, diry=0;//, frames_until_next_step = LOCK_DELAY;
  
  for (i = 0; i < ROWS; i++) BitTable[i] = 0;
  score = 0;
  debounce_timer = 0;
  enable_controller_1_x();
  enable_controller_1_y();

  (void)GetNextTile (); // first one is always safe.
  changes=1;
  Wait_Recal(); // placed at the end of the loop so that the maximum possible
                // computation is done during idle time.
  check_joysticks(); check_buttons();
  Intensity_a(60);
  for (;;) {
#ifdef NEVER
    if (--frames_until_next_step == 0) {
      frames_until_next_step = LOCK_DELAY;
      Move_Down_One_Step();
    }
#endif


    //if (button_1_1_held()) { // leftmost button for internal debugging
    SHOW_SIGNED_NUM((long)debug, -50,120,255);
#ifdef NEVER
    SHOW_SIGNED_NUM((long)shape, -25,120,255);
    SHOW_SIGNED_NUM((long)rot,   0,120,255);
#endif
    SHOW_SIGNED_NUM((long)row,  25,120,255);
#ifdef NEVER
    SHOW_SIGNED_NUM((long)col,  50,120,255);
    SHOW_SIGNED_NUM((long)width,  75,120,255);
#endif
    //}

    if (button_1_1_pressed()) {
      shape = (shape+1U)%SHAPES;         // next shape (debugging)
      changes = 1;
    }

    if (button_1_2_pressed()) {
      shape = (shape+(SHAPES-1))%SHAPES; // previous shape
      changes = 1;
    }

    if (button_1_3_pressed()) {
      Rotate_AntiClockwise_One_Quarter_Turn();
      changes = 1;
    } else if (button_1_4_pressed()) {
      Rotate_Clockwise_One_Quarter_Turn();
      changes = 1;
    }

    DrawGrid();

    // Don't want cursor movement to trigger action every frame!
    if (debounce_timer) {
      debounce_timer -= 1;
    } else {
      i = joystick_1_x();
      if (i < -40) {
        dirx = -1;
      } else if (i > 40) {
        dirx = 1;
      }
      i = joystick_1_y();
      if (i < -40) {
        diry = -1;
      } else if (i > 40) {
        diry = 1;
      }
    }
    if (((dirx|diry) != 0) && (debounce_timer == 0)) {
      if (dirx>0) {
        Move_Right_One_Step();
        changes = 1;
      }
      if (dirx<0) {
        Move_Left_One_Step();
        changes = 1;
      }
      width = widths[shape];
      if (diry<0) {
        //if (--frames_until_next_step == 0) {
          //frames_until_next_step = LOCK_DELAY;
        //}
        // debugging
        row++;                 // move down
        if (!CanPlaceHere ()) { // only allow collapse if tile has stopped moving
                          // even though it is theoretically possible for it
                          // to form a line while dropping through a gap to
                          // a position below which does *not* form a line... 
          row--; // undo
          WriteCurrentTetrominoToBitTable(); // before collapsing
          // Might be better to do 'check and collapse' at head of loop, when *next* tile
          // is ready to drop.
          Check_and_collapse ();          // check full lines, after putting it down
          if (!GetNextTile ()) {
            // END OF GAME!  Reset and restart
            i = 0; dirx=0; diry=0; //, frames_until_next_step = LOCK_DELAY;
            for (i = 0; i < ROWS; i++) BitTable[i] = 0;
            debounce_timer = 0;
          }
        }
        changes=1;
      } else if (diry>0) {
        HardDrop();
        if (!GetNextTile ()) {
          // END OF GAME!  Reset and restart
          i = 0; dirx=0; diry=0; //, frames_until_next_step = LOCK_DELAY;
          for (i = 0; i < ROWS; i++) BitTable[i] = 0;
          debounce_timer = 0;
        }
        changes=1;
      }
      dirx = diry = 0; debounce_timer = DEBOUNCE_DELAY;
    }
    check_joysticks(); check_buttons();
    Wait_Recal();
    Intensity_a(60);
  }
  return 0;
  (void) &SHOW_N0;  (void) &SHOW_N1;  (void) &SHOW_N2;  (void) &SHOW_N3;
  (void) &SHOW_N4;  (void) &SHOW_N5;  (void) &SHOW_N6;  (void) &SHOW_N7;
  (void) &SHOW_N8;  (void) &SHOW_N9;  (void) &SHOW_MINUS;
  (void) &SHOW_SIGNED_NUM;  (void) &SHOW_UNSIGNED_NUM;
}