/*
 * Copyright (c) 2024 Amelia Zabardast Ziabari
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *  2. Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *  3. Neither the name of the copyright holder nor the names of its
 *     contributors may be used to endorse or promote products derived from
 *     this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <time.h>
#include <sys/time.h>

extern char *optarg;
extern int optind;

/* BOARD SELECTION --------------------------------------------------------- */


#define MOD_OUT 0
#define MOD_IN  1
#if defined(BOARD_RASPI)
#include <wiringPi.h>
/*
 * These pin definitions are BCM-logical (the "BCM" column in wiringPi's
 * "gpio readall" command).  They have been chosen to maintain compatibility
 * with previous attempts to solve this problem, in order to reduce infuriating
 * experiences with re-wiring breadboards for the user.
 *
 * They also have the nice property of being compatible with boards that use a
 * reduced GPIO connector, as well as avoiding the SPI-related GPIO pins (which
 * you may want to keep untouched if you also use your Raspberry Pi as an SPI
 * programmer).
 */
#define PIN_RSET 17
#define PIN_LAD0 22
#define PIN_LAD1 23
#define PIN_LAD2 24
#define PIN_LAD3 25
#define PIN_LFRM 27
#define PIN_LCLK 18
#define PIN_WREN 4
#define PSETUP wiringPiSetupGpio
#define OUTP(p, s) digitalWrite(p, (s) ? HIGH : LOW)
#define INP(p) digitalRead(p)
#define MODEP(p, m) pinMode(p, (m) ? INPUT : OUTPUT)
#elif defined(BOARD_DUMMY)
/* Only useful with the dummy programmer. */
static void dummySetup() {}
#define PSETUP dummySetup
#define OUTP(p, s)
#define INP(p) 1
#define MODEP(p, m)
#else
#error "no SBC board specification selected!"
#endif

/* CHIP AND FUNCTIONAL DECLARATION ----------------------------------------- */


typedef unsigned long long ndelay_t;

#define TMMIX(a, t) (ndelay_t)((a) * (double)(t))
#define USEC(t) TMMIX(1000.0, t)
#define MSEC(t) TMMIX(1000000.0, t)
#define  SEC(t) TMMIX(1000000000.0, t)
#define STABILIZE USEC(1)
#define DEFAULT_LAD .lad = STABILIZE

typedef struct dself {
	char *name;
	enum {
		BUS_FWH,	/* Firmware Hub Transfers */
		BUS_LPC,	/* LPC Transfers */
		BUS_LPC_ISA	/* LPC Transfers with ISA Compatibility */
	} type;
	enum {
		NEEDS_MISC_BLOCK_UNLOCK	= 0x0001,
		NEEDS_WE_ALWAYS_ON	= 0x0002
	} quirks;
	enum {
		TEST_WIP,	/* Not even finished */
		TEST_NONE,	/* Untested */
		TEST_I,		/* Identifies */
		TEST_IR,	/* Identifies & Reads */
		TEST_IRW	/* Identifies, Reads & Writes */
	} status;
	unsigned char ven_id;
	unsigned char dev_id;
	unsigned long register_base;
	unsigned long memory_base;
	unsigned long bsz; /* block  */
	unsigned long ssz; /* sector */
	unsigned long csz; /* chip   */

	struct {
		unsigned long size;
		enum {
			XFER_N, /* terminator value (must be zero) */
			XFER_RW,
			XFER_R,
			XFER_W
		} type;
	} xfers[8]; /* valid "IMSIZE" field sizes. */

	void (*s_erase)(struct dself *, unsigned long);
	void (*b_erase)(struct dself *, unsigned long);
	void (*c_erase)(struct dself *);
	int (*erased)(struct dself *);

	void (*write)(struct dself *, unsigned long, size_t, char *);
	void (*read)(struct dself *, unsigned long, size_t, char *);

	int (*probe)(struct dself *);

	struct {
		ndelay_t lad;
		ndelay_t program;
		ndelay_t s_erase;
		ndelay_t b_erase;
		ndelay_t c_erase;
		ndelay_t toggle_bit_delay;
		ndelay_t cycle;
		ndelay_t regset;
	} delay;
} mdev_t;

/* "Standardized" access functions for various types of chips. */
void jedec_block_erase(mdev_t *, unsigned long);
void jedec_chip_erase(mdev_t *);
void jedec_sector_erase(mdev_t *, unsigned long);
void jedec_program(mdev_t *, unsigned long, size_t, char *);
void jedec_read(mdev_t *, unsigned long, size_t, char *);
int jedec_chip_identify(mdev_t *);
int toggle_d6_erased(mdev_t *d);

/* Dummy functions */
void dummy_sector_erase(mdev_t *, unsigned long);
void dummy_block_erase(mdev_t *, unsigned long);
void dummy_chip_erase(mdev_t *);
void dummy_program(mdev_t *, unsigned long, size_t, char *);
void dummy_read(mdev_t *, unsigned long, size_t, char *);

/* SST "Mini-Command" functions */
int sstmc_chip_identify(mdev_t *d);

mdev_t devicetab[] = {
	{
		.name = "dummy",
		.status = TEST_IRW,
		.bsz = 0x10000,
		.ssz = 0x1000,
		.csz = 0x100000,
		.xfers = { { 128, XFER_RW } },
		.s_erase = dummy_sector_erase,
		.b_erase = dummy_block_erase,
		.c_erase = dummy_chip_erase,
		.erased = NULL,
		.write = dummy_program,
		.read = dummy_read,
		.probe = NULL,
		.delay = {0}
	},
	{
		/* See "Design Considerations" in the datasheet. */
		.name = "SST49LF008A",
		.type = BUS_FWH,
		.quirks = NEEDS_MISC_BLOCK_UNLOCK,
		.status = TEST_IRW,
		.ven_id = 0xbf,
		.dev_id = 0x5a,
		.register_base = 0xff800000,
		.memory_base = 0xffc00000,
		.bsz = 0x10000,
		.ssz = 0x1000,
		.csz = 0x100000,
		.s_erase = jedec_sector_erase,
		.b_erase = jedec_block_erase,
		.c_erase = NULL,
		.erased = toggle_d6_erased,
		.write = jedec_program,
		.read = jedec_read,
		.probe = jedec_chip_identify,
		.delay = {
			DEFAULT_LAD
		}
	},
	{
		/* Similar to SST49LF008A, but not FWH and smaller. */
		.name = "SST49LF020",
		.type = BUS_LPC_ISA,
		.status = TEST_NONE,
		.ven_id = 0xbf,
		.dev_id = 0x61,
		.register_base = 0xff800000,
		.memory_base = 0xffc00000,
		.bsz = 0x4000,
		.ssz = 0x1000,
		.csz = 0x40000,
		.s_erase = jedec_sector_erase,
		.b_erase = jedec_block_erase,
		.c_erase = NULL,
		.erased = toggle_d6_erased,
		.write = jedec_program,
		.read = jedec_read,
		.probe = jedec_chip_identify,
		.delay = {
			DEFAULT_LAD
		}
	},
	{
		/* Special considerations for this chip to save you the pain I
		 * went through:
		 *  - Vpp must be 3.3V. (!)
		 *  - All Vdd and Vss pins must be connected.
		 *  - Use a high frequency/AC 0.1uF ceramic capacitor between
		 *    Vdd and Vss no further than 1cm away from Vdd.
		 */
		.name = "W39V080FA",
		.type = BUS_FWH,
		.quirks = NEEDS_MISC_BLOCK_UNLOCK | NEEDS_WE_ALWAYS_ON,
		.status = TEST_IRW,
		.ven_id = 0xda,
		.dev_id = 0xd3,
		.register_base = 0xff800000,
		.memory_base = 0xffc00000,
		.bsz = 0x10000,
		.ssz = 0x10000,
		.csz = 0x100000,
		.s_erase = jedec_sector_erase,
		.b_erase = NULL,
		.c_erase = NULL,
		.erased = toggle_d6_erased,
		.write = jedec_program,
		.read = jedec_read,
		.probe = jedec_chip_identify,
		.delay = {
			DEFAULT_LAD,
			.toggle_bit_delay = MSEC(8)
		}
	},
	{
		/* TODO: Finish this chip (does not identify, needs special
		 * commands).
		 */
		.name = "SST49LF016C",
		.type = BUS_FWH,
		.quirks = 0,
		.status = TEST_WIP,
		.ven_id = 0xbf,
		.dev_id = 0x5c,
		.register_base = 0xff800000,
		.memory_base = 0xffc00000,
		.bsz = 0x10000,
		.ssz = 0x1000,
		.csz = 0x200000,
		.xfers = {
			{ 2  , XFER_RW }, { 2  , XFER_RW },
			{ 4  , XFER_RW }, { 4  , XFER_RW },
			{ 16 , XFER_R  },
			{ 128, XFER_R  }
		},
		.s_erase = NULL,
		.b_erase = NULL,
		.c_erase = NULL,
		.erased = NULL,
		.write = NULL,
		.read = NULL,
		.probe = sstmc_chip_identify,
		.delay = {
			DEFAULT_LAD
		}
	}
};
int devices = sizeof(devicetab) / sizeof(devicetab[0]);

/* DISPLAY AND DEBUG HELPERS ----------------------------------------------- */


#ifndef DEBUG_LEVEL
#define DEBUG_LEVEL 0
#endif
static int dbglvl = DEBUG_LEVEL;

#define ABORT(...) do { fprintf(stderr, __VA_ARGS__); safe_exit(); } while (0)
#define DEBUG(...) do { fprintf(stderr, __VA_ARGS__); } while (0)
#define ASSERT(cond) do { if (!(cond)) \
		ABORT("%d: assertion failed: " #cond "\n", __LINE__); \
} while (0)
#define VDEBUG(l, ...) do { if ((l) <= dbglvl) DEBUG(__VA_ARGS__); } while (0)

#define TO_MS(tv) ((unsigned long long)tv.tv_sec * 1000LL) + \
((unsigned long long)tv.tv_usec / 1000LL)

static void
progress(char *op, unsigned long c, unsigned long t)
{
	static struct timeval s1 = {0}, s2 = {0};
	struct timeval sub;

	gettimeofday(&s1, NULL);
	sub.tv_sec  = s1.tv_sec  - s2.tv_sec;
	sub.tv_usec = s1.tv_usec - s2.tv_usec;
	if (TO_MS(sub) > 250LL) {
		s2 = s1;
		DEBUG("   %s (%2d%%) %08lx / %08lx\r",
		    op, (int)((100 * c) / t), c, t);
	}
}

/* MAIN DECLARATIONS ------------------------------------------------------- */


static FILE *fp = NULL;

static void safe_exit(void);

static void we(mdev_t *, int);
static void reset(void);

static void lad_output(void);
static void lad_input(void);
static void lad_write(mdev_t *, unsigned char, int);
static unsigned char lad_read(mdev_t *);
static void prepare_pins(void);

static unsigned int log2i(unsigned int);

static void lad_address(mdev_t *, unsigned long address);
static void lad_rsync(mdev_t *);
static void lad_start(mdev_t *, int);
static void read_maddress(mdev_t *, unsigned long, size_t, char *);
static void write_maddress(mdev_t *, unsigned long, size_t, char *);
static unsigned char read_address(mdev_t *, unsigned long);
static void write_address(mdev_t *, unsigned long, unsigned char);

static size_t max_xfer(mdev_t *, unsigned long, size_t, int);
static unsigned char read_reg(mdev_t *, unsigned long);
static void write_reg(mdev_t *, unsigned long, unsigned char);
static void op_mmem(mdev_t *, unsigned long, size_t, char *, int);
static unsigned char read_mem(mdev_t *, unsigned long);
static void write_mem(mdev_t *, unsigned long, unsigned char);

static void get_jedec_id(mdev_t *, unsigned char *, unsigned char *);
static void get_jedec_id2(mdev_t *, unsigned char *, unsigned char *);

static void get_sstmc_id(mdev_t *, unsigned char *, unsigned char *);

static void reset_echk(void);
static void misc_unlock_block(mdev_t *d, unsigned long block);

static void gen_chip_setup(mdev_t *);
static void gen_read(mdev_t *, char *, unsigned long, unsigned long, int);
static void gen_wait_status(mdev_t *, ndelay_t, int);
static void gen_erase_block(mdev_t *, unsigned long);
static void gen_erase_chip(mdev_t *, int);
static void gen_write(mdev_t *, char *, unsigned long, unsigned long, int);

static char *alloc_chip(mdev_t *);

static void fwc_template(int);
static void fwc_setfield(int, int, char);
static void full_write_cycle(mdev_t *, char *);

static mdev_t *get_chip_by_name(char *);
static void usage();
static const char *fmt_chip_status(int);

/* HIGH-ACCURACY DELAY AND TIME ROUTINES ----------------------------------- */


/* Not all operating systems can sleep for a reasonable amount of time.  On my
 * system, usleep(1); makes the average full-chip read take up to a whole
 * day(!), so in order to prioritize chip operation speed over host CPU usage,
 * let's just create an unoptimized no-op loop and calibrate it.
 */
static int clob;
#define NOOP asm volatile ("nop" : "+r" (clob))
#define NOOPLOOP(t) do {						\
	unsigned long long i; for (i = 0; i < t; i++) NOOP;		\
} while (0)
#define BIGCLK(tv) (((unsigned long long)(tv)->tv_sec * 1000000000ULL)	\
+ (unsigned long long)(tv)->tv_nsec)
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))

static void
ndelay(ndelay_t ns)
{
	static unsigned long long itpms = 0, ax;
	if (itpms == 0) {
		struct timespec r1, r2;
		unsigned long long tres;
		int monot, realt, i;
		clockid_t clck;
		DEBUG("calibrating delay scale... ");
		/* select the most accurate clock, assuming it won't change */
		monot = clock_getres(CLOCK_MONOTONIC, &r1);
		realt = clock_getres(CLOCK_REALTIME, &r2);
		if (monot != 0 || realt != 0) {
			clck = realt == 0 ? CLOCK_REALTIME : CLOCK_MONOTONIC;
		} else {
			/* prioritizes monotonic if equal */
			clck = BIGCLK(&r2) < BIGCLK(&r1) ?
			    CLOCK_REALTIME : CLOCK_MONOTONIC;
		}
		tres = clck == CLOCK_MONOTONIC ? BIGCLK(&r1) : BIGCLK(&r2);
		/* warm up the processor to max freq */
		NOOPLOOP(1000000);
		/* now calibrate for 1msec over and over for shortest cycles */
		for (i = 0; i < 128; i++) {
			ax = 2;
			for (;;) {
				clock_gettime(clck, &r1);
				NOOPLOOP(ax);
				clock_gettime(clck, &r2);
				tres = BIGCLK(&r2) - BIGCLK(&r1);
				if (tres >= 1000000) break;
				ax *= 2;
			}
			ax = (1000000.0 / (double)tres) * (double)ax;
			/* need to find the quickest run (the largest number of
			 * required instructions) as this varies by factors
			 * like CPU frequency and context switches.
			 */
			if (itpms == 0) itpms = ax;
			else itpms = MAX(itpms, ax);
		}
		DEBUG("needs %llu iterations for 1msec\n", itpms);
		clock_gettime(clck, &r1);
		ndelay(150000);
		clock_gettime(clck, &r2);
		tres = BIGCLK(&r2) - BIGCLK(&r1);
		DEBUG("slept for 150us, got %fus\n", (double)tres / 1000.0);
		/* bail out if calibration is untrustworthy! */
		ASSERT(tres >= 150000);
	} else {
		ax = ((double)itpms / 1000000.0) * (double)ns;
		VDEBUG(15, " (W %lluns %lluistr) ", ns, ax);
		NOOPLOOP(ax);
	}
}

#define DELAY_FOR(d, a) ndelay(d->delay.a)

/* ACCESS PRIMITIVES ------------------------------------------------------- */


static void
safe_exit(void)
{
	OUTP(PIN_RSET, 1);
	MODEP(PIN_RSET, MOD_OUT);
	OUTP(PIN_WREN, 0);
	lad_input();
	MODEP(PIN_WREN, MOD_IN);
	MODEP(PIN_LFRM, MOD_IN);
	MODEP(PIN_LCLK, MOD_IN);
	if (fp != NULL) fclose(fp);
	exit(1);
}

static void
lad_output(void)
{
	MODEP(PIN_LAD0, MOD_OUT);
	MODEP(PIN_LAD1, MOD_OUT);
	MODEP(PIN_LAD2, MOD_OUT);
	MODEP(PIN_LAD3, MOD_OUT);
}

static void
lad_input(void)
{
	MODEP(PIN_LAD0, MOD_IN);
	MODEP(PIN_LAD1, MOD_IN);
	MODEP(PIN_LAD2, MOD_IN);
	MODEP(PIN_LAD3, MOD_IN);
}

static void
lad_write(mdev_t *d, unsigned char data, int start_frame)
{
	ASSERT((data & 0xf0) == 0);
	if (start_frame)
		OUTP(PIN_LFRM, 0);
	OUTP(PIN_LCLK, 0);
	OUTP(PIN_LAD0, data & 0x1 ? 1 : 0);
	OUTP(PIN_LAD1, data & 0x2 ? 1 : 0);
	OUTP(PIN_LAD2, data & 0x4 ? 1 : 0);
	OUTP(PIN_LAD3, data & 0x8 ? 1 : 0);
	DELAY_FOR(d, lad);
	OUTP(PIN_LCLK, 1);
	if (start_frame)
		OUTP(PIN_LFRM, 1);
	VDEBUG(10, "LAD W: %02x (%d)\n", data, start_frame);
}

static unsigned char
lad_read(mdev_t *d)
{
	unsigned char v = 0;
	OUTP(PIN_LCLK, 1);
	DELAY_FOR(d, lad);
	if (INP(PIN_LAD0)) v |= 0x1;
	if (INP(PIN_LAD1)) v |= 0x2;
	if (INP(PIN_LAD2)) v |= 0x4;
	if (INP(PIN_LAD3)) v |= 0x8;
	OUTP(PIN_LCLK, 0);
	VDEBUG(10, "LAD R: %02x\n", v);
	return v;
}

static void
we(mdev_t *d, int v)
{
	OUTP(PIN_WREN, d->quirks & NEEDS_WE_ALWAYS_ON ? 1 : v);
}

static void
reset(void)
{
	OUTP(PIN_RSET, 0);
	usleep(1000);
	OUTP(PIN_RSET, 1);
	usleep(500000);
}

static void
prepare_pins(void)
{
	PSETUP();
	OUTP(PIN_RSET, 0);
	OUTP(PIN_LCLK, 1);
	OUTP(PIN_LFRM, 1);
	OUTP(PIN_WREN, 0);

	OUTP(PIN_LAD0, 0);
	OUTP(PIN_LAD1, 0);
	OUTP(PIN_LAD2, 0);
	OUTP(PIN_LAD3, 0);
	lad_input();
	MODEP(PIN_RSET, MOD_OUT);
	MODEP(PIN_WREN, MOD_OUT);
	MODEP(PIN_LFRM, MOD_OUT);
	MODEP(PIN_LCLK, MOD_OUT);
	reset();
}

/* LPC/FPC PROTOCOL PRIMITIVES --------------------------------------------- */


static void
lad_address(mdev_t *d, unsigned long address)
{
	int i;
	/* FWH has a 28-bit address space, LPC has a 32-bit address space. */
	for (i = 1; i <= (d->type == BUS_FWH ? 7 : 8); i++)
		lad_write(d,
		    (address >> ((d->type == BUS_FWH ? 28 : 32) - 4 * i))
		    & 0xf, 0);
}

static int xfer_errs_fatal = 0, global_t0 = 0;

static void
lad_rsync(mdev_t *d)
{
	/* Some chips do not immediately respond with an RSYNC value of 0.
	 * The W39V080FA for example often provides a value of 5 several times,
	 * indicating the host to wait for a few more clock cycles.
	 */
	unsigned char v;
 rsync_reset:
	switch ((v = lad_read(d))) {
	case 0: return;
	case 5: ndelay(STABILIZE);	goto rsync_reset;
	case 6: ndelay(USEC(100));	goto rsync_reset;
	default:
		if (xfer_errs_fatal)
			ABORT("(read) field RSYNC not zero: %01x\n", v);
	}
}

static void
lad_start(mdev_t *d, int write)
{
	unsigned char cyctype = (1<<2);
	if (write) cyctype |= (1<<1);
	switch (d->type) {
	case BUS_FWH:
		cyctype |= (1<<3);
		if (!write) cyctype |= 1;
		break;
	case BUS_LPC:
		if (write) cyctype |= 1;
		break;
	case BUS_LPC_ISA:
		lad_write(d, 0x00, 1);
		lad_write(d, cyctype, 0);
		return;
	}
	lad_write(d, cyctype, 1);
}

static unsigned int
log2i(unsigned int v)
{
	/* https://graphics.stanford.edu/~seander/bithacks.html#IntegerLog */
	int i;
	static const unsigned int b[] = {0xAAAAAAAA, 0xCCCCCCCC, 0xF0F0F0F0,
		0xFF00FF00, 0xFFFF0000};
	unsigned int r = (v & b[0]) != 0;
	for (i = 4; i > 0; i--)
		r |= ((v & b[i]) != 0) << i;
	return r;
}

static void
read_maddress(mdev_t *d, unsigned long address, size_t a, char *out)
{
	size_t i;
	unsigned char v;
	lad_output();
	/* FWH vs. LPC use different START bits for differentiation. */
	lad_start(d, 0); 				/* START (READ) */
	lad_write(d, 0x00, 0); 				/* IDSEL 0000 */
	lad_address(d, address); 			/* IMADDR */
	lad_write(d, log2i(a), 0); 			/* IMSIZE */
	lad_write(d, 0x0f, 0); 				/* TAR0 */
	lad_input();
	lad_read(d); 					/* TAR1 */
	lad_rsync(d);					/* RSYNC */
	for (i = 0; i < a; i++) {
		out[i]  = lad_read(d); 			/* DATA (LOW) */
		out[i] |= lad_read(d) << 4; 		/* DATA (HIGH) */
	}
	if((v = lad_read(d)) != 0xf && xfer_errs_fatal)	/* TAR0 */
		ABORT("(read) field TAR0 not all ones: %01x\n", v);
	global_t0 |= v;
	lad_read(d); 					/* TAR1 */
	lad_output();
	DELAY_FOR(d, cycle);
}

static void
write_maddress(mdev_t *d, unsigned long address, size_t a, char *in)
{
	size_t i;
	unsigned char v;
	lad_output();
	lad_start(d, 1); 				/* START (WRITE) */
	lad_write(d, 0x00, 0); 				/* IDSEL 0000 */
	lad_address(d, address); 			/* IMADDR */
	lad_write(d, log2i(a), 0); 			/* IMSIZE */
	for (i = 0; i < a; i++) {
		lad_write(d,  in[i]       & 0xf , 0); 	/* DATA (LOW) */
		lad_write(d, (in[i] >> 4) & 0xf , 0); 	/* DATA (HIGH) */
	}
	lad_write(d, 0x0f, 0); 				/* TAR0 */
	lad_input();
	lad_read(d); 					/* TAR1 */
	lad_rsync(d);					/* RSYNC */
	if((v = lad_read(d)) != 0xf && xfer_errs_fatal)	/* TAR0 */
		ABORT("(write) field TAR0 not all ones: %01x!\n", v);
	global_t0 |= v;
	lad_read(d); 					/* TAR1 */
	lad_output();
	DELAY_FOR(d, cycle);
}

static unsigned char
read_address(mdev_t *d, unsigned long address)
{
	char buf[1];
	read_maddress(d, address, 1, buf);
	VDEBUG(5, "1R: %08lx, %02x\n", address, buf[0]);
	return buf[0];
}

static void
write_address(mdev_t *d, unsigned long address, unsigned char data)
{
	char buf[1];
	buf[0] = data;
	VDEBUG(5, "1W: %08lx, %02x\n", address, buf[0]);
	write_maddress(d, address, 1, buf);
}

/* CHIP REGION ACCESS PRIMITIVES ------------------------------------------- */


static size_t
max_xfer(mdev_t *d, unsigned long s, size_t tc, int t)
{
	/* XXX: work out the largest transfer size supported on the chip for
	 * a given type (read or write operation).  "tc" (the total operation
	 * size) and "s" (the starting address) must be evenly divisible by (or
	 * rather, aligned with) the derived transfer chunk size.  In most
	 * cases this is already true, so there's no real reason to optimize
	 * for the case where it wouldn't be.
	 */
	int pta, ptt, i;
	size_t r = 1;
	for (i = 0;;i++) {
		pta = d->xfers[i].size;
		ptt = d->xfers[i].type;
		if (ptt == XFER_N) break;

		if ( (pta > r)
		    && (ptt == t || ptt == XFER_RW)
		    && !(tc % pta)
		    && !(s  % pta)) r = pta;
		VDEBUG(10, "ESTABLISH XFER SZ: %08lx\n", (unsigned long)r);
	}
	return r;
}

#define XFERL(v, s, a, x, e)			\
for (v = s; v < (s + a); (e),v += x)

static int opp_q = 0;
#define PROG_PRINT(t, c, e) do {					\
	if (!opp_q) progress(t == XFER_R ? "READ " : "WRITE", c, e);	\
} while (0)
#define PROG_FIN do {				\
	if (!opp_q) DEBUG("\n");		\
} while (0)

/* Produces variables i and xf, containing the current address and transfer
 * size respectively, and a loop that iterates over an address space in
 * transfer-sized chunks.
 */
#define XFEROP(d, s, a, t, ...)	do {	    \
	unsigned long i;		    \
	size_t xf = max_xfer(d, s, a, t);   \
	ASSERT(s + a <= d->csz);	    \
	XFERL(i, s, a, xf, buf += xf) {	    \
		PROG_PRINT(t, i, s + a);    \
		__VA_ARGS__;		    \
		    }			    \
	PROG_FIN;			    \
} while (0);

static unsigned char
read_reg(mdev_t *d, unsigned long address)
{
	return read_address(d, address + d->register_base);
}

static void
write_reg(mdev_t *d, unsigned long address, unsigned char data)
{
	write_address(d, address + d->register_base, data);
	DELAY_FOR(d, regset);
}

static void
op_mmem(mdev_t *d, unsigned long address, size_t a, char *buf, int w)
{
	if (!w)	read_maddress (d, address + d->memory_base, a, buf);
	else	write_maddress(d, address + d->memory_base, a, buf);
}

static unsigned char
read_mem(mdev_t *d, unsigned long address)
{
	return read_address(d, address + d->memory_base);
}

static void
write_mem(mdev_t *d, unsigned long address, unsigned char data)
{
	write_address(d, address + d->memory_base, data);
}

/* MISC FUNCTIONS ---------------------------------------------------------- */


static void
misc_unlock_block(mdev_t *d, unsigned long block)
{
	unsigned long ulock = 0x300002 | (d->bsz * block);
	ASSERT(read_reg(d, ulock) == 0x01);
	write_reg(d, ulock, 0x00);
	ASSERT(read_reg(d, ulock) == 0x00);
}

/* JEDEC FUNCTIONS --------------------------------------------------------- */


static unsigned char d6_lt;

int
toggle_d6_erased(mdev_t *d)
{
	unsigned char v;
	if ((v = read_mem(d, 0x0000) & 0x40) == d6_lt) return 1;
	d6_lt = v;
	return 0;
}

static void
reset_echk(void)
{
	/*
	 * This function should be used for resetting erase-check state for all
	 * available chips.
	 */
	d6_lt = 0xAB;
}

static void
get_jedec_id(mdev_t *d, unsigned char *ven, unsigned char *dev)
{
	*ven = read_reg(d, 0x3c0000);
	*dev = read_reg(d, 0x3c0001);
}

static void
get_jedec_id2(mdev_t *d, unsigned char *ven, unsigned char *dev)
{
	write_mem(d, 0x5555, 0xAA);
	write_mem(d, 0x2AAA, 0x55);
	write_mem(d, 0x5555, 0x90);
	*ven = read_mem(d, 0x0000);
	*dev = read_mem(d, 0x0001);
	write_mem(d, 0x0000, 0xF0);
}

#define IDDBG(m, c, v, d, p) DEBUG(					\
    m " identification method " c ": "					\
	"ven %02x dev %02x (%s)\n", v, d, p ? "PASSED" : "FAILED");

int
jedec_chip_identify(mdev_t *d)
{
	unsigned char ven, dev;
	int m2;
	get_jedec_id(d, &ven, &dev);
	if (ven == d->ven_id && dev == d->dev_id) {
		IDDBG("jedec", "1", ven, dev, 1);
		get_jedec_id2(d, &ven, &dev);
		m2 = ven == d->ven_id && dev == d->dev_id;
		IDDBG("jedec", "2", ven, dev, m2);
		return m2;
	} else return 0;
}

void
jedec_sector_erase(mdev_t *d, unsigned long sector)
{
	write_mem(d, 0x5555, 0xAA);
	write_mem(d, 0x2AAA, 0x55);
	write_mem(d, 0x5555, 0x80);
	write_mem(d, 0x5555, 0xAA);
	write_mem(d, 0x2AAA, 0x55);
	write_mem(d, d->ssz * sector, 0x30);
	gen_wait_status(d, d->delay.s_erase, 1);
}

void
jedec_block_erase(mdev_t *d, unsigned long block)
{
	write_mem(d, 0x5555, 0xAA);
	write_mem(d, 0x2AAA, 0x55);
	write_mem(d, 0x5555, 0x80);
	write_mem(d, 0x5555, 0xAA);
	write_mem(d, 0x2AAA, 0x55);
	write_mem(d, d->bsz * block, 0x50);
	gen_wait_status(d, d->delay.b_erase, 1);
}

void
jedec_chip_erase(mdev_t *d)
{
	write_mem(d, 0x5555, 0xAA);
	write_mem(d, 0x2AAA, 0x55);
	write_mem(d, 0x5555, 0x80);
	write_mem(d, 0x5555, 0xAA);
	write_mem(d, 0x2AAA, 0x55);
	write_mem(d, 0x5555, 0x10);
	gen_wait_status(d, d->delay.c_erase, 1);
}

void
jedec_program(mdev_t *d, unsigned long address, size_t a, char *buf)
{
	write_mem(d, 0x5555, 0xAA);
	write_mem(d, 0x2AAA, 0x55);
	write_mem(d, 0x5555, 0xA0);
	op_mmem(d, address, a, buf, 1);
	gen_wait_status(d, d->delay.program, 0);
}

void
jedec_read(mdev_t *d, unsigned long address, size_t a, char *buf)
{
	op_mmem(d, address, a, buf, 0);
}

/* SST MINI-COMMAND IMPLEMENTATION ----------------------------------------- */


enum sstmc_modes {
	SSTMC_READARRAY,
	SSTMC_SOFTWAREID,
	SSTMC_STATUSREG
};

static int sstmc_mode = SSTMC_READARRAY;

static void
get_sstmc_id(mdev_t *d, unsigned char *ven, unsigned char *dev)
{
	if (sstmc_mode != SSTMC_SOFTWAREID)
		write_mem(d, 0x0000, 0x90);
	*ven = read_mem(d, 0x3c0000);
	*dev = read_mem(d, 0x3c0001);
	sstmc_mode = SSTMC_SOFTWAREID;
}

int
sstmc_chip_identify(mdev_t *d)
{
	unsigned char ven, dev;
	int m2;
	get_jedec_id(d, &ven, &dev);
	if ((ven == 0 && dev == 0)
	    || (ven == d->ven_id && dev == d->dev_id)) {
		get_sstmc_id(d, &ven, &dev);
		m2 = ven == d->ven_id && dev == d->dev_id;
		IDDBG("sstmc", "1", ven, dev, m2);
		return m2;
	} else return 0;
}

/* DUMMY IMPLEMENTATION ---------------------------------------------------- */


static FILE *dummy_chip = NULL;

#define FFILL(f, c, a) do { int i;for (i=0;i<a;i++) {fputc(c,f);} } while (0)
#define FSEEK(f, t) if (fseeko(f, t, SEEK_SET)<0) ABORT("failed to seek!\n");

void
dummy_sector_erase(mdev_t *d, unsigned long sector)
{
	if (!dummy_chip) return;
	FSEEK(dummy_chip, sector * d->ssz);
	FFILL(dummy_chip, 0xFF, d->ssz);
}

void
dummy_block_erase(mdev_t *d, unsigned long block)
{
	if (!dummy_chip) return;
	FSEEK(dummy_chip, block * d->bsz);
	FFILL(dummy_chip, 0xFF, d->bsz);
}

void
dummy_chip_erase(mdev_t *d)
{
	if (!dummy_chip) return;
	FSEEK(dummy_chip, 0);
	FFILL(dummy_chip, 0xFF, d->csz);
}

void
dummy_program(mdev_t *d, unsigned long address, size_t a, char *buf)
{
	if (!dummy_chip) return;
	FSEEK(dummy_chip, address);
	ASSERT(fwrite(buf, a, 1, dummy_chip) == 1);
}

void
dummy_read(mdev_t *d, unsigned long address, size_t a, char *buf)
{
	if (!dummy_chip) return;
	FSEEK(dummy_chip, address);
	ASSERT(fread(buf, a, 1, dummy_chip) == 1);
}

/* GENERIC IMPLEMENTATION -------------------------------------------------- */


static void
gen_chip_setup(mdev_t *d)
{
	unsigned long i, be;
	unsigned char bl;
	xfer_errs_fatal = 1;
	if (d->quirks & NEEDS_WE_ALWAYS_ON) {
		DEBUG("WARNING: chip requires WP# always active!\n");
		we(d, 1);
		reset();
		write_mem(d, 0x5555, 0xAA);
		write_mem(d, 0x2AAA, 0x55);
		write_mem(d, 0x5555, 0x90);
		if ((bl = read_mem(d, 0x000FFFF2) & 0xc) > 0)
			ABORT("CHIP STILL LOCKED! %02x\n", bl);
		write_mem(d, 0x0000, 0xF0);
	}
	if (d->quirks & NEEDS_MISC_BLOCK_UNLOCK) {
		be = d->csz / d->bsz;
		for (i = 0; i < be; i++) {
			DEBUG("unlocking block %lu / %lu\r", i + 1, be);
			misc_unlock_block(d, i);
		}
		DEBUG("\n");
	}
}

static void
gen_read(mdev_t *d, char *buf, unsigned long s, unsigned long a, int o)
{
	if (!d->read)
		ABORT("this chip has no read implementation!\n");
	opp_q = !o;
	XFEROP(d, s, a, XFER_R, do {
		    VDEBUG(3, "DREAD: %08lx, %08lx\n", i, (unsigned long)xf);
		    d->read(d, i, xf, buf);
	    } while (0));
}

static void
gen_wait_status(mdev_t *d, ndelay_t fallback, int slow)
{
	int t = 0;
	reset_echk();
	if (!d->erased) {
		ndelay(fallback);
		return;
	} do {
		if (slow)
			DELAY_FOR(d, toggle_bit_delay);
		t++;
	} while (d->erased(d) == 0);
	VDEBUG(5, "WAITED: %d tries\n", t);
}

static void
gen_erase_block(mdev_t *d, unsigned long block)
{
	unsigned long i, ss, se;
	char sanity[1];
	we(d, 1);
	if (d->b_erase) {
		d->b_erase(d, block);
	} else if (d->bsz && d->csz && d->s_erase) {
		ss = (block    ) * d->bsz / d->ssz;
		se = (block + 1) * d->bsz / d->ssz;
		for (i = ss; i < se; i++)
			d->s_erase(d, i);
	} else ABORT("no generic block erasure method could be employed.\n");
	/* sanity check */
	we(d, 0);
	gen_read(d, sanity, d->bsz * block, 1, 0);
	ASSERT(sanity[0] == 0xFF);
}

static void
gen_erase_chip(mdev_t *d, int o)
{
	unsigned long i, be;
	we(d, 1);
	if (d->c_erase) {
		DEBUG("issuing a complete chip erase command...\n");
		d->c_erase(d);
	} else {
		be = d->csz / d->bsz;
		for (i = 0; i < be; i++) {
			if (o) DEBUG("erase block %lu / %lu\r", i + 1, be);
			gen_erase_block(d, i);
		}
		if (o) DEBUG("\n");
	}
	we(d, 0);
}

static void
gen_write(mdev_t *d, char *buf, unsigned long s, unsigned long a, int o)
{
	unsigned long e = s + a;
	ASSERT(e <= d->csz);
	we(d, 1);
	if (!d->write)
		ABORT("this chip has no write implementation!\n");
	opp_q = !o;
	XFEROP(d, s, a, XFER_W, do {
		    VDEBUG(3, "DWRITE: %08lx, %08lx\n", i, (unsigned long)xf);
		    d->write(d, i, xf, buf);
	    } while (0));
	we(d, 0);
}

/* ERASE-WRITE-VERIFY CODE ------------------------------------------------- */


static char *
alloc_chip(mdev_t *d)
{
	return malloc(d->csz);
}

enum fwf {
	FWF_ERASE,
	FWF_WRITE,
	FWF_VERIFY
};

enum fwc {
	FWF_WAIT	= '.',
	FWF_PROGRESS	= '%',
	FWF_DONE	= 'S'
};

static void
fwc_template(int blocks)
{
	int i;
#define PCM(...) for (i = 0; i < blocks; i++) DEBUG(__VA_ARGS__); DEBUG("\n");
	DEBUG("   BLOCK  : "); PCM("%01X", i);
	DEBUG("   ERASE  : "); PCM("%c", FWF_WAIT);
	DEBUG("   WRITE  : "); PCM("%c", FWF_WAIT);
	DEBUG("   VERIFY : "); PCM("%c", FWF_WAIT);
}

static void
fwc_setfield(int field, int block, char v)
{
	DEBUG("\r\033[%dA\033[%dC%c\r\033[%dB",
	    3 - field, 12 + block, v, 3 - field);
	fflush(stderr);
}

static void
full_write_cycle(mdev_t *d, char *buf)
{
	char *vbuf;
	unsigned long i, be, bs;
	be = d->csz / d->bsz;
	DEBUG("initiating a full EWV (Erase Write Verify) cycle...\n\n");
	fwc_template(be);
	vbuf = alloc_chip(d);
	for (i = 0; i < be; i++) {
		bs = i * d->bsz;
		fwc_setfield(FWF_ERASE, i, FWF_PROGRESS);
		gen_erase_block(d, i);
		fwc_setfield(FWF_ERASE, i, FWF_DONE);
		fwc_setfield(FWF_WRITE, i, FWF_PROGRESS);
		gen_write(d, buf + bs, bs, d->bsz, 0);
		fwc_setfield(FWF_WRITE, i, FWF_DONE);
		fwc_setfield(FWF_VERIFY, i, FWF_PROGRESS);
		gen_read(d, vbuf, bs, d->bsz, 0);
		if (memcmp(vbuf, buf + bs, d->bsz) != 0)
			ABORT("\nverify 1 failed for block %lu\n", i);
		fwc_setfield(FWF_VERIFY, i, FWF_DONE);
	}
	DEBUG("\n   SUCCESS -- checking full chip...\n");
	gen_read(d, vbuf, 0, d->csz, 1);
	if (memcmp(vbuf, buf, d->csz) != 0)
		ABORT("verify 2 failed for chip\n");
	DEBUG("   SUCCESS -- all ok!\n");
	free(vbuf);
}

/* USER INTERFACE FUNCTIONALITY -------------------------------------------- */


static mdev_t *
get_chip_by_name(char *name)
{
	int i;
	mdev_t *r;
	for (i = 0; i < devices; i++)
	{
		r = &devicetab[i];
		if (strcasecmp(r->name, name) == 0)
			return r;
	}
	return NULL;
}

static void
usage()
{
	ABORT(
	    "SBC FWH/LPC flash tool\n\n"
	    " -i (identify only)\n"
	    " -r <file> (read to file)\n"
	    " -w <file> (write from file)\n"
	    " -E (erase)\n"
	    " -W (write without erasing)\n"
	    " -c <chip> (override chip detection)\n"
	    " -d <file> (open file as backing for dummy chip)\n"
	    " -f (force untested chip)\n");
}

static const char *
fmt_chip_status(int status)
{
	switch (status) {
	default:
	case TEST_WIP:	return "!WORK IN PROGRESS!";
	case TEST_NONE:	return "NONE";
	case TEST_I:	return "IDENTIFY";
	case TEST_IR:	return "IDENTIFY READ";
	case TEST_IRW:	return "IDENTIFY READ WRITE";
	}
}

int
main(int argc, char **argv)
{
	int ch, dwrite = 0, force = 0;
	enum {
		CM_NOTHING,
		CM_IDENTIFY,
		CM_READ,
		CM_WRITE,
		CM_ERASE
	} mode = CM_NOTHING;
	mdev_t *d = NULL;
	char *buf, *chip_name = NULL;
#ifndef BOARD_DUMMY
	int i;
	mdev_t *dt;
#endif
	prepare_pins();

#define TRY_OPEN_FILE(f, m) if ((f = fopen(optarg, m)) == NULL) {	\
	perror(NULL); exit(1); }
	while ((ch = getopt(argc, argv, "hir:w:EW:c:d:f")) != -1) {
		switch (ch) {
		case 'i':
			mode = CM_IDENTIFY;
			break;
		case 'r':
			mode = CM_READ;
			TRY_OPEN_FILE(fp, "wb");
			break;
		case 'w':
			mode = CM_WRITE;
			TRY_OPEN_FILE(fp, "rb");
			break;
		case 'E':
			mode = CM_ERASE;
			break;
		case 'W':
			dwrite = 1;
			break;
		case 'c':
			chip_name = strdup(optarg);
			break;
		case 'd':
			TRY_OPEN_FILE(dummy_chip, "r+b");
			break;
		case 'f':
			force = 1;
			break;
		case '?':
		case 'h':
		default:
			usage();
			break;
		}
	}
	argc -= optind;
	argv += optind;

	if (mode == CM_NOTHING)
		ABORT("no mode selected, aborting.\n");
	if (dwrite && mode != CM_WRITE)
		ABORT("need to specify -w with -W.\n");

#ifdef BOARD_DUMMY
	chip_name = "dummy";
	d = get_chip_by_name(chip_name);
#else
	if (chip_name) {
		d = get_chip_by_name(chip_name);
		if (d->probe && !d->probe(d)) {
			DEBUG("WARNING: forced chip but probe failed!\n");
		}
	} else {
		for (i = 0; i < devices; i++)
		{
			dt = &devicetab[i];
			if (dt->probe && dt->probe(dt)) {
				d = dt;
				break;
			}
		}
	}
#endif
	if (dummy_chip)
		ASSERT(ftruncate(fileno(dummy_chip), d->csz) == 0);

	if (global_t0 != 0xf)
		ABORT("wiring fault, tar0 max at %d\n", global_t0);
	if (!d)
		ABORT("no supported flash chip found!\n");

	DEBUG("found %s at %s!\n", d->name,
	    d->type == BUS_FWH ? "FWH" : "LPC");

	DEBUG("detected chip tested for operations: %s\n",
	    fmt_chip_status(d->status));
	if (!force && d->status != TEST_IRW)
		ABORT(
		    "detected chip hasn't been fully tested!\n"
		    "you may leave the chip unrecoverable. (see -f)\n");

	if (mode == CM_IDENTIFY) return 0;
	gen_chip_setup(d);

	buf = alloc_chip(d);

	/*
	 * Chip size and block size must always be defined (even if block size
	 * is not a real number, although it must always be a multiple of the
	 * sector size).
	 */
	ASSERT(d->bsz && d->csz);

	switch (mode) {
	default:
		usage();
		break;
	case CM_READ:
		gen_read(d, buf, 0, d->csz, 1);
		ASSERT(fwrite(buf, d->csz, 1, fp) == 1);
		break;
	case CM_WRITE:
		ASSERT(fread(buf, d->csz, 1, fp) == 1);
		if (!dwrite) full_write_cycle(d, buf);
		else gen_write(d, buf, 0, d->csz, 1);
		break;
	case CM_ERASE:
		gen_erase_chip(d, 1);
		break;
	}

	if (fp) fclose(fp);
	if (dummy_chip) fclose(dummy_chip);
	free(buf);
	return 0;
}
