shx.c
1 /* Simple HeX (SHX) 2 ** A simplified, dependency-less, and hopefully portable utility for converting 3 ** data into a hexadecimal representation. (C99) 4 ** ---------------------------------------------------------------------------- 5 ** Copyright (c) 2022, 2023 Amelia Zabardast Ziabari 6 ** 7 ** Redistribution and use in source and binary forms, with or without 8 ** modification, are permitted provided that the following conditions are met: 9 ** 10 ** 1. Redistributions of source code must retain the above copyright notice, 11 ** this list of conditions and the following disclaimer. 12 ** 2. Redistributions in binary form must reproduce the above copyright 13 ** notice, this list of conditions and the following disclaimer in the 14 ** documentation and/or other materials provided with the distribution. 15 ** 16 ** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 ** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 ** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 ** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 ** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 ** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 ** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 ** POSSIBILITY OF SUCH DAMAGE. 27 ** ---------------------------------------------------------------------------- 28 ** What's New? 29 ** 30 ** (2.0.1) Re-wrote the argument parser to be more lenient, and corrected a bug 31 ** that made the last line of the pretty print output sometimes have 32 ** the wrong address. 33 ** 34 ** (2.0.0) Refactored large parts of the program. 35 ** 36 ** (1.1.3) Moved the ASCII and hex character strings into the same variable in 37 ** order to waste less space. 38 ** 39 ** (1.1.2) Made the input size counter display clearer in the pretty printed 40 ** output by prefixing it, aligning it and giving it a unit. 41 ** 42 ** (1.1.0) Fixed a few C standard compatibility issues that were making the 43 ** program fail to compile using tools such as lcc-win32. Mainly 44 ** stopped using the __VA_ARGS__ version of the RUN_ONCE macro, as it 45 ** causes issues with some compilers. 46 */ 47 #include <stdio.h> 48 #include <string.h> 49 #include <stdlib.h> 50 #include <errno.h> 51 52 53 /* --- CONFIGURATION ------------------------------------------------------- */ 54 55 56 #define VERSION "2.0.1" 57 58 /* Changing this value guarantees that each row of the pretty-printed output 59 ** will have exactly 16 8-bit hexidecimal values, which is common in many other 60 ** utilities with similar functionality. This must be a power of 2 (subtracted 61 ** by one, e.g (1 << 4) - 1 == 15, the default). 62 */ 63 #define LINE_MAX 15 64 65 /* This value determines the byte-counter type. It needs to be something 66 ** relatively large to handle absolutely yuuge inputs. A 64-bit max is probably 67 ** fine, given that we only print 48-bit addresses. 68 */ 69 #define CT_TYPE size_t 70 #define CT_PRINT "zu" 71 72 73 /* --- HELPER MACROS ------------------------------------------------------- */ 74 75 76 /* There are many ways to check for Windows, so do them all here. */ 77 #if defined(_WIN32) || defined(WIN32) || defined(__WIN32__) 78 # define IS_WINDOWS 79 #endif 80 81 /* We need a macro to change the mode of the standard input to binary, or else 82 ** platforms like Windows will do annoying things to the input without asking. 83 */ 84 #ifdef IS_WINDOWS 85 # include <io.h> 86 # include <fcntl.h> 87 # define SET_BINARY_MODE(handle) _setmode(_fileno(handle), _O_BINARY) 88 #else 89 # define SET_BINARY_MODE(handle) 90 #endif 91 92 /* This defines a macro for function inlining hints, because the compilers that 93 ** support it usually also implement GNU C extensions generally. 94 */ 95 #ifdef __GNUC__ 96 # if __GNUC__ < 5 97 # define __poly_inline inline 98 # else 99 # define __poly_inline inline __attribute__((hot)) 100 # endif 101 #else 102 # define __poly_inline 103 #endif 104 105 #define ARG_ERR(...) do { printf(__VA_ARGS__); return 1; } while (0) 106 107 /* Effectively provides a "do once" construct. */ 108 #define __once_l do { static int ro = 0; if (!ro) { ro = 1; do 109 #define __once_r while (0); } } while (0) 110 111 /* The setmode() trick doesn't work for some Windows compiler environments, 112 ** namely Digital Mars, so I just implemented getchar() manually using the 113 ** typical Windows API functions. 114 */ 115 #ifdef IS_WINDOWS 116 # include <windows.h> 117 static int __poly_getchar() 118 { 119 static HANDLE wstdin; 120 static DWORD wbr; 121 static unsigned char wstdinbuf[1]; 122 123 __once_l 124 { 125 wstdin = GetStdHandle(STD_INPUT_HANDLE); 126 } __once_r; 127 ReadFile(wstdin, wstdinbuf, 1, &wbr, NULL); 128 if (!wbr) return EOF; 129 return wstdinbuf[0]; 130 } 131 #else 132 # define __poly_getchar() getchar() 133 #endif 134 135 136 /* --- MAIN PROGRAM -------------------------------------------------------- */ 137 138 139 static int pretty_buf[LINE_MAX + 1]; 140 static int ctr = 0; 141 static CT_TYPE ct = 0; 142 143 static int pretty = 0, count = 0, help = 0, rlimit = 0, 144 ascii = 0, limit = 0, cols = 0, skip = 0; 145 146 static const char *readme = 147 "------ Simple HeX (" VERSION ") -------" "\n" 148 "Copyright (c) 2022, 2023 Amelia Zabardast Ziabari" "\n\n" 149 "%s [OPTION...] [FILENAME]" "\n\n" 150 "Where OPTION is one or more of the following:" "\n" 151 " -p use formal pretty-printing instead of raw output" "\n" 152 " -c print the total length in octets" "\n" 153 " -h print this help information" "\n" 154 " -sXXXXX limit the number of octets to read (after skipping)" "\n" 155 " -jXXXXX skip a certain number of octets" "\n" 156 " (faster when using FILENAME instead of stdin)" "\n" 157 " -a when in raw output, print only filtered ASCII" "\n" 158 " -l[XXX] when in raw output, limit the output columns" "\n" 159 " (the default value is 80)" "\n\n" 160 "The FILENAME is optional, if it is not provided, stdin is used." "\n"; 161 162 /* This table of ASCII characters is used both when printing the ASCII column 163 ** of the pretty-printed output AND the hexadecimal values. For the former, a 164 ** strict order of the characters is not required, so for the latter we can 165 ** simply make sure the first 16 characters are the ones required for printing 166 ** hex values in the correct order in order to save on binary size. 167 ** 168 ** Technically it might be easier to just have a lookup table for every 8-bit 169 ** hexadecimal value. However, it's not like it would massively improve the 170 ** performance or something like that, and it would result in a larger binary, 171 ** (even if not by much in today's standards), so it's not really worth it even 172 ** if doing it this way instead requires some smarter code. 173 */ 174 static const char *ascii_chars = 175 "0123456789" 176 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 177 "abcdefghijklmnopqrstuvwxyz" 178 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ "; 179 180 181 static __poly_inline void ascii_print(int c) 182 { 183 if (strchr(ascii_chars, c) != NULL && c != 0) 184 putchar(c); 185 else 186 putchar('.'); 187 } 188 189 190 /* This is a crude way to check how many bytes are required to store an 191 ** unsigned integer, to check for overflow. 192 */ 193 static __poly_inline int bytes_req(CT_TYPE val) 194 { 195 int bytes = 0; 196 while (val != 0) 197 { 198 val >>= 8; 199 bytes++; 200 } 201 return bytes; 202 } 203 204 205 /* There appear to be some weird bugs in the standard C library on some 206 ** platforms that break the handling of long long types in the printf family 207 ** of functions. I'm not sure why, but to remedy it I've written my own code 208 ** to handle printing hexadecimal numbers in this function. It also handles 209 ** overflow, so if a number is too big to be represented, it will just blank it 210 ** out with asterisks instead of messing up the alignment of the output. 211 */ 212 static void hex_print(CT_TYPE val, int pad, const char *add) 213 { 214 #define ADD_BUFFER 8 215 216 /* Create a buffer capable of holding the maximum printable size. */ 217 static char fmt[sizeof(CT_TYPE) * 2 + 1 + ADD_BUFFER] = {0}; 218 219 /* Calculates the overflow based on the available padding. */ 220 int i, overflow = bytes_req(val) > pad; 221 222 /* Pad the empty space with zeros. If val too large to print, fill the 223 ** buffer with a warning character and then skip to printing. 224 */ 225 memset(fmt, overflow ? '*' : '0', pad <<= 1); 226 fmt[pad--] = 0; 227 228 if (overflow) goto overflow_skip; 229 230 /* Iterate over the padded space and map each index to a byte out of 231 ** the input value, and then convert that byte to a hexadecimal string 232 ** which can be pasted into the padding space. 233 **/ 234 for (i = 0; i <= pad; i += 2) 235 { 236 unsigned char byte = ((unsigned char *) &val)[i >> 1]; 237 /* "pad - i" makes sure it starts writing from the end of the 238 ** value instead of the start, ensuring a correct order for 239 ** inputs larger than 8 bits. 240 */ 241 fmt[pad - i - 1] = ascii_chars[byte >> 4]; 242 fmt[pad - i ] = ascii_chars[byte & 15]; 243 } 244 245 overflow_skip: 246 /* XXX: It may be faster to do things this way than to concatenate the 247 ** strings with printf. Unsure? 248 */ 249 if (strlen(add) < ADD_BUFFER) strcat(fmt, add); 250 fputs((const char *) fmt, stdout); 251 } 252 253 254 static void dump_pretty_buf(int exit) 255 { 256 /* Only print anything if we're at the end of one pretty output line. 257 ** If we just broke out of the getchar loop, then there might be some 258 ** bytes left in the buffer that didn't quite fill a line, so we will 259 ** print it as long as the loop left the buffer unfinished. 260 */ 261 int e, pad, unfin = ctr == LINE_MAX; 262 263 if (exit ? unfin : !unfin) return; 264 265 /* Print the address as a 48-bit hex integer, assuming a max size of 266 ** 256 TiB, just in case the counter type changes at some point. 267 */ 268 hex_print(ct - ctr, 6, ": "); 269 270 /* Prints the actual prettified hex view. */ 271 for (e = 0; e <= LINE_MAX; e++) 272 { 273 /* Prints value if buffer slot filled, else do padding. */ 274 pad = !(e & 1); 275 if (e <= ctr) hex_print(pretty_buf[e], 1, pad ? "" : " "); 276 else fputs(pad ? " " : " ", stdout); 277 } 278 fputs("| ", stdout); 279 280 /* Only used buffer slots are iterated to print ASCII values, as we 281 ** have no need to add padding for any further columns. 282 */ 283 for (e = 0; e <= ctr; e++) 284 ascii_print(pretty_buf[e]); 285 putchar('\n'); 286 } 287 288 289 int main(int argc, char** argv) 290 { 291 int c, argn; 292 char *filen = NULL; 293 FILE *fd; 294 295 /* This is a grossly complex argument parser intended to handle many 296 ** plausible edge cases and scenarios. 297 ** TODO: Either document this or replace it with getopt? 298 */ 299 for (argn = 1; argn < argc; argn++) 300 { 301 if (strchr(argv[argn], '-') == argv[argn]) 302 { 303 #define STRCH_ARG(a) strchr(argv[argn], a) 304 #define CHECK_ARG(a) if (STRCH_ARG(a) != NULL) 305 CHECK_ARG('p') pretty = 1; 306 CHECK_ARG('c') count = 1; 307 CHECK_ARG('h') help = 1; 308 CHECK_ARG('a') ascii = 1; 309 CHECK_ARG('l') limit = 1; 310 #define GO_INT(v, a) v = (int) strtol(STRCH_ARG(a) + 1, NULL, 0) 311 #define GO_CHK(v) if (errno == ERANGE || v < 1) 312 #define GO_VAL(a) if (*(STRCH_ARG(a) + 1) != '\0') 313 #define GO_ER1(v, s) ARG_ERR("invalid " s " specified (%d)\n", v) 314 #define GO_ER2(s) ARG_ERR("no " s " specified\n") 315 #define GO_DUP(a, v, s) GO_VAL(a) \ 316 { GO_INT(v, a); GO_CHK(v) { GO_ER1(v, s); } } 317 #define GO_DEF(a, v, s, d) CHECK_ARG(a) { GO_DUP(a, v, s) else { v = d; } } 318 #define GO_REQ(a, v, s) CHECK_ARG(a) { GO_DUP(a, v, s) else { GO_ER2(s); } } 319 GO_DEF('l', cols, "column limit", 80); 320 GO_REQ('s', rlimit, "read limit"); 321 GO_REQ('j', skip, "data skip"); 322 } 323 else if (argn == argc - 1) 324 { 325 filen = argv[argn]; 326 } 327 } 328 if (!ascii) cols >>= 1; 329 if (filen) 330 { 331 if (!(fd = fopen(filen, "rb"))) 332 ARG_ERR("could not open file (%s)\n", filen); 333 else if (skip) 334 { 335 fseek(fd, skip, SEEK_SET); 336 skip = 0; 337 } 338 } 339 else 340 SET_BINARY_MODE(stdin); 341 if (help) 342 { 343 printf(readme, argv[0]); 344 return 1; 345 } 346 while ((c = filen ? fgetc(fd) : __poly_getchar()) != EOF) 347 { 348 /* If we need to skip, we just re-use count mode and then reset 349 ** the stream statistics and disable skip mode once the target 350 ** has been reached. 351 */ 352 if (skip && ct < (size_t) skip) goto c_only; 353 else __once_l 354 { 355 ct = 0; 356 skip = 0; 357 } __once_r; 358 359 if (rlimit && ct >= (size_t) rlimit) break; 360 361 if (!pretty) 362 { 363 /* Count mode doesn't hide the output if pretty. */ 364 if (count) goto c_only; 365 366 /* Both of these are raw output with no spaces, etc */ 367 if (!ascii) hex_print(c, 1, ""); 368 else ascii_print(c); 369 370 /* Print a newline if we've handled the last character 371 ** in a column. Similar logic to CTR in pretty mode. 372 */ 373 if (limit && ct % cols == cols - 1) 374 putchar('\n'); 375 } 376 else 377 { 378 ctr = ct & LINE_MAX; 379 pretty_buf[ctr] = c; 380 381 /* This can safely be called for each byte, because the 382 ** check to see if we're at the end of a line happens 383 ** within the function itself. 384 */ 385 dump_pretty_buf(0); 386 } 387 c_only: 388 ct++; 389 } 390 if (fd) fclose(fd); 391 if (pretty) 392 { 393 ct--; 394 dump_pretty_buf(1); 395 if (count) 396 printf(" TOTAL: %" CT_PRINT " OCTETS\n", ++ct); 397 } 398 else if (count) 399 printf("%" CT_PRINT "\n", ct); 400 else 401 printf("\n"); 402 403 return 0; 404 }