hx.c
(plain)
1 /* HX 2 ** A simplified, dependency-less, and hopefully portable utility for converting 3 ** data into a hexadecimal representation. (C99) 4 ** ---------------------------------------------------------------------------- 5 ** Copyright (c) 2022, 2023 Amelia Zabardast Ziabari 6 ** 7 ** Redistribution and use in source and binary forms, with or without 8 ** modification, are permitted provided that the following conditions are met: 9 ** 10 ** 1. Redistributions of source code must retain the above copyright notice, 11 ** this list of conditions and the following disclaimer. 12 ** 2. Redistributions in binary form must reproduce the above copyright 13 ** notice, this list of conditions and the following disclaimer in the 14 ** documentation and/or other materials provided with the distribution. 15 ** 16 ** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 ** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 ** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 ** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 ** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 ** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 ** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 ** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 ** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 ** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 ** POSSIBILITY OF SUCH DAMAGE. 27 ** ---------------------------------------------------------------------------- 28 ** What's New? 29 ** 30 ** (2.0.2) SHX -> HX 31 ** 32 ** (2.0.1) Re-wrote the argument parser to be more lenient, and corrected a bug 33 ** that made the last line of the pretty print output sometimes have 34 ** the wrong address. 35 ** 36 ** (2.0.0) Refactored large parts of the program. 37 ** 38 ** (1.1.3) Moved the ASCII and hex character strings into the same variable in 39 ** order to waste less space. 40 ** 41 ** (1.1.2) Made the input size counter display clearer in the pretty printed 42 ** output by prefixing it, aligning it and giving it a unit. 43 ** 44 ** (1.1.0) Fixed a few C standard compatibility issues that were making the 45 ** program fail to compile using tools such as lcc-win32. Mainly 46 ** stopped using the __VA_ARGS__ version of the RUN_ONCE macro, as it 47 ** causes issues with some compilers. 48 */ 49 #include <stdio.h> 50 #include <string.h> 51 #include <stdlib.h> 52 #include <errno.h> 53 54 55 /* --- CONFIGURATION ------------------------------------------------------- */ 56 57 58 #define VERSION "2.0.2" 59 60 /* Changing this value guarantees that each row of the pretty-printed output 61 ** will have exactly 16 8-bit hexidecimal values, which is common in many other 62 ** utilities with similar functionality. This must be a power of 2 (subtracted 63 ** by one, e.g (1 << 4) - 1 == 15, the default). 64 */ 65 #define LINE_MAX 15 66 67 /* This value determines the byte-counter type. It needs to be something 68 ** relatively large to handle absolutely yuuge inputs. A 64-bit max is probably 69 ** fine, given that we only print 48-bit addresses. 70 */ 71 #define CT_TYPE size_t 72 #define CT_PRINT "zu" 73 74 75 /* --- HELPER MACROS ------------------------------------------------------- */ 76 77 78 /* There are many ways to check for Windows, so do them all here. */ 79 #if defined(_WIN32) || defined(WIN32) || defined(__WIN32__) 80 # define IS_WINDOWS 81 #endif 82 83 /* We need a macro to change the mode of the standard input to binary, or else 84 ** platforms like Windows will do annoying things to the input without asking. 85 */ 86 #ifdef IS_WINDOWS 87 # include <io.h> 88 # include <fcntl.h> 89 # define SET_BINARY_MODE(handle) _setmode(_fileno(handle), _O_BINARY) 90 #else 91 # define SET_BINARY_MODE(handle) 92 #endif 93 94 /* This defines a macro for function inlining hints, because the compilers that 95 ** support it usually also implement GNU C extensions generally. 96 */ 97 #ifdef __GNUC__ 98 # if __GNUC__ < 5 99 # define __poly_inline inline 100 # else 101 # define __poly_inline inline __attribute__((hot)) 102 # endif 103 #else 104 # define __poly_inline 105 #endif 106 107 #define ARG_ERR(...) do { printf(__VA_ARGS__); return 1; } while (0) 108 109 /* Effectively provides a "do once" construct. */ 110 #define __once_l do { static int ro = 0; if (!ro) { ro = 1; do 111 #define __once_r while (0); } } while (0) 112 113 /* The setmode() trick doesn't work for some Windows compiler environments, 114 ** namely Digital Mars, so I just implemented getchar() manually using the 115 ** typical Windows API functions. 116 */ 117 #ifdef IS_WINDOWS 118 # include <windows.h> 119 static int __poly_getchar() 120 { 121 static HANDLE wstdin; 122 static DWORD wbr; 123 static unsigned char wstdinbuf[1]; 124 125 __once_l 126 { 127 wstdin = GetStdHandle(STD_INPUT_HANDLE); 128 } __once_r; 129 ReadFile(wstdin, wstdinbuf, 1, &wbr, NULL); 130 if (!wbr) return EOF; 131 return wstdinbuf[0]; 132 } 133 #else 134 # define __poly_getchar() getchar() 135 #endif 136 137 138 /* --- MAIN PROGRAM -------------------------------------------------------- */ 139 140 141 static int pretty_buf[LINE_MAX + 1]; 142 static int ctr = 0; 143 static CT_TYPE ct = 0; 144 145 static int pretty = 0, count = 0, help = 0, rlimit = 0, 146 ascii = 0, limit = 0, cols = 0, skip = 0; 147 148 static const char *readme = 149 "hx (" VERSION ")" "\n" 150 "Copyright (c) 2022, 2023 Amelia Zabardast Ziabari" "\n\n" 151 "%s [OPTION...] [FILENAME]" "\n\n" 152 "Where OPTION is one or more of the following:" "\n" 153 " -p use formal pretty-printing instead of raw output" "\n" 154 " -c print the total length in octets" "\n" 155 " -h print this help information" "\n" 156 " -sXXXXX limit the number of octets to read (after skipping)" "\n" 157 " -jXXXXX skip a certain number of octets" "\n" 158 " (faster when using FILENAME instead of stdin)" "\n" 159 " -a when in raw output, print only filtered ASCII" "\n" 160 " -l[XXX] when in raw output, limit the output columns" "\n" 161 " (the default value is 80)" "\n\n" 162 "The FILENAME is optional, if it is not provided, stdin is used." "\n"; 163 164 /* This table of ASCII characters is used both when printing the ASCII column 165 ** of the pretty-printed output AND the hexadecimal values. For the former, a 166 ** strict order of the characters is not required, so for the latter we can 167 ** simply make sure the first 16 characters are the ones required for printing 168 ** hex values in the correct order in order to save on binary size. 169 ** 170 ** Technically it might be easier to just have a lookup table for every 8-bit 171 ** hexadecimal value. However, it's not like it would massively improve the 172 ** performance or something like that, and it would result in a larger binary, 173 ** (even if not by much in today's standards), so it's not really worth it even 174 ** if doing it this way instead requires some smarter code. 175 */ 176 static const char *ascii_chars = 177 "0123456789" 178 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 179 "abcdefghijklmnopqrstuvwxyz" 180 "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ "; 181 182 183 static __poly_inline void ascii_print(int c) 184 { 185 if (strchr(ascii_chars, c) != NULL && c != 0) 186 putchar(c); 187 else 188 putchar('.'); 189 } 190 191 192 /* This is a crude way to check how many bytes are required to store an 193 ** unsigned integer, to check for overflow. 194 */ 195 static __poly_inline int bytes_req(CT_TYPE val) 196 { 197 int bytes = 0; 198 while (val != 0) 199 { 200 val >>= 8; 201 bytes++; 202 } 203 return bytes; 204 } 205 206 207 /* There appear to be some weird bugs in the standard C library on some 208 ** platforms that break the handling of long long types in the printf family 209 ** of functions. I'm not sure why, but to remedy it I've written my own code 210 ** to handle printing hexadecimal numbers in this function. It also handles 211 ** overflow, so if a number is too big to be represented, it will just blank it 212 ** out with asterisks instead of messing up the alignment of the output. 213 */ 214 static void hex_print(CT_TYPE val, int pad, const char *add) 215 { 216 #define ADD_BUFFER 8 217 218 /* Create a buffer capable of holding the maximum printable size. */ 219 static char fmt[sizeof(CT_TYPE) * 2 + 1 + ADD_BUFFER] = {0}; 220 221 /* Calculates the overflow based on the available padding. */ 222 int i, overflow = bytes_req(val) > pad; 223 224 /* Pad the empty space with zeros. If val too large to print, fill the 225 ** buffer with a warning character and then skip to printing. 226 */ 227 memset(fmt, overflow ? '*' : '0', pad <<= 1); 228 fmt[pad--] = 0; 229 230 if (overflow) goto overflow_skip; 231 232 /* Iterate over the padded space and map each index to a byte out of 233 ** the input value, and then convert that byte to a hexadecimal string 234 ** which can be pasted into the padding space. 235 **/ 236 for (i = 0; i <= pad; i += 2) 237 { 238 unsigned char byte = ((unsigned char *) &val)[i >> 1]; 239 /* "pad - i" makes sure it starts writing from the end of the 240 ** value instead of the start, ensuring a correct order for 241 ** inputs larger than 8 bits. 242 */ 243 fmt[pad - i - 1] = ascii_chars[byte >> 4]; 244 fmt[pad - i ] = ascii_chars[byte & 15]; 245 } 246 247 overflow_skip: 248 /* XXX: It may be faster to do things this way than to concatenate the 249 ** strings with printf. Unsure? 250 */ 251 if (strlen(add) < ADD_BUFFER) strcat(fmt, add); 252 fputs((const char *) fmt, stdout); 253 } 254 255 256 static void dump_pretty_buf(int exit) 257 { 258 /* Only print anything if we're at the end of one pretty output line. 259 ** If we just broke out of the getchar loop, then there might be some 260 ** bytes left in the buffer that didn't quite fill a line, so we will 261 ** print it as long as the loop left the buffer unfinished. 262 */ 263 int e, pad, unfin = ctr == LINE_MAX; 264 265 if (exit ? unfin : !unfin) return; 266 267 /* Print the address as a 48-bit hex integer, assuming a max size of 268 ** 256 TiB, just in case the counter type changes at some point. 269 */ 270 hex_print(ct - ctr, 6, ": "); 271 272 /* Prints the actual prettified hex view. */ 273 for (e = 0; e <= LINE_MAX; e++) 274 { 275 /* Prints value if buffer slot filled, else do padding. */ 276 pad = !(e & 1); 277 if (e <= ctr) hex_print(pretty_buf[e], 1, pad ? "" : " "); 278 else fputs(pad ? " " : " ", stdout); 279 } 280 fputs("| ", stdout); 281 282 /* Only used buffer slots are iterated to print ASCII values, as we 283 ** have no need to add padding for any further columns. 284 */ 285 for (e = 0; e <= ctr; e++) 286 ascii_print(pretty_buf[e]); 287 putchar('\n'); 288 } 289 290 291 int main(int argc, char** argv) 292 { 293 int c, argn; 294 char *filen = NULL; 295 FILE *fd; 296 297 /* This is a grossly complex argument parser intended to handle many 298 ** plausible edge cases and scenarios. 299 ** TODO: Either document this or replace it with getopt? 300 */ 301 for (argn = 1; argn < argc; argn++) 302 { 303 if (strchr(argv[argn], '-') == argv[argn]) 304 { 305 #define STRCH_ARG(a) strchr(argv[argn], a) 306 #define CHECK_ARG(a) if (STRCH_ARG(a) != NULL) 307 CHECK_ARG('p') pretty = 1; 308 CHECK_ARG('c') count = 1; 309 CHECK_ARG('h') help = 1; 310 CHECK_ARG('a') ascii = 1; 311 CHECK_ARG('l') limit = 1; 312 #define GO_INT(v, a) v = (int) strtol(STRCH_ARG(a) + 1, NULL, 0) 313 #define GO_CHK(v) if (errno == ERANGE || v < 1) 314 #define GO_VAL(a) if (*(STRCH_ARG(a) + 1) != '\0') 315 #define GO_ER1(v, s) ARG_ERR("invalid " s " specified (%d)\n", v) 316 #define GO_ER2(s) ARG_ERR("no " s " specified\n") 317 #define GO_DUP(a, v, s) GO_VAL(a) \ 318 { GO_INT(v, a); GO_CHK(v) { GO_ER1(v, s); } } 319 #define GO_DEF(a, v, s, d) CHECK_ARG(a) { GO_DUP(a, v, s) else { v = d; } } 320 #define GO_REQ(a, v, s) CHECK_ARG(a) { GO_DUP(a, v, s) else { GO_ER2(s); } } 321 GO_DEF('l', cols, "column limit", 80); 322 GO_REQ('s', rlimit, "read limit"); 323 GO_REQ('j', skip, "data skip"); 324 } 325 else if (argn == argc - 1) 326 { 327 filen = argv[argn]; 328 } 329 } 330 if (!ascii) cols >>= 1; 331 if (filen) 332 { 333 if (!(fd = fopen(filen, "rb"))) 334 ARG_ERR("could not open file (%s)\n", filen); 335 else if (skip) 336 { 337 fseek(fd, skip, SEEK_SET); 338 skip = 0; 339 } 340 } 341 else 342 SET_BINARY_MODE(stdin); 343 if (help) 344 { 345 printf(readme, argv[0]); 346 return 1; 347 } 348 while ((c = filen ? fgetc(fd) : __poly_getchar()) != EOF) 349 { 350 /* If we need to skip, we just re-use count mode and then reset 351 ** the stream statistics and disable skip mode once the target 352 ** has been reached. 353 */ 354 if (skip && ct < (size_t) skip) goto c_only; 355 else __once_l 356 { 357 ct = 0; 358 skip = 0; 359 } __once_r; 360 361 if (rlimit && ct >= (size_t) rlimit) break; 362 363 if (!pretty) 364 { 365 /* Count mode doesn't hide the output if pretty. */ 366 if (count) goto c_only; 367 368 /* Both of these are raw output with no spaces, etc */ 369 if (!ascii) hex_print(c, 1, ""); 370 else ascii_print(c); 371 372 /* Print a newline if we've handled the last character 373 ** in a column. Similar logic to CTR in pretty mode. 374 */ 375 if (limit && ct % cols == cols - 1) 376 putchar('\n'); 377 } 378 else 379 { 380 ctr = ct & LINE_MAX; 381 pretty_buf[ctr] = c; 382 383 /* This can safely be called for each byte, because the 384 ** check to see if we're at the end of a line happens 385 ** within the function itself. 386 */ 387 dump_pretty_buf(0); 388 } 389 c_only: 390 ct++; 391 } 392 if (fd) fclose(fd); 393 if (pretty) 394 { 395 ct--; 396 dump_pretty_buf(1); 397 if (count) 398 printf(" TOTAL: %" CT_PRINT " OCTETS\n", ++ct); 399 } 400 else if (count) 401 printf("%" CT_PRINT "\n", ct); 402 else 403 printf("\n"); 404 405 return 0; 406 }