From bcbe8080be6610c265bcbc28879502cd9ab72bfc Mon Sep 17 00:00:00 2001 From: lvgx Date: Fri, 21 Aug 2020 04:45:45 +0200 Subject: Add support for Alexey Tourbin's QSORT code (#708) * Add support for Alexey Tourbin's QSORT code See https://github.com/svpv/qsort * Add benchmark scripts and compilation mode Compile with `make O_BENCHMARK=1`, and run benchmarks with e.g.: ./misc/test/benchmark.sh ./nnn '/' '/usr/bin' '/usr/lib' > benchdata You can then plot basic violin graphs with: ./misc/test/plot-bench.py benchdata * Update style, doc, haiku support, fix lint --- Makefile | 10 +++ misc/haiku/Makefile | 10 +++ misc/test/benchmark.sh | 37 ++++++++++ misc/test/plot-bench.py | 20 ++++++ src/nnn.c | 21 +++++- src/qsort.h | 186 ++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 281 insertions(+), 3 deletions(-) create mode 100755 misc/test/benchmark.sh create mode 100755 misc/test/plot-bench.py create mode 100644 src/qsort.h diff --git a/Makefile b/Makefile index 1138488..35d37aa 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,8 @@ O_NOBATCH := 0 # no built-in batch renamer O_NOFIFO := 0 # no FIFO previewer support O_CTX8 := 0 # enable 8 contexts O_ICONS := 0 # support icons +O_QSORT := 0 # use Alexey Tourbin's QSORT implementation +O_BENCH := 0 # benchmark mode (stops at first user input) # convert targets to flags for backwards compatibility ifneq ($(filter debug,$(MAKECMDGOALS)),) @@ -75,6 +77,14 @@ ifeq ($(O_ICONS),1) CPPFLAGS += -DICONS endif +ifeq ($(O_QSORT),1) + CPPFLAGS += -DTOURBIN_QSORT +endif + +ifeq ($(O_BENCH),1) + CPPFLAGS += -DBENCH +endif + ifeq ($(shell $(PKG_CONFIG) ncursesw && echo 1),1) CFLAGS_CURSES ?= $(shell $(PKG_CONFIG) --cflags ncursesw) LDLIBS_CURSES ?= $(shell $(PKG_CONFIG) --libs ncursesw) diff --git a/misc/haiku/Makefile b/misc/haiku/Makefile index 34057df..6ab4544 100644 --- a/misc/haiku/Makefile +++ b/misc/haiku/Makefile @@ -18,6 +18,8 @@ O_NOBATCH := 0 # no built-in batch renamer O_NOFIFO := 0 # no FIFO previewer support O_CTX8 := 0 # enable 8 contexts O_ICONS := 0 # support icons +O_QSORT := 0 # use Alexey Tourbin's QSORT implementation +O_BENCH := 0 # benchmark mode (stops at first user input) # convert targets to flags for backwards compatibility ifneq ($(filter debug,$(MAKECMDGOALS)),) @@ -74,6 +76,14 @@ ifeq ($(O_ICONS),1) CPPFLAGS += -DICONS endif +ifeq ($(O_QSORT),1) + CPPFLAGS += -DTOURBIN_QSORT +endif + +ifeq ($(O_BENCH),1) + CPPFLAGS += -DBENCH +endif + ifeq ($(shell $(PKG_CONFIG) ncursesw && echo 1),1) CFLAGS_CURSES ?= $(shell $(PKG_CONFIG) --cflags ncursesw) LDLIBS_CURSES ?= $(shell $(PKG_CONFIG) --libs ncursesw) diff --git a/misc/test/benchmark.sh b/misc/test/benchmark.sh new file mode 100755 index 0000000..fb56d01 --- /dev/null +++ b/misc/test/benchmark.sh @@ -0,0 +1,37 @@ +#!/bin/sh +# +# Usage: ./misc/test/benchmark.sh ./nnn /tmp/testdir1 ./testdir2 ... +# +# Don't forget to build nnn in benchmark mode: make O_BENCH=1 + +# Use a test dir filled with genfiles.sh to get interesting output +# (or maybe /usr/lib/) + +LANG=C + +TIME_VAL=${TIME_VAL:-"real"} + +SAMPLES=${SAMPLES:-100} + +EXE=$1 + +bench_val () { + (time "$1" "$2") 2>&1 |\ + awk '$1=="'"$TIME_VAL"'"{match($2, /[0-9]*\.[0-9]*/) ; print substr($2, RSTART, RLENGTH)}' +} + +bench_dir () { + i=$SAMPLES + printf "$2" + while [ $((i--)) -gt 0 ] ; do + printf "\t%s" "$(bench_val "$1" "$2")" + done + printf "\n" +} + +shift + +for dir in "$@" ; do + bench_dir "$EXE" "$dir" +done + diff --git a/misc/test/plot-bench.py b/misc/test/plot-bench.py new file mode 100755 index 0000000..b708fec --- /dev/null +++ b/misc/test/plot-bench.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +# +# Usage: ./plot-bench.py datafile +# (where datafile is the output of benchmark.sh) + +import matplotlib.pyplot as plt +import sys + +def bench_file_to_lists(infile): + return [[float(entry) for entry in line.split('\t')[1:]] for line in infile.readlines()] + +def plot_data(data): + fig = plt.figure() + ax = fig.add_axes([0,0,1,1]) + ax.violinplot(data) + plt.savefig("plot.svg") + +filename = sys.argv[1] + +plot_data(bench_file_to_lists(open(filename))) diff --git a/src/nnn.c b/src/nnn.c index f9b2050..be25850 100644 --- a/src/nnn.c +++ b/src/nnn.c @@ -116,6 +116,10 @@ #include "icons.h" #endif +#ifdef TOURBIN_QSORT +#include "qsort.h" +#endif + /* Macro definitions */ #define VERSION "3.4" #define GENERAL_INFO "BSD 2-Clause\nhttps://github.com/jarun/nnn" @@ -732,6 +736,14 @@ static haiku_nm_h haiku_hnd; #define xisdigit(c) ((unsigned int) (c) - '0' <= 9) #define xerror() perror(xitoa(__LINE__)) +#ifdef TOURBIN_QSORT +#define ENTLESS(i,j) (entrycmpfn(pdents + (i), pdents + (j)) < 0) +#define ENTSWAP(i,j) (swap_ent((i),(j))) +#define ENTSORT(pdents, ndents, entrycmpfn) QSORT((ndents), ENTLESS, ENTSWAP) +#else +#define ENTSORT(pdents, ndents, entrycmpfn) qsort((pdents), (ndents), sizeof(*(pdents)), (entrycmpfn)) +#endif + #ifdef __GNUC__ #define UNUSED(x) UNUSED_##x __attribute__((__unused__)) #else @@ -2513,6 +2525,9 @@ static int handle_alt_key(wint_t *wch) */ static int nextsel(int presel) { +#ifdef BENCH + return SEL_QUIT; +#endif int c = presel; uint i; @@ -2710,7 +2725,7 @@ static int matches(const char *fltr) regfree(&re); #endif - qsort(pdents, ndents, sizeof(*pdents), entrycmpfn); + ENTSORT(pdents, ndents, entrycmpfn); return ndents; } @@ -5122,7 +5137,7 @@ static void populate(char *path, char *lastname) if (!ndents) return; - qsort(pdents, ndents, sizeof(*pdents), entrycmpfn); + ENTSORT(pdents, ndents, entrycmpfn); #ifdef DBGMODE clock_gettime(CLOCK_REALTIME, &ts2); @@ -6317,7 +6332,7 @@ nochange: if (r == 'd' || r == 'a') goto begin; - qsort(pdents, ndents, sizeof(*pdents), entrycmpfn); + ENTSORT(pdents, ndents, entrycmpfn); move_cursor(ndents ? dentfind(lastname, ndents) : 0, 0); } continue; diff --git a/src/qsort.h b/src/qsort.h new file mode 100644 index 0000000..f177e95 --- /dev/null +++ b/src/qsort.h @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2013, 2017 Alexey Tourbin + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * This is a traditional Quicksort implementation which mostly follows + * [Sedgewick 1978]. Sorting is performed entirely on array indices, + * while actual access to the array elements is abstracted out with the + * user-defined `LESS` and `SWAP` primitives. + * + * Synopsis: + * QSORT(N, LESS, SWAP); + * where + * N - the number of elements in A[]; + * LESS(i, j) - compares A[i] to A[j]; + * SWAP(i, j) - exchanges A[i] with A[j]. + */ + +#ifndef QSORT_H +#define QSORT_H + +/* Sort 3 elements. */ +#define Q_SORT3(q_a1, q_a2, q_a3, Q_LESS, Q_SWAP) \ +do { \ + if (Q_LESS(q_a2, q_a1)) { \ + if (Q_LESS(q_a3, q_a2)) \ + Q_SWAP(q_a1, q_a3); \ + else { \ + Q_SWAP(q_a1, q_a2); \ + if (Q_LESS(q_a3, q_a2)) \ + Q_SWAP(q_a2, q_a3); \ + } \ + } \ + else if (Q_LESS(q_a3, q_a2)) { \ + Q_SWAP(q_a2, q_a3); \ + if (Q_LESS(q_a2, q_a1)) \ + Q_SWAP(q_a1, q_a2); \ + } \ +} while (0) + +/* Partition [q_l,q_r] around a pivot. After partitioning, + * [q_l,q_j] are the elements that are less than or equal to the pivot, + * while [q_i,q_r] are the elements greater than or equal to the pivot. */ +#define Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP) \ +do { \ + /* The middle element, not to be confused with the median. */ \ + Q_UINT q_m = (q_l) + (((q_r) - (q_l)) >> 1); \ + /* Reorder the second, the middle, and the last items. \ + * As [Edelkamp Weiss 2016] explain, using the second element \ + * instead of the first one helps avoid bad behaviour for \ + * decreasingly sorted arrays. This method is used in recent \ + * versions of gcc's std::sort, see gcc bug 58437#c13, although \ + * the details are somewhat different (cf. #c14). */ \ + Q_SORT3((q_l) + 1, q_m, q_r, Q_LESS, Q_SWAP); \ + /* Place the median at the beginning. */ \ + Q_SWAP(q_l, q_m); \ + /* Partition [q_l+2, q_r-1] around the median which is in q_l. \ + * q_i and q_j are initially off by one, they get decremented \ + * in the do-while loops. */ \ + (q_i) = (q_l) + 1; (q_j) = q_r; \ + while (1) { \ + do (q_i)++; while (Q_LESS(q_i, q_l)); \ + do (q_j)--; while (Q_LESS(q_l, q_j)); \ + if ((q_i) >= (q_j)) break; /* Sedgewick says "until j < i" */ \ + Q_SWAP(q_i, q_j); \ + } \ + /* Compensate for the i==j case. */ \ + (q_i) = (q_j) + 1; \ + /* Put the median to its final place. */ \ + Q_SWAP(q_l, q_j); \ + /* The median is not part of the left subfile. */ \ + (q_j)--; \ +} while (0) + +/* Insertion sort is applied to small subfiles - this is contrary to + * Sedgewick's suggestion to run a separate insertion sort pass after + * the partitioning is done. The reason I don't like a separate pass + * is that it triggers extra comparisons, because it can't see that the + * medians are already in their final positions and need not be rechecked. + * Since I do not assume that comparisons are cheap, I also do not try + * to eliminate the (q_j > q_l) boundary check. */ +#define Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP) \ +do { \ + Q_UINT q_i, q_j; \ + /* For each item starting with the second... */ \ + for (q_i = (q_l) + 1; q_i <= (q_r); q_i++) \ + /* move it down the array so that the first part is sorted. */ \ + for (q_j = q_i; q_j > (q_l) && (Q_LESS(q_j, q_j - 1)); q_j--) \ + Q_SWAP(q_j, q_j - 1); \ +} while (0) + +/* When the size of [q_l,q_r], i.e. q_r-q_l+1, is greater than or equal to + * Q_THRESH, the algorithm performs recursive partitioning. When the size + * drops below Q_THRESH, the algorithm switches to insertion sort. + * The minimum valid value is probably 5 (with 5 items, the second and + * the middle items, the middle itself being rounded down, are distinct). */ +#define Q_THRESH 16 + +/* The main loop. */ +#define Q_LOOP(Q_UINT, Q_N, Q_LESS, Q_SWAP) \ +do { \ + Q_UINT q_l = 0; \ + Q_UINT q_r = (Q_N) - 1; \ + Q_UINT q_sp = 0; /* the number of frames pushed to the stack */ \ + struct { Q_UINT q_l, q_r; } \ + /* On 32-bit platforms, to sort a "char[3GB+]" array, \ + * it may take full 32 stack frames. On 64-bit CPUs, \ + * though, the address space is limited to 48 bits. \ + * The usage is further reduced if Q_N has a 32-bit type. */ \ + q_st[sizeof(Q_UINT) > 4 && sizeof(Q_N) > 4 ? 48 : 32]; \ + while (1) { \ + if (q_r - q_l + 1 >= Q_THRESH) { \ + Q_UINT q_i, q_j; \ + Q_PARTITION(q_l, q_r, q_i, q_j, Q_UINT, Q_LESS, Q_SWAP); \ + /* Now have two subfiles: [q_l,q_j] and [q_i,q_r]. \ + * Dealing with them depends on which one is bigger. */ \ + if (q_j - q_l >= q_r - q_i) \ + Q_SUBFILES(q_l, q_j, q_i, q_r); \ + else \ + Q_SUBFILES(q_i, q_r, q_l, q_j); \ + } \ + else { \ + Q_INSERTION_SORT(q_l, q_r, Q_UINT, Q_LESS, Q_SWAP); \ + /* Pop subfiles from the stack, until it gets empty. */ \ + if (q_sp == 0) break; \ + q_sp--; \ + q_l = q_st[q_sp].q_l; \ + q_r = q_st[q_sp].q_r; \ + } \ + } \ +} while (0) + +/* The missing part: dealing with subfiles. + * Assumes that the first subfile is not smaller than the second. */ +#define Q_SUBFILES(q_l1, q_r1, q_l2, q_r2) \ +do { \ + /* If the second subfile is only a single element, it needs \ + * no further processing. The first subfile will be processed \ + * on the next iteration (both subfiles cannot be only a single \ + * element, due to Q_THRESH). */ \ + if ((q_l2) == (q_r2)) { \ + q_l = q_l1; \ + q_r = q_r1; \ + } \ + else { \ + /* Otherwise, both subfiles need processing. \ + * Push the larger subfile onto the stack. */ \ + q_st[q_sp].q_l = q_l1; \ + q_st[q_sp].q_r = q_r1; \ + q_sp++; \ + /* Process the smaller subfile on the next iteration. */ \ + q_l = q_l2; \ + q_r = q_r2; \ + } \ +} while (0) + +/* And now, ladies and gentlemen, may I proudly present to you... */ +#define QSORT(Q_N, Q_LESS, Q_SWAP) \ +do { \ + if ((Q_N) > 1) \ + /* We could check sizeof(Q_N) and use "unsigned", but at least \ + * on x86_64, this has the performance penalty of up to 5%. */ \ + Q_LOOP(unsigned long, Q_N, Q_LESS, Q_SWAP); \ +} while (0) + +#endif + +/* ex:set ts=8 sts=4 sw=4 noet: */ -- cgit v1.2.3-70-g09d2