aboutsummaryrefslogtreecommitdiffstats
path: root/plugins/dups
blob: e409a5e4ea82462e71af44ba9e465b646b71dc76 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env sh

# Description: List non-empty duplicate files in the current directory (based on size followed by MD5)
#
# Source: https://www.commandlinefu.com/commands/view/3555/find-duplicate-files-based-on-size-first-then-md5-hash
#
# Dependencies: find md5sum sort uniq xargs gsed
#
# Note: bash compatible required for mktemp
#
# Shell: bash
# Authors: syssyphus, KlzXS

# If the size of a file has more that $size_digits digits the file will be misplaced
# 12 digits fit files up to 931GiB

EDITOR="${EDITOR:-vi}"
TMPDIR="${TMPDIR:-/tmp}"

size_digits=12
tmpfile=$(mktemp "$TMPDIR/.nnnXXXXXX")

printf "\
## This is an overview of all duplicate files found.
## After editiing this file you will be prompted to remove some of them.
## You can choose between removing all the commented out files, all the uncommented ones or none at all.
## All the lines begining with '##','#md5sum' or 'md5sum' will be ignored either way.
## If you choose to remove, you will be given a choice between removing with force or interactively for each file.
" > "$tmpfile"

# shellcheck disable=SC2016
find . -size +0 -type f -printf "%${size_digits}s %p\n" | sort -rn | uniq -w"${size_digits}" -D | sed -E '
s/^ {,12}([0-9]{,12}) (.*)$/printf "%s %s\\n" "$(md5sum "\2")" "d\1"/
' | tr '\n' '\0' | xargs -0 -n1 sh -c | sort | { uniq -w32 --all-repeated=separate; echo; } | sed -nE '
h
s/^(.{32}).* d([0-9]*)$/#md5sum: \1 size: \2 bytes/p
g

:loop
N
/.*\n$/!b loop
p' | sed -E 's/^.{32} (.*) d[0-9]*$/\1/' > "$tmpfile"

"$EDITOR" "$tmpfile"

printf "Remove commented files? (yes/no/abort) [default=a]: "
read -r commented

if [ "$commented" = "y" ]; then
	sedcmd="/^(##|#?md5sum|[^#]).*/d"
elif [ "$commented" = "n" ]; then
	sedcmd="/^(#|#?md5sum).*/d"
else
	printf "Press any key to exit"
	read -r _
	exit
fi

printf "Remove with force or interactive? (f/i) [default=i]: "
read -r force

rmcmd="'rm -$force \"\$0\" \"\$@\" < /dev/tty'"

# shellcheck disable=SC2016
sed -e "$sedcmd" "$tmpfile" | tr '\n' '\0' | xargs -0 sh -c "$rmcmd"

rm "$tmpfile"

printf "Press any key to exit"
read -r _