#!/bin/sh # Steve Parker, http://steve-parker.org/ # Updated Nov 2005 to work properly with filenames containing spaces # Checks for duplicate files in a directory # tree and removes them (or does whatever else # you want to do with them). # Uses diff to check the difference, not filename. # Depends on: # diff, sum LOG=/tmp/sum.$$ trap 'echo "Interrupted... " ; rm -f ${LOG} ; exit 1' 1 2 15 touch ${LOG} find . -type f -print | while read i do echo -n "." #echo "Testing $i ..." s=`sum "$i"|cut -d" " -f1` n=`grep "^${s}:" ${LOG}` #echo "s=$s n=$n" if [ ! -z "$n" ]; then for j in `echo $n | cut -d":" -f2-` do diff "$i" "$j" > /dev/null 2>&1 if [ "$?" -eq "0" ]; then # This is the bit you want to change. # $i is the current file, $j is a previous file # which has already been found, and is identical to $i. # examples: # echo "$i and $j match" # ls -l $i $j # rm -f $i echo "$i and $j match" fi done else echo "$s:${i}" >> ${LOG} fi done rm -f ${LOG}