commit duperemove for openSUSE:Factory

31 Oct 2014

Hello community,

here is the log from the commit of package duperemove for openSUSE:Factory checked in at 2014-10-31 18:27:56
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/duperemove (Old)
 and      /work/SRC/openSUSE:Factory/.duperemove.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "duperemove"

Changes:
--------

--- /work/SRC/openSUSE:Factory/duperemove/duperemove.changes	2014-09-30 19:42:03.000000000 +0200
+++ /work/SRC/openSUSE:Factory/.duperemove.new/duperemove.changes	2014-10-31 20:03:36.000000000 +0100
@@ -1,0 +2,11 @@
+Fri Oct 31 02:44:53 UTC 2014 - mfasheh@suse.com
+
+- Update to duperemove v0.09.beta2
+  - fix memory leak
+  - fix hardlink detection on btrfs
+  - print file number status during csum phase
+  - print a status bar during extent seearch
+  - several bugfixes and performance improvements to extent search
+- Removed patch: 001-fix-build.patch
+
+-------------------------------------------------------------------

Old:
----
  001-fix-build.patch
  duperemove-v0.09.beta1.tar.gz

New:
----
  duperemove-v0.09.beta2.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ duperemove.spec ++++++
--- /var/tmp/diff_new_pack.49iyG2/_old	2014-10-31 20:03:37.000000000 +0100
+++ /var/tmp/diff_new_pack.49iyG2/_new	2014-10-31 20:03:37.000000000 +0100
@@ -17,13 +17,13 @@
 
 
 %define modname duperemove
-%define tar_version v0.09.beta1
+%define tar_version v0.09.beta2
 
 Name:           duperemove
 BuildRequires:  gcc-c++
 BuildRequires:  glib2-devel
 BuildRequires:  libgcrypt-devel
-Version:        0.09~beta1
+Version:        0.09~beta2
 Release:        0
 Summary:        Software to find duplicate extents in files and remove them
 License:        GPL-2.0
@@ -31,7 +31,6 @@
 Url:            https://github.com/markfasheh/duperemove
 Source:         %{modname}-%{tar_version}.tar.gz
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
-Patch1:         001-fix-build.patch
 
 %description
 Duperemove finds duplicate extents in files and prints them to the
@@ -48,7 +47,6 @@
 
 %prep
 %setup -q -n %{modname}-%{tar_version}
-%patch1 -p1
 
 %build
 make

++++++ duperemove-v0.09.beta1.tar.gz -> duperemove-v0.09.beta2.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/Makefile new/duperemove-v0.09.beta2/Makefile
--- old/duperemove-v0.09.beta1/Makefile	2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/Makefile	2014-10-31 00:27:39.000000000 +0100
@@ -1,11 +1,11 @@
-RELEASE=v0.09.beta1
+RELEASE=v0.09.beta2
 
 CC = gcc
 CFLAGS = -Wall -ggdb
 
 MANPAGES=duperemove.8 btrfs-extent-same.8
 
-DIST_SOURCES=csum-gcrypt.c csum-mhash.c csum.h duperemove.c hash-tree.c hash-tree.h results-tree.c results-tree.h kernel.h LICENSE list.h Makefile rbtree.c rbtree.h rbtree.txt README TODO dedupe.c dedupe.h btrfs-ioctl.h filerec.c filerec.h $(MANPAGES) btrfs-extent-same.c debug.h util.c util.h serialize.c serialize.h hashstats.c
+DIST_SOURCES=csum-gcrypt.c csum-mhash.c csum.h duperemove.c hash-tree.c hash-tree.h results-tree.c results-tree.h kernel.h LICENSE list.h Makefile rbtree.c rbtree.h rbtree.txt README TODO dedupe.c dedupe.h btrfs-ioctl.h filerec.c filerec.h btrfs-util.c btrfs-util.h $(MANPAGES) btrfs-extent-same.c debug.h util.c util.h serialize.c serialize.h hashstats.c
 DIST=duperemove-$(RELEASE)
 DIST_TARBALL=$(DIST).tar.gz
 TEMP_INSTALL_DIR:=$(shell mktemp -du -p .)
@@ -26,7 +26,7 @@
 	$(crypt_CFLAGS) $(glib_CFLAGS)
 LIBRARY_FLAGS += $(crypt_LIBS) $(glib_LIBS)
 
-objects = duperemove.o rbtree.o hash-tree.o results-tree.o dedupe.o filerec.o util.o serialize.o $(hash_obj)
+objects = duperemove.o rbtree.o hash-tree.o results-tree.o dedupe.o filerec.o util.o serialize.o btrfs-util.o $(hash_obj)
 progs = duperemove
 
 DESTDIR = /
@@ -38,7 +38,7 @@
 all: $(progs) kernel.h list.h btrfs-ioctl.h debug.h
 
 duperemove: $(objects) kernel.h duperemove.c
-	$(CC) $(LIBRARY_FLAGS) $(CFLAGS) $(objects) -o duperemove
+	$(CC) $(CFLAGS) $(objects) -o duperemove $(LIBRARY_FLAGS)
 
 tarball: clean
 	mkdir -p $(TEMP_INSTALL_DIR)/$(DIST)
@@ -60,14 +60,14 @@
 	done
 
 csum-test: $(hash_obj) csum-test.c
-	$(CC) $(LIBRARY_FLAGS) $(CFLAGS) $(hash_obj) -o csum-test csum-test.c
+	$(CC) $(CFLAGS) $(hash_obj) -o csum-test csum-test.c  $(LIBRARY_FLAGS)
 
 filerec-test: filerec.c filerec.h rbtree.o
-	$(CC) $(LIBRARY_FLAGS) $(CFLAGS) -DFILEREC_TEST filerec.c rbtree.o -o filerec-test
+	$(CC) $(CFLAGS) -DFILEREC_TEST filerec.c rbtree.o -o filerec-test $(LIBRARY_FLAGS)
 
 hashstats_obj = $(hash_obj) rbtree.o hash-tree.o filerec.o util.o serialize.o results-tree.o
 hashstats: $(hashstats_obj) hashstats.c
-	$(CC) $(LIBRARY_FLAGS) $(CFLAGS) $(hashstats_obj) hashstats.c -o hashstats
+	$(CC) $(CFLAGS) $(hashstats_obj) hashstats.c -o hashstats $(LIBRARY_FLAGS)
 
 clean:
 	rm -fr $(objects) $(progs) $(DIST_TARBALL) btrfs-extent-same filerec-test hashstats csum-*.o *~
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/TODO new/duperemove-v0.09.beta2/TODO
--- old/duperemove-v0.09.beta1/TODO	2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/TODO	2014-10-31 00:27:39.000000000 +0100
@@ -1,6 +1,3 @@
-- Inodes between subvolumes on a btrfs file system can have the same i_ino (hooray!).
-  We need to handle this, probably by recording subvolid on the filerec.
-
 - Store results of our search to speed up subsequent runs.
   - When writing hashes, store latest btrfs transaction id in the hash file (import find-new from btrfsprogs for this)
   - When reading hashes and we have a transaction id, do a scan of btrfs objects to see which inodes have changed.
@@ -14,6 +11,18 @@
 - Add an optional mode to do duplicate checking with resolution of extent
   owners.
 
+- Multi-threaded extent search.
+  - This needs locking and atomic updates to some fields, so it's not quite
+    as straight forward as threading the hash stage. A start would be to have
+    find_all_dups queue up appropriate dup_blocks_lists to worker threads.
+    The threads have to properly lock the filerec compared trees and results
+    trees. b_seen also needs to be read/written atomically.
+
+- csum_whole_file() still does a read/checksum of holes and unwritten
+  extents (even though we detect and mark them now).  If we calculate and
+  store (in memory) the checksum of a zero'd block, we can skip the read and
+  copy our known value directly into the block digest member.
+
 - Allow checking for similar files by some criteria (approximate size,
   file magic type, file extension, etc)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/btrfs-util.c new/duperemove-v0.09.beta2/btrfs-util.c
--- old/duperemove-v0.09.beta1/btrfs-util.c	1970-01-01 01:00:00.000000000 +0100
+++ new/duperemove-v0.09.beta2/btrfs-util.c	2014-10-31 00:27:39.000000000 +0100
@@ -0,0 +1,74 @@
+/*
+ * btrfs-util.c
+ *
+ * Copyright (C) 2014 SUSE.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Some parts of this taken from btrfs-progs, which is also GPLv2
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/statfs.h>
+#include <sys/ioctl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <string.h>
+#include <linux/magic.h>
+#include <linux/btrfs.h>
+
+#include "btrfs-util.h"
+#include "debug.h"
+
+/* For some reason linux/btrfs.h doesn't define this. */
+#define	BTRFS_FIRST_FREE_OBJECTID	256ULL
+
+/*
+ * For a given:
+ * - file or directory return the containing tree root id
+ * - subvolume return its own tree id
+ * - BTRFS_EMPTY_SUBVOL_DIR_OBJECTID (directory with ino == 2) the result is
+ *   undefined and function returns -1
+ */
+int lookup_btrfs_subvolid(int fd, uint64_t *subvolid)
+{
+	int ret;
+	struct btrfs_ioctl_ino_lookup_args args;
+
+	memset(&args, 0, sizeof(args));
+	args.objectid = BTRFS_FIRST_FREE_OBJECTID;
+
+	ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+	if (ret)
+		return errno;
+
+	*subvolid = args.treeid;
+
+	return 0;
+}
+
+int check_file_btrfs(int fd, int *btrfs)
+{
+	int ret;
+	struct statfs fs;
+
+	*btrfs = 0;
+
+	ret = fstatfs(fd, &fs);
+	if (ret)
+		return errno;
+
+	if (fs.f_type == BTRFS_SUPER_MAGIC)
+		*btrfs = 1;
+
+	return ret;
+}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/btrfs-util.h new/duperemove-v0.09.beta2/btrfs-util.h
--- old/duperemove-v0.09.beta1/btrfs-util.h	1970-01-01 01:00:00.000000000 +0100
+++ new/duperemove-v0.09.beta2/btrfs-util.h	2014-10-31 00:27:39.000000000 +0100
@@ -0,0 +1,22 @@
+/*
+ * btrfs-util.h
+ *
+ * Copyright (C) 2014 SUSE.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef	__BTRFS_UTIL__
+#define	__BTRFS_UTIL__
+
+int check_file_btrfs(int fd, int *btrfs);
+int lookup_btrfs_subvolid(int fd, uint64_t *rootid);
+
+#endif	/* __BTRFS_UTIL__ */
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/debug.h new/duperemove-v0.09.beta2/debug.h
--- old/duperemove-v0.09.beta1/debug.h	2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/debug.h	2014-10-31 00:27:39.000000000 +0100
@@ -2,6 +2,7 @@
 #define	__DEBUG_H__
 
 #include <stdio.h>
+#include <stdbool.h>
 
 extern int verbose;
 extern int debug;
@@ -56,6 +57,7 @@
 declare_alloc_tracking_header(filerec);
 declare_alloc_tracking_header(files_compared);
 declare_alloc_tracking_header(filerec_token);
+declare_alloc_tracking_header(file_hash_head);
 /* Can be called anywhere we want to dump the above statistics */
 void print_mem_stats(void);
 
@@ -73,6 +75,45 @@
 		}							\
 	} while(0)
 
+
+/*
+ * compiletime_assert and associated code taken from
+ * linux-3.14.git/include/linux/compiler.h
+ */
+
+# define __compiletime_warning(message)
+# define __compiletime_error(message)
+
+/*
+ * Sparse complains of variable sized arrays due to the temporary variable in
+ * __compiletime_assert. Unfortunately we can't just expand it out to make
+ * sparse see a constant array size without breaking compiletime_assert on old
+ * versions of GCC (e.g. 4.2.4), so hide the array from sparse altogether.
+ */
+#define __compiletime_error_fallback(condition) \
+	do { ((void)sizeof(char[1 - 2 * condition])); } while (0)
+
+#define __compiletime_assert(condition, msg, prefix, suffix)		\
+	do {								\
+		bool __cond = !(condition);				\
+		extern void prefix ## suffix(void) __compiletime_error(msg); \
+		if (__cond)						\
+			prefix ## suffix();				\
+		__compiletime_error_fallback(__cond);			\
+	} while (0)
+
+/**
+ * compiletime_assert - break build and emit msg if condition is false
+ * @condition: a compile-time constant condition to check
+ * @msg:       a message to emit if condition is false
+ *
+ * In tradition of POSIX assert, this macro will break the build if the
+ * supplied condition is *false*, emitting the supplied error message if the
+ * compiler has support to do so.
+ */
+#define compiletime_assert(condition, msg) \
+	__compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
+
 /*
  * BUILD_BUG_ON() and associated code taken from
  * linux-2.6.git/include/linux/bug.h
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/duperemove.c new/duperemove-v0.09.beta2/duperemove.c
--- old/duperemove-v0.09.beta1/duperemove.c	2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/duperemove.c	2014-10-31 00:27:39.000000000 +0100
@@ -17,7 +17,6 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <sys/statfs.h>
 #include <limits.h>
 #include <fcntl.h>
 #include <unistd.h>
@@ -32,7 +31,6 @@
 #include <ctype.h>
 #include <getopt.h>
 #include <inttypes.h>
-#include <linux/magic.h>
 #include <assert.h>
 #include <linux/fiemap.h>
 
@@ -47,6 +45,7 @@
 #include "dedupe.h"
 #include "util.h"
 #include "serialize.h"
+#include "btrfs-util.h"
 #include "debug.h"
 
 /* exported via debug.h */
@@ -73,13 +72,17 @@
 static char *serialize_fname = NULL;
 static unsigned int hash_threads = 0;
 
+static int fancy_status = 0;
+
 static void debug_print_block(struct file_block *e)
 {
 	struct filerec *f = e->b_file;
 
-	printf("%s\tloff: %llu lblock: %llu seen: %u\n", f->filename,
+	printf("%s\tloff: %llu lblock: %llu seen: %u flags: 0x%x\n",
+	       f->filename,
 	       (unsigned long long)e->b_loff,
-	       (unsigned long long)e->b_loff / blocksize, e->b_seen);
+	       (unsigned long long)e->b_loff / blocksize, e->b_seen,
+	       e->b_flags);
 }
 
 static void debug_print_tree(struct hash_tree *tree)
@@ -372,10 +375,12 @@
 	assert(buf != NULL);
 	unsigned char *digest = malloc(DIGEST_LEN_MAX);
 	assert(digest != NULL);
+	static long long unsigned cur_num_filerecs = 0;
 
 	GMutex *tree_mutex = g_dataset_get_data(tree, "mutex");
 
-	printf("csum: %s\n", file->filename);
+	printf("csum: %s \t[%llu/%llu]\n", file->filename,
+	       __sync_add_and_fetch(&cur_num_filerecs, 1), num_filerecs);
 
 	fc = alloc_fiemap_ctxt();
 	if (fc == NULL) /* This should be non-fatal */
@@ -448,6 +453,8 @@
 	filerec_close(file);
 	free(digest);
 	free(buf);
+	if (fc)
+		free(fc);
 
 	return;
 
@@ -456,6 +463,8 @@
 err_noclose:
 	free(digest);
 	free(buf);
+	if (fc)
+		free(fc);
 
 	fprintf(
 		stderr,
@@ -543,7 +552,7 @@
 	printf("\t-A\t\tOpens files readonly when deduping. Primarily for use by privileged users on readonly snapshots\n");
 	printf("\t-b bsize\tUse bsize blocks. Default is %dk.\n",
 	       DEFAULT_BLOCKSIZE / 1024);
-	printf("\t-h\t\tPrint numbers in human-readble format.\n");
+	printf("\t-h\t\tPrint numbers in human-readable format.\n");
 	printf("\t-v\t\tBe verbose.\n");
 	printf("\t--hash-threads=N\n\t\t\tUse N threads for hashing phase. "
 	       "Default is automatically detected.\n");
@@ -593,44 +602,18 @@
 	return 0;
 }
 
-static int check_file_fs(struct filerec *file, int *bad_fs)
-{
-	int ret;
-	struct statfs fs;
-
-	*bad_fs = 0;
-
-	if (!run_dedupe)
-		return 0;
-
-	ret = filerec_open(file, 0);
-	if (ret)
-		return ret;
-
-	ret = fstatfs(file->fd, &fs);
-	if (ret) {
-		ret = -errno;
-		goto out;
-	}
-
-	if (fs.f_type != BTRFS_SUPER_MAGIC)
-		*bad_fs = 1;
-
-out:
-	filerec_close(file);
-	return ret;
-}
-
 /*
  * Returns nonzero on fatal errors only
  */
 static int add_file(const char *name, int dirfd)
 {
 	int ret, len = strlen(name);
-	int bad_fs;
+	int fd;
+	int on_btrfs = 0;
 	struct stat st;
 	char *pathtmp;
 	struct filerec *file;
+	uint64_t subvolid;
 
 	if (len > (path_max - pathp)) {
 		fprintf(stderr, "Path max exceeded: %s %s\n", path, name);
@@ -671,26 +654,59 @@
 		goto out;
 	}
 
-	file = filerec_new(path, st.st_ino);
-	if (file == NULL) {
-		fprintf(stderr, "Out of memory while allocating file record "
-			"for: %s\n", path);
-		return ENOMEM;
+	fd = open(path, O_RDONLY);
+	if (fd == -1) {
+		ret = errno;
+		fprintf(stderr, "Error %d: %s while opening file \"%s\". "
+			"Skipping.\n", ret, strerror(ret), path);
+		goto out;
 	}
 
-	ret = check_file_fs(file, &bad_fs);
+	ret = check_file_btrfs(fd, &on_btrfs);
 	if (ret) {
-		fprintf(stderr, "Skip file \"%s\" due to errors\n",
-			file->filename);
-		filerec_free(file);
-		return ENOMEM;
+		close(fd);
+		fprintf(stderr, "Skip file \"%s\" due to errors\n", path);
+		goto out;
 	}
-	if (bad_fs) {
-		fprintf(stderr, "Can only dedupe files on btrfs\n");
-		filerec_free(file);
+
+	if (run_dedupe && !on_btrfs) {
+		close(fd);
+		fprintf(stderr, "\"%s\": Can only dedupe files on btrfs\n",
+			path);
 		return ENOSYS;
 	}
 
+	if (on_btrfs) {
+		/*
+		 * Inodes between subvolumes on a btrfs file system
+		 * can have the same i_ino. Get the subvolume id of
+		 * our file so hard link detection works.
+		 */
+		ret = lookup_btrfs_subvolid(fd, &subvolid);
+		if (ret) {
+			close(fd);
+			fprintf(stderr,
+				"Error %d: %s while finding subvolid for file "
+				"\"%s\". Skipping.\n", ret, strerror(ret),
+				path);
+			goto out;
+		}
+	} else {
+		subvolid = st.st_dev;
+	}
+
+//	printf("\"%s\", ino: %llu, subvolid: %"PRIu64"\n", path,
+//	       (unsigned long long)st.st_ino, subvolid);
+
+	close(fd);
+
+	file = filerec_new(path, st.st_ino, subvolid);
+	if (file == NULL) {
+		fprintf(stderr, "Out of memory while allocating file record "
+			"for: %s\n", path);
+		return ENOMEM;
+	}
+
 out:
 	pathp = pathtmp;
 	return 0;
@@ -784,12 +800,27 @@
 
 	/* Filter out option combinations that don't make sense. */
 	if (write_hashes &&
-	    (read_hashes || run_dedupe))
+	    (read_hashes || run_dedupe)) {
+		if (run_dedupe)
+			fprintf(stderr,
+				"Error: Can not dedupe with --write-hashes "
+				"option. Try writing hashes and then deduping "
+				"with --read-hashes instead.\n");
+		if (read_hashes)
+			fprintf(stderr,
+				"Error: Specify only one of --write-hashes or "
+				"--read-hashes.\n");
+
 		return 1;
+	}
 
 	if (read_hashes) {
-		if (numfiles)
+		if (numfiles) {
+			fprintf(stderr,
+				"Error: --read-hashes option does not take a "
+				"file list argument\n");
 			return 1;
+		}
 		goto out_nofiles;
 	}
 
@@ -852,19 +883,14 @@
 		(unsigned long long)eoff[1] / blocksize);
 }
 
-struct dupe_walk_ctxt {
-	struct file_block	*orig;
-
-	struct filerec		*orig_file;
-	struct filerec		*walk_file;
-
-	struct results_tree	*res;
-};
-
-static int walk_dupe_block(struct file_block *block, void *priv)
+static int walk_dupe_block(struct filerec *orig_file,
+			   struct file_block *orig_file_block,
+			   struct filerec *walk_file,
+			   struct file_block *walk_file_block,
+			   struct results_tree *res)
 {
-	struct dupe_walk_ctxt *ctxt = priv;
-	struct file_block *orig = ctxt->orig;
+	struct file_block *orig = orig_file_block;
+	struct file_block *block = walk_file_block;
 	struct file_block *start[2] = { orig, block };
 	struct file_block *end[2];
 	struct running_checksum *csum;
@@ -891,8 +917,8 @@
 		 * This is kind of ugly, however it does correctly
 		 * signify the end of our list.
 		 */
-		if (orig->b_file_next.next == &ctxt->orig_file->block_list ||
-		    block->b_file_next.next == &ctxt->walk_file->block_list)
+		if (orig->b_file_next.next == &orig_file->block_list ||
+		    block->b_file_next.next == &walk_file->block_list)
 			break;
 
 		orig =	list_entry(orig->b_file_next.next, struct file_block,
@@ -903,32 +929,59 @@
 
 	finish_running_checksum(csum, match_id);
 
-	record_match(ctxt->res, match_id, ctxt->orig_file, ctxt->walk_file,
+	record_match(res, match_id, orig_file, walk_file,
 		     start, end);
 out:
 	return 0;
 }
 
+/*
+ * Start an extent search (with orig_block) at each block in our dups
+ * list which is owned by walk_file.
+ */
+static void lookup_walk_file_hash_head(struct file_block *orig_block,
+				       struct filerec *walk_file,
+				       struct results_tree *res)
+{
+	struct dupe_blocks_list *parent = orig_block->b_parent;
+	struct file_block *cur;
+	struct file_hash_head *head = find_file_hash_head(parent, walk_file);
+
+	/* find_file_dups should have checked this for us already */
+	abort_on(head == NULL);
+
+	list_for_each_entry(cur, &head->h_blocks, b_head_list) {
+		/* Ignore self. Technically this shouldn't happen (see above)
+		 * until we allow walking a file against itself. */
+		if (cur == orig_block)
+			continue;
+
+		abort_on(cur->b_file != walk_file);
+
+		if (walk_dupe_block(orig_block->b_file, orig_block,
+				    walk_file, cur, res))
+			break;
+	}
+}
+
 static void find_file_dupes(struct filerec *file, struct filerec *walk_file,
 			    struct results_tree *res)
 {
 	struct file_block *cur;
-	struct dupe_walk_ctxt ctxt = { 0, };
 
 	list_for_each_entry(cur, &file->block_list, b_file_next) {
 		if (block_seen(cur))
 			continue;
+
+		if (!file_in_dups_list(cur->b_parent, walk_file))
+			continue;
+
 		/*
 		 * For each file block with the same hash:
 		 *  - Traverse, along with original file until we have no match
 		 *     - record
 		 */
-		memset(&ctxt, 0, sizeof(struct dupe_walk_ctxt));
-		ctxt.orig_file = file;
-		ctxt.walk_file = walk_file;
-		ctxt.orig = cur;
-		ctxt.res = res;
-		for_each_dupe(cur, walk_file, walk_dupe_block, &ctxt);
+		lookup_walk_file_hash_head(cur, walk_file, res);
 	}
 	clear_all_seen_blocks();
 }
@@ -941,50 +994,6 @@
 	return mark_filerecs_compared(file1, file2);
 }
 
-/*
- * This dupe list is too large for the extent search algorithm to
- * handle efficiently. Instead of walking the block list, we walk the
- * list of files referenced and compare them to each other directly.
- *
- * A future improvement might be to always do this, at the cost of
- * extra memory usage.
- */
-static int walk_large_dups(struct hash_tree *tree,
-			   struct results_tree *res,
-			   struct dupe_blocks_list *dups)
-{
-	int ret;
-	struct rb_node *node = rb_first(&dups->dl_files_root);
-	struct rb_node *next;
-	struct filerec_token *t1, *t2;
-	struct filerec *file1, *file2;
-
-	while (node) {
-		t1 = rb_entry(node, struct filerec_token, t_node);
-		file1 = t1->t_file;
-
-		next = rb_next(node);
-		while (next) {
-			t2 = rb_entry(next, struct filerec_token, t_node);
-			file2 = t2->t_file;
-
-			/* filerec token tree does not allow duplicates */
-			abort_on(file1 == file2);
-			if (!filerecs_compared(file1, file2)) {
-				ret = compare_files(res, file1, file2);
-				if (ret)
-					return ret;
-				break;
-			}
-
-			next = rb_next(next);
-		}
-		node = rb_next(node);
-	}
-
-	return 0;
-}
-
 static int walk_dupe_hashes(struct dupe_blocks_list *dups,
 			    struct results_tree *res)
 {
@@ -1043,13 +1052,59 @@
 	return 0;
 }
 
+static void update_extent_search_status(struct hash_tree *tree,
+					unsigned long long processed)
+{
+	static int last_pos = -1;
+	int i, pos;
+	int width = 40;
+	float progress;
+
+	if (!fancy_status)
+		return;
+
+	progress = (float) processed / tree->num_hashes;
+	pos = width * progress;
+
+	/* Only update our status every width% */
+	if (pos <= last_pos)
+		return;
+	last_pos = pos;
+
+	printf("\r[");
+	for(i = 0; i < width; i++) {
+		if (i < pos)
+			printf("#");
+		else if (i == pos)
+			printf("%%");
+		else
+			printf(" ");
+	}
+	printf("]");
+	fflush(stdout);
+}
+
+static void clear_extent_search_status(unsigned long long processed,
+				       int err)
+{
+	if (!fancy_status)
+		return;
+
+	if (err)
+		printf("\rSearch exited (%llu processed) with error %d: "
+		       "\"%s\"\n", processed, err, strerror(err));
+	else
+		printf("\rSearch completed with no errors.             \n");
+	fflush(stdout);
+}
+
 static int find_all_dups(struct hash_tree *tree, struct results_tree *res)
 {
-	int ret;
+	int ret = 0;
 	struct rb_root *root = &tree->root;
 	struct rb_node *node = rb_first(root);
 	struct dupe_blocks_list *dups;
-	LIST_HEAD(large_dupes);
+	unsigned long long processed = 0;
 
 	while (1) {
 		if (node == NULL)
@@ -1057,29 +1112,22 @@
 
 		dups = rb_entry(node, struct dupe_blocks_list, dl_node);
 
-		if (dups->dl_num_files) {
-			list_add_tail(&dups->dl_large_list, &large_dupes);
+		update_extent_search_status(tree, processed);
 
-			printf("Hash (\"");
-			debug_print_digest(stdout, dups->dl_hash);
-			printf("\") has %u items (across %u files), processing later.\n",
-			       dups->dl_num_elem, dups->dl_num_files);
-		} else if (dups->dl_num_elem > 1) {
+		if (dups->dl_num_elem > 1) {
 			ret = walk_dupe_hashes(dups, res);
 			if (ret)
-				return ret;
+				goto out;
 		}
 
-		node = rb_next(node);
-	}
+		processed++;
 
-	list_for_each_entry(dups, &large_dupes, dl_large_list) {
-		ret = walk_large_dups(tree, res, dups);
-		if (ret)
-			return ret;
+		node = rb_next(node);
 	}
+out:
 
-	return 0;
+	clear_extent_search_status(processed, ret);
+	return ret;
 }
 
 int main(int argc, char **argv)
@@ -1101,6 +1149,9 @@
 		return EINVAL;
 	}
 
+	if (isatty(STDOUT_FILENO))
+		fancy_status = 1;
+
 	if (read_hashes) {
 		ret = read_hash_tree(serialize_fname, &tree, &blocksize, NULL);
 		if (ret == FILE_VERSION_ERROR) {
@@ -1143,8 +1194,9 @@
 			fprintf(stderr, "Error %d while writing to hash file\n", ret);
 		goto out;
 	} else {
-		printf("Hashed %"PRIu64" blocks. Calculating duplicate "
-		       "extents - this may take some time.\n", tree.num_blocks);
+		printf("Hashed %"PRIu64" blocks, resulting in %"PRIu64" unique "
+		       "hashes. Calculating duplicate extents - this may take "
+		       "some time.\n", tree.num_blocks, tree.num_hashes);
 	}
 
 	ret = find_all_dups(&tree, &res);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/filerec.c new/duperemove-v0.09.beta2/filerec.c
--- old/duperemove-v0.09.beta1/filerec.c	2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/filerec.c	2014-10-31 00:27:39.000000000 +0100
@@ -83,13 +83,19 @@
 		free_filerec_token(token);
 }
 
+static void filerec_token_init(struct filerec_token *token,
+			       struct filerec *file)
+{
+	rb_init_node(&token->t_node);
+	token->t_file = file;
+}
+
 struct filerec_token *filerec_token_new(struct filerec *file)
 {
 	struct filerec_token *token = malloc_filerec_token();
 
 	if (token) {
-		rb_init_node(&token->t_node);
-		token->t_file = file;
+		filerec_token_init(token, file);
 	}
 	return token;
 }
@@ -151,8 +157,23 @@
 	}
 }
 
+static int cmp_filerecs(struct filerec *file1, uint64_t file2_inum,
+			uint64_t file2_subvolid)
+{
+	if (file1->inum < file2_inum)
+		return -1;
+	else if (file1->inum > file2_inum)
+		return 1;
+	if (file1->subvolid < file2_subvolid)
+		return -1;
+	if (file1->subvolid > file2_subvolid)
+		return -1;
+	return 0;
+}
+
 static void insert_filerec(struct filerec *file)
 {
+	int c;
 	struct rb_node **p = &filerec_by_inum.rb_node;
 	struct rb_node *parent = NULL;
 	struct filerec *tmp;
@@ -162,9 +183,10 @@
 
 		tmp = rb_entry(parent, struct filerec, inum_node);
 
-		if (file->inum < tmp->inum)
+		c = cmp_filerecs(tmp, file->inum, file->subvolid);
+		if (c < 0)
 			p = &(*p)->rb_left;
-		else if (file->inum > tmp->inum)
+		else if (c > 0)
 			p = &(*p)->rb_right;
 		else
 			abort_lineno(); /* We should never find a duplicate */
@@ -175,17 +197,19 @@
 	return;
 }
 
-static struct filerec *find_filerec(uint64_t inum)
+static struct filerec *find_filerec(uint64_t inum, uint64_t subvolid)
 {
+	int c;
 	struct rb_node *n = filerec_by_inum.rb_node;
 	struct filerec *file;
 
 	while (n) {
 		file = rb_entry(n, struct filerec, inum_node);
 
-		if (inum < file->inum)
+		c = cmp_filerecs(file, inum, subvolid);
+		if (c < 0)
 			n = n->rb_left;
-		else if (inum > file->inum)
+		else if (c > 0)
 			n = n->rb_right;
 		else
 			return file;
@@ -193,7 +217,8 @@
 	return NULL;
 }
 
-static struct filerec *filerec_alloc_insert(const char *filename, uint64_t inum)
+static struct filerec *filerec_alloc_insert(const char *filename,
+					    uint64_t inum, uint64_t subvolid)
 {
 	struct filerec *file = calloc_filerec(1);
 
@@ -210,6 +235,7 @@
 		INIT_LIST_HEAD(&file->tmp_list);
 		rb_init_node(&file->inum_node);
 		file->inum = inum;
+		file->subvolid = subvolid;
 		file->comparisons = RB_ROOT;
 
 		insert_filerec(file);
@@ -219,11 +245,12 @@
 	return file;
 }
 
-struct filerec *filerec_new(const char *filename, uint64_t inum)
+struct filerec *filerec_new(const char *filename, uint64_t inum,
+			    uint64_t subvolid)
 {
-	struct filerec *file = find_filerec(inum);
+	struct filerec *file = find_filerec(inum, subvolid);
 	if (!file)
-		file = filerec_alloc_insert(filename, inum);
+		file = filerec_alloc_insert(filename, inum, subvolid);
 	return file;
 }
 
@@ -458,9 +485,12 @@
 				last = 1;
 
 			dprintf("(fiemap) [%d] fe_logical: %llu, "
-				"fe_length: %llu\n",
+				"fe_length: %llu, fe_physical: %llu, "
+				"fe_flags: 0x%x\n",
 				i, (unsigned long long)fm_ext[i].fe_logical,
-				(unsigned long long)fm_ext[i].fe_length);
+				(unsigned long long)fm_ext[i].fe_length,
+				(unsigned long long)fm_ext[i].fe_physical,
+				fm_ext[i].fe_flags);
 
 			loff = fm_ext[i].fe_logical;
 			ext_len = fm_ext[i].fe_length;
@@ -494,8 +524,9 @@
 				last = 1;
 			}
 
-			dprintf("(fiemap) loff: %"PRIu64" ext_len: %"PRIu64"\n",
-				loff, ext_len);
+//			dprintf("(fiemap) loff: %"PRIu64" ext_len: %"PRIu64
+//				" flags: 0x%x\n",
+//				loff, ext_len, fm_ext[i].fe_flags);
 
 			if (fm_ext[i].fe_flags & FIEMAP_EXTENT_SHARED)
 				*shared_bytes += ext_len;
@@ -527,7 +558,7 @@
 		return 1;
 	}
 
-	file = filerec_new(argv[1], 500); /* Use made up ino */
+	file = filerec_new(argv[1], 500, 1); /* Use made up ino */
 	if (!file) {
 		fprintf(stderr, "filerec_new(): malloc error\n");
 		return 1;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/filerec.h new/duperemove-v0.09.beta2/filerec.h
--- old/duperemove-v0.09.beta1/filerec.h	2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/filerec.h	2014-10-31 00:27:39.000000000 +0100
@@ -11,6 +11,7 @@
 struct filerec {
 	int		fd;			/* file descriptor */
 	char	*filename;		/* path to file */
+	uint64_t subvolid;
 
 	uint64_t		inum;
 	struct rb_node		inum_node;
@@ -29,7 +30,8 @@
 
 void init_filerec(void);
 
-struct filerec *filerec_new(const char *filename, uint64_t inum);
+struct filerec *filerec_new(const char *filename, uint64_t inum,
+			    uint64_t subvolid);
 void filerec_free(struct filerec *file);
 int filerec_open(struct filerec *file, int write);
 void filerec_close(struct filerec *file);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/hash-tree.c new/duperemove-v0.09.beta2/hash-tree.c
--- old/duperemove-v0.09.beta1/hash-tree.c	2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/hash-tree.c	2014-10-31 00:27:39.000000000 +0100
@@ -34,48 +34,83 @@
 declare_alloc_tracking(file_block);
 declare_alloc_tracking(dupe_blocks_list);
 
-static int add_one_filerec_token(struct dupe_blocks_list *dups,
-				 struct filerec *file)
+struct file_hash_head *find_file_hash_head(struct dupe_blocks_list *dups,
+					   struct filerec *file)
 {
-	struct filerec_token *t = NULL;
+	struct rb_node *n = dups->dl_files_root.rb_node;
+	struct file_hash_head *head;
 
-	if (find_filerec_token_rb(&dups->dl_files_root, file))
-		return 0;
-
-	t = filerec_token_new(file);
-	if (!t)
-		return ENOMEM;
+	while (n) {
+		head = rb_entry(n, struct file_hash_head, h_node);
 
-	insert_filerec_token_rb(&dups->dl_files_root, t);
-	dups->dl_num_files++;
-	return 0;
+		if (head->h_file < file)
+			n = n->rb_left;
+		else if (head->h_file > file)
+			n = n->rb_right;
+		else return head;
+	}
+	return NULL;
 }
 
-static int add_filerec_tokens(struct dupe_blocks_list *dups)
+static void insert_file_hash_head(struct dupe_blocks_list *dups,
+				  struct file_hash_head *head)
 {
-	struct file_block *block;
+	struct rb_node **p = &dups->dl_files_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct file_hash_head *tmp;
 
-	list_for_each_entry(block, &dups->dl_list, b_list) {
-		if (add_one_filerec_token(dups, block->b_file))
-			return ENOMEM;
+	while (*p) {
+		parent = *p;
+
+		tmp = rb_entry(parent, struct file_hash_head, h_node);
+
+		if (tmp->h_file < head->h_file)
+			p = &(*p)->rb_left;
+		else if (tmp->h_file > head->h_file)
+			p = &(*p)->rb_right;
+		else abort_lineno(); /* We should never find a duplicate */
 	}
-	return 0;
+
+	rb_link_node(&head->h_node, parent, p);
+	rb_insert_color(&head->h_node, &dups->dl_files_root);
 }
 
-static void free_filerec_tokens(struct dupe_blocks_list *dups)
+static int add_file_hash_head(struct dupe_blocks_list *dups,
+			      struct file_block *block)
 {
-	struct rb_node *node = rb_first(&dups->dl_files_root);
-	struct filerec_token *t;
+	struct filerec *file = block->b_file;
+	struct file_hash_head *head = find_file_hash_head(dups, file);
 
-	while (node) {
-		t = rb_entry(node, struct filerec_token, t_node);
+	if (head)
+		goto add;
 
-		node = rb_next(node);
+	head = malloc(sizeof(*head));
+	if (!head)
+		return ENOMEM;
 
-		dups->dl_num_files--;
-		rb_erase(&t->t_node, &dups->dl_files_root);
-		filerec_token_free(t);
-	}
+	head->h_file = file;
+	rb_init_node(&head->h_node);
+	INIT_LIST_HEAD(&head->h_blocks);
+	insert_file_hash_head(dups, head);
+	dups->dl_num_files++;
+add:
+	/* We depend on this being added in increasing block order */
+	list_add_tail(&block->b_head_list, &head->h_blocks);
+	return 0;
+}
+
+static void free_one_hash_head(struct dupe_blocks_list *dups,
+			       struct file_hash_head *head)
+{
+	rb_erase(&head->h_node, &dups->dl_files_root);
+	free(head);
+}
+
+int file_in_dups_list(struct dupe_blocks_list *dups, struct filerec *file)
+{
+	if (find_file_hash_head(dups, file))
+		return 1;
+	return 0;
 }
 
 static void insert_block_list(struct hash_tree *tree,
@@ -147,30 +182,27 @@
 		rb_init_node(&d->dl_node);
 		rb_init_node(&d->dl_by_size);
 		INIT_LIST_HEAD(&d->dl_list);
-		INIT_LIST_HEAD(&d->dl_large_list);
 		d->dl_files_root = RB_ROOT;
 
 		insert_block_list(tree, d);
 	}
 
-	if (d->dl_num_elem >= DUPLIST_CONVERT_LIMIT && d->dl_num_files == 0) {
-		if (add_filerec_tokens(d))
-			return ENOMEM;
-	}
-
 	e->b_file = file;
 	e->b_seen = 0;
 	e->b_loff = loff;
 	e->b_flags = flags;
-	list_add_tail(&e->b_file_next, &file->block_list);
-	file->num_blocks++;
 	e->b_parent = d;
 
-	if (d->dl_num_files) {
-		if (add_one_filerec_token(d, file))
-			return ENOMEM;
+	INIT_LIST_HEAD(&e->b_head_list);
+
+	if (add_file_hash_head(d, e)) {
+		free_file_block(e);
+		return ENOMEM;
 	}
 
+	list_add_tail(&e->b_file_next, &file->block_list);
+	file->num_blocks++;
+
 	d->dl_num_elem++;
 	list_add_tail(&e->b_list, &d->dl_list);
 
@@ -182,6 +214,7 @@
 				struct file_block *block, struct filerec *file)
 {
 	struct dupe_blocks_list *blocklist = block->b_parent;
+	struct file_hash_head *head;
 
 	abort_on(blocklist->dl_num_elem == 0);
 
@@ -193,12 +226,16 @@
 	list_del(&block->b_file_next);
 	list_del(&block->b_list);
 
+	list_del(&block->b_head_list);
+	head = find_file_hash_head(blocklist, file);
+	if (head && list_empty(&head->h_blocks))
+		free_one_hash_head(blocklist, head);
+
 	blocklist->dl_num_elem--;
 	if (blocklist->dl_num_elem == 0) {
 		rb_erase(&blocklist->dl_node, &tree->root);
 		tree->num_hashes--;
 
-		free_filerec_tokens(blocklist);
 		free_dupe_blocks_list(blocklist);
 	}
 
@@ -214,25 +251,6 @@
 		remove_hashed_block(tree, block, file);
 }
 
-void for_each_dupe(struct file_block *block, struct filerec *file,
-		   for_each_dupe_t func, void *priv)
-{
-	struct dupe_blocks_list *parent = block->b_parent;
-	struct file_block *cur;
-
-	list_for_each_entry(cur, &parent->dl_list, b_list) {
-		/* Ignore self and any blocks from another file */
-		if (cur == block)
-			continue;
-
-		if (cur->b_file != file)
-			continue;
-
-		if (func(cur, priv))
-			break;
-	}
-}
-
 static unsigned int seen_counter = 1;
 
 int block_seen(struct file_block *block)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/hash-tree.h new/duperemove-v0.09.beta2/hash-tree.h
--- old/duperemove-v0.09.beta1/hash-tree.h	2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/hash-tree.h	2014-10-31 00:27:39.000000000 +0100
@@ -14,24 +14,14 @@
 	unsigned int	dl_num_elem;
 	struct list_head	dl_list;
 
-	/*
-	 * num_files and files_root are used when the total number of
-	 * blocks in the list exceeds DUPLIST_CONVERT_LIMIT (defined
-	 * below)
-	 */
 	unsigned int		dl_num_files;
-	struct rb_root		dl_files_root;
-	struct list_head	dl_large_list; /* Temporary list for
-						* use by extent finding code */
+	struct rb_root		dl_files_root; /* stores file_hash_head nodes */
 
 	unsigned char		dl_hash[DIGEST_LEN_MAX];
 };
 
-/* Max number of blocks before we'll add filerec tokens */
-#define	DUPLIST_CONVERT_LIMIT		30000
-
 /* Fiemap flags that would cause us to skip comparison of the block */
-#define FIEMAP_SKIP_FLAGS	(FIEMAP_EXTENT_UNKNOWN|FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_UNWRITTEN)
+#define FIEMAP_SKIP_FLAGS	(FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_UNWRITTEN)
 /* Fiemap flags that indicate the extent may have already been deduped */
 #define FIEMAP_DEDUPED_FLAGS	(FIEMAP_EXTENT_SHARED)
 
@@ -50,15 +40,33 @@
 					  * with this md5. */
 
 	struct list_head	b_file_next; /* filerec->block_list */
+
+	struct list_head	b_head_list; /* file_hash_head->h_blocks */
 };
 
 int insert_hashed_block(struct hash_tree *tree, unsigned char *digest,
 			struct filerec *file, uint64_t loff, unsigned int flags);
 void remove_hashed_blocks(struct hash_tree *tree, struct filerec *file);
 
-typedef int (for_each_dupe_t)(struct file_block *, void *);
-void for_each_dupe(struct file_block *block, struct filerec *file,
-		   for_each_dupe_t func, void *priv);
+/*
+ * Stores a list of blocks with the same hash / filerec
+ * combination. Each dupe_blocks_list keeps a tree of these (sorted by
+ * file).
+ *
+ * This speeds up the extent search by allowing us to skip blocks that
+ * don't belong to the file we are 'walking'. Blocks are inserted into
+ * h_blocks in the same order they are given to insert_hashed_block()
+ * (that is, in order of increasing offset).
+ */
+struct file_hash_head {
+	struct filerec *h_file;
+	struct rb_node h_node;
+	struct list_head h_blocks;
+};
+
+int file_in_dups_list(struct dupe_blocks_list *dups, struct filerec *file);
+struct file_hash_head *find_file_hash_head(struct dupe_blocks_list *dups,
+					   struct filerec *file);
 
 int block_seen(struct file_block *block);
 int block_ever_seen(struct file_block *block);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/hashstats.c new/duperemove-v0.09.beta2/hashstats.c
--- old/duperemove-v0.09.beta1/hashstats.c	2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/hashstats.c	2014-10-31 00:27:39.000000000 +0100
@@ -93,6 +93,21 @@
 	}
 }
 
+static void printf_file_block_flags(struct file_block *block)
+{
+	if (!block->b_flags)
+		return;
+
+	printf("( ");
+	if (block->b_flags & FILE_BLOCK_SKIP_COMPARE)
+		printf("skip_compare ");
+	if (block->b_flags & FILE_BLOCK_DEDUPED)
+		printf("deduped ");
+	if (block->b_flags & FILE_BLOCK_HOLE)
+		printf("hole ");
+	printf(")");
+}
+
 static void print_by_size(void)
 {
 	struct rb_node *node = rb_first(&by_size);
@@ -104,6 +119,8 @@
 	else
 		printf("Print top %d hashes\n", num_to_print);
 
+	printf("Hash, # Blocks, # Files\n");
+
 	while (1) {
 		if (node == NULL)
 			break;
@@ -111,14 +128,18 @@
 		dups = rb_entry(node, struct dupe_blocks_list, dl_by_size);
 
 		debug_print_digest(stdout, dups->dl_hash);
-		printf(", %u\n", dups->dl_num_elem);
+		printf(", %u, %u\n", dups->dl_num_elem, dups->dl_num_files);
 		if (print_blocks) {
 			list_for_each_entry(block, &dups->dl_list,
 					    b_list) {
 				struct filerec *f = block->b_file;
-				printf("  %s\tloff: %llu lblock: %llu\n", f->filename,
+				printf("  %s\tloff: %llu lblock: %llu "
+				       "flags: 0x%x ", f->filename,
 				       (unsigned long long)block->b_loff,
-				       (unsigned long long)block->b_loff / blocksize);
+				       (unsigned long long)block->b_loff / blocksize,
+				       block->b_flags);
+				printf_file_block_flags(block);
+				printf("\n");
 			}
 		}
 
@@ -133,12 +154,12 @@
 {
 	struct filerec *file;
 
-	printf("Showing %llu files.\nInode\tBlocks Stored\tFilename\n",
+	printf("Showing %llu files.\nInode\tBlocks Stored\tSubvold ID\tFilename\n",
 		num_filerecs);
 
 	list_for_each_entry(file, &filerec_list, rec_list) {
-		printf("%"PRIu64"\t%"PRIu64"\t%s\n", file->inum,
-		       file->num_blocks, file->filename);
+		printf("%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%s\n", file->inum,
+		       file->num_blocks, file->subvolid, file->filename);
 	}
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/serialize.c new/duperemove-v0.09.beta2/serialize.c
--- old/duperemove-v0.09.beta1/serialize.c	2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/serialize.c	2014-10-31 00:27:39.000000000 +0100
@@ -34,8 +34,8 @@
 #include "debug.h"
 
 #include "csum.h"
-#include "hash-tree.h"
 #include "filerec.h"
+#include "hash-tree.h"
 
 #include "serialize.h"
 
@@ -131,6 +131,7 @@
 	finfo.ino = swap64(file->inum);
 	finfo.file_size = 0ULL; /* We don't store this yet */
 	finfo.num_blocks = swap64(file->num_blocks);
+	finfo.subvolid = swap64(file->subvolid);
 
 	name_len = strlen(file->filename);
 	finfo.name_len = swap16(name_len);
@@ -256,6 +257,7 @@
 	f->ino = swap64(disk.ino);
 	f->file_size = swap64(disk.file_size);
 	f->num_blocks = swap64(disk.num_blocks);
+	f->subvolid = swap64(disk.subvolid);
 	f->rec_len = swap16(disk.rec_len);
 	f->name_len = swap16(disk.name_len);
 
@@ -295,7 +297,7 @@
 	dprintf("Load %"PRIu64" hashes for \"%s\"\n", finfo.num_blocks,
 		fname);
 
-	file = filerec_new(fname, finfo.ino);
+	file = filerec_new(fname, finfo.ino, finfo.subvolid);
 	if (file == NULL)
 		return ENOMEM;
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/duperemove-v0.09.beta1/serialize.h new/duperemove-v0.09.beta2/serialize.h
--- old/duperemove-v0.09.beta1/serialize.h	2014-09-27 01:56:37.000000000 +0200
+++ new/duperemove-v0.09.beta2/serialize.h	2014-10-31 00:27:39.000000000 +0100
@@ -47,7 +47,8 @@
 	uint16_t	rec_len;
 	uint16_t	name_len;
 	uint32_t	pad0;
-/*20*/	uint64_t	pad1[3];
+	uint64_t	subvolid;
+/*20*/	uint64_t	pad1[2];
 	char		name[0];
 };
 

-- 
To unsubscribe, e-mail: opensuse-commit+unsubscribe@opensuse.org
For additional commands, e-mail: opensuse-commit+help@opensuse.org

    

commit duperemove for openSUSE:Factory

root＠hilbert.suse.de