Recently I had to compare the contents of two directories on Windows.

Since I was not allowed to install new software on the machine and the workaround of using windows shell commands in combination with Excel and Regular Expressions was a bit annoying I deceided to implement a class in Java.

Source Code for comparing directories

Below a class, which is able to use a given regular expressions to compare file names in two given directories.

The concept is as follows.

  1. Create a new object of this class providing the following parameters.
    • dirPath - a path for a directory, which should be used as the source for comparisons.
    • pattern - a Pattern object for identifying files of interest. A files name has to match the pattern.
    • fileExtension - a file extension, which should be used as filter for file types of interest.
  2. Call method compareDir providing the target directory you want to compare against the source directory provided via construtors parameters.
  3. The returned collection contains all file names, which ...
    • ... match the given pattern object ...
    • ... and are in the source directory ...
    • ... but are not in given target directory.
  4. The returned Set contains the key strings, which can be used for the HashMaps returned by the methods for getting the target and source file names and file paths.

If you are interested in the files, which are in synch / common between two directories use the method getCommonFileNames.

In the end I only compare two collections, which is very easy using HashSets. Not only can I use key Strings, but also the class already provides a method called removeAll. What would have required coding is already free to use :0). I like Java a lot for this reason.

ret = new HashSet<String>(srcFiles.keySet());
ret.removeAll(compFiles.keySet());

And here the full source code.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
/*
 * A class for comparing directories and high level file information on files in a directory.
 * */
 
package de.consulting.bolte.io;
 
import java.io.File;
import java.io.FilenameFilter;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
 
public class DirDiff {
 
	/**
	 * Instantiating a new object of this class using provided directory path to
	 * reference a source directory for later comparison.
	 
	 * The String pattern containing a regular expression is used to create a
	 * global Pattern object, which is used to match / filter files of interest.
	 
	 * Only files, which match a provided file extension like ".txt" are
	 * regarded in later comparisons.
	 
	 * @param dirPath
	 *            a path for a directory, which should be used as the source for
	 *            comparisons.
	 * @param pattern
	 *            a Pattern object for identifying files of interest. A files
	 *            name has to match the pattern.
	 * @param fileExtension
	 *            a file extension, which should be used as filter for file
	 *            types of interest.
	 */
	public DirDiff(String dirPath, String pattern, final String fileExtension) {
		this.srcDirPath = dirPath;
		this.fileExtension = fileExtension;
 
		/*
		 * Compile a Pattern for searching matches in a way that is optimized
		 * for performance.
		 */
		this.keyPatternStr = pattern;
		try {
			this.keyPattern = Pattern.compile(this.keyPatternStr);
		} catch (PatternSyntaxException ex) {
			this.keyPattern = null;
		}
		/* Reference the source directory as base line for later compare. */
		this.srcDir = new File(this.srcDirPath);
 
		/*
		 * A FilenamFilter object implementing an accept method for provided
		 * file type extension.
		 */
		if (srcDir.exists()) {
			this.srcFiles = readFileNames(srcDir, new FilenameFilter() {
				public boolean accept(File directory, String fileName) {
					return fileName.endsWith(fileExtension);
				}
			});
		}
	}
 
	/**
	 * Validating, if the source directory is existing and if the global key
	 * Pattern for file names has been compiled properly.
	 
	 * @return true, if an object of this class is ready for use, else false.
	 */
	public boolean isReady() {
		boolean ret;
 
		/* Source directory has to exist. */
		ret = srcDir.exists();
		/*
		 * Provided regular expression has to be successfully compiled by
		 * Pattern class.
		 */
		ret = ret && (keyPattern != null);
 
		return ret;
	}
 
	/**
	 * Returning a collection holding the file names matching the global Pattern
	 * object and the provided FilenameFilter.
	 
	 * @param srcDir
	 *            a path to a source directory you want to inspect for file
	 *            names matching the global Pattern.
	 * @param fileFilter
	 *            a FilenameFilter determining the file type of interest.
	 * @return a HashMap<String, String> holding Strings from file names matched
	 *         by the global Pattern object as keys and the whole file name per
	 *         file in source directory as value.
	 */
	private HashMap<String, String> readFileNames(File srcDir,
			FilenameFilter fileFilter) {
		HashMap<String, String> ret = new HashMap<String, String>();
		String[] fileNames;
		Matcher keyMatcher;
 
		/*
		 * Get a String array of all files in provided directory. The array only
		 * holds a files name, not its full path.
		 */
		fileNames = srcDir.list(fileFilter);
 
		/*
		 * Walk through the array of available file names and add corresponding
		 * filePath to a returned HashMap in case the file names match the
		 * current global Pattern.
		 */
		for (int i = 0; i < fileNames.length; i++) {
			keyMatcher = keyPattern.matcher(fileNames[i]);
			if (keyMatcher.find()) {
				ret.put(keyMatcher.group(), srcDir.getPath() + File.separator
						+ fileNames[i]);
			}
		}
		fileNames = null;
 
		return ret;
	}
 
	/**
	 * Getting a collection containing all file names, which are in synch
	 * between two directories compared before using this class.
	 
	 * @return - a collection of type HashSet<String> containing file names. The
	 *         file names in the returned Set can be used as keys for the
	 *         HasMaps returned by the methods getSrcFileNames as well as
	 *         getTrgFileNames.
	 */
	public HashSet<String> getCommmonFileNames() {
		return this.commonFiles;
	}
 
	/**
	 * Getting a collection of file names matching provided regular expression
	 * used in directory comparison from the source directory.
	 
	 * The file names matching a regular expression are used as key Strings. The
	 * corresponding file path is stored as value in a pair.
	 
	 * @return - a collection of type HasMap<String, String> providing a
	 *         matching between matched file names and file paths in the source
	 *         directory.
	 */
	public HashMap<String, String> getSrcFileNames() {
		return this.srcFiles;
	}
 
	/**
	 * Getting a collection of file names matching provided regular expression
	 * used in directory comparison from the target directory.
	 
	 * The file names matching a regular expression are used as key Strings. The
	 * corresponding file path is stored as value in a pair.
	 
	 * @return - a collection of type HasMap<String, String> providing a
	 *         matching between matched file names and file paths in the target
	 *         directory.
	 */
	public HashMap<String, String> getTrgFileNames() {
		return this.compFiles;
	}
 
	/**
	 * Comparing all files, which are matching global FileFilter object as well
	 * as global Pattern object with files in directory referenced by global
	 * object 'srcDir'.
	 
	 * Both directories have to exist. Else no comparison is applied.
	 
	 * @param trgDir
	 *            a path to a directory, which files should be compared to files
	 *            in directory referenced by global object 'srcDir'.
	 * @return A Set containing all keys of the global HashMap object
	 *         'srcFiles', which could not be found in a directory referenced
	 *         under provided path 'dirPath'.
	 */
	public Set<String> compareDir(String trgDir) {
		Set<String> ret = null;
		/**
		 * Free global resources and assure that they are reset before starting
		 * processing new directories.
		 */
		commonFiles = null;
		compDir = null;
		compFiles = null;
 
		compDirPath = trgDir;
		compDir = new File(compDirPath);
		if (compDir.exists()) {
			compFiles = readFileNames(compDir, new FilenameFilter() {
				public boolean accept(File directory, String fileName) {
					return fileName.endsWith(fileExtension);
				}
			});
			if (srcDir.exists()) {
				ret = new HashSet<String>(srcFiles.keySet());
				ret.removeAll(compFiles.keySet());
 
				commonFiles = new HashSet<String>(srcFiles.keySet());
				commonFiles.removeAll(ret);
			}
		}
 
		return ret;
	}
 
	private final String fileExtension;
	private String keyPatternStr;
	private Pattern keyPattern;
	private HashMap<String, String> srcFiles;
	private HashMap<String, String> compFiles;
	private HashSet<String> commonFiles;
	private File srcDir;
	private File compDir;
	private String srcDirPath;
	private String compDirPath;
}

Unit Testing

Below you can find a small class I used for unit testing on the above class for example usage.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/**
 
 */
package de.consulting.bolte.io.test;
 
import static org.junit.Assert.*;
 
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.Set;
 
import org.junit.Before;
import org.junit.Test;
 
import consulting.bolte.io.DirDiff;
 
/**
 * @author Alexander Bolte (2015)
 
 */
public class UnitTestDirDiff {
 
	private boolean createEmptyTextFile(String trgFilePath) {
		boolean isCreated = false;
		PrintWriter writer = null;
 
		try {
			/* Create a new file and, if existing, overwrite it. */
			writer = new PrintWriter(trgFilePath, "UTF-8");
			writer.println();
			isCreated = true;
		} catch (FileNotFoundException | UnsupportedEncodingException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			if (writer != null) {
				writer.close();
			}
		}
 
		return isCreated;
	}
 
	@Before
	public void testDirDiffPrep() {
		File srcDir = new File(parentPath + srcDirName);
		File trgDir = new File(parentPath + trgDirName);
 
		/*
		 * Create a source directory and some empty text files in this
		 * directory.
		 */
		if (srcDir.exists()) {
			srcDir.delete();
		}
		srcDir.mkdirs();
		for (int i = 0; i <= 20; i++) {
			createEmptyTextFile(srcDir.getAbsolutePath() + "/Bla" + i + ".txt");
		}
 
		/*
		 * Create a target directory and some empty text files in this
		 * directory.
		 */
		if (trgDir.exists()) {
			trgDir.delete();
		}
		trgDir.mkdirs();
		for (int i = 0; i <= 20; i += 2) {
			createEmptyTextFile(trgDir.getAbsolutePath() + "/Bla" + i + ".txt");
		}
	}
 
	/**
	 * Test method for
	 * {@link consulting.bolte.io.DirDiff#DirDiff(java.lang.String, java.lang.String, java.lang.String)}
	 * .
	 */
	@Test
	public void testDirDiff() {
		DirDiff aDiff = new DirDiff(parentPath + srcDirName, ".+", ".txt");
		final boolean expectedResult = true; 
		boolean actualResult = false;
		
		actualResult = aDiff != null;
		
		assertEquals("Object instanziated from tested class must be initialized no matter what happens on file level.", expectedResult, actualResult);		
		
		aDiff = null;
	}
 
	/**
	 * Test method for
	 * {@link consulting.bolte.io.DirDiff#compareDir(java.lang.String)}.
	 */
	@Test
	public void testCompareDir() {
		DirDiff aDiff = new DirDiff(parentPath + srcDirName, ".+", ".txt");
		Set<String> ret = aDiff.compareDir(parentPath + trgDirName);
		Iterator<String> it = null;
		final int expectedCount = 10;
		int actualCount = 0;
 
		if (ret != null) {
			it = ret.iterator();
			actualCount = ret.size();
			while (it.hasNext()) {
				System.out.println(it.next());
			}
		} else {
			System.out
					.println("ERROR - Source or target directory not existing.");
		}
 
		it = null;
		ret = null;
		aDiff = null;
		
		assertEquals("10 files must be identyfied as difference.", expectedCount, actualCount);
	}
 
	private final String parentPath = "C:/Temp/";
	private final String srcDirName = "Src";
	private final String trgDirName = "Trg";
}