/* * Created on Sep 21, 2003 * */ import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.StringTokenizer; /** * @author chase */ public class JoinSpreadSheets { static List readLine(BufferedReader r) throws IOException { String s = r.readLine(); if (s == null) return null; String[] toks = s.split("\t"); ArrayList a = new ArrayList(toks.length); for (String t : toks) a.add(t.trim()); return a; } static List> readFile(String file_name) throws Throwable { ArrayList a = new ArrayList(); BufferedReader r = new BufferedReader(new FileReader(file_name)); List l = readLine(r); while (l != null) { a.add(l); l = readLine(r); } return a; } static void writeList(BufferedWriter w, List l, String sep, int pad, boolean[] nonempty) throws Throwable { for (int i = 0; i < l.size(); i++) { if (nonempty[i]) { w.write(sep); sep = "\t"; w.write(String.valueOf(l.get(i))); } } for (int i = l.size(); i < pad; i++) if (nonempty[i]) { w.write(sep); sep = "\t"; } } public static void main(String[] args) throws Throwable { // java JoinSpreadSheeets main_file main_column second_file // second_column result_file diff_file if (args.length != 6) { System.err .println("usage: java JoinSpreadSheeets main_file main_column second_file second_column result_file diff_file\n" + "joins rows from main and second file with matching columns into result file.\n" + "Unselected rows from main are written to diff_file"); } int main_column = Integer.parseInt(args[1]) - 1; int second_column = Integer.parseInt(args[3]) - 1; List> a = readFile(args[0]); List> b = readFile(args[2]); BufferedWriter result = new BufferedWriter(new FileWriter(args[4])); BufferedWriter diff = new BufferedWriter(new FileWriter(args[5])); int acols = 0; int bcols = 0; // Fill in M with the join column from B HashMap m = new HashMap(); for (int i = 0; i < b.size(); i++) { List l = (List) (b.get(i)); int ll = l.size(); if (ll > bcols) bcols = ll; String k = (String) l.get(second_column); if (k.length() > 0) { m.put(k.toLowerCase(), new Integer(i)); } } for (List l : a) { int ll = l.size(); if (ll > acols) acols = ll; } boolean[] ane = new boolean[acols]; boolean[] bne = new boolean[bcols]; HashSet ahm = new HashSet(); String[] barr = new String[bcols]; findEmptiesAndEquals(a, ane, ahm, null); findEmptiesAndEquals(b, bne, null, barr); bne[second_column] = false; for (int i = 0; i < bne.length; i++) { if (ahm.contains(barr[i])) { // Duplicated column bne[i] = false; } } // Iterate over rows of A, either extracting the joined row from B // or writing the row into the difference file. for (int i = 0; i < a.size(); i++) { List l = (List) a.get(i); String k = (String) l.get(main_column); k = k.toLowerCase(); if (i == 0) { // header row writeList(result, l, "", acols, ane); writeList(result, (List) b.get(0), "\t", bcols, bne); result.write("\n"); writeList(diff, l, "", acols, ane); diff.write("\n"); } else if (k.length() > 0 && m.get(k) != null) { // matching row writeList(result, l, "", acols, ane); writeList(result, (List) b.get(((Integer) m.get(k)).intValue()), "\t", bcols, bne); result.write("\n"); } else { // difference row writeList(diff, l, "", acols, ane); diff.write("\n"); } } result.close(); diff.close(); } /** * @param a * @param ane * @param asb */ private static void findEmptiesAndEquals(List> a, boolean[] ane, HashSet ahm, String[] barr) { StringBuffer[] asb = new StringBuffer[ane.length]; for (int i = 0; i < asb.length; i++) asb[i] = new StringBuffer(); boolean first = true; for (List l : a) { int j = 0; for (String s : l) { if (first) { } else if (s.length() > 0) { ane[j] = true; } asb[j].append(s); asb[j].append("\n"); j++; } first = false; } int j = 0; for (StringBuffer sb : asb) { String s = sb.toString(); if (ahm != null) ahm.add(s); if (barr != null) barr[j] = s; j++; } } }