1 /**
2 * Copyright 2010, CSIRO Australia.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 package au.csiro.netcdf.wron;
18
19 import java.io.BufferedWriter;
20 import java.io.File;
21 import java.io.FileWriter;
22 import java.io.IOException;
23 import java.io.PrintWriter;
24 import java.io.RandomAccessFile;
25 import java.io.StringWriter;
26 import java.util.ArrayList;
27 import java.util.Date;
28 import java.util.List;
29
30 import org.apache.commons.cli.BasicParser;
31 import org.apache.commons.cli.CommandLine;
32 import org.apache.commons.cli.HelpFormatter;
33 import org.apache.commons.cli.MissingOptionException;
34 import org.apache.commons.cli.Option;
35 import org.apache.commons.cli.OptionBuilder;
36 import org.apache.commons.cli.Options;
37 import org.apache.commons.cli.ParseException;
38 import org.apache.log4j.Logger;
39
40 import ucar.ma2.Array;
41 import ucar.ma2.InvalidRangeException;
42 import ucar.nc2.Attribute;
43 import ucar.nc2.NetcdfFile;
44 import ucar.nc2.Variable;
45 import ucar.nc2.units.DateFormatter;
46 import au.csiro.netcdf.cli.CommandLineOptionsComparator;
47 import au.csiro.netcdf.util.CSVTokenizer;
48 import au.csiro.netcdf.util.Util;
49
50 /**
51 * The class is a converter control class to convert Murray Darling Basin Sustainable Yields netCDF files into CSV
52 * files.
53 *
54 * Copyright 2010, CSIRO Australia
55 *
56 * @author Robert Bridle on 07/07/2010
57 * @version $Revision: 84 $ $Date: 2010-08-25 15:56:46 +1000 (Wed, 25 Aug 2010) $
58 */
59 public class MdbsyNetCDF2CSVConverter
60 {
61 /**
62 * Constant that defines the logger to be used.
63 */
64 private static final Logger LOG = Logger.getLogger(MdbsyNetCDF2CSVConverter.class.getName());
65
66 /**
67 * The latitude variable name used in the netCDF files.
68 */
69 private static final String LATITUDE_VARIABLE_NAME = "lat";
70
71 /**
72 * The longitude variable name used in the netCDF files.
73 */
74 private static final String LONGITUDE_VARIABLE_NAME = "long";
75
76 /**
77 * The time variable name used in the netCDF files.
78 */
79 private static final String TIME_VARIABLE_NAME = "time";
80
81 /**
82 * The number of milliseconds in a 24 hours.
83 */
84 private static final Long MILLISECONDS_IN_A_DAY = new Long("86400000"); // milliseconds in a day.
85
86 /**
87 * The date column text that is found in all csv files.
88 */
89 // private static final String DATE_COLUMN_TEXT = writeOutDateColumn();
90
91 /**
92 * The name of the command line option used for specifying the input directory containing the netCDF files.
93 */
94 public static final String INPUT_DIR = "inputDir";
95
96 /**
97 * The name of the command line option used for specifying the directory to create the csv files in.
98 */
99 public static final String OUTPUT_DIR = "outputDir";
100
101 /**
102 * The pattern for accepted file names where multiple files are accepted.
103 */
104 public static final String FILENAME_PATTERN = "pattern";
105
106 /**
107 * The column that the variable name is stored in the csv file.
108 */
109 private static final int VARIABLE_COLUMN = 0;
110
111 /**
112 * The only variables that we are interested in writing out to csv files are those that are in three dimensions
113 * (i.e. rank=3)
114 */
115 private static final int RANK_THREE = 3;
116
117 /**
118 * NetCDf file extension.
119 */
120 private static final Object CSV_FILE_EXTENSION = ".csv";
121
122 /**
123 * CSV file extension.
124 */
125 private static final Object NETCDF_FILE_EXTENSION = ".nc";
126
127 /**
128 * A string containing the date column text to be written out as the first column in all the csv files.
129 */
130 private static String DATE_COLUMN_TEXT = null;
131
132 /**
133 * The attribute name used to store a variable's type.
134 */
135 private static final String UNITS = "units";
136
137 /**
138 * Command line entry point for the MdbsyNetCDF2CSVConverter class. Gets the input and output directory names
139 * from the command line and runs a conversion.
140 *
141 * @param args The command line arguments
142 * @throws ParseException If an error occurs when parsing the arguments.
143 * @throws IOException If the files cannot be read or written to.
144 * @throws InvalidRangeException if an invalid range is attempted to be read from a netCDf variable
145 */
146 @SuppressWarnings("static-access")
147 public static void main(String[] args) throws ParseException, IOException, InvalidRangeException
148 {
149 Options options = new Options();
150 try
151 {
152 Option inputDirectoryName = OptionBuilder.withArgName("dir").hasArg().withDescription(
153 "1. the directory path containing the netCDF files (split by latitude) to be converted into csv.")
154 .isRequired(true).withLongOpt(INPUT_DIR).create("i");
155
156 Option outputDirectoryName = OptionBuilder.withArgName("dir").hasArg().withDescription(
157 "2: the directory path to place the new csv files.").isRequired(true).withLongOpt(OUTPUT_DIR)
158 .create("o");
159
160 options.addOption(inputDirectoryName);
161 options.addOption(outputDirectoryName);
162
163 // parse the command line arguments
164 CommandLine parsedCommandLine = new BasicParser().parse(options, args);
165
166 String inputDir = parsedCommandLine.getOptionValue(INPUT_DIR);
167 String outputDir = parsedCommandLine.getOptionValue(OUTPUT_DIR);
168
169 MdbsyNetCDF2CSVConverter converter = new MdbsyNetCDF2CSVConverter();
170
171 long start = System.currentTimeMillis();
172 converter.execute(inputDir, outputDir);
173 long end = System.currentTimeMillis() - start;
174 LOG.warn("Successfully converted all netcdf files to csv files in: " + end + " ms.");
175 }
176 catch (MissingOptionException moe)
177 {
178 LOG.error(moe.getMessage());
179
180 // generate the help/usage statement
181 String header = "Recreate ";
182 String footer = "\nExample: --inputDir \"C:\\input\" --outputDir \"C:\\output\" \n"
183 + "Will convert the netCDF files (which are split by latitude) in the input directory to csv files in the output directory.";
184 StringWriter sw = new StringWriter();
185 HelpFormatter formatter = new HelpFormatter();
186 formatter.setOptionComparator(new CommandLineOptionsComparator());
187 final int lineWidth = 80;
188 formatter.printHelp(new PrintWriter(sw), lineWidth, "-", header, options, 0, 1, footer);
189 System.out.println(sw.toString());
190 }
191 }
192
193 /**
194 * Performs the conversion.
195 *
196 * @param inputDir
197 * the directory containing the Scenario A or C netCDF files.
198 * @param outputDir
199 * the directory in which the csv file will be placed.
200 * @throws InvalidRangeException
201 * thrown if an invalid range is attempted to be read from a netCDf variable.
202 * @throws IOException
203 * If the files cannot be read or written to.
204 */
205 public void execute(String inputDir, String outputDir) throws IOException, InvalidRangeException
206 {
207 // perform an initial conversion from netCDF to csv, the csv files will look like this:
208 // Date, 1985-01-01, 1895-01-02, 1985-01-03, ...
209 // APET, 0.1, 0.3, 0.0, ....
210 // rainfall,0.0, 0.4, 0.1, ...
211 System.out.println("Starting intial convert of NetCDF 2 CSV.");
212 LOG.info("Starting intial convert of NetCDF 2 CSV.");
213 writeCSVFiles(inputDir, outputDir);
214 System.out.println("Finished intial convert of NetCDF 2 CSV.");
215 LOG.info("Finished intial convert of NetCDF 2 CSV.");
216
217 // transpose the csv files so they will look like this:
218 // Date, APET, rainfall
219 // 1895-01-01, 0.1, 0.0
220 // 1895-01-02, 0.3, 0.4
221 // 1895-01-03, 0.0, 0.1
222 System.out.println("Started transposition of CSV.");
223 LOG.info("Started transposition of CSV.");
224 transposeCSVFiles(outputDir);
225 System.out.println("Finished transposition of CSV.");
226 LOG.info("Finished transposition of CSV.");
227 }
228
229 /**
230 * For each netCDF file in the input directory:
231 * <p>
232 * Create a csv file for every latitude and longitude position.
233 * <p>
234 * Write out the value of a variable at every time interval, at a specific latitude and longitude position.
235 *
236 * @param inputDir
237 * the directory containing the Scenario A or C netCDF files.
238 * @param outputDir
239 * the directory in which the csv file will be placed.
240 * @throws InvalidRangeException
241 * thrown if an invalid range is attempted to be read from a netCDf variable.
242 * @throws IOException
243 */
244 private void writeCSVFiles(String inputDir, String outputDir) throws IOException, InvalidRangeException
245 {
246 File dir = new File(inputDir);
247 File[] files = dir.listFiles();
248 if (files == null)
249 {
250 System.out.println("The input directory does not exist: " + inputDir);
251 LOG.error("The input directory does not exist: " + inputDir);
252 return;
253 }
254
255 for (int fileIndex = 0; fileIndex < files.length; fileIndex++)
256 {
257 if (files[fileIndex].isFile()
258 && NETCDF_FILE_EXTENSION.equals(Util.getFileExtension(files[fileIndex].getName())))
259 {
260 NetcdfFile nc = null;
261 try
262 {
263 nc = NetcdfFile.open(files[fileIndex].getAbsolutePath());
264
265 Array latitudes = getLatitudeValues(nc);
266 Array longitudes = getLongitudeValues(nc);
267
268 LOG.info("latitude coordinate variable size: " + latitudes.getSize());
269 LOG.info("longitude coordinate variable size: " + longitudes.getSize());
270
271 for (int latIndex = 0; latIndex < latitudes.getSize(); latIndex++)
272 {
273 for (int longIndex = 0; longIndex < longitudes.getSize(); longIndex++)
274 {
275 String fileName = createFileNameBasedOnLatLong(nc, latIndex, longIndex);
276 String filePath = outputDir + System.getProperty("file.separator") + fileName + ".csv";
277
278 LOG.info("\tWorking on file: " + filePath);
279
280 // if the csv file already exists, then append the next variable.
281 if (Util.fileExists(filePath))
282 {
283 File file = new File(filePath);
284 PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(file, true /*
285 * append
286 * mode
287 */)));
288
289 List<String> variablesAlreadyWrittenOut = getVariablesAlreadyWrittenOut(file);
290 List<String> variablesThatCanBeWrittenOut = getVariablesThatCanBeWrittenOut(nc);
291
292 variablesThatCanBeWrittenOut.removeAll(variablesAlreadyWrittenOut);
293
294 try
295 {
296 writeOutVariable(nc, out, variablesThatCanBeWrittenOut, latIndex, longIndex);
297 }
298 finally
299 {
300 if (out != null)
301 out.close();
302 }
303 }
304 else
305 // if the csv file does not exist, create a new csv file and write all available variables.
306 {
307 PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(new File(filePath),
308 false /* append mode */)));
309
310 List<String> variablesThatCanBeWrittenOut = getVariablesThatCanBeWrittenOut(nc);
311
312 try
313 {
314 out.println(writeOutDateColumn(nc)); // note: we only write out the date column when
315 // a new csv file is created.
316
317 writeOutVariable(nc, out, variablesThatCanBeWrittenOut, latIndex, longIndex);
318 }
319 finally
320 {
321 if (out != null)
322 out.close();
323 }
324 }
325 }
326 }
327 }
328 catch (java.io.FileNotFoundException fnfe)
329 {
330 System.out.println("file not found= " + files[fileIndex]);
331 LOG.error(fnfe);
332 }
333 finally
334 {
335 if (nc != null)
336 nc.close();
337 }
338 }
339 }
340 }
341
342 /**
343 * Writes out the value of a variable at every time interval, at a specific latitude and longitude position.
344 *
345 * @throws InvalidRangeException
346 * @throws IOException
347 *
348 */
349 private void writeOutVariable(NetcdfFile nc, PrintWriter out, List<String> variablesThatCanBeWrittenOut,
350 int latIndex, int longIndex) throws IOException, InvalidRangeException
351 {
352 for (String variableName : variablesThatCanBeWrittenOut)
353 {
354 Array subsection = getVariableAcrossTime(nc, variableName, latIndex, longIndex);
355 out.println(variableName + "," + subsection.toString().replaceAll(" ", ","));
356 }
357 }
358
359 /**
360 * Transpose the csv files so that columns represent a variable's value over time.
361 *
362 * @param outputDir
363 * the directory in which the csv file will be placed.
364 * @throws IOException
365 */
366 private void transposeCSVFiles(String outputDir) throws IOException
367 {
368 File dir = new File(outputDir);
369 File[] files = dir.listFiles();
370 if (files == null)
371 {
372 System.out.println("The output directory does not exist: " + outputDir);
373 LOG.error("The output directory does not exist: " + outputDir);
374 return;
375 }
376
377 for (int fileIndex = 0; fileIndex < files.length; fileIndex++)
378 {
379 if (files[fileIndex].isFile()
380 && CSV_FILE_EXTENSION.equals(Util.getFileExtension(files[fileIndex].getName())))
381 {
382 String[][] matrix = readLookupFile(files[fileIndex]);
383
384 // write over-the-top of the csv file.
385 PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(files[fileIndex], false /*
386 * append
387 * mode
388 */)));
389
390 // transpose String[][] matrix and write back out to csv
391 String[][] transposed = transpose(matrix);
392
393 try
394 {
395 for (int row = 0; row < transposed.length; row++)
396 {
397 for (int col = 0; col < transposed[0].length; col++)
398 {
399 out.print(transposed[row][col]);
400 if (col < (transposed[0].length - 1))
401 {
402 out.print(", ");
403 }
404 }
405 out.println();
406 }
407 }
408 finally
409 {
410 if (out != null)
411 out.close();
412 }
413 }
414 }
415 }
416
417 /**
418 * Returns the names of the variables in the netCDF file that we need to write out for a given latitude x longitude
419 * position.
420 *
421 * @param nc
422 * a netCDF file {@link NetcdfFile}
423 * @return the names of the variables in the netCDF file that we need to write out for a given latitude x longitude
424 * position.
425 */
426 private List<String> getVariablesThatCanBeWrittenOut(NetcdfFile nc)
427 {
428 List<String> variablesThatCanBeWrittenOut = new ArrayList<String>();
429
430 for (Variable variable : nc.getVariables())
431 {
432 if (variable.getRank() == RANK_THREE)
433 {
434 variablesThatCanBeWrittenOut.add(variable.getName().trim());
435 }
436 }
437
438 return variablesThatCanBeWrittenOut;
439 }
440
441 /**
442 * Returns the name of the variables that have already be written out to a csv file.
443 *
444 * @param file
445 * the csv file being written out too.
446 * @return the name of the variables that have already be written out to a csv file.
447 * @throws IOException
448 */
449 private List<String> getVariablesAlreadyWrittenOut(File file) throws IOException
450 {
451 List<String> variablesAlreadyWrittenOut = new ArrayList<String>();
452
453 String[][] matrix = readLookupFile(file);
454 for (int rows = 0; rows < matrix.length; rows++)
455 {
456 variablesAlreadyWrittenOut.add(matrix[rows][VARIABLE_COLUMN].trim());
457 }
458
459 return variablesAlreadyWrittenOut;
460 }
461
462 /**
463 * Creates a file name based on a given latitude and longitude.
464 *
465 * @param nc
466 * a netCDF file {@link NetcdfFile}
467 * @param latIndex
468 * the latitude index.
469 * @param longIndex
470 * the longitude index.
471 * @return a file name based on latitude and longitude position.
472 * @throws InvalidRangeException
473 * @throws IOException
474 */
475 private String createFileNameBasedOnLatLong(NetcdfFile nc, int latIndex, int longIndex) throws IOException,
476 InvalidRangeException
477 {
478 return "region_" + nc.readSection("lat(" + latIndex + ":" + latIndex + ")").toString().trim() + "_"
479 + nc.readSection("long(" + longIndex + ":" + longIndex + ")").toString().trim();
480 }
481
482 /**
483 * Parses a csv file into an row-by-column string matrix.
484 *
485 * @param file
486 * a csv file.
487 * @return row-by-column string matrix of the csv file.
488 * @throws IOException
489 */
490 public static String[][] readLookupFile(File file) throws IOException
491 {
492 RandomAccessFile raf = new RandomAccessFile(file, "r");
493 List<String[]> lineList = new ArrayList<String[]>();
494
495 try
496 {
497 String line = raf.readLine();
498 while (line != null)
499 {
500 String[] cols = new CSVTokenizer(line).getAllColumns();
501 lineList.add(cols);
502 line = raf.readLine();
503 }
504 }
505 finally
506 {
507 raf.close();
508 }
509
510 String[][] lineCols = lineList.toArray(new String[][] {});
511 return lineCols;
512 }
513
514 /**
515 * Transpose a matrix, e.g. [a][b] -> [b][a]
516 *
517 * @param values
518 * a String[][] matrix.
519 */
520 private String[][] transpose(String[][] values)
521 {
522 if (values.length == 0)
523 {
524 return new String[0][0];
525 }
526
527 String[][] transposed = new String[values[0].length][values.length];
528 for (int row = 0; row < values.length; row++)
529 {
530 for (int col = 0; col < values[0].length; col++)
531 {
532 transposed[col][row] = values[row][col];
533 }
534 }
535 return transposed;
536 }
537
538 /**
539 * Returns the value of a variable at every time interval, at a specific latitude and longitude position.
540 *
541 * @param nc
542 * a netCDF file {@link NetcdfFile}
543 * @param variableName
544 * the variable whose values are to be read.
545 * @param latIndex
546 * the latitude index.
547 * @param longIndex
548 * the longitude index.
549 * @return the value of a variable at every time interval, at a specific latitude and longitude position.
550 * @throws IOException
551 * @throws InvalidRangeException
552 */
553 private Array getVariableAcrossTime(NetcdfFile nc, String variableName, int latIndex, int longIndex)
554 throws IOException, InvalidRangeException
555 {
556 try
557 {
558 return nc.readSection(variableName + "(" + latIndex + ":" + latIndex + "," + longIndex + ":" + longIndex
559 + ",:)");
560 }
561 catch (InvalidRangeException ire)
562 {
563 ire.printStackTrace();
564 LOG.error("Could not read section: " + variableName + "(" + latIndex + ":" + latIndex + "," + longIndex
565 + ":" + longIndex + ",:)", ire);
566 }
567 return null;
568 }
569
570 /**
571 * Returns the longitude coordinate variable values.
572 *
573 * @param nc
574 * a netCDF file {@link NetcdfFile}
575 * @return the longitude coordinate variable values.
576 * @throws IOException
577 * @throws IllegalArgumentException
578 */
579 private Array getLongitudeValues(NetcdfFile nc) throws IOException, IllegalArgumentException
580 {
581 List<Variable> variables = nc.getVariables();
582 if (variables != null)
583 {
584 for (Variable v : variables)
585 {
586 if (v.isCoordinateVariable() && LONGITUDE_VARIABLE_NAME.equals(v.getName()))
587 {
588 return v.read();
589 }
590 }
591 }
592 LOG.error("Could not find coordinate variable: " + LONGITUDE_VARIABLE_NAME);
593 throw new IllegalStateException("Could not find coordinate variable: " + LONGITUDE_VARIABLE_NAME);
594 }
595
596 /**
597 * Returns the latitude coordinate variable values.
598 *
599 * @param nc
600 * a netCDF file {@link NetcdfFile}
601 * @return the latitude coordinate variable values.
602 * @throws IOException
603 * @throws IllegalArgumentException
604 */
605 private Array getLatitudeValues(NetcdfFile nc) throws IOException, IllegalArgumentException
606 {
607 List<Variable> variables = nc.getVariables();
608 if (variables != null)
609 {
610 for (Variable v : variables)
611 {
612 if (v.isCoordinateVariable() && LATITUDE_VARIABLE_NAME.equals(v.getName()))
613 {
614 return v.read();
615 }
616 }
617 }
618 LOG.error("Could not find coordinate variable: " + LATITUDE_VARIABLE_NAME);
619 throw new IllegalStateException("Could not find coordinate variable: " + LATITUDE_VARIABLE_NAME);
620 }
621
622 /**
623 * Returns the time variable.
624 *
625 * @param nc
626 * a netCDF file {@link NetcdfFile}
627 * @return the time variable.
628 * @throws IllegalArgumentException
629 */
630 private static Variable getTimeVariable(NetcdfFile nc) throws IllegalArgumentException
631 {
632 List<Variable> variables = nc.getVariables();
633 if (variables != null)
634 {
635 for (Variable v : variables)
636 {
637 if (v.isCoordinateVariable() && TIME_VARIABLE_NAME.equals(v.getName()))
638 {
639 return v;
640 }
641 }
642 }
643 LOG.error("Could not find coordinate variable: " + TIME_VARIABLE_NAME);
644 throw new IllegalStateException("Could not find coordinate variable: " + TIME_VARIABLE_NAME);
645 }
646
647 /**
648 * Converts a "days since" time coordinate variable into a list of formatted dates.
649 *
650 * @param nc
651 * a netCDF file {@link NetcdfFile}
652 * @return a list of formatted dates.
653 */
654 private static String writeOutDateColumn(NetcdfFile nc)
655 {
656 // we only need to create the date column text once, it is the same for all csv files.
657 if (DATE_COLUMN_TEXT == null)
658 {
659 StringBuffer strBuf = new StringBuffer();
660
661 DateFormatter dateFormatter = new DateFormatter();
662 Date epocDate;
663 try
664 {
665 Variable time = getTimeVariable(nc);
666
667 String timeUnits = getVariablesUnits(time);
668 String epocString = timeUnits.substring(timeUnits.indexOf("days since") + 11); // we expect the time
669 // value to be in units
670 // of:
671 // "days since yyyy-mm-dd h:m:s"
672
673 epocDate = dateFormatter.dateOnlyFormat(epocString);
674 strBuf.append("Date,");
675
676 long timeVariableSize = time.getSize();
677 for (int i = 0; i < timeVariableSize; i++)
678 {
679 strBuf.append(dateFormatter.toDateOnlyString(
680 new Date(epocDate.getTime() + (i * MILLISECONDS_IN_A_DAY))).trim());
681
682 if (i < (timeVariableSize - 1))
683 {
684 strBuf.append(",");
685 }
686 }
687
688 DATE_COLUMN_TEXT = strBuf.toString();
689 }
690 catch (java.text.ParseException pe)
691 {
692 pe.printStackTrace();
693 LOG.error("Error parsing date", pe);
694 }
695 }
696 return DATE_COLUMN_TEXT;
697 }
698
699 /**
700 * Finds the string value of the attribute that is used to store a variable's type.
701 *
702 * @param variable
703 * a variable {@link Variable}
704 * @return the string value of the variable's type.
705 */
706 private static String getVariablesUnits(Variable variable)
707 {
708 List<Attribute> attributes = variable.getAttributes();
709 if (attributes != null)
710 {
711 for (Attribute attribute : attributes)
712 {
713 if (UNITS.equals(attribute.getName()))
714 {
715 return attribute.getStringValue();
716 }
717 }
718 }
719 LOG
720 .error("Could not find the value for the variable: " + variable.getName() + " -\"" + UNITS
721 + "\" attribute.");
722 throw new IllegalStateException("Could not find the value for the variable: " + variable.getName() + " -\""
723 + UNITS + "\" attribute.");
724 }
725 }