View Javadoc

1   /**
2    * Copyright 2010, CSIRO Australia.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *         http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package au.csiro.netcdf.wron;
18  
19  import java.io.BufferedReader;
20  import java.io.ByteArrayInputStream;
21  import java.io.File;
22  import java.io.FileNotFoundException;
23  import java.io.FileReader;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Calendar;
27  import java.util.Date;
28  import java.util.GregorianCalendar;
29  import java.util.HashMap;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.NoSuchElementException;
33  import java.util.Scanner;
34  import java.util.Set;
35  import java.util.SortedSet;
36  import java.util.TimeZone;
37  import java.util.TreeSet;
38  import java.util.regex.Pattern;
39  
40  import org.apache.log4j.Logger;
41  
42  import au.csiro.netcdf.NcWriteVariable;
43  
44  /**
45   * A set of common processing used for conversion of MDB SY datasets from CSV to netCDF format.
46   * 
47   * Copyright 2010, CSIRO Australia All rights reserved.
48   * 
49   * @author James Dempsey on 21/04/2010
50   * @version $Revision: 78 $ $Date: 2010-07-24 16:23:13 +1000 (Sat, 24 Jul 2010) $
51   */
52  public class ConversionUtils
53  {
54  
55      /**
56       * Encoding used to fill variables.
57       */
58      public static final String ENCODING = "UTF-8";
59  
60      /**
61       * Constant that defines the logger to be used.
62       */
63      private static final Logger LOG = Logger.getLogger(ConversionUtils.class.getName());
64  
65      /**
66       * Retrieve the latitudes within the bounds provided.
67       * 
68       * @param allLatitudes
69       *            The list of latitudes in numerical order
70       * @param startingLatitude The first (numerically -38 < -24) latitude to be included
71       * @param endingLatitude The last (numerically -38 < -24) latitude to be included
72       * @return The set of latitudes in the range
73       */
74      public Set<String> getLimitedLatitudes(Set<String> allLatitudes, String startingLatitude, String endingLatitude)
75      {
76          Set<String> result = new TreeSet<String>();
77          for (String lat : allLatitudes)
78          {
79              if ("".equals(startingLatitude) || lat.compareTo(startingLatitude)>=0)
80              {
81                  if ("".equals(endingLatitude) || lat.compareTo(endingLatitude)<=0)
82                  {
83                      result.add(lat);
84                  }
85              }
86          }
87          return result;
88      }
89  
90      /**
91       * Retrieve the block of latitudes to be processed.
92       * 
93       * @param sortedLatitudes
94       *            The list of latitudes in numerical order
95       * @param blockNum
96       *            The block of latitudes to be retrieved
97       * @param blockSize
98       *            The number of latitudes in a block
99       * @return The latitudes in the block
100      */
101     public List<String> getLatitudeBlock(Set<String> sortedLatitudes, int blockNum, int blockSize)
102     {
103         List<String> result = new ArrayList<String>();
104         List<String> workingLats = new ArrayList<String>(sortedLatitudes);
105         int start = blockNum * blockSize;
106         int end = Math.min(start + blockSize, workingLats.size());
107         for (int i = start; i < end; i++)
108         {
109             result.add(workingLats.get(i));
110         }
111         return result;
112     }
113 
114     public List<CellData> buildCsvFilenamesForLatitudes(List<String> lats, List<CellData> triples, String folderName, String extension)
115     {
116         List<CellData> result = new ArrayList<CellData>();
117         for (String latitude : lats)
118         {
119             for (CellData cellData : triples)
120             {
121                 if (cellData.latitude.equals(latitude))
122                 {
123                     String filename = folderName + cellData.cellId + extension;
124                     cellData.filename = filename;
125                     result.add(cellData);
126                 }
127             }
128 
129         }
130         return result;
131     }
132 
133     /**
134      * Read in the data from the CSV files. The data is returned as a double map keyed on time index (days since epoch)
135      * and latitude of LongitudeRanges. Each LongitudeRange will contain the values for each variable at each each 
136      * longitude point within the latitude.
137      * 
138      * @param targetCells The cells to be read. Each cell should have a populated filename (including full path) 
139      * @param lats The latitudes being processed.
140      * @param numVariables The number of variables held in each csv file (all MUST be read).
141      * @param numTimes The number of times expected in each CSV file.
142      * @param fillValues The data any missing entries are to be filled with.
143      * @return The variables data from the files
144      */
145     public Map<Integer, Map<String, LongitudeRange>> readDataByLatitudes(List<CellData> targetCells, List<String> lats,
146             int numVariables, int numTimes, int numLongitudes, String[] fillValues, String startLongitudeRange)
147     {
148         long start = System.currentTimeMillis();
149         // Return type is a map of times, each with a map of latitudes and their data
150         Map<Integer, Map<String, LongitudeRange>> result = new HashMap<Integer, Map<String,LongitudeRange>>();
151         
152         // Prefill the map to account for any missing values
153         for (int i = 0; i < numTimes; i++)
154         {
155             Map<String, LongitudeRange> latData = new HashMap<String, LongitudeRange>();
156             result.put(i, latData);
157             for (String targetLat : lats)
158             {
159                LongitudeRange range = new LongitudeRange(numVariables, numLongitudes, fillValues); 
160                range.setStartLongitudeRange(startLongitudeRange);
161                latData.put(targetLat, range);
162             }
163             
164         }
165         long mid = System.currentTimeMillis();
166 
167         // Read the data files
168         int latIndex = 0;
169         int longitudeIndex = 0;
170         int counter = 0;
171         for (CellData cell : targetCells)
172         {
173             if (++counter % 100 == 0)
174             {
175                 LOG.warn("   Num files processed: " + counter);
176             }
177             String filename = cell.filename;
178             
179             String latitude = cell.latitude; //lats.get(latIndex);
180             try
181             {
182                 Scanner s = null;
183                 int timeIndex = 0;
184                 try
185                 {
186                     s = new Scanner(new BufferedReader(new FileReader(filename)))
187                             .useDelimiter(Pattern.compile("[\n,]"));
188 
189                     // remove header line
190                     for (int i = 0; i < numVariables+1; i++)
191                     {
192                         s.next();
193                     }
194 
195                     while (s.hasNext())
196                     {
197                         s.next(); // date column
198 
199                         String variableValues[] = new String[numVariables];
200                         for (int i = 0; i < numVariables; i++)
201                         {
202                             variableValues[i] = s.next();
203                         }
204                         // Trim off the return at the end of the last variable
205                         variableValues[numVariables-1] = variableValues[numVariables-1].trim(); 
206                         LongitudeRange timeLatRange =  result.get(timeIndex).get(latitude);
207                         timeLatRange.putValues(cell.longitude, variableValues);
208                         timeIndex++;
209                     }
210                 }
211                 catch (NoSuchElementException e)
212                 {
213                     LOG.error("Ran out of data in file " + filename + " on line " + timeIndex, e);
214                     throw e;
215                 }
216                 catch (NullPointerException e)
217                 {
218                     LOG.error("NPE when procesing file " + filename + " on line " + timeIndex, e);
219                     throw e;
220                 }
221                 finally
222                 {
223                     if (s != null)
224                     {
225                         s.close();
226                     }
227                 }
228 
229             }
230             catch (FileNotFoundException fnfe)
231             {
232                 // Ignore any missing files as the data has already been pre-populated to account for these.
233                 LOG.warn(fnfe.getMessage());
234                 //System.out.println(fnfe.getMessage());
235             }
236             longitudeIndex++;
237             if (longitudeIndex >= numLongitudes)
238             {
239                 longitudeIndex = 0;
240                 latIndex++;
241             }
242         }
243         long end = System.currentTimeMillis();
244         LOG.warn("Processed " + targetCells.size() + " files in " + (mid-start) + " ms prep and " + (end-mid) + " ms read");
245         return result;
246     }
247 
248     /**
249      * Write out the data for each variable. 
250      * 
251      * @param latData The data to be output
252      * @param allFilenames The filenames for each decade and variable
253      * @param variableNames The names of the variables to be written
254      * @param baseDate The startdate of the data (i.e. the date a date value of 0 is)
255      * @throws IOException If the data cannot be written.
256      */
257     public void writeLatDataByDecade(Map<Integer, Map<String, LongitudeRange>> latData, Map<Integer, String[]> allFilenames,
258             String[] variableNames, Date baseDate) throws IOException
259     {
260         // TODO: Speed up options
261         // - do lookups separately and cache results
262         // - Send blocks of latitudes to the writer
263         // - Send blocks of times to the writer
264         
265         long start = System.currentTimeMillis();
266         
267         NcWriteVariable command = new NcWriteVariable();
268 
269         SortedSet<Integer> timeKeys = new TreeSet<Integer>(latData.keySet());
270         
271         int numBlocks =  timeKeys.size()*(latData.get(new ArrayList<Integer>(timeKeys).get(0)).keySet().size())*variableNames.length;
272         LOG.warn("Started writing " + numBlocks + " data blocks.");
273         for (int varNum = 0; varNum < variableNames.length; varNum++)
274         {
275             for (Integer dateKey : timeKeys)
276             {
277                 // Get Decade of dateKey
278                 int decadeKey = calcDecadeKey(dateKey, baseDate);
279                 // Get Array of filenames for decade
280                 String variableFileNames[] = allFilenames.get(decadeKey);
281                 
282                 SortedSet<String> latKeys = new TreeSet<String>(latData.get(dateKey).keySet());
283                 for (String latitude : latKeys)
284                 {
285                     LongitudeRange longRange = latData.get(dateKey).get(latitude);
286                     String fillRange = "lookup(" + dateKey + "),lookup("
287                             + latitude + "),0-" + (longRange.getNumLongitudes() - 1);
288                     //LOG.warn("  Processing range " + fillRange);
289             
290                     try
291                     {
292                         command.execute(new File(variableFileNames[varNum]), variableNames[varNum], fillRange,
293                                 new ByteArrayInputStream(longRange.getValues(varNum).getBytes(ENCODING)), false);
294                     }
295                     catch (IllegalArgumentException iae)
296                     {
297                         LOG.error(iae);
298                         LOG.error("fillRange (date, lat, long): " + fillRange);
299                         throw iae;
300                     }
301                 }
302             }
303         }
304         long end = System.currentTimeMillis();
305         LOG.warn("Processed " + numBlocks + " blocks in " + (end-start) + " ms.");
306         
307     }
308 
309     public int calcDecadeKey(Integer dateKey, Date baseDate)
310     {
311         Calendar cal = getCalendar(baseDate);
312         cal.add(Calendar.DAY_OF_YEAR, dateKey);
313         int decade = cal.get(Calendar.YEAR) / 10;
314 
315         return decade;
316     }
317     
318     /**
319      * returns a TimeZone with correct Sydney daylight saving regime - ie correct server timezone.
320      */
321     public TimeZone getGoodZone()
322     { 
323         return TimeZone.getTimeZone("Australia/Sydney");
324     }
325 
326    /**
327     * Returns a suitable GregorianCalendar instance for use in date 
328     * calculations.
329     * 
330     * @param date The date to be set.
331     * @return GregorianCalendar A calendar instance for the ACT timezone.
332     */
333    public Calendar getCalendar(Date date)
334    {
335     GregorianCalendar cal = new GregorianCalendar(getGoodZone());
336     cal.setTime(date);
337     return cal;
338    }
339 
340 }