1 /* -------------------------------------------------------------------
2 GeoVISTA Center (Penn State, Dept. of Geography)
3
4 Java source file for the class DBaseFile
5
6 Copyright (c), 2002, GeoVISTA Center
7 All Rights Reserved.
8
9 Original Author: Michael T. Wheeler
10 Modified for more generic use, accessors added,
11 some comments added by: Frank Hardisty
12
13 $Author: jmacgill $
14
15 $Id: DBaseFile.java,v 1.1.1.1 2003/02/28 14:54:01 jmacgill Exp $
16
17 $Date: 2003/02/28 14:54:01 $
18
19 Reference: Document no:
20 ___ ___
21
22 To Do:
23 ___
24
25 ------------------------------------------------------------------- */
26 /* --------------------------- Package ---------------------------- */
27 package edu.psu.geovista.db.dbase;
28
29 /* ------------------ Import classes (packages) ------------------- *//package-summary/html">color="#329900"> ------------------ Import classes (packages) ------------------- *//package-summary.html">color="#329900">/* ------------------ Import classes (packages) ------------------- *//package-summary.html">color="#329900"> ------------------ Import classes (packages) ------------------- */
30 import java.io.*;
31
32 import java.util.*;
33
34
35 /*====================================================================
36 Implementation of class DBaseFile
37 ====================================================================*/
38
39 /***
40 * DBaseFile reads a dBase (*.dbf) file into an Object[] array where
41 * the first Object is an array of Strings that holds the names of the arrays,
42 * and the sucessive Objects are arrays of primitives of doubles, ints, strings,
43 * or booleans.
44 *
45 * DBaseFile can also be used to write dBase files, but this functionality
46 * is only partly supported.
47 *
48 * @version $Revision: 1.1.1.1 $
49 * @author Michael T. Wheeler (mtwheels@psu.edu)
50 * @see DBaseFieldDescriptor
51 */
52 public class DBaseFile {
53 private static byte HEADER_FIELD_TERMINATOR = 0x0D;
54 private static byte RECORD_STATUS_DELETED = 0x2A; // => '*'
55 private static byte FIELD_STATUS_NULL = 0x2A; // => '*'
56 private transient int numRecords;
57 private transient short sizeRecordBytes;
58 private transient Vector recordDescriptor;
59 private Object[] dataSet;
60
61 public DBaseFile(InputStream inStream) {
62 this.readRecords(inStream);
63 }
64
65 public DBaseFile(String fileName) throws IOException {
66 //dataSet = dataIn;
67 FileInputStream fisDbase = new FileInputStream(new File(fileName));
68 this.readRecords(fisDbase);
69 }
70
71 // Really a copy with an addition. Hack for Craig
72 public DBaseFile(String fileName, String outputFileName)
73 throws IOException {
74 FileInputStream fisDbase = new FileInputStream(new File(fileName));
75 GvDataInputStream gdis = new GvDataInputStream(fisDbase);
76
77 FileOutputStream fosDbase = new FileOutputStream(
78 new File(outputFileName));
79 GvDataOutputStream gdos = new GvDataOutputStream(fosDbase);
80
81 try {
82 copyFileHeader(gdis, gdos);
83
84 byte[] recordBuffer = new byte[sizeRecordBytes];
85
86 // The recordNumber value in the Shape file starts at 1
87 boolean numeric = false;
88
89 for (int i = 1; i <= numRecords; i++) {
90 gdis.read(recordBuffer, 0, sizeRecordBytes);
91 gdos.write(recordBuffer, 0, sizeRecordBytes);
92
93 if (numeric) {
94 byte ch1 = (byte) ((i >> 24) & 0xFF); // big
95 byte ch2 = (byte) ((i >> 16) & 0xFF);
96 byte ch3 = (byte) ((i >> 8) & 0xFF);
97 byte ch4 = (byte) ((i >> 0) & 0xFF); // little
98 int littleEndian = (ch4 << 24) | (ch3 << 16) |
99 (ch2 << 8) | (ch1 << 0);
100 gdos.writeInt(littleEndian);
101 } else {
102 String string = Integer.toString(i);
103 byte[] arrayByte = string.getBytes();
104
105 for (int j = 0; j < 6; j++) {
106 if (j < arrayByte.length) {
107 gdos.writeByte(arrayByte[j]);
108 } else {
109 gdos.writeByte(' ');
110 }
111 }
112 }
113 }
114 } // end try
115 catch (EOFException e) {
116 // Ignore EOF
117 }
118 }
119
120 private void readRecords(InputStream inStream) {
121 GvDataInputStream gdis = new GvDataInputStream(inStream);
122
123 try {
124 int numRecs = readFileHeader(gdis);
125
126 byte[] recordBuffer = new byte[sizeRecordBytes];
127
128 // Side effect - sets numRecords
129 int dimension = recordDescriptor.size();
130
131 // The recordNumber value in the Shape file starts at 1
132 for (int i = 1; i <= numRecords; i++) {
133 gdis.read(recordBuffer, 0, sizeRecordBytes);
134 loadRecord(i, dataSet, recordBuffer);
135 }
136 } // end try
137 catch (EOFException e) {
138 //YYY bad bad...
139 // Ignore EOF
140 } catch (Exception e) {
141 e.printStackTrace();
142 }
143 }
144
145 private int readFileHeader(GvDataInputStream gdis)
146 throws IOException {
147 long byteCount = 0;
148 byte fileType = gdis.readByte();
149 byteCount++;
150
151 byte[] dateBytes = new byte[3];
152
153 for (int i = 0; i < 3; i++)
154 dateBytes[i] = gdis.readByte();
155
156 byteCount++;
157
158 // dBase file of course does not handle 2000 dates correctly. Also, Calendar
159 // seems to use a 0-based month.
160 GregorianCalendar lastModified = new GregorianCalendar(((dateBytes[0] < 50)
161 ? (2000 + dateBytes[0])
162 : (1900 + dateBytes[0])),
163 dateBytes[1] - 1,
164 dateBytes[2]);
165
166 numRecords = gdis.readIntLE(); //num records is the length of the individual arrays
167 byteCount = byteCount + 4;
168
169 short sizeHeaderBytes = gdis.readShortLE();
170 sizeRecordBytes = gdis.readShortLE();
171 byteCount = byteCount + 4;
172
173 gdis.skipBytes(20); // Unused and /or reserved
174 byteCount = byteCount + 20;
175
176 // We've already read 32 bytes + 1 for the final field terminator
177 int fieldDescriptorSize = DBaseFieldDescriptor.getDataSizeBytes();
178 sizeHeaderBytes -= 33;
179
180 if ((sizeHeaderBytes % fieldDescriptorSize) != 0) {
181 throw new IOException(
182 "Invalid number of header bytes. Available = " +
183 sizeHeaderBytes + ", size = " + fieldDescriptorSize);
184 }
185
186 //fieldDescriptorCount is the number of arrays
187 int fieldDescriptorCount = sizeHeaderBytes / fieldDescriptorSize;
188 this.dataSet = new Object[fieldDescriptorCount + 1]; //plus one for the variable names array
189
190 String[] variableNames = new String[fieldDescriptorCount];
191 dataSet[0] = variableNames;
192
193 byte[] fieldDescriptorByteArray = new byte[fieldDescriptorSize];
194 recordDescriptor = new Vector();
195
196 for (int i = 0; i < fieldDescriptorCount; i++) {
197 gdis.read(fieldDescriptorByteArray, 0, fieldDescriptorSize);
198 byteCount = byteCount + fieldDescriptorSize;
199
200 //System.out.println("fieldDescriptorSize = " + fieldDescriptorSize);
201 //System.out.println("byteCount = " + byteCount);
202 DBaseFieldDescriptor fieldDescriptor = new DBaseFieldDescriptor(
203 dataSet,
204 fieldDescriptorByteArray,
205 this.numRecords,
206 i + 1);
207
208 recordDescriptor.addElement(fieldDescriptor);
209 variableNames[i] = fieldDescriptor.getFieldName(); //add the names
210 }
211
212 // Read the last byte of the header, it should be a 0
213 byte fieldTerminator = gdis.readByte();
214
215 if (fieldTerminator != HEADER_FIELD_TERMINATOR) {
216 throw new IOException("Invalid field terminator = " +
217 fieldTerminator);
218 }
219
220 return numRecords;
221 }
222
223 // See above, hack for Craig
224 private void copyFileHeader(GvDataInputStream gdis, GvDataOutputStream gdos)
225 throws IOException {
226 // Input
227 byte fileType = gdis.readByte();
228 byte[] dateBytes = new byte[3];
229
230 for (int i = 0; i < 3; i++)
231 dateBytes[i] = gdis.readByte();
232
233
234 // Output
235 gdos.writeByte(fileType);
236
237 for (int i = 0; i < 3; i++)
238 gdos.writeByte(dateBytes[i]);
239
240
241 // Input
242 numRecords = gdis.readIntLE();
243
244 short sizeHeaderBytes = gdis.readShortLE();
245 sizeRecordBytes = gdis.readShortLE();
246
247
248 // Ouput
249 gdos.writeIntLE(numRecords);
250
251 int fieldDescriptorSize = DBaseFieldDescriptor.getDataSizeBytes();
252 gdos.writeShortLE(sizeHeaderBytes + fieldDescriptorSize);
253 gdos.writeShortLE(sizeRecordBytes + 6);
254
255
256 // Input
257 gdis.skipBytes(20); // Unused and /or reserved
258
259 // Output (skip 20)
260 for (int i = 0; i < 20; i++)
261 gdos.writeByte(0);
262
263
264 // We've already read 32 bytes + 1 for the final field terminator
265 sizeHeaderBytes -= 33;
266
267 if ((sizeHeaderBytes % fieldDescriptorSize) != 0) {
268 throw new IOException(
269 "Invalid number of header bytes. Available = " +
270 sizeHeaderBytes + ", size = " + fieldDescriptorSize);
271 }
272
273 int fieldDescriptorCount = sizeHeaderBytes / fieldDescriptorSize;
274 byte[] fieldDescriptorByteArray = new byte[fieldDescriptorSize];
275
276 for (int i = 0; i < fieldDescriptorCount; i++) {
277 // input
278 gdis.read(fieldDescriptorByteArray, 0, fieldDescriptorSize);
279
280
281 // output
282 gdos.write(fieldDescriptorByteArray, 0, fieldDescriptorSize);
283 }
284
285 byte[] descriptorNew = createNewDescriptor(fieldDescriptorSize);
286 gdos.write(descriptorNew, 0, fieldDescriptorSize);
287
288 // Read the last byte of the header, it should be a 0
289 byte fieldTerminator = gdis.readByte();
290
291 if (fieldTerminator != HEADER_FIELD_TERMINATOR) {
292 throw new IOException("Invalid field terminator = " +
293 fieldTerminator);
294 }
295
296
297 // Output
298 gdos.writeByte(fieldTerminator);
299 }
300
301 // This was a hack to create an outgoing data file for Craig. It needs a lot of
302 // work to be a more-general solution
303 private byte[] createNewDescriptor(int fieldDescriptorSize) {
304 byte[] arrayByte = new byte[fieldDescriptorSize];
305 arrayByte[0] = 'I';
306 arrayByte[1] = 'D';
307 arrayByte[2] = '_';
308 arrayByte[3] = 'C';
309 arrayByte[17] = 0; // decimal places
310
311
312 // arrayByte[11] = DBaseFieldDescriptor.FIELD_TYPE_NUMERIC;
313 arrayByte[11] = DBaseFieldDescriptor.FIELD_TYPE_STRING;
314 arrayByte[16] = 6; // field length
315 arrayByte[23] = 0;
316
317 return arrayByte;
318 }
319
320 private void loadRecord(int rowNumber, Object[] dataSet,
321 byte[] recordBuffer) {
322 try {
323 // Read the status of the record from the first byte
324 byte dataRecordStatus = recordBuffer[0];
325
326 if (dataRecordStatus == RECORD_STATUS_DELETED) {
327 System.out.println("Ignoring deleted record");
328
329 return;
330 }
331
332 // Iterate through all of our fields
333 // Note, the bufferOffset = 1 compensates for an unused first space (see Arc doc)
334 int bufferOffset = 1;
335
336 // Iterate through all of our fields
337 // Note, the bufferOffset = 1 compensates for an unused first space (see Arc doc)
338 int indexField = 0;
339
340 for (Enumeration enum = recordDescriptor.elements();
341 enum.hasMoreElements();
342 indexField++) {
343 DBaseFieldDescriptor fieldDescriptor =
344 (DBaseFieldDescriptor) enum.nextElement();
345
346 // Read expected length and type from the schema
347 String fieldName = fieldDescriptor.getFieldName();
348
349 // Since Java doesn't have an unsigned type, we need to use a short here so we don't screw up the sign bit
350 short fieldLength = fieldDescriptor.getFieldLength();
351 byte fieldType = fieldDescriptor.getFieldType();
352 byte fieldStatus = recordBuffer[bufferOffset];
353 Object dataArray = fieldDescriptor.getDataArray();
354
355 if (fieldStatus == FIELD_STATUS_NULL) {
356 // No data for this field
357 } else {
358 String dataString = new String(recordBuffer, bufferOffset,
359 fieldLength);
360
361
362 //System.out.println(dataString);
363 dataString = dataString.trim();
364
365 // Convert the data
366 switch (fieldType) {
367 case DBaseFieldDescriptor.FIELD_TYPE_STRING:
368
369 if (dataString.length() > 0) {
370 String gvString = new String(dataString);
371 String[] stArray = null;
372 stArray = (String[]) dataArray;
373 stArray[rowNumber - 1] = gvString;
374 }
375
376 break;
377
378 case DBaseFieldDescriptor.FIELD_TYPE_NUMERIC:
379
380 byte decimalPlaces = fieldDescriptor.getFieldDecimalPlaces();
381
382 if (decimalPlaces > 0) { //it's a double
383
384 double[] dArray = null;
385 dArray = (double[]) dataArray;
386
387 if (dataString.length() > 0) { //not null?
388
389 double doubleValue = Double.parseDouble(
390 dataString);
391 dArray[rowNumber - 1] = doubleValue;
392 } else { //it's null
393 dArray[rowNumber - 1] = Double.NaN;
394 } //not null
395 } else { //an int
396
397 int[] iArray = null;
398 iArray = (int[]) dataArray;
399
400 if (dataString.length() > 0) { //not null?
401
402 int intValue = Integer.parseInt(dataString);
403 iArray[rowNumber - 1] = intValue;
404 } else { //null
405 iArray[rowNumber - 1] = Integer.MIN_VALUE;
406 } //end if null
407 } //end if decimal
408
409 break;
410
411 default:
412 break;
413 }
414 }
415
416
417 // Step over the field we just read
418 bufferOffset += fieldLength;
419 }
420 } catch (Exception ex) {
421 ex.printStackTrace();
422 }
423 }
424
425 public Object[] getDataSet() {
426 return this.dataSet;
427 }
428
429 public void setDataSet(Object[] dataSet) {
430 this.dataSet = dataSet;
431 }
432
433 public static void main(String[] args) {
434 try {
435 String stringFileName = "/D:/geovista_data/Temp/test_fields.dbf";
436 String outputFileName = "/D:/geovista_data/Temp/test_fields2.dbf";
437 Object[] dataArray = null;
438 DBaseFile dbaseFile2 = new DBaseFile(stringFileName); //, dataArray);
439 dataArray = dbaseFile2.getDataSet();
440
441 DBaseFile dbaseFile = new DBaseFile(stringFileName, outputFileName);
442 } catch (Exception ex) {
443 ex.printStackTrace();
444 }
445 }
446 }
This page was automatically generated by Maven