Package thirdparty ::
Module DSV
|
|
1 """
2 DSV.py - Cliff Wells, 2002
3 Import/export DSV (delimiter separated values, a generalization of CSV).
4
5 $Id: DSV.py 3878 2007-01-09 22:28:37Z djpham $
6 Modified by Joe Pham <djpham@bitpim.org> to accommodate wxPython 2.8+
7
8 Basic use:
9
10 from DSV import DSV
11
12 data = file.read()
13 qualifier = DSV.guessTextQualifier(data) # optional
14 data = DSV.organizeIntoLines(data, textQualifier = qualifier)
15 delimiter = DSV.guessDelimiter(data) # optional
16 data = DSV.importDSV(data, delimiter = delimiter, textQualifier = qualifier)
17 hasHeader = DSV.guessHeaders(data) # optional
18
19 If you know the delimiters, qualifiers, etc, you may skip the optional
20 'guessing' steps as they rely on heuristics anyway (although they seem
21 to work well, there is no guarantee they are correct). What they are
22 best used for is to make a good guess regarding the data structure and then
23 let the user confirm it.
24
25 As such there is a 'wizard' to aid in this process (use this in lieu of
26 the above code - requires wxPython):
27
28 from DSV import DSV
29
30 dlg = DSV.ImportWizardDialog(parent, -1, 'DSV Import Wizard', filename)
31 dlg.ShowModal()
32 headers, data = dlg.ImportData() # may also return None
33 dlg.Destroy()
34
35 The dlg.ImportData() method may also take a function as an optional argument
36 specifying what it should do about malformed rows. See the example at the bottom
37 of this file. A few common functions are provided in this file (padRow, skipRow,
38 useRow).
39
40 Requires Python 2.0 or later
41 Wizards tested with wxPython 2.2.5/NT 4.0, 2.3.2/Win2000 and Linux/GTK (RedHat 7.x)
42 """
43
44 __version__ = '1.4'
45
46 """
47 Bugs/Caveats:
48 - Although I've tested this stuff on varied data, I'm sure there are cases
49 that I haven't seen that will choke any one of these routines (or at least
50 return invalid data). This is beta code!
51 - guessTextQualifier() algorithm is limited to quotes (double or single).
52 - Surprising feature: Hitting <enter> on wxSpinCtrl causes seg
53 fault under Linux/GTK (not Win32). Strangely, pressing <tab> seems ok.
54 Therefore, I had to use wxSpinButton. Also, spurious spin events get
55 generated for both of these controls (e.g. when calling wxBeginBusyCursor)
56 - Keyboard navigation needs to be implemented on wizards
57 - There may be issues with cr/lf translation, although I haven't yet seen any.
58
59 Why another CSV tool?:
60 - Because I needed a more flexible CSV importer, one that could accept different
61 delimiters (not just commas or tabs), one that could make an intelligent guess
62 regarding file structure (for user convenience), be compatible with the files
63 output by MS Excel, and finally, be easily integrated with a wizard. All of the
64 modules I have seen prior to this fell short on one count or another.
65 - It seemed interesting.
66
67 To do:
68 - Better guessTextQualifier() algorithm. In the perfect world I envision, I can
69 use any character as a text qualifier, not just quotes.
70 - Finish wizards and move them into separate module.
71 - Better guessHeaders() algorithm, although this is difficult.
72 - Optimize maps() - try to eliminate lambda when possible
73 - Optimize memory usage. Presently the entire file is loaded and then saved as
74 a list. A better approach might be to analyze a smaller part of the file and
75 then return an iterator to step through it.
76 """
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117 import sys
118
119 import re
120 import copy
121 import exceptions
122 import string
123
124
125
126 import os
127 os.putenv('LANG', 'C')
128
129
130
131
132
133
134 try:
135 import wx
136 import wx.grid as gridlib
137 except ImportError:
138 wx = None
139
141 -class InvalidTextQualifier(exceptions.StandardError): pass
144
145
147 """
148 PROTOTYPE:
149 guessTextQualifier(input)
150 DESCRIPTION:
151 tries to guess if the text qualifier (a character delimiting ambiguous data)
152 is a single or double-quote (or None)
153 ARGUMENTS:
154 - input is raw data as a string
155 RETURNS:
156 single character or None
157 """
158
159
160
161
162
163
164
165 data = input[:16 * 1024]
166
167 regexp = re.compile('(?:(?:^|\n)(?P<b_quote>["\']).*?(?P=b_quote))|'
168 '(?:(?P<delim>.)(?P<quote>["\']).*?(?P=quote)(?=(?P=delim)|\n))|'
169 '(?:(?P<e_quote>["\']).*?(?P=e_quote)$)', re.M | re.S)
170 matches = filter(lambda i: reduce(lambda a, b: a + b, i), regexp.findall(data))
171 if not matches: return None
172
173 quotes = {}
174 for q in ('b_quote', 'quote', 'e_quote'):
175 n = regexp.groupindex[q] - 1
176 for m in matches:
177 key = m[n]
178 if key:
179 quotes[key] = quotes.get(key, 0) + 1
180
181 return reduce(lambda a, b, quotes = quotes:
182 (quotes[a] > quotes[b]) and a or b, quotes.keys())
183
184
186 """
187 PROTOTYPE:
188 guessDelimiter(input, textQualifier = '\"')
189 DESCRIPTION:
190 Tries to guess the delimiter.
191 ARGUMENTS:
192 - input is raw data as string
193 - textQualifier is a character used to delimit ambiguous data
194 RETURNS:
195 single character or None
196 """
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212 if type(input) != type([]): return None
213 if len(input) < 2: return None
214
215 if textQualifier:
216
217 regexp = re.compile('%s(.*?)%s' % (textQualifier, textQualifier), re.S)
218 subCode = compile("regexp.sub('', line)", '', 'eval')
219 else:
220 subCode = compile("line", '', 'eval')
221
222 ascii = [chr(c) for c in range(127)]
223
224
225 chunkLength = min(10, len(input))
226 iteration = 0
227 charFrequency = {}
228 modes = {}
229 delims = {}
230 start, end = 0, min(chunkLength, len(input))
231 while start < len(input):
232 iteration += 1
233 for line in input[start:end]:
234 l = eval(subCode)
235 for char in ascii:
236 metafrequency = charFrequency.get(char, {})
237 freq = l.strip().count(char)
238 metafrequency[freq] = metafrequency.get(freq, 0) + 1
239 charFrequency[char] = metafrequency
240
241 for char in charFrequency.keys():
242 items = charFrequency[char].items()
243 if len(items) == 1 and items[0][0] == 0: continue
244
245 if len(items) > 1:
246 modes[char] = reduce(lambda a, b: a[1] > b[1] and a or b, items)
247
248 items.remove(modes[char])
249 modes[char] = (modes[char][0], modes[char][1]
250 - reduce(lambda a, b: (0, a[1] + b[1]), items)[1])
251 else:
252 modes[char] = items[0]
253
254
255 modeList = modes.items()
256 total = float(chunkLength * iteration)
257 consistency = 1.0
258 threshold = 0.9
259 while len(delims) == 0 and consistency >= threshold:
260 for k, v in modeList:
261 if v[0] > 0 and v[1] > 0:
262 if (v[1]/total) >= consistency:
263 delims[k] = v
264 consistency -= 0.01
265
266 if len(delims) == 1:
267 return delims.keys()[0]
268
269
270 start = end
271 end += chunkLength
272
273 if not delims: return None
274
275
276
277
278 if len(delims) > 1 and textQualifier is not None:
279 regexp = re.compile('%s(.*?)%s' % (textQualifier, textQualifier), re.S)
280 for line in input:
281 inQuotes = "".join(regexp.findall(line))
282 for d in delims.keys():
283 if not d in inQuotes:
284 del delims[d]
285 if len(delims) == 1:
286 return delims.keys()[0]
287
288
289 if len(delims) > 1:
290 for d in ['\t', ',', ';', ' ', ':']:
291 if d in delims.keys():
292 return d
293
294
295 return delims.keys()[0]
296
297
299 """
300 PROTOTYPE:
301 modeOfLengths(input)
302 DESCRIPTION:
303 Finds the mode (most frequently occurring value) of the lengths of the lines.
304 ARGUMENTS:
305 - input is list of lists of data
306 RETURNS:
307 mode as integer
308 """
309 freq = {}
310 for row in input:
311 l = len(row)
312 freq[l] = freq.get(l, 0) + 1
313
314 return reduce(lambda a, b, freq = freq: (freq[a] > freq[b]) and a or b, freq.keys())
315
316
318 """
319 PROTOTYPE:
320 guessHeaders(input, columns = 0)
321 DESCRIPTION:
322 Decides whether row 0 is a header row
323 ARGUMENTS:
324 - input is a list of lists of data (as returned by importDSV)
325 - columns is either the expected number of columns in each row or 0
326 RETURNS:
327 - true if data has header row
328 """
329
330
331
332
333
334
335
336
337
338 if type(input) != type([]): raise InvalidData, "list expected."
339 if len(input) < 2: return 0
340
341 if not columns:
342 columns = modeOfLengths(input)
343
344 columnTypes = {}
345 for i in range(columns): columnTypes[i] = None
346
347 for row in input[1:]:
348 if len(row) != columns:
349 continue
350 for col in columnTypes.keys():
351 try:
352 try:
353
354 thisType = type(eval(row[col]))
355 except OverflowError:
356
357 thisType = type(eval(row[col] + 'L'))
358 thisType = type(0)
359 except:
360
361 thisType = len(row[col])
362
363 if thisType != columnTypes[col]:
364 if columnTypes[col] is None:
365 columnTypes[col] = thisType
366 else:
367 del columnTypes[col]
368
369
370 hasHeader = 0
371 for col, colType in columnTypes.items():
372 if type(colType) == type(0):
373 if len(input[0][col]) != colType:
374 hasHeader += 1
375 else:
376 hasHeader -= 1
377 else:
378 try:
379 eval("%s(%s)" % (colType.__name__, input[0][col]))
380 except:
381 hasHeader += 1
382 else:
383 hasHeader -= 1
384
385 return hasHeader > 0
386
387
389 """
390 PROTOTYPE:
391 organizeIntoLines(input, textQualifier = '\"', limit = None)
392 DESCRIPTION:
393 Takes raw data (as from file.read()) and organizes it into lines.
394 Newlines that occur within text qualifiers are treated as normal
395 characters, not line delimiters.
396 ARGUMENTS:
397 - input is raw data as a string
398 - textQualifier is a character used to delimit ambiguous data
399 - limit is a integer specifying the maximum number of lines to organize
400 RETURNS:
401 list of strings
402 """
403
404
405
406
407
408 data = input.split('\n')
409 line = 0
410 while 1:
411 try:
412 while data[line].count(textQualifier) % 2:
413 data[line] = data[line] + '\n' + data[line + 1]
414 del data[line + 1]
415 line += 1
416 if limit and line > limit:
417 del data[limit:]
418 break
419 except:
420 break
421
422
423
424 data = filter(string.join, data)
425 return data
426
427
428
429
430
431 -def padRow(oldrow, newrow, columns, maxColumns):
432 "pads all rows to the same length with empty strings"
433 difference = maxColumns - len(newrow)
434 return newrow + ([''] * difference)
435
436 -def skipRow(oldrow, newrow, columns, maxColumns):
437 "skips any inconsistent rows"
438 return None
439
440 -def useRow(oldrow, newrow, columns, maxColumns):
441 "returns row unchanged"
442 return newrow
443
444
445 -def importDSV(input, delimiter = ',', textQualifier = '"', columns = 0,
446 updateFunction = None, errorHandler = None):
447 """
448 PROTOTYPE:
449 importDSV(input, delimiter = ',', textQualifier = '\"', columns = 0,
450 updateFunction = None, errorHandler = None)
451 DESCRIPTION:
452 parses lines of data in CSV format
453 ARGUMENTS:
454 - input is a list of strings (built by organizeIntoLines)
455 - delimiter is the character used to delimit columns
456 - textQualifier is the character used to delimit ambiguous data
457 - columns is the expected number of columns in each row or 0
458 - updateFunction is a callback function called once per record (could be
459 used for updating progress bars). Its prototype is
460 updateFunction(percentDone)
461 - percentDone is an integer between 0 and 100
462 - errorHandler is a callback invoked whenever a row has an unexpected number
463 of columns. Its prototype is
464 errorHandler(oldrow, newrow, columns, maxColumns)
465 where
466 - oldrow is the unparsed data
467 - newrow is the parsed data
468 - columns is the expected length of a row
469 - maxColumns is the longest row in the data
470 RETURNS:
471 list of lists of data
472 """
473 if type(input) != type([]):
474 raise InvalidData, "expected list of lists of strings"
475 if type(delimiter) != type('') or not delimiter:
476 raise InvalidDelimiter, `delimiter`
477
478
479
480
481
482
483
484
485
486
487
488 percent = 0.0
489 lineno = 0.0
490 newdata = []
491 maxColumns = 0
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510 for line in input:
511 if textQualifier:
512 record = []
513 inquotes = 0
514 for s in line.split(delimiter):
515 odd = s.count(textQualifier) % 2
516 if inquotes:
517 accu += delimiter + s.replace(textQualifier * 2, delimiter).\
518 replace(textQualifier, '').replace(delimiter, textQualifier)
519 if odd:
520 record.append(accu)
521 inquotes = 0
522 else:
523
524 if s.count(textQualifier):
525 s = s.strip()
526
527 if s == textQualifier * 2:
528 s = ""
529
530 accu = s.replace(textQualifier * 2, delimiter).\
531 replace(textQualifier, '').replace(delimiter, textQualifier)
532 if odd:
533 inquotes = 1
534 else:
535 record.append(accu)
536 else:
537
538 record = map(string.strip, line.split(delimiter))
539
540 newdata.append(record)
541
542
543 if updateFunction is not None:
544 lineno = lineno + 1.0
545 newpercent = int((lineno / len(input)) * 100)
546 if percent != newpercent:
547 percent = newpercent
548 if not updateFunction(percent):
549 return None
550
551 if not columns:
552 columns = modeOfLengths(newdata)
553 maxColumns = max([len(line) for line in newdata])
554
555
556 for record in xrange(len(newdata)):
557 length = len(newdata[record])
558 difference = length - columns
559 if difference:
560 if errorHandler is None:
561 raise InvalidNumberOfColumns, "Expected %d, got %d" % (columns, length)
562 else:
563 newdata[record] = errorHandler(input[record], newdata[record], columns, maxColumns)
564
565
566
567 newdata = filter(None, newdata)
568
569 return newdata
570
571
572
573 -def exportDSV(input, delimiter = ',', textQualifier = '"', quoteall = 0):
574 """
575 PROTOTYPE:
576 exportDSV(input, delimiter = ',', textQualifier = '\"', quoteall = 0)
577 DESCRIPTION:
578 Exports to DSV (delimiter-separated values) format.
579 ARGUMENTS:
580 - input is list of lists of data (as returned by importDSV)
581 - delimiter is character used to delimit columns
582 - textQualifier is character used to delimit ambiguous data
583 - quoteall is boolean specifying whether to quote all data or only data
584 that requires it
585 RETURNS:
586 data as string
587 """
588 if not delimiter or type(delimiter) != type(''): raise InvalidDelimiter
589 if not textQualifier or type(delimiter) != type(''): raise InvalidTextQualifier
590
591
592 data = map(lambda i, q = textQualifier:
593 map(lambda j, q = q: str(j).replace(q, q * 2), i),
594 input)
595
596 if quoteall:
597 data = map(lambda i, q = textQualifier:
598 map(lambda j, q = q: q + j + q, i),
599 data)
600 else:
601 data = map(lambda i, q = textQualifier, d = delimiter:
602 map(lambda j, q = q, d = d: ((j.find(q) != -1 or j.find(d) != -1
603 or j.find('\n') != -1)
604 and (q + j + q)) or j, i), data)
605
606 data = [delimiter.join(line) for line in data]
607
608
609 data = "\n".join(data)
610 return data
611
612 if wx is not None:
613
615 """
616 CLASS(SUPERCLASS):
617 ImportWizardPanel_Delimiters(wx.Panel)
618 DESCRIPTION:
619 A wx.Panel that provides a basic interface for validating and changing the
620 parameters for importing a delimited text file. Similar to MS Excel's
621 CSV import wizard. Can be used in a series of wizards or embedded in an
622 application.
623 PROTOTYPE:
624 ImportWizardPanel_Delimiters(parent, id, file, data, isValidCallback = None,
625 pos = wx.DefaultPosition, size = wx.DefaultSize,
626 style = wx.TAB_TRAVERSAL, name = 'ImportWizardPanel')
627 ARGUMENTS:
628 - parent is the parent window
629 - id is the id of this wizard panel
630 - file is the name of the file being imported
631 - data is the raw data to be parsed
632 - isValidCallback is a callback function that accepts a single boolean argument
633 If the argument is true, the wizard is in a valid state (all the settings are
634 acceptable), if the argument is false, trying to import will likely cause an
635 exception.
636 METHODS:
637 - GetDelimiters()
638 returns list of characters used as delimiters
639 - GetTextQualifiers()
640 returns character used as text qualifier or None
641 - GetHasHeaders()
642 returns true if first row is header
643 """
644
645 - def __init__(self, parent, id, file, data, isValidCallback = None,
646 pos = wx.DefaultPosition, size = wx.DefaultSize,
647 style = wx.TAB_TRAVERSAL, name = "ImportWizardPanel"):
648 wx.Panel.__init__(self, parent, id, pos, size, style, name)
649 self.SetAutoLayout(True)
650 mainSizer = wx.FlexGridSizer(3, 1)
651 self.SetSizer(mainSizer)
652 mainSizer.AddGrowableCol(0)
653
654 self.initialized = False
655 self.data = data
656 self.isValidCallback = isValidCallback
657 self.Validate = (isValidCallback and self.Validate) or self.BuildPreview
658
659 dlg = wx.ProgressDialog("Import Wizard",
660 "Analyzing %s... Please wait." % file,
661 3,
662 parent,
663 wx.PD_APP_MODAL | wx.PD_AUTO_HIDE)
664 textQualifier = guessTextQualifier(data)
665 dlg.Update(1)
666 newdata = organizeIntoLines(data, textQualifier = textQualifier, limit = 100)
667 dlg.Update(2)
668 delimiter = guessDelimiter(newdata, textQualifier = textQualifier)
669 dlg.Update(3)
670 dlg.Destroy()
671
672
673 msg = ("This screen lets you set the delimiters your data contains.\n"
674 "You can see how your data is affected in the preview below.")
675 message1 = wx.StaticText(self, -1, msg)
676
677
678 delimiterBox = wx.BoxSizer(wx.HORIZONTAL)
679 delimStaticBox = wx.StaticBox(self, -1, "Delimiters")
680 delimStaticSizer = wx.StaticBoxSizer(delimStaticBox, wx.VERTICAL)
681 delimGridSizer = wx.FlexGridSizer(2, 3)
682
683 delims = {
684 'Tab': '\t',
685 'Semicolon': ';',
686 'Comma': ',',
687 'Space': ' ',
688 }
689
690 self.delimChecks = {}
691
692 for label, value in delims.items():
693 self.delimChecks[value] = wx.CheckBox(self, -1, label)
694 delimGridSizer.Add(self.delimChecks[value], 0, wx.ALL, 3)
695 wx.EVT_CHECKBOX(self, self.delimChecks[value].GetId(), self.Validate)
696
697 otherSizer = wx.BoxSizer(wx.HORIZONTAL)
698
699 self.delimChecks['Other'] = wx.CheckBox(self, -1, 'Other:')
700 wx.EVT_CHECKBOX(self, self.delimChecks['Other'].GetId(), self.Validate)
701
702 self.otherDelim = wx.TextCtrl(self, -1, size = (20, -1))
703 wx.EVT_TEXT(self, self.otherDelim.GetId(), self.OnCustomDelim)
704
705 if self.delimChecks.has_key(delimiter):
706 self.delimChecks[delimiter].SetValue(True)
707 elif delimiter is not None:
708 self.delimChecks['Other'].SetValue(True)
709 self.otherDelim.SetValue(delimiter)
710
711 otherSizer.AddMany([
712 (self.delimChecks['Other'], 0, wx.ALL, 3),
713 (self.otherDelim, 0, wx.ALIGN_CENTER),
714 ])
715
716 delimGridSizer.Add(otherSizer)
717 delimStaticSizer.Add(delimGridSizer, 1, wx.EXPAND)
718 delimOtherSizer = wx.BoxSizer(wx.VERTICAL)
719 self.consecutiveDelimsAs1 = wx.CheckBox(self, -1, "Treat consecutive delimiters as one")
720 self.consecutiveDelimsAs1.Enable(False)
721 tqSizer = wx.BoxSizer(wx.HORIZONTAL)
722 self.textQualifierChoice = wx.Choice(self, -1, choices = ['"', "'", "{None}"])
723 wx.EVT_CHOICE(self, self.textQualifierChoice.GetId(), self.BuildPreview)
724 if textQualifier is not None:
725 self.textQualifierChoice.SetStringSelection(textQualifier)
726 else:
727 self.textQualifierChoice.SetStringSelection('{None}')
728
729 tqSizer.AddMany([
730 (wx.StaticText(self, -1, "Text qualifier:"), 0, wx.ALIGN_RIGHT | wx.ALIGN_CENTER_VERTICAL),
731 (self.textQualifierChoice, 0, wx.ALL | wx.ALIGN_LEFT | wx.ALIGN_CENTER_VERTICAL, 5),
732 ])
733
734 delimOtherSizer.AddMany([
735 (self.consecutiveDelimsAs1, 1, wx.EXPAND | wx.ALL, 5),
736 (tqSizer, 1, wx.ALL | wx.ALIGN_CENTER, 5),
737 ])
738
739 delimiterBox.AddMany([
740 (delimStaticSizer, 0, wx.ALIGN_CENTER),
741 (delimOtherSizer, 0, wx.ALIGN_CENTER),
742 ])
743
744 delimStaticBox.Fit()
745
746
747 self.displayRows = 6
748 previewSettingsBox = wx.BoxSizer(wx.HORIZONTAL)
749 self.hasHeaderRow = wx.CheckBox(self, -1, "First row is header")
750 wx.EVT_CHECKBOX(self, self.hasHeaderRow.GetId(), self.BuildPreview)
751
752 if wx.Platform in ('__WX.WXGTK__', '__WX.WXMSW__'):
753
754 self.previewRowsText = wx.TextCtrl(self, -1, str(self.displayRows),
755 size = (30, -1), style = wx.TE_PROCESS_ENTER)
756 h = self.previewRowsText.GetSize().height
757 self.previewRows = wx.SpinButton(self, -1, size = (-1, h), style = wx.SP_VERTICAL)
758 self.previewRows.SetRange(self.displayRows, 100)
759 self.previewRows.SetValue(self.displayRows)
760 wx.EVT_SPIN(self, self.previewRows.GetId(), self.OnSpinPreviewRows)
761 wx.EVT_TEXT_ENTER(self, self.previewRowsText.GetId(), self.OnTextPreviewRows)
762 else:
763 self.previewRows = wx.SpinCtrl(self, -1, str(self.displayRows),
764 min = self.displayRows, max = 100, size = (50, -1))
765 wx.EVT_SPINCTRL(self, self.previewRows.GetId(), self.BuildPreview)
766
767 previewSettingsBox.AddMany([
768 (self.hasHeaderRow, 1, wx.ALL | wx.EXPAND, 5),
769 (wx.StaticText(self, -1, "Preview"), 0, wx.WEST | wx.ALIGN_RIGHT | wx.ALIGN_CENTER_VERTICAL, 10),
770 ])
771 if wx.Platform in ('__WX.WXGTK__', '__WX.WXMSW__'):
772 previewSettingsBox.Add(self.previewRowsText, 0, wx.ALIGN_CENTER | wx.ALL, 3)
773 previewSettingsBox.AddMany([
774 (self.previewRows, 0, wx.ALIGN_CENTER | wx.ALL, 3),
775 (wx.StaticText(self, -1, "rows"), 0, wx.ALIGN_RIGHT | wx.ALIGN_CENTER_VERTICAL),
776 ])
777
778
779 if delimiter is not None:
780 previewData = importDSV(newdata[:self.displayRows],
781 textQualifier = textQualifier,
782 delimiter = delimiter,
783 errorHandler = padRow)
784 hasHeaders = guessHeaders(previewData)
785 self.hasHeaderRow.SetValue(hasHeaders)
786
787 cols = len(previewData[0])
788 else:
789 previewData = []
790 hasHeaders = 0
791 cols = 1
792
793 previewStaticBox = wx.StaticBox(self, -1, "Data Preview")
794 previewStaticSizer = wx.StaticBoxSizer(previewStaticBox, wx.VERTICAL)
795 self.preview = gridlib.Grid(self, -1)
796 self.preview.CreateGrid(self.displayRows, cols)
797 self.preview.SetDefaultRowSize(self.preview.GetCharHeight() + 4, True)
798 self.preview.EnableEditing(False)
799 self.preview.SetColLabelSize(0)
800 self.preview.SetRowLabelSize(0)
801 self.preview.SetMargins(1, 0)
802 self.initialized = True
803 self.BuildPreview()
804
805 rowheight = self.preview.GetRowSize(0) + 2
806 self.preview.SetSize((-1, rowheight * self.displayRows))
807 previewStaticSizer.Add(self.preview, 0, wx.ALL | wx.EXPAND, 5)
808
809
810 mainSizer.AddMany([
811 (message1, 0, wx.ALL, 5),
812 (delimiterBox, 0, wx.ALL, 5),
813 (previewSettingsBox, 0, wx.ALL, 5),
814 (previewStaticSizer, 0, wx.ALL | wx.EXPAND, 5),
815 ])
816
817 self.Layout()
818 self.Fit()
819
823
824 - def OnTextPreviewRows(self, event):
825 try: v = int(self.previewRowsText.GetValue())
826 except: v = self.displayRows
827 v = max(self.displayRows, v)
828 v = min(v, 100)
829 self.previewRowsText.SetValue(str(v))
830 self.previewRows.SetValue(v)
831 self.BuildPreview()
832
834 hasDelimiter = reduce(lambda a, b: a + b, [cb.GetValue() for cb in self.delimChecks.values()])
835 if hasDelimiter == 1 and self.delimChecks['Other'].GetValue():
836 hasDelimiter = self.otherDelim.GetValue() != ""
837 self.BuildPreview()
838 self.isValidCallback(hasDelimiter)
839
841 if not self.initialized:
842 return
843
844 if wx.Platform != '__WX.WXGTK__':
845 wx.BeginBusyCursor()
846 wx.Yield()
847 self.preview.BeginBatch()
848 self.preview.DeleteCols(0, self.preview.GetNumberCols())
849 self.preview.DeleteRows(0, self.preview.GetNumberRows())
850 self.preview.ClearGrid()
851
852 textQualifier = self.textQualifierChoice.GetStringSelection()
853 if textQualifier == '{None}': textQualifier = None
854 other = self.otherDelim.GetValue()
855 delimiter = map(lambda i, other = other: i[0] != 'Other' and i[0] or other,
856 filter(lambda i: i[1].GetValue(), self.delimChecks.items()))
857 delimiter = "".join(delimiter)
858
859 rows = self.previewRows.GetValue()
860
861 newdata = organizeIntoLines(self.data, textQualifier, limit = rows)
862 try:
863 previewData = importDSV(newdata[:rows],
864 textQualifier = textQualifier,
865 delimiter = delimiter,
866 errorHandler = padRow)
867 except InvalidDelimiter, e:
868 previewData = map(lambda i: [i], newdata[:rows])
869
870 rows = min(rows, len(previewData))
871 hasHeaders = self.hasHeaderRow.GetValue()
872 self.preview.AppendRows(rows - hasHeaders)
873 cols = max([len(row) for row in previewData])
874 self.preview.AppendCols(cols)
875
876 if hasHeaders:
877 self.preview.SetColLabelSize(self.preview.GetRowSize(0))
878 for col in range(cols):
879 try: self.preview.SetColLabelValue(col, str(previewData[0][col]))
880 except: self.preview.SetColLabelValue(col, "")
881
882 else:
883 self.preview.SetColLabelSize(0)
884
885 for row in range(hasHeaders, rows):
886 for col in range(cols):
887 try: self.preview.SetCellValue(row - hasHeaders, col, str(previewData[row][col]))
888 except: pass
889
890
891 self.preview.AutoSizeColumns(True)
892
893 rowheight = self.preview.GetRowSize(0)
894 self.preview.SetRowSize(0, rowheight)
895 self.preview.EndBatch()
896 if wx.Platform != '__WX.WXGTK__':
897 wx.EndBusyCursor()
898
899 self.delimiters = delimiter
900 self.textQualifier = textQualifier
901 self.hasHeaders = hasHeaders
902
906
908 return self.delimiters
909
911 return self.textQualifier
912
914 return self.hasHeaders
915
916
918 """
919 CLASS(SUPERCLASS):
920 ImportWizardDialog(wx.Dialog)
921 DESCRIPTION:
922 A dialog allowing the user to preview and change the options for importing
923 a file.
924 PROTOTYPE:
925 ImportWizardDialog(parent, id, title, file,
926 pos = wx.DefaultPosition, size = wx.DefaultSize,
927 style = wx.DEFAULT_DIALOG_STYLE, name = 'ImportWizardDialog')
928 ARGUMENTS:
929 - parent: the parent window
930 - id: the id of this window
931 - title: the title of this dialog
932 - file: the file to import
933 METHODS:
934 - GetImportInfo()
935 returns a tuple (delimiters, text qualifiers, has headers)
936 - ImportData(errorHandler = skipRow)
937 returns (headers, data), headers may be None
938 errorHandler is a callback function that instructs the method on what
939 to do with irregular rows. The default skipRow function simply discards
940 the bad row (see importDSV() above).
941 """
942
943 - def __init__(self, parent, id, title, file,
944 pos = wx.DefaultPosition, size = wx.DefaultSize,
945 style = wx.DEFAULT_DIALOG_STYLE, name = "ImportWizardDialog"):
946 wx.Dialog.__init__(self, parent, id, title, pos, size, style, name)
947 self.SetAutoLayout(True)
948
949 self.file = file
950 f = open(file, 'r')
951 self.data = f.read()
952 f.close()
953
954 sizer = wx.BoxSizer(wx.VERTICAL)
955 self.delimPanel = ImportWizardPanel_Delimiters(self, -1, file, self.data, self.ValidState)
956 buttonBox = self.ButtonBox()
957 sizer.AddMany([
958 (self.delimPanel, 0, wx.ALL, 5),
959 (buttonBox, 0, wx.SOUTH | wx.ALIGN_CENTER_HORIZONTAL | wx.ALIGN_TOP, 0),
960 ])
961
962 self.SetSizer(sizer)
963 self.Layout()
964 sizer.Fit(self.delimPanel)
965 self.Fit()
966 self.Centre()
967
982
987
989 delimiters, qualifier, hasHeaders = self.GetImportInfo()
990 self.data = organizeIntoLines(self.data, textQualifier = qualifier)
991 dlg = wx.ProgressDialog("Import DSV File",
992 self.file,
993 100,
994 self,
995 wx.PD_CAN_ABORT | wx.PD_APP_MODAL | wx.PD_AUTO_HIDE)
996 self.data = importDSV(self.data,
997 delimiter = delimiters,
998 textQualifier = qualifier,
999 updateFunction = dlg.Update,
1000 errorHandler = errorHandler)
1001 if self.data is None: return None
1002 if hasHeaders:
1003 headers = copy.copy(self.data[0])
1004 del self.data[0]
1005 else:
1006 headers = None
1007 return (headers, self.data)
1008
1011
1012
1013
1014 if __name__ == '__main__':
1015 if wx is None:
1016 print "\nYou need wxPython to run this sample*."
1017 print "\n*Note that wxPython is _not_ necessary to use this module, but it is required"
1018 print "to use the wizard dialog (which the sample requires)."
1019 raise SystemExit
1020
1021
1023 class SampleApp(wx.App):
1024 def OnInit(self):
1025 dlg = wx.FileDialog(None, "Choose a file", ".", "",
1026 "CSV files (*.csv)|*.csv|Text files (*.txt)|*.txt|All files (*.*)|*.*",
1027 wx.OPEN)
1028 if dlg.ShowModal() == wx.ID_OK:
1029 path = dlg.GetPath()
1030 dlg.Destroy()
1031
1032 errorLog = open('import_error.log', 'a+')
1033 def logErrors(oldrow, newrow, expectedColumns, maxColumns, file = errorLog):
1034
1035 file.write(oldrow + '\n')
1036
1037 dlg = ImportWizardDialog(None, -1, 'CSV Import Wizard (v.%s)' % __version__, path)
1038 if dlg.ShowModal() == wx.ID_OK:
1039 results = dlg.ImportData(errorHandler = logErrors)
1040 dlg.Destroy()
1041 errorLog.close()
1042
1043 if results is not None:
1044 headers, data = results
1045 if True:
1046 if headers:
1047 print headers
1048 print 80*'='
1049 for row in data:
1050 print row
1051
1052 if True:
1053 if headers:
1054 print exportDSV([headers] + data)
1055 else:
1056 print exportDSV(data)
1057 else:
1058 dlg.Destroy()
1059
1060 else:
1061 dlg.Destroy()
1062
1063 return True
1064
1065 app = SampleApp()
1066 app.MainLoop()
1067
1068
1069
1070 demo()
1071