Skip to content
Please note that GitHub no longer supports your web browser.

We recommend upgrading to the latest Google Chrome or Firefox.

Learn more
Permalink
Browse files

Change default CSV encoding to UTF-8 BOM

Since Python's 'utf-8-sig' encoding works with and without BOM, we can
support UTF-8 with BOM by changing the default encoding.
  • Loading branch information...
felixbuenemann committed Jul 24, 2019
1 parent a84e5f5 commit fc54d69168cc4e7f33850ee3d417290c47136f04
@@ -163,7 +163,7 @@ def _init_common_parser(self):
self.argparser.add_argument('-z', '--maxfieldsize', dest='field_size_limit', type=int,
help='Maximum length of a single field in the input CSV file.')
if 'e' not in self.override_flags:
self.argparser.add_argument('-e', '--encoding', dest='encoding', default='utf-8',
self.argparser.add_argument('-e', '--encoding', dest='encoding', default='utf-8-sig',
help='Specify the encoding of the input CSV file.')
if 'L' not in self.override_flags:
self.argparser.add_argument('-L', '--locale', dest='locale', default='en_US',
@@ -0,0 +1,3 @@
foo,bar,baz
1,2,3
4,5,ʤ
@@ -100,6 +100,13 @@ def test_changes_character_encoding(self):
'4,5,©\n',
], [], ['-e', 'latin1'])

def test_removes_bom(self):
self.assertCleaned('test_utf8_bom', [
'foo,bar,baz\n',
'1,2,3\n',
'4,5,ʤ\n',
], [], [])

def test_dry_run(self):
output = self.get_output_as_io(['-n', 'examples/bad.csv'])
self.assertFalse(os.path.exists('examples/bad_err.csv'))
@@ -45,6 +45,12 @@ def test_match_utf8(self):
['4', '5', u'ʤ'],
])

def test_match_utf8_bom(self):
self.assertRows(['-c', '3', '-m', 'ʤ', 'examples/test_utf8_bom.csv'], [
['foo', 'bar', 'baz'],
['4', '5', u'ʤ'],
])

def test_no_match(self):
self.assertRows(['-c', '1', '-m', 'NO MATCH', 'examples/dummy.csv'], [
['a', 'b', 'c'],
@@ -65,6 +65,14 @@ def test_unicode(self):
u'| 4 | 5 | ʤ |',
])

def test_unicode_bom(self):
self.assertLines(['examples/test_utf8_bom.csv'], [
'| foo | bar | baz |',
'| --- | --- | --- |',
'| 1 | 2 | 3 |',
u'| 4 | 5 | ʤ |',
])

def test_linenumbers(self):
self.assertLines(['--linenumbers', 'examples/dummy3.csv'], [
'| line_numbers | a | b | c |',

0 comments on commit fc54d69

Please sign in to comment.
You can’t perform that action at this time.