Skip to content

Commit 3db6d1c

Browse files
committed
fix(shell): decode Windows pipe output with system codepage instead of UTF-8
On Windows, subprocess pipes deliver output in the system's ANSI codepage (e.g. CP1252, CP437), not UTF-8. The previous chcp 65001 workaround had no effect on pipe output (PEP 528). Use locale.getpreferredencoding() with errors='replace' to handle non-ASCII characters like umlauts in usernames. Fixes Linuxfabrik/monitoring-plugins#681
1 parent 1fc1332 commit 3db6d1c

2 files changed

Lines changed: 21 additions & 5 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
4545

4646
### Fixed
4747

48+
* shell.py: fix `UnicodeDecodeError` on Windows when command output contains non-ASCII characters (e.g. usernames with umlauts) by decoding with the system's ANSI codepage instead of assuming UTF-8
4849
* base.py: `cu()` now also escapes HTML characters in the error message, not just in the traceback
4950
* base.py: `cu()` now detects active exceptions via `sys.exc_info()` instead of string-matching the traceback
5051
* base.py: `get_state()` no longer calls `sys.exit()` on malformed range specs, returns UNKNOWN instead

shell.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@
1212
"""
1313

1414
__author__ = 'Linuxfabrik GmbH, Zurich/Switzerland'
15-
__version__ = '2026032101'
15+
__version__ = '2026040801'
1616

1717

18+
import locale
1819
import os
1920
import re
2021
import shlex
@@ -143,9 +144,19 @@ def shell_exec(cmd, env=None, shell=False, stdin='', cwd=None, timeout=None, lc_
143144
env = {**os.environ.copy(), **(env or {})}
144145
env['LC_ALL'] = lc_all
145146

147+
# On Windows, subprocess pipes deliver output in the system's ANSI codepage
148+
# (e.g. CP1252, CP437), NOT in UTF-8, even if "chcp 65001" was run beforehand.
149+
# This is because pipes are not console devices (see PEP 528 and Python docs
150+
# on sys.stdout). We therefore decode with the system's preferred encoding
151+
# and fall back to 'replace' to avoid UnicodeDecodeError on unexpected bytes.
152+
# See: https://github.com/Linuxfabrik/monitoring-plugins/issues/681
146153
if os.name == 'nt':
147-
cmd = f'chcp 65001 && {cmd}'
154+
_encoding = locale.getpreferredencoding(False)
155+
_errors = 'replace'
148156
shell = True
157+
else:
158+
_encoding = 'utf-8'
159+
_errors = 'surrogateescape'
149160

150161
if shell or stdin:
151162
try:
@@ -171,8 +182,8 @@ def shell_exec(cmd, env=None, shell=False, stdin='', cwd=None, timeout=None, lc_
171182
p.communicate()
172183
return False, f'Timeout after {timeout} seconds.'
173184
retc = p.returncode
174-
stdout = txt.to_text(stdout).replace('Active code page: 65001\r\n', '')
175-
stderr = txt.to_text(stderr)
185+
stdout = txt.to_text(stdout, encoding=_encoding, errors=_errors)
186+
stderr = txt.to_text(stderr, encoding=_encoding, errors=_errors)
176187
return True, (stdout, stderr, retc)
177188

178189
cmds = cmd.split('|')
@@ -199,4 +210,8 @@ def shell_exec(cmd, env=None, shell=False, stdin='', cwd=None, timeout=None, lc_
199210
p.communicate()
200211
return False, f'Timeout after {timeout} seconds.'
201212

202-
return True, (txt.to_text(stdout), txt.to_text(stderr), p.returncode)
213+
return True, (
214+
txt.to_text(stdout, encoding=_encoding, errors=_errors),
215+
txt.to_text(stderr, encoding=_encoding, errors=_errors),
216+
p.returncode,
217+
)

0 commit comments

Comments
 (0)