|
2 | 2 | import re |
3 | 3 | import sys |
4 | 4 |
|
| 5 | + |
| 6 | +def is_pip_upgrade_msg(line): |
| 7 | + return isinstance(line, str) and re.match(r"WARNING.+pip version|upgrade pip", line) |
| 8 | + |
| 9 | + |
| 10 | +def is_vid(cell): |
| 11 | + try: |
| 12 | + text = cell["outputs"][0]["data"]["text/plain"][0] |
| 13 | + except (IndexError, KeyError, TypeError): |
| 14 | + return False |
| 15 | + |
| 16 | + return text == "<IPython.core.display.Video object>" |
| 17 | + |
| 18 | + |
5 | 19 | input_str = sys.stdin.read() |
6 | 20 | notebook = json.loads(input_str) |
7 | 21 |
|
| 22 | +# nbconvert wants to embed videos, so skip them |
| 23 | +notebook["cells"] = [cell for cell in notebook["cells"] if not is_vid(cell)] |
| 24 | + |
8 | 25 | for cell in notebook["cells"]: |
| 26 | + if "execution_count" in cell: |
| 27 | + # ignore all the execution count numbers |
| 28 | + cell["execution_count"] = None |
| 29 | + |
9 | 30 | if cell["cell_type"] != "code": |
10 | 31 | continue |
11 | 32 |
|
| 33 | + # ignore any system command output |
| 34 | + if cell["source"][0].startswith("!"): |
| 35 | + cell["outputs"] = [] |
| 36 | + |
| 37 | + # filter out pip upgrade warnings |
| 38 | + cell["outputs"] = [line for line in cell["outputs"] if not is_pip_upgrade_msg(line)] |
| 39 | + |
12 | 40 | for output in cell["outputs"]: |
| 41 | + if "execution_count" in output: |
| 42 | + # ignore all the execution count numbers |
| 43 | + output["execution_count"] = 1 |
| 44 | + |
13 | 45 | # clear HTML output, since it often has generated IDs (from displacy, plotly, etc.) that change with each execution |
14 | 46 | if "data" in output and "text/html" in output["data"]: |
15 | 47 | cell["outputs"] = [] |
|
0 commit comments