The code from the question takes 25
seconds on my machine, numpy
-- 0.37
seconds:
import numpy as np
a_in = np.memmap('foreman_cif.yuv', mode='readonly')
a_out = np.memmap('py_10bpp.yuv', mode='write', shape=2*len(a_in))
a_out[::2] = a_in << 2
a_out[1::2] = a_in >> 6
cython
-- 0.20
seconds:
from functools import partial
import pyximport; pyximport.install() # pip install cython
from bpp8to10 import convert # bpp8to10.pyx
f_in = 'foreman_cif.yuv'
f_out = 'py_10bpp.yuv'
def main():
with open(f_in, 'rb') as fd_in, open(f_out, 'wb') as fd_out:
for chunk in iter(partial(fd_in.read, 8192), b''):
fd_out.write(convert(chunk))
main()
Where bpp8to10.pyx
:
from cpython.bytes cimport PyBytes_FromStringAndSize
def convert(bytes chunk not None):
cdef:
bytes data = PyBytes_FromStringAndSize(NULL, len(chunk)*2)
char* buf = data # no copy
Py_ssize_t j = 0
unsigned char c
for c in chunk:
buf[j] = (c << 2)
buf[j + 1] = (c >> 6)
j += 2
return data
The main speedup in pure CPython version is from moving the code from the module level into a function (main()
) -- 6.7
seconds (2 CPUs):
from functools import partial
from multiprocessing import Pool
f_in = 'foreman_cif.yuv'
f_out = 'py_10bpp.yuv'
def convert(chunk):
data = bytearray() # [] -> bytearray(): 17 -> 15 seconds
data_append = data.append # 15 -> 12 seconds
for b in bytearray(chunk): # on Python 3: `for b in chunk:`
data_append((b << 2) & 0xff)
data_append((b >> 8) & 0xff)
return data
def main(): # put in main(): # 25 -> 17 seconds
pool = Pool(processes=2) # 12 -> 6.7 seconds
with open(f_in, 'rb') as fd_in, open(f_out, 'wb') as fd_out:
for data in pool.imap(convert, iter(partial(fd_in.read, 8192), b'')):
fd_out.write(data)
main()
pypy
-- 1.6
seconds:
f_in = 'foreman_cif.yuv'
f_out = 'py_10bpp.yuv'
def convert(chunk):
data = bytearray() # 1.6 -> 1.5 seconds for preallocated data
for b in bytearray(chunk):
data.append((b << 2) & 0xff)
data.append((b >> 6) & 0xff)
return data
with open(f_in, 'rb') as fd_in, open(f_out, 'wb') as fd_out:
while True:
chunk = fd_in.read(8192)
if not chunk:
break
fd_out.write(convert(chunk))
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…