Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
421 views
in Technique[技术] by (71.8m points)

python subprocess with timeout and large output (>64K)

I want to execute a process, limit the execution-time by some timeout in seconds and grab the output produced by the process. And I want to do this on windows, linux and freebsd.

I have tried implementing this in three different ways:

  1. cmd - Without timeout and subprocess.PIPE for output capture.

    BEHAVIOUR: Operates as expected but does not support timeout, i need timeout...

  2. cmd_to - With timeout and subprocess.PIPE for output capture.

    BEHAVIOUR: Blocks subprocess execution when output >= 2^16 bytes.

  3. cmd_totf - With timeout and tempfile.NamedTemporaryfile for output capture.

    BEHAVIOUR: Operates as expected but uses temporary files on disk.

These are available below for closer inspection.

As can be seen in the output below, then the timeout-code blocks the execution of the sub-process when using subprocessing.PIPE and output from the subprocess is >= 2^16 bytes.

The subprocess documentation states that this is expected when calling process.wait() and using subprocessing.PIPE, however no warnings are given when using process.poll(), so what is going wrong here?

I have a solution in cmd_totf which use the tempfile module but the tradeoff is that it writes the output to disk, something I would REALLY like to avoid.

So my questions are:

  • What am I doing wrong in cmd_to?
  • Is there a way to do what I want and without using tempfiles / keeping the output in memory.

Script to generate a bunch of output ('exp_gen.py'):

#!/usr/bin/env python
import sys
output  = "b"*int(sys.argv[1])
print output

Three different implementations (cmd, cmd_to, cmd_totf) of wrappers around subprocessing.Popen:

#!/usr/bin/env python
import subprocess, time, tempfile
bufsize = -1

def cmd(cmdline, timeout=60):
  """
  Execute cmdline.
  Uses subprocessing and subprocess.PIPE.
  """

  p = subprocess.Popen(
    cmdline,
    bufsize = bufsize,
    shell   = False,
    stdin   = subprocess.PIPE,
    stdout  = subprocess.PIPE,
    stderr  = subprocess.PIPE
  )

  out, err    = p.communicate()
  returncode  = p.returncode

  return (returncode, err, out)

def cmd_to(cmdline, timeout=60):
  """
  Execute cmdline, limit execution time to 'timeout' seconds.
  Uses subprocessing and subprocess.PIPE.
  """

  p = subprocess.Popen(
    cmdline,
    bufsize = bufsize,
    shell   = False,
    stdin   = subprocess.PIPE,
    stdout  = subprocess.PIPE,
    stderr  = subprocess.PIPE
  )

  t_begin         = time.time()             # Monitor execution time
  seconds_passed  = 0  

  while p.poll() is None and seconds_passed < timeout:
    seconds_passed = time.time() - t_begin
    time.sleep(0.1)

  #if seconds_passed > timeout:
  #
  #  try:
  #    p.stdout.close()  # If they are not closed the fds will hang around until
  #    p.stderr.close()  # os.fdlimit is exceeded and cause a nasty exception
  #    p.terminate()     # Important to close the fds prior to terminating the process!
  #                      # NOTE: Are there any other "non-freed" resources?
  #  except:
  #    pass
  #  
  #  raise TimeoutInterrupt

  out, err    = p.communicate()
  returncode  = p.returncode

  return (returncode, err, out)

def cmd_totf(cmdline, timeout=60):
  """
  Execute cmdline, limit execution time to 'timeout' seconds.
  Uses subprocessing and tempfile instead of subprocessing.PIPE.
  """

  output  = tempfile.NamedTemporaryFile(delete=False)
  error   = tempfile.NamedTemporaryFile(delete=False)

  p = subprocess.Popen(
    cmdline,
    bufsize = 0,
    shell   = False,
    stdin   = None,
    stdout  = output,
    stderr  = error
  )

  t_begin         = time.time()             # Monitor execution time
  seconds_passed  = 0  

  while p.poll() is None and seconds_passed < timeout:
    seconds_passed = time.time() - t_begin
    time.sleep(0.1)

  #if seconds_passed > timeout:
  #
  #  try:
  #    p.stdout.close()  # If they are not closed the fds will hang around until
  #    p.stderr.close()  # os.fdlimit is exceeded and cause a nasty exception
  #    p.terminate()     # Important to close the fds prior to terminating the process!
  #                      # NOTE: Are there any other "non-freed" resources?
  #  except:
  #    pass
  #  
  #  raise TimeoutInterrupt

  p.wait()

  returncode  = p.returncode

  fd          = open(output.name)
  out         = fd.read()
  fd.close()

  fd  = open(error.name)
  err = fd.read()
  fd.close()

  error.close()
  output.close()

  return (returncode, err, out)

if __name__ == "__main__":

  implementations = [cmd, cmd_to, cmd_totf]
  bytes     = ['65535', '65536', str(1024*1024)]
  timeouts  = [5]

  for timeout in timeouts:    
    for size in bytes:    
      for i in implementations:
        t_begin         = time.time()
        seconds_passed  = 0        
        rc, err, output = i(['exp_gen.py', size], timeout)
        seconds_passed = time.time() - t_begin
        filler = ' '*(8-len(i.func_name))
        print "[%s%s:  timeout=%d,  iosize=%s,  seconds=%f]" % (repr(i.func_name), filler, timeout, size, seconds_passed)

Output from execution:

['cmd'     :  timeout=5,  iosize=65535,  seconds=0.016447]
['cmd_to'  :  timeout=5,  iosize=65535,  seconds=0.103022]
['cmd_totf':  timeout=5,  iosize=65535,  seconds=0.107176]
['cmd'     :  timeout=5,  iosize=65536,  seconds=0.028105]
['cmd_to'  :  timeout=5,  iosize=65536,  seconds=5.116658]
['cmd_totf':  timeout=5,  iosize=65536,  seconds=0.104905]
['cmd'     :  timeout=5,  iosize=1048576,  seconds=0.025964]
['cmd_to'  :  timeout=5,  iosize=1048576,  seconds=5.128062]
['cmd_totf':  timeout=5,  iosize=1048576,  seconds=0.103183]
See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

As opposed to all the warnings in the subprocess documentation then directly reading from process.stdout and process.stderr has provided a better solution.

By better I mean that I can read output from a process that exceeds 2^16 bytes without having to temporarily store the output on disk.

The code follows:

import fcntl
import os
import subprocess
import time

def nonBlockRead(output):
    fd = output.fileno()
    fl = fcntl.fcntl(fd, fcntl.F_GETFL)
    fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
    try:
        return output.read()
    except:
        return ''

def cmd(cmdline, timeout=60):
    """
    Execute cmdline, limit execution time to 'timeout' seconds.
    Uses the subprocess module and subprocess.PIPE.

    Raises TimeoutInterrupt
    """

    p = subprocess.Popen(
        cmdline,
        bufsize = bufsize, # default value of 0 (unbuffered) is best
        shell   = False, # not really needed; it's disabled by default
        stdout  = subprocess.PIPE,
        stderr  = subprocess.PIPE
    )

    t_begin = time.time() # Monitor execution time
    seconds_passed = 0

    stdout = ''
    stderr = ''

    while p.poll() is None and seconds_passed < timeout: # Monitor process
        time.sleep(0.1) # Wait a little
        seconds_passed = time.time() - t_begin

        # p.std* blocks on read(), which messes up the timeout timer.
        # To fix this, we use a nonblocking read()
        # Note: Not sure if this is Windows compatible
        stdout += nonBlockRead(p.stdout)
        stderr += nonBlockRead(p.stderr)

    if seconds_passed >= timeout:
        try:
            p.stdout.close()  # If they are not closed the fds will hang around until
            p.stderr.close()  # os.fdlimit is exceeded and cause a nasty exception
            p.terminate()     # Important to close the fds prior to terminating the process!
                              # NOTE: Are there any other "non-freed" resources?
        except:
            pass

        raise TimeoutInterrupt

    returncode  = p.returncode

    return (returncode, stdout, stderr)

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...