Looks reasonable, though it really needs to fix leaking std
and aux
to the children and after the loop, and the parent's original stdin
is lost forever.
This would probably be better with color...
./a.out foo bar baz <stdin >stdout
std = dup(stdout) || |+==========================std
|| || ||
pipe(fd) || || pipe1[0] -- pipe0[1] ||
|| || || || ||
aux = fd[0] || || aux || ||
|| XX || || ||
|| /-------++----------+| ||
dup2(fd[1], 1) || // || || ||
|| || || || ||
close(fd[1]) || || || XX ||
|| || || ||
fork+exec(foo) || || || ||
XX || || ||
/-----++-------+| ||
dup2(aux, 0) // || || ||
|| || || ||
close(aux) || || XX ||
|| || ||
pipe(fd) || || pipe2[0] -- pipe2[1] ||
|| || || || ||
aux = fd[0] || || aux || ||
|| XX || || ||
|| /-------++----------+| ||
dup2(fd[1], 1) || // || || ||
|| || || || ||
close(fd[1]) || || || XX ||
|| || || ||
fork+exec(bar) || || || ||
XX || || ||
/-----++-------+| ||
dup2(aux, 0) // || || ||
|| || || ||
close(aux) || || XX ||
|| || ||
pipe(fd) || || pipe3[0] -- pipe3[1] ||
|| || || || ||
aux = fd[0] || || aux || ||
|| XX || || ||
|| /-------++----------+| ||
dup2(fd[1], 1) || // || || ||
|| || || || ||
close(fd[1]) || || || XX ||
|| XX || ||
|| /-------++-----------------+|
dup2(std, 1) || // || ||
|| || || ||
fork+exec(baz) || || || ||
foo
gets stdin=stdin
, stdout=pipe1[1]
bar
gets stdin=pipe1[0]
, stdout=pipe2[1]
baz
gets stdin=pipe2[0]
, stdout=stdout
My suggestion is different in that it avoids mangling the parent's stdin
and stdout
, only manipulating them within the child, and never leaks any FDs. It's a bit harder to diagram, though.
for cmd in cmds
if there is a next cmd
pipe(new_fds)
fork
if child
if there is a previous cmd
dup2(old_fds[0], 0)
close(old_fds[0])
close(old_fds[1])
if there is a next cmd
close(new_fds[0])
dup2(new_fds[1], 1)
close(new_fds[1])
exec cmd || die
else
if there is a previous cmd
close(old_fds[0])
close(old_fds[1])
if there is a next cmd
old_fds = new_fds
parent
cmds = [foo, bar, baz]
fds = {0: stdin, 1: stdout}
cmd = cmds[0] {
there is a next cmd {
pipe(new_fds)
new_fds = {3, 4}
fds = {0: stdin, 1: stdout, 3: pipe1[0], 4: pipe1[1]}
}
fork => child
there is a next cmd {
close(new_fds[0])
fds = {0: stdin, 1: stdout, 4: pipe1[1]}
dup2(new_fds[1], 1)
fds = {0: stdin, 1: pipe1[1], 4: pipe1[1]}
close(new_fds[1])
fds = {0: stdin, 1: pipe1[1]}
}
exec(cmd)
there is a next cmd {
old_fds = new_fds
old_fds = {3, 4}
}
}
cmd = cmds[1] {
there is a next cmd {
pipe(new_fds)
new_fds = {5, 6}
fds = {0: stdin, 1: stdout, 3: pipe1[0], 4: pipe1[1],
5: pipe2[0], 6: pipe2[1]}
}
fork => child
there is a previous cmd {
dup2(old_fds[0], 0)
fds = {0: pipe1[0], 1: stdout,
3: pipe1[0], 4: pipe1[1],
5: pipe2[0], 6: pipe2[1]}
close(old_fds[0])
fds = {0: pipe1[0], 1: stdout,
4: pipe1[1],
5: pipe2[0] 6: pipe2[1]}
close(old_fds[1])
fds = {0: pipe1[0], 1: stdout,
5: pipe2[0], 6: pipe2[1]}
}
there is a next cmd {
close(new_fds[0])
fds = {0: pipe1[0], 1: stdout, 6: pipe2[1]}
dup2(new_fds[1], 1)
fds = {0: pipe1[0], 1: pipe2[1], 6: pipe2[1]}
close(new_fds[1])
fds = {0: pipe1[0], 1: pipe1[1]}
}
exec(cmd)
there is a previous cmd {
close(old_fds[0])
fds = {0: stdin, 1: stdout, 4: pipe1[1],
5: pipe2[0], 6: pipe2[1]}
close(old_fds[1])
fds = {0: stdin, 1: stdout, 5: pipe2[0], 6: pipe2[1]}
}
there is a next cmd {
old_fds = new_fds
old_fds = {3, 4}
}
}
cmd = cmds[2] {
fork => child
there is a previous cmd {
dup2(old_fds[0], 0)
fds = {0: pipe2[0], 1: stdout,
5: pipe2[0], 6: pipe2[1]}
close(old_fds[0])
fds = {0: pipe2[0], 1: stdout,
6: pipe2[1]}
close(old_fds[1])
fds = {0: pipe2[0], 1: stdout}
}
exec(cmd)
there is a previous cmd {
close(old_fds[0])
fds = {0: stdin, 1: stdout, 6: pipe2[1]}
close(old_fds[1])
fds = {0: stdin, 1: stdout}
}
}
Edit
Your updated code does fix the previous FD leaks… but adds one: you're now leaking std0
to the children. As Jon says, this is probably not dangerous to most programs... but you still should write a better behaved shell than this.
Even if it's temporary, I would strongly recommend against mangling your own shell's standard in/out/err (0/1/2), only doing so within the child right before exec. Why? Suppose you add some printf
debugging in the middle, or you need to bail out due to an error condition. You'll be in trouble if you don't clean up your messed-up standard file descriptors first. Please, for the sake of having things operate as expected even in unexpected scenarios, don't muck with them until you need to.
Edit
As I mentioned in other comments, splitting it up into smaller parts makes it much easier to understand. This small helper should be easily understandable and bug-free:
/* cmd, argv: passed to exec
* fd_in, fd_out: when not -1, replaces stdin and stdout
* return: pid of fork+exec child
*/
int fork_and_exec_with_fds(char *cmd, char **argv, int fd_in, int fd_out) {
pid_t child = fork();
if (fork)
return child;
if (fd_in != -1 && fd_in != 0) {
dup2(fd_in, 0);
close(fd_in);
}
if (fd_out != -1 && fd_in != 1) {
dup2(fd_out, 1);
close(fd_out);
}
execvp(cmd, argv);
exit(-1);
}
As should this:
void run_pipeline(int num, char *cmds[], char **argvs[], int pids[]) {
/* initially, don't change stdin */
int fd_in = -1, fd_out;
int i;
for (i = 0; i < num; i++) {
int fd_pipe[2];
/* if there is a next command, set up a pipe for stdout */
if (i + 1 < num) {
pipe(fd_pipe);
fd_out = fd_pipe[1];
}
/* otherwise, don't change stdout */
else
fd_out = -1;
/* run child with given stdin/stdout */
pids[i] = fork_and_exec_with_fds(cmds[i], argvs[i], fd_in, fd_out);
/* nobody else needs to use these fds anymore
* safe because close(-1) does nothing */
close(fd_in);
close(fd_out);
/* set up stdin for next command */
fd_in = fd_pipe[0];
}
}
You can see Bash's execute_cmd.c#execute_disk_command
being called from execute_cmd.c#execute_pipeline
, xsh's process.c#process_run
being called from jobs.c#job_run
, and even every single one of BusyBox's various small and minimal shells splits them up.