/*
From: mmccartn@cs.utexas.edu (Michael David McCartney)
Newsgroups: comp.os.linux.development
Subject: Re: looking for some information on the clone() system call.
Date: 14 Jun 1994 01:01:18 -0500
Organization: CS Dept, University of Texas at Austin

In article <CrCAK7.Gp@wg.saar.de>, Patrick Schaaf <bof@wg.saar.de> wrote:
>
>I also noticed that my code destroyed %ebx, which is callee-saved...
>
>Below is a version of do_clone() that should work for all cases and
>switches. I tested it with 1.1.8 kernel and gcc 2.5.8, and it seems
>to work.
>

well, i thought you (and other developers) might be interested in what
i came up with last night.  basically, i ran with your first version
of the code to produce a clone() call that behaves like fork.

it just so happens that my function saves ebx so "technically" it's
not a problem but i would rather have better reassurance.  since i've
decided i don't need the clone() functionality in my project, i'll
leave it up to someone else to make improvements.  (i'm getting sleepy :-))

Below is a function called clone3() which takes the first 2 arguments to
do_clone() and the third argument tells it how many levels up the stack it
is to traverse when figuring out where the top of the stack is.  the
primary difference between clone3() and do_clone() is that it constructs
a stack for the clone process and returns on that stack so the user can
use the function just like fork().

Future work:
  1) i didn't know an exact way to find the top of the
     stack so it's dangerous to return more than the number of levels specified
     by the "num_frames_to_copy" argument.  if there is a nice way of finding
     out the address of the top of the stack, we could do away with the
     argument all together.
  2) There may be some register abuse in the assembly code (with respect to
     ebx, ecx, and edx.  I was more concerned with getting the stack frames
     set up properly.
  3) The code i added to copy and rethread the stack should really be done
     inside sys_fork() with the added knowledge of the actual size of the
     stack.  Thise works, but IMHO is a bit clumsy.

I'll include the entire clone3.c so you can see how it runs.  it's basically
the same as your previous example with do_clone().

Enjoy!
Mike McCartney
*/
/* clone3.c - a test program using the clone syscall */
/* #define __NR_viper              141 */

#include <errno.h>
#include <stdio.h>
#include <signal.h>
#include <stdlib.h>
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/unistd.h>

#define STR(x) #x
#define DEREF_STR(x) STR(x)

int stuff = 0;

pid_t clone3(long *clone_esp,
	     unsigned long clone_flags,
	     int num_frames_to_copy)
{
  long *current_esp;
  long *current_ebp;
  long *current_top_ebp;
  long *clone_top_ebp;
  long *clone_ebp;
  size_t size;
  pid_t pid;
  int i;

  /* get the current esp and ebp */
  __asm__ (
           "movl %%esp, %0\n\t"
           "movl %%ebp, %1\n\t"
	   : /* outputs */ /* %0 */ "=m" (current_esp),
                           /* %1 */ "=m" (current_ebp)
	   : /* inputs */
  );

  /* traverse up stack frames (num_frames_to_copy) */
  current_top_ebp = current_ebp;
  for (i=0; i<num_frames_to_copy; i++) {
    current_top_ebp = (long*) *current_top_ebp;
  }

  /* calculate size of stack frame + argv, argc, main */
  size = (current_top_ebp - current_esp)+1 + 3;

  /* adjust clone's esp to point to bottom of stack frame */
  clone_esp -= size;

  /* calculate ebp for clone */
  clone_ebp = clone_esp + (current_ebp - current_esp);
  
  /* copy stack frames */
  memcpy(clone_esp, current_esp, size * sizeof(long));

  /* rethread clone's stack frames with ebp's which are in terms of the
   * addresses in the clone's stack
   */
  current_top_ebp = current_ebp;
  clone_top_ebp = clone_ebp;
  for (i=0; i<num_frames_to_copy; i++) {
    current_top_ebp = (long *) *current_top_ebp;
    *clone_top_ebp = (long) (clone_ebp + (current_top_ebp - current_ebp));
    clone_top_ebp = (long *) *clone_top_ebp;
  }

  /* make the clone syscall and store the return value in pid */
  __asm__ (
	   /*
	    * make the syscall
	    *   eax = __NR_clone
	    *   ebx = clone_esp
	    *   ecx = clone_flags
	    * returns:
	    *   carry if error
	    *   eax = pid of child (for parent)
	    *       = 0            (for child)
	    *       = error code   (if error)
	    */
	   "movl %3, %%edx\n\t"
           "movl %1, %%ebx\n\t"
           "movl %2, %%ecx\n\t"
	   "movl $" DEREF_STR(__NR_clone) ", %%eax\n\t"
	   "int $0x80\n\t"

	   /*
	    * error checking:
	    *   if (carry) {
	    *     errno = eax;
	    *     eax = -1;
	    *   }
	    */
	   "jnc 1f\n\t"
	   "movl %%eax, _errno\n\t"
	   "movl $-1, %%eax\n\t"
	   "1:\n\t"

	   /*
	    * set ebp for clone:
	    *   if (!eax) {        (in child)
	    *     ebp = clone_ebp  (via edx)
	    *   }
	    */
	   "testl %%eax, %%eax\n\t"
	   "jne 3f\n\t"
	   "movl %%edx, %%ebp\n\t"
	   "3:\n\t"

	   /*
	    * save pid:            (note %0 is based on value of ebp)
	    *   pid = eax;
	    */
	   "movl %%eax, %0\n\t"

	   : /* outputs */ /* %0 */ "=m" (pid)
	   : /* inputs */  /* %1 */ "m" (clone_esp),
	                   /* %2 */ "m" (clone_flags),
                           /* %3 */ "m" (clone_ebp)
  );

  return(pid);
}

int clone_pid = -1;
int parent_pid = -1;
int do_terminate = 0;

/* we use sigusr1() for child termination signalling */
void sigusr1(int sig)
{ unsigned long status;
  int pid;

  printf("parent: got SIGUSR1, waiting for children... clone_pid=%d\n",
         clone_pid);
  pid = wait4(clone_pid, &status, __WCLONE, (struct rusage *)0);
  if (pid < 0) {
    perror("wait4");
    return;
  }
  printf("parent: wait4 returned %d\n", pid);
  if (clone_pid == pid)
    do_terminate = 1;
  signal(SIGUSR1, sigusr1);
  return;
}

long clone_stack[4096];

int clone_function(void)
{
  clone_pid = getpid();
  fprintf(stderr, "clone running, pid = %d\n", clone_pid);
  sleep(30);
  stuff = 24;
  fprintf(stderr, "clone terminating\n");
  return 0;
}

int main(int argc, char **argv)
{
  int pid;

  parent_pid = getpid();
  printf("parent pid = %d = %lx\n", parent_pid, parent_pid);
  signal(SIGUSR1, sigusr1);
  pid = clone3(clone_stack+(sizeof(clone_stack)/sizeof(long))-1, SIGUSR1, 1);

  if (pid < 0) {
    perror("clone");
  } else if (pid == 0) {
    if (parent_pid == getpid()) {
      fprintf(stderr, "funny, clone() returned pid=0 in parent\n");
    } else {
      clone_function();
    }
  } else {
    printf("parent: clone running, pid = %d = %lx. waiting for termination.\n", pid, pid);
    while (!do_terminate) sleep(1);
    printf("parent: looks like our kid is gone. BTW, clone_pid = %d, stuff = %d\n",
	   clone_pid, stuff);
  }

  exit(EXIT_SUCCESS);
}

