// The usual way to traverse a tree structure is to use a stack, often
// implemented with a recursive function so you can remember where you
// used to be and where to go next.  This is perfectly fine, and for
// most trees, the depth is going to be so small that the stack will
// not ever become too large.
//
// But sometimes you either have to be able to handle extremely
// lopsided trees, or are perhaps in an environment where you want to
// traverse a tree, but do not have access to a stack.  The latter may
// happen when doing e.g. GPU or vector programming.  For such cases
// there is a nifty trick for stack-free tree walking.  The only thing
// it requires is being able to uniquely identify tree nodes (e.g. by
// their address) and each node having a pointer to its parent.  I'll
// demonstrate the technique in C.

#include <stdio.h>
#include <stdlib.h>

// We'll be working with binary trees and for simplicity the
// node-level data is just an integer.

struct node {
  int data;
  struct node* parent; // NULL for the root.
  struct node* left;
  struct node* right;
};

// We'll need some trees to test with, so here's recursive functions
// for creating and freeing trees.  This creation can also be done in
// a stack-free manner, but I'll leave that for another program.

struct node* mk_tree(struct node *parent, int d) {
  if (d == 0) {
    return NULL;
  } else {
    struct node *x = malloc(sizeof(struct node));
    x->data = 0;
    x->parent = parent;
    x->left = mk_tree(x, d-1);
    x->right = mk_tree(x, d-1);
    return x;
  }
}

void free_tree(struct node* x) {
  if (x) {
    free_tree(x->left);
    free_tree(x->right);
    free(x);
  }
}

// Time to walk the tree.  For each node, let's increment its value.

void visit(struct node *x) {
  x->data++;
}

// Here's the standard recursive way of walking the tree.  This is a
// preorder traversal, because we modify the node value before walking
// down the left branch.  This isn't important and could have been
// done any way.

void walk_tree_withstack(struct node *x) {
  if (x) {
    visit(x);
    walk_tree_withstack(x->left);
    walk_tree_withstack(x->right);
  }
}

// Okay, so how do we do this without a stack?  walk_tree_withstack()
// uses the C call stack to "suspend" execution while recursively
// traversing the children.  Since the nodes have parent pointers, we
// don't need the call stack to know where to return to, but we do need
// to know whether we are visiting a node for the first time, have
// returned from the left child, or returned from the right child.  We
// can do this by keeping a pointer to the _previous_ node we visited,
// and upon entering a new node, we check whether we just came from
// the parent or one of the children.

void walk_tree_stackfree(struct node *x) {
  // Current node we are visiting.  Will be x->parent when we are done.
  struct node *cur = x;

  // Last node we visited.
  struct node *prev = x->parent;

  while (cur != x->parent) {
    // Initially, suppose the next node to visit is our parent.  This
    // is the case if we've already visited the children.
    struct node *next = cur->parent;
    if (prev == cur->parent) {
      // If the _previous_ node is the parent, then we must be visiting
      // this node for the first time.
      visit(cur);
      // We want to visit the left node (if it exists), otherwise the
      // right node (if it exists).
      if (cur->left) {
        next = cur->left;
      } else if (cur->right) {
        next = cur->right;
      }
    } else if (prev == cur->left) {
      // We are returning from the left child, so we want to visit the
      // right child (if it exists).
      if (cur->right) {
        next = cur->right;
      }
    }
    prev = cur;
    cur = next;
  }
}

// The control flow is much more convoluted.  If would be even more
// convoluted if our tree supported an arbitrary nubmer of children
// per node. But how much slower is stack-free traversal?  Let's see.
// First, define a way to measure performance.

#include <time.h>

double seconds() {
  struct timespec ts;
  clock_gettime(CLOCK_MONOTONIC, &ts);
  return ts.tv_sec + ts.tv_nsec/1000000000.0;
}

// And then a main() function that measures the time to construct,
// walk, and free a tree of given depth.

int main(int argc, char** argv) {
  if (argc != 2) {
    fprintf(stderr, "Usage: %s N\n", argv[0]);
    exit(1);
  }

  int d = atoi(argv[1]);
  double a,b;

  a = seconds();
  struct node *t = mk_tree(NULL, d);
  b = seconds();
  printf("Constructing:\t\t%fs\n", b-a);

  a = seconds();
  walk_tree_withstack(t);
  b = seconds();
  printf("Walking with stack:\t%fs\n", b-a);

  a = seconds();
  walk_tree_stackfree(t);
  b = seconds();
  printf("Walking without stack:\t%fs\n", b-a);

  a = seconds();
  free_tree(t);
  b = seconds();
  printf("Freeing:\t\t%fs\n", b-a);
}

// Here are some numbers on my computer:
//
// $ gcc treewalk.c -o treewalk -O2
//
// $ ./treewalk 20
// Constructing:		0.035791s
// Walking with stack:          0.005445s
// Walking without stack:	0.006163s
// Freeing:		        0.009331s
//
// $ ./treewalk 25
// Constructing:		1.140084s
// Walking with stack:	        0.181610s
// Walking without stack:	0.195902s
// Freeing:		        0.301698s
//
// Stack-free traversal is a bit slower, probably because of the extra
// control flow, but it's not awful.  Further, the stack-free
// implementation can handle extremely unbalanced trees without
// blowing the stack, and works in settings where a stack is not
// available or practical.  I have used it myself for traversing
// bounding volume hierarchies in a ray tracer running on GPU:
//
// https://github.com/athas/raytracingthenextweekinfuthark/