Articles

Building Your First Cluster

Sidebar Two: task.c
/*
 *========================================================================
 * task.c
 *========================================================================
 */

#include 
#include 
unsigned long int seed;
/* set to 1 to debug, but not for parallel production */
int verbose = 0;
int nwork;

int main(int argc, char *argv[])
{

 int i,delay;

 /* Not much error checking */
 if(argc < 3){
   fprintf(stderr,"Usage: task seed nwork delay\n");
   exit(0);
 }

 /* Parse command line */
 seed = strtol(argv[1], (char **)NULL, 10);
 nwork = strtol(argv[2], (char **)NULL, 10);
 delay = strtol(argv[3], (char **)NULL, 10);
 if(verbose) {
   printf("The seed for this task is %u, and it will do %d work units.\n",seed,nwork);
 }

 /* Here we do some work.  The amount is controlled by nwork. */
 srandom(seed);
 for(i=0; i < work; i++){
   if(verbose){
     printf("Doing simulated work unit %d\n",i);
   }
   /* Simulated work is "generate and return a uniform deviate"... */
   sleep(delay);
   printf("%f\n",(double) random()/RAND_MAX);
   /* ...badly (should use gsl mt19937, not random():-) */

 }


}

Sidebar Three: Makefile for task.c
# $Id: first_cluster.txt,v 1.4 2003/09/07 14:45:43 rgb Exp $
#========================================================================

# Defines/Macros
PROGRAM = task
SOURCE = $(PROGRAM:=.c)
OBJECT = $(PROGRAM:=.o)
CC = gcc
CFLAGS = -O3
LDFLAGS =
LIBS = -lm

# Targets
all: $(PROGRAM)

$(PROGRAM): $(OBJECT)
        $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(LIBS) $(OBJECT)

clean:
        rm -f core $(PROGRAM) $(OBJECT)

# Rules
%.o:%.c $(INCLUDES)
        $(CC) -c $(CFLAGS) $<

{mosgoogle right}

Sidebar Four: taskmaster (perl) listing
#!/usr/bin/perl
#========================================================================
# $Id: first_cluster.txt,v 1.4 2003/09/07 14:45:43 rgb Exp $
#========================================================================

 use Config;
 use threads;

 # Set the path to the task.  You may prefer to use a path in your
 # home directory on a shared filesystem.
 my $taskpath = "/tmp/task";

 $Config{useithreads} or die "Upgrade to perl >= 5.8.0, compiled with threads";
 # Get required arguments (2) from command line
 $verbose = 1;
 $ARGC = @ARGV;
 if($ARGC != 4){
   Usage("Incorrect number or type of arguments");
   exit;
 }
 $hostfile = $ARGV[0];
 $nhosts = $ARGV[1];
 $nrands = $ARGV[2];
 $delay = $ARGV[3];

 # Get list of host names
 open(FD,$hostfile) || die "$0: can't open $hostfile";
 $i = 0;
 while() {
   chop;
   $hosts[$i] = $_;
   $i++;
 }
 close(FD);

 # Split up nrands precisely and lazily (outside timer).
 # This balances our "load".
 $nr = 0;
 $i = 0;
 while($nr < $nrands){
   $nw[$i]++;
   $nr++;
   $i++;
   $i %= $nhosts;
 }

 # Start timer and spawn remote host task threads.
 $tstart = time;
 print "\nSpawning host threads\n\n";
 for($i = 0; $i < $nhosts; $i++){
   $seed = $i + 1;
   $hostthread[$i] = threads->new(\&runtask,$taskpath,$hosts[$i],$seed,$nw[$i],$delay);
   if($verbose){
     print "Host $hosts[$i] thread running.\n";
   }
 }
 print "\n";


 # Accumulate returns from each host task thread in @rands.
 # This will block until the last host completes.
 @rands = ();
 foreach $hostt (@hostthread){
   @rands = (@rands,split /\n/,$hostt->join);
 }
 $tstop = time;

 # Print out results and timing.  Don't time the printout.
 for($i = 0;$i < $nrands;$i++){
   print "rand[$i] = $rands[$i]\n";
 }
 print "\n";
 $ttime = $tstop - $tstart;
 printf("Results:\n");
 printf("%8s %8s %8s %8s\n","nhosts","nrands","delay","time");
 printf(" %5d  %8d %8d %8d\n",$nhosts,$nrands,$delay,$ttime);

 exit;

sub runtask {

 my $taskpath = shift;
 my $host = shift;
 my $seed = shift;
 my $nwork = shift;
 my $delay = shift;
 my $task = "/usr/bin/ssh -x $host $taskpath $seed $nwork $delay";
 $rand = `$task`;
 return $rand;

}

sub Usage {

 my $message = shift;
 if($message) {print STDERR "Error: $message\n";}
 print STDERR
"Usage:

taskmaster hostfile nhosts nrands delay

 hostfile is a file that contains hostnames, one per line

 nhosts is the number of these hosts you wish to use

 nrands is the number of random numbers you wish to generate in parallel.

 delay is the number of seconds the worker task will sleep 
 (simulating  work) between each random number it generates.

";
 exit;
}

This article was originally published in ClusterWorld Magazine. It has been updated and formatted for the web. If you want to read more about HPC clusters and Linux you may wish to visit Linux Magazine.

Robert Brown, Ph.D, is has written extensively about Linux clusters. You can find his work and much more on his home page at http://www.phy.duke.edu/~rgb.

    Search

    Feedburner

    Login Form

    Share The Bananas


    Creative Commons License
    ©2005-2012 Copyright Seagrove LLC, Some rights reserved. Except where otherwise noted, this site is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 2.5 License. The Cluster Monkey Logo and Monkey Character are Trademarks of Seagrove LLC.