#!/usr/bin/env python3 """Parallel example code for estimating the value of π. We can estimate the value of π by a stochastic algorithm. Consider a circle of radius 1, inside a square that bounds it, with vertices at (1,1), (1,-1), (-1,-1), and (-1,1). The area of the circle is just π, whereas the area of the square is 4. So, the fraction of the area of the square which is covered by the circle is π/4. A point selected at random uniformly from the square thus has a probability π/4 of being within the circle. We can estimate π by examining a large number of randomly-selected points from the square, and seeing what fraction of them lie within the circle. If this fraction is f, then our estimate for π is π ≈ 4f. Thanks to symmetry, we can compute points in one quadrant, rather than within the entire unit square, and arrive at identical results. This task lends itself naturally to parallelization -- the task of selecting a sample point and deciding whether or not it's inside the circle is independent of all the other samples, so they can be done simultaneously. We only need to aggregate the data at the end to compute our fraction f and our estimate for π. """ import numpy as np import sys import datetime from mpi4py import MPI wtime1 = MPI.Wtime() def inside_circle(total_count): """Single-processor task for a group of samples. Generates uniform random x and y arrays of size total_count, on the interval [0,1), and returns the number of the resulting (x,y) pairs which lie inside the unit circle. """ host_name = MPI.Get_processor_name() print("Rank {} generating {:n} samples on host {}.".format( rank, total_count, host_name)) x = np.float64(np.random.uniform(size=total_count)) y = np.float64(np.random.uniform(size=total_count)) radii = np.sqrt(x*x + y*y) count = len(radii[np.where(radii<=1.0)]) return count if __name__ == '__main__': """Main executable. This function runs the 'inside_circle' function with a defined number of samples. The results are then used to estimate π. An estimate of the required memory, elapsed calculation time, and accuracy of calculating π are also computed. """ # Declare an MPI Communicator for the parallel processes to talk through comm = MPI.COMM_WORLD # Read the number of parallel processes tied into the comm channel cpus = comm.Get_size() # Find out the index ("rank") of *this* process rank = comm.Get_rank() if len(sys.argv) > 1: n_samples = int(sys.argv[1]) else: n_samples = 8738128 # trust me, this number is not random :-) if rank == 0: # Time how long it takes to estimate π. start_time = datetime.datetime.now() print("Generating {:n} samples.".format(n_samples)) # Rank zero builds two arrays with one entry for each rank: # one for the number of samples they should run, and # one to store the count info each rank returns. partitions = [ int(n_samples / cpus) ] * cpus counts = [ int(0) ] * cpus else: partitions = None counts = None # All ranks participate in the "scatter" operation, which assigns # the local scalar values to their appropriate array components. # partition_item is the number of samples this rank should generate, # and count_item is the place to put the number of counts we see. partition_item = comm.scatter(partitions, root=0) count_item = comm.scatter(counts, root=0) # Each rank locally populates its count_item variable. count_item = inside_circle(partition_item) # All ranks participate in the "gather" operation, which sums the # rank's count_items into the total "counts". counts = comm.gather(count_item, root=0) if rank == 0: # Only rank zero writes the result, although it's known to all. my_pi = 4.0 * sum(counts) / n_samples elapsed_time = (datetime.datetime.now() - start_time).total_seconds() # Memory required is dominated by the size of x, y, and radii from # inside_circle(), calculated in MiB size_of_float = np.dtype(np.float64).itemsize memory_required = 3 * n_samples * size_of_float / (1024**2) # accuracy is calculated as a percent difference from a known estimate # of π. pi_specific = np.pi accuracy = 100*(1-my_pi/pi_specific) # Print the values of Pi print('Numpy Pi: ', pi_specific) print('My Estimate of Pi: ', my_pi) # Uncomment either summary format for verbose or terse output summary = "{:d} core(s), {:d} samples, {:f} MiB memory, {:f} seconds, {:f}% error" # summary = "{:d},{:d},{:f},{:f},{:f}" print(summary.format(cpus, n_samples, memory_required, elapsed_time, accuracy)) print(f"Total run time={MPI.Wtime()-wtime1}s")