@article {5036, title = {Hydrographic Data Processing on a Robust, Network-Coupled Parallel Cluster}, year = {2012}, month = {Feb 21 - Feb 26}, address = {Wellington, New Zealand}, abstract = {

Increasing data volumes and adoption of computer-assisted hydrographic data processing algorithms necessitate higher data processing rates if gains in efficiency achieved in the last decade are to be maintained and enhanced.\  Recent advances in desktop computer architectures have made multi-core and multi-processor systems readily available, and some advances have been made in implementing multi-threaded versions of common hydrographic data processing algorithms.\  In many cases, however, although the algorithms might be ideal for parallel implementation (so called \‘embarrassingly parallel\’ tasks), limitations in memory, disc and network bandwidth within a single system can have significant limitations on the scalability of these solutions.

Offloading the computational requirements to a separate, clustered system of multiple computers is therefore appealing, since it has the potential for much higher net bandwidth, and robustness, without the collateral constraints of a desktop system.\  We consider, therefore, the advantages, potential efficiency gains, and difficulties, of processing hydrographic data in a robust, network-coupled, parallel cluster of computers.\  In particular, we address the problems of efficient and robust data distribution, compute load and network balancing, and of ensuring task- and system-level robustness in such a distributed system.

To illustrate the problem, we have considered two common processing tasks: pre-processing of raw Multibeam Echosounder (MBES) data to the stage of uncertainty-attributed resolved soundings in the local level, and computation of most-probable depths with a CUBE-like algorithm.\  These tasks illustrate a time- and spatially-indexed processing problem, respectively, which can engender differences in optimal data distribution and have different data- and network-use patterns.\  We demonstrate the gains and limitations of a clustered compute solution in these two cases, using the metrics of computational time as a function of processor resources committed, and robustness of processing in the face of intermittent random failures, as applied to (portions of) the Shallow Survey 2012 Common Data Set.

}, keywords = {CHRT, Data Processing, Parallel Processing}, author = {Brian R Calder and Venugopal, Rohit} }