The following C example computes a 2-dimensional out-of-place FFT using the cluster FFT interface:
/* C99 example */
#include <stdlib.h>
#include "mpi.h"
#include "mkl_cdft.h"
#include <math.h>
#ifndef M_PI
#define M_PI (3.14159265358979323846)
#endif
#define DBL_EPSILON 2.2204460492503131e-16
#define CHK_ERR_EQ(ERR_CODE, SUCCESS_CODE) do { \
if (ERR_CODE != SUCCESS_CODE) \
goto finish; \
} while(0)
#define CHK_MKL_ERR(MKL_ERR) CHK_ERR_EQ(MKL_ERR, DFTI_NO_ERROR)
#define CHK_MPI_ERR(MPI_ERR) CHK_ERR_EQ(MPI_ERR, MPI_SUCCESS)
int main(int argc, char* argv[])
{
DFTI_DESCRIPTOR_DM_HANDLE desc = NULL;
MKL_Complex16 *in = NULL, *out = NULL;
MKL_LONG v, i, j, n, s, mkl_err;
const MKL_LONG lengths[2] = {128, 64};
const MKL_LONG h = 7, k = 6;
MKL_Complex16 local_err;
double phase, max_dft_err = 0.0;
int mpi_err = MPI_Init(&argc, &argv); CHK_MPI_ERR(mpi_err);
int exit_code = EXIT_FAILURE;
/* Create descriptor for 2D FFT */
mkl_err = DftiCreateDescriptorDM(MPI_COMM_WORLD, &desc, DFTI_DOUBLE,
DFTI_COMPLEX, 2, lengths); CHK_MKL_ERR(mkl_err);
/* Ask necessary length of in and out arrays and allocate memory */
mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_SIZE, &v); CHK_MKL_ERR(mkl_err);
in = (MKL_Complex16*) malloc(v*sizeof(MKL_Complex16));
out = (MKL_Complex16*) malloc(v*sizeof(MKL_Complex16));
if (in == NULL || out == NULL) goto finish;
/* Fill local array with initial data. Current process performs n rows,
0th row of in corresponds to s-th row of virtual global array */
mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_NX, &n); CHK_MKL_ERR(mkl_err);
mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_X_START, &s); CHK_MKL_ERR(mkl_err);
/* Virtual global array globalIN is such that (0<=i<n, 0<=j<lengths[1])
globalIN[(i+s)*lengths[1]+j] := in[i*lengths[1]+j] */
for (i = 0; i < n; ++i) {
for (j = 0; j < lengths[1]; ++j) {
phase = 2.0*M_PI*((double)((i+s)*h)/lengths[0] + (double)(j*k)/lengths[1]);
in[i*lengths[1]+j].real = cos(phase) / (lengths[0]*lengths[1]);
in[i*lengths[1]+j].imag = sin(phase) / (lengths[0]*lengths[1]);
}
}
/* Configure for an out-of-place transform (default is DFTI_INPLACE) */
mkl_err = DftiSetValueDM(desc, DFTI_PLACEMENT,
DFTI_NOT_INPLACE); CHK_MKL_ERR(mkl_err);
/* Commit descriptor and compute DFT */
mkl_err = DftiCommitDescriptorDM(desc); CHK_MKL_ERR(mkl_err);
mkl_err = DftiComputeForwardDM(desc, in, out); CHK_MKL_ERR(mkl_err);
/* Virtual global array globalOUT is such that (0<=i<n, 0<=j<lengths[1])
globalOUT[(i+s)*lengths[1]+j] := out[i*lengths[1]+j] */
for (i = 0; i < n; ++i) {
for (j = 0; j < lengths[1]; ++j) {
local_err = out[i*lengths[1] + j];
if (i+s == h && j == k)
local_err.real -= 1.0; // expected value
max_dft_err = fmax(max_dft_err, hypot(local_err.real, local_err.imag));
}
}
mpi_err = MPI_Allreduce(MPI_IN_PLACE, &max_dft_err, 1, MPI_DOUBLE, MPI_MAX,
MPI_COMM_WORLD); CHK_MPI_ERR(mpi_err);
if (max_dft_err < 5.0*log2((double) lengths[0]*lengths[1])*DBL_EPSILON)
exit_code = EXIT_SUCCESS;
finish:
if (mkl_err != DFTI_NO_ERROR || mpi_err != MPI_SUCCESS)
exit_code = EXIT_FAILURE;
if (desc) mkl_err = DftiFreeDescriptorDM(&desc);
if (in) free(in);
if (out) free(out);
mpi_err = MPI_Finalize();
if (mkl_err != DFTI_NO_ERROR || mpi_err != MPI_SUCCESS)
exit_code = EXIT_FAILURE;
return exit_code;
}The C example below illustrates one-dimensional in-place cluster FFT computations effected with a user-defined workspace:
/* C99 example */
#include <stdlib.h>
#include "mpi.h"
#include "mkl_cdft.h"
#include <math.h>
#ifndef M_PI
#define M_PI (3.14159265358979323846)
#endif
#define DBL_EPSILON 2.2204460492503131e-16
#define CHK_ERR_EQ(ERR_CODE, SUCCESS_CODE) do { \
if (ERR_CODE != SUCCESS_CODE) \
goto finish; \
} while(0)
#define CHK_MKL_ERR(MKL_ERR) CHK_ERR_EQ(MKL_ERR, DFTI_NO_ERROR)
#define CHK_MPI_ERR(MPI_ERR) CHK_ERR_EQ(MPI_ERR, MPI_SUCCESS)
int main(int argc, char* argv[])
{
DFTI_DESCRIPTOR_DM_HANDLE desc = NULL;
MKL_Complex16 *data = NULL, *work = NULL;
MKL_LONG v, i, n, n_out, s_out, s, mkl_err;
const MKL_LONG h = 7;
const MKL_LONG N = 512;
MKL_Complex16 local_err;
double max_dft_err = 0.0;
int mpi_err = MPI_Init(&argc, &argv); CHK_MPI_ERR(mpi_err);
int exit_code = EXIT_FAILURE;
/* Create descriptor for 1D FFT */
mkl_err = DftiCreateDescriptorDM(MPI_COMM_WORLD, &desc, DFTI_DOUBLE,
DFTI_COMPLEX, 1, N); CHK_MKL_ERR(mkl_err);
/* Ask necessary length of array and workspace and allocate memory */
mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_SIZE, &v);
data = (MKL_Complex16*) malloc(v*sizeof(MKL_Complex16));
work = (MKL_Complex16*) malloc(v*sizeof(MKL_Complex16));
if (data == NULL || work == NULL) goto finish;
/* Fill local array with initial data. Local array has n elements,
0th element of data corresponds to s-th element of virtual global array */
mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_NX, &n); CHK_MKL_ERR(mkl_err);
mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_X_START, &s); CHK_MKL_ERR(mkl_err);
/* Set work array as a workspace */
mkl_err = DftiSetValueDM(desc, CDFT_WORKSPACE, work); CHK_MKL_ERR(mkl_err);
/* Virtual global array globalIN is such that globalIN[s+i] := data[i],
0 <= i < n */
for(i = 0; i < n; ++i) {
data[i].real = cos(2.0*M_PI*(i+s)*h/N) / N;
data[i].imag = sin(2.0*M_PI*(i+s)*h/N) / N;
}
/* Commit descriptor and compute DFT */
mkl_err = DftiCommitDescriptorDM(desc); CHK_MKL_ERR(mkl_err);
mkl_err = DftiComputeForwardDM(desc, data); CHK_MKL_ERR(mkl_err);
mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_OUT_NX, &n_out); CHK_MKL_ERR(mkl_err);
mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_OUT_X_START, &s_out); CHK_MKL_ERR(mkl_err);
/* Virtual global array globalOUT is such that
globalOUT[s_out + i] := data[i], 0 <= i < n_out */
for (i = 0; i < n_out; ++i) {
local_err = data[i];
if (i+s == h)
local_err.real -= 1.0; // expected value
max_dft_err = fmax(max_dft_err, hypot(local_err.real, local_err.imag));
}
mpi_err = MPI_Allreduce(MPI_IN_PLACE, &max_dft_err, 1, MPI_DOUBLE, MPI_MAX,
MPI_COMM_WORLD); CHK_MPI_ERR(mpi_err);
if (max_dft_err < 5.0*log2((double) N)*DBL_EPSILON)
exit_code = EXIT_SUCCESS;
finish:
if (mkl_err != DFTI_NO_ERROR || mpi_err != MPI_SUCCESS)
exit_code = EXIT_FAILURE;
if (desc) mkl_err = DftiFreeDescriptorDM(&desc);
if (data) free(data);
if (work) free(work);
mpi_err = MPI_Finalize();
if (mkl_err != DFTI_NO_ERROR || mpi_err != MPI_SUCCESS)
exit_code = EXIT_FAILURE;
return exit_code;
}