当从matlab对MEX文件计时时:
D=rand(14000)+rand(14000)*1i;
tic;
[A B C]=myMexFile(D);
toc
disp(datetime('now'));
输出为:
Elapsed time is 35.192704 seconds.
15-Sep-2018 16:51:35
使用以下最小工作示例从C中计时MEX文件:
#include <mex.h>
#include <sys/time.h>
#include <time.h>
#include <cuComplex.h>
double getHighResolutionTime() {
struct timeval tod;
gettimeofday(&tod, NULL);
double time_seconds = (double) tod.tv_sec + ((double) tod.tv_usec / 1000000.0);
return time_seconds;
}
void double2cuDoubleComplex(cuDoubleComplex* p, double* pr, double* pi,int numElements){
for(int j=0;j<numElements;j++){
p[j].x=pr[j];
p[j].y=pi[j];
}
}
void cuDoubleComplex2double(cuDoubleComplex* p, double* pr, double* pi,int numElements){
for(int j=0;j<numElements;j++){
pr[j]= p[j].x;
pi[j]= p[j].y;
}
}
void mexFunction( int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
double tic=getHighResolutionTime();
int m=(int)mxGetM(prhs[0]);
int n=(int)mxGetN(prhs[0]);
int SIZE=m*n;
//get pointers to input data from matlab and convert to
//interleaved (Fortran) ordering
cuDoubleComplex *Gr= (cuDoubleComplex*) mxMalloc(SIZE*sizeof(cuDoubleComplex));
double2cuDoubleComplex(Gr,mxGetPr(prhs[0]),mxGetPi(prhs[0]),SIZE);
//modify the input data, allocate output matrices, and convert
//back to split (matlab) ordering.
Gr[0].x=0.0;
plhs[0] = mxCreateDoubleMatrix(m,m,mxCOMPLEX);
cuDoubleComplex2double(Gr,mxGetPr(plhs[0]),mxGetPi(plhs[0]),SIZE);
Gr[0].x=1.0;
plhs[1] = mxCreateDoubleMatrix(m,m,mxCOMPLEX);
cuDoubleComplex2double(Gr,mxGetPr(plhs[1]),mxGetPi(plhs[1]),SIZE);
Gr[0].x=2.0;
plhs[2] = mxCreateDoubleMatrix(m,m,mxCOMPLEX);
cuDoubleComplex2double(Gr,mxGetPr(plhs[2]),mxGetPi(plhs[2]),SIZE);
mxFree(Gr);
double elapsed=getHighResolutionTime()-tic;mexPrintf("%f\n", elapsed);
time_t current_time = time(NULL);
char* c_time_string = ctime(¤t_time);
mexPrintf("time at end of MEX file %s\n", c_time_string);
}
输出为:
21.676793
time at end of MEX file Sat Sep 15 16:51:21 2018
输出是非常大的矩阵,但是它们在MEX文件的最后一行之前被成功地分配和初始化。我想不出别的了。是什么导致了这种行为?我该如何避免这种行为?
更新:我已经用一个最小的工作示例替换了上面的伪代码。请注意,上面的代码实际上并不使用任何GPU功能。我加入cuComplex.h头只是为了使用cuDoubleComplex数据类型。