你可以这样做:
void dct(unsigned char **m, int baserow, int basecol)
{
for (int row = baserow, endrow = baserow + 8; row < endrow; ++row)
for (int col = basecol, endcol = basecol + 8; col < endcol; ++col)
; // operate on m[row][col]
}
int do_dcts(unsigned char **m, int num_rows, int num_cols)
{
if (num_rows <= 0 || num_rows % 8 || num_cols <= 0 || num_cols % 8)
return -1;
for (int row = 0; row < num_rows; row += 8)
for (int col = 0; col < num_cols; col += 8)
dct(m, row, col);
return 0;
}
通过使用两个级别的指针实现2D阵列,您正在浪费空间并恶化内存位置。最好进行一次分配,然后适当地偏移到数组中,如下所示:
int main()
{
FILE *image_raw;
unsigned char *matriz_image;
int i, j;
int rows=1080, colums=1920;
matriz_image = malloc(rows*colums*sizeof(unsigned char));
...
如果您可以将行和列设置为常量或具有VLA,则可以执行以下操作:
unsigned char (*m)[colums] = (unsigned char (*)[colums]) matriz_image;
m[5][2] = 2; // double indexed access without extra pointers + allocs
类似地,您可以将m的指向矩阵的指针传递给函数,以对其进行操作。
如果不能将行和列设置为编译时常量,并且没有VLA,那么可以编写助手fcns来为您执行指针运算:
inline unsigned char *get_row(unsigned char *m, int numcols, int row)
{
return &m[row * num_cols];
}
inline unsigned char *get_elem(unsigned char *m, int numcols, int row, int col)
{
return &m[row * num_cols + col];
}
...
*get_elem(m, colums, 5, 2) = 2; // double indexing not as nice but good memory usage
如果您确实需要快速执行这些操作,那么在读取图像时,可以重新组织图像,以便在内存中连续放置8x8字节的块,以获得最佳的缓存性能:
// organize m like m[rows * colums / 64][8][8]; so first index is an 8x8 block #
for (int k = 0; k < rows / 8; ++k) // read all rows in chunks of 8
for (int i = 0; i < 8; ++i) // read 8 rows
for (int j = 0; j < colums / 8; ++j) // read 1 row in 8 byte chunks
fread(&m[k * 8 * colums + i * 8 + j * 64], 1, 8, image_raw);
...
typedef unsigned char (*block_ptr)[8];
inline block_ptr get_block(unsigned char *m, int num_cols, int block_num)
{
return (block_ptr) &m[block_num * 64];
}
inline block_ptr get_block2(unsigned char *m, int num_cols, int row, int col)
{
if (row % 8 || col % 8)
return NULL;
return (block_ptr) &m[row * num_cols + col * 8];
}
...
for (int k = 0; k < rows * colums / 64; ++k)
{
block_ptr block = get_block(m, num_colums, k);
for (int i = 0; i < 8; ++i)
for (int j = 0; j < 8; ++j)
; // operate on block[i][j];
}