这个秋天，OpenCV和MCU更配哟（优化篇2）-icspec

这个秋天，OpenCV和MCU更配哟（优化篇2）

来源:恩智浦MCU加油站发布时间:2022-12-08

分享至微信

原计划本系列的四篇都已经发表了，分别是“先跑篇”、"配置篇"、“实战篇”和“进阶篇”，现在已经入冬，我们这个秋天的系列将以两篇“优化篇”结束，带您牵手OpenCV，进入OpenCV的广阔世界。

作为《这个秋天系列》真正的最后一篇，写到这儿，小编对这个系列已经有些恋恋不舍了。奈何，一颗急于交稿的心无时无刻不在关心着小编：最后一篇了，能不能快点！！

话不多说了，代码展示环节！写代码之前，要首先确定一下代码的基础框架以及集成方式。考虑到SDK中提供的PXP相关代码，包含了很多NXP相关的头文件以及驱动文件。将他们全部集成进OpenCV是一件繁琐而枯燥的事情，因此，我们退而求其次，将这些基础驱动的添加和维护任务直接丢给用户来处理。

本着谁用，谁负责的原则。用户需要自己将这些PXP相关的驱动文件以及相关的头文件导入到示例工程中。而在OpenCV中，只需要添加相应的外部符号即可。这样，链接器会帮助OpenCV找到心中的她。

下面的事情就变得简单了，我们只需要在OpenCV中添加一些调用PXP的外围代码即可。

首先是resize函数，在resize.cpp中：

voidcv::resize(InputArray_src,OutputArray_dst,Sizedsize
          double inv_scale_x, double inv_scale_y, int interpolation )

通过查看代码，发现其中有一个基于OCL的优化代码：

CV_OCL_RUN(_src.dims()<=2_dst.isUMat()_src.cols()>10_src.rows()>10
            ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation))

这样一来，问题就更简单了，让我们直接发挥学习精神，写个我们自己的：

CV_PXP_RUN(_src.dims()<=2_dst.isMat()(interpolation==INTER_LINEAR),
            resize_pxp(_src,_dst,dsize,inv_scale_x,inv_scale_y)

这里有一点要注意，PXP只能支持INTER_LINEAR类型的resize。因此需要我们对传入的resize方式进行查看。如果是其他类型，就需要调用OpenCV自带的resize代码了。

接下来将上述宏定义实现在private.hpp中：

#ifdef HAVE_PXP
int resize_pxp(cv::InputArray _src, cv::OutputArray _dst, cv::Size dsize, float fx=0, float fy=0, int rotateCode=-1, int flipCode=-2);
#define CV_PXP_RUN_(condition, func, ...) \ 
  try \
  { \ 
    if((condition)func) \
    { \
      return__VA_ARGS__; \
    } \
  } \
  catch(constcv::Exceptione) \
  { \
    CV_UNUSED(e);  /*TODO:Addsomelogginghere*/ \
  }
#else
#define CV_PXP_RUN_(condition,func,...)
#endif
#define CV_PXP_RUN(condition,func)  CV_PXP_RUN_(condition,func)

以上代码的意图就显而易见了，如果使能了WITH_PXP功能，那么HAVE_PXP的宏就会被定义，这样一来就会调用外部resize_pxp函数进行图像的resize操作。

现在，OpenCV端的代码编写工作就完成了，接下来轮到重头戏了：编写用户端的PXP相关代码，包括PXP初始化，resize_pxp等，姑且叫它pxp_nxp.cpp :

1. 既然是C++，就要凸显C++的样子，定义PXP类：

#include “opencv2/opencv.hpp”
using namespace cv;
/**************************************************************
   construct the pxp class
**************************************************************/
class pxp_handler{
   public:
     pxp_handler();
     int resize(cv::InputArray _src, cv::OutputArray _dst, cv::Size dsize, float fx=0, float fy=0, int rotate_code=-1, int flip_code=-2);
};

2. 函数实现：

pxp_handler::pxp_handler(){
   pxp_init();
}
static inline void* get_pxp_handler(){
    static pxp_handler  s_pxp_handler;
    return (void*)(s_pxp_handler);
}
int resize_pxp(cv::InputArray _src, cv::OutputArray _dst, cv::Size dsize, float fx=0, float fy=0, int rotate_code=-1, int flip_code=-2){
   pxp_handler* handler = (pxp_handler*)get_pxp_handler();
   return handler->resize(_src, _dst, dsize, fx, fy, rotate_code, flip_code);
}

这里，我们定义了一个静态类，一旦被使用，其构造函数就会被直接调用，完成PXP的初始化函数pxp_init()调用。

3.不过在编写resize函数之前，还有一件事儿要考虑。PXP本身有一个限制，只能支持ARGB32，RGB565或是YUV数据，就是没有RGB24。。。：

/*! @brief PXP process surface buffer pixel format. */
typedef enum _pxp_ps_pixel_format
{
    kPXP_PsPixelFormatRGB888    = 0x4,  /*!< 32-bit pixels without alpha (unpacked 24-bit format) */
    kPXP_PsPixelFormatRGB555    = 0xC,  /*!< 16-bit pixels without alpha. */
    kPXP_PsPixelFormatRGB444    = 0xD,  /*!< 16-bit pixels without alpha. */
    kPXP_PsPixelFormatRGB565    = 0xE,  /*!< 16-bit pixels without alpha. */
    kPXP_PsPixelFormatYUV1P444  = 0x10, /*!< 32-bit pixels (1-plane XYUV unpacked). */
    kPXP_PsPixelFormatUYVY1P422 = 0x12, /*!< 16-bit pixels (1-plane U0,Y0,V0,Y1 interleaved bytes) */
    kPXP_PsPixelFormatVYUY1P422 = 0x13, /*!< 16-bit pixels (1-plane V0,Y0,U0,Y1 interleaved bytes) */
    kPXP_PsPixelFormatY8        = 0x14, /*!< 8-bit monochrome pixels (1-plane Y luma output) */
    kPXP_PsPixelFormatY4        = 0x15, /*!< 4-bit monochrome pixels (1-plane Y luma, 4 bit truncation) */
    kPXP_PsPixelFormatYUV2P422  = 0x18, /*!< 16-bit pixels (2-plane UV interleaved bytes) */
    kPXP_PsPixelFormatYUV2P420  = 0x19, /*!< 16-bit pixels (2-plane UV) */
    kPXP_PsPixelFormatYVU2P422  = 0x1A, /*!< 16-bit pixels (2-plane VU interleaved bytes) */
    kPXP_PsPixelFormatYVU2P420  = 0x1B, /*!< 16-bit pixels (2-plane VU) */
    kPXP_PsPixelFormatYVU422    = 0x1E, /*!< 16-bit pixels (3-plane) */
    kPXP_PsPixelFormatYVU420    = 0x1F, /*!< 16-bit pixels (3-plane) */
} pxp_ps_pixel_format_t;

是不是很气人，，，，因为OpenCV中大量使用RGB24的像素数据。如果想要使用PXP进行加速优化，就要实现一个高效的RGB24转RGB565的转化函数。否则，这个转换部分可能会成为性能瓶颈，让PXP辛辛苦苦做的优化工作荡然无存。

经过多次尝试，我们最终得到了如下代码：

#define zip_v(v, bits, shift_l) ((v >> (8 - bits)) << shift_l)
#define RGB2RGB565(r, g, b) \
(zip_v(r, 5, 11) | zip_v(g, 6, 5) | zip_v(b, 5, 0))
typedef struct {
   union {
      rgb_clip_t rgb_clip[4];
        uint8_t rgb[12];
        uint32_t rgbx4[3];
   } rgb_rgb565;
} color_t;

int RGB888toRGB565_struct(uint32_t *prgb888, uint32_t *prgb565, uint32_t pixCnt) {
   color_t color;
   uint32_t rgb565x2[2];
   while (pixCnt >= 4) {
      memcpy(color.rgb_rgb565.rgbx4, prgb888, 12);
      rgb888+=3;
      rgb565x2[0] = RGB2RGB565(color.rgb_rgb565.rgb[2], color.rgb_rgb565.rgb[1], 
                       color.rgb_rgb565.rgb[0]) | 
                       RGB2RGB565(color.rgb_rgb565.rgb[5], color.rgb_rgb565.rgb[4], 
                       color.rgb_rgb565.rgb[3]) << 16 ;
      rgb565x2[1] = RGB2RGB565(color.rgb_rgb565.rgb[8], color.rgb_rgb565.rgb[7], 
                       color.rgb_rgb565.rgb[6]) |
                       RGB2RGB565(color.rgb_rgb565.rgb[11], color.rgb_rgb565.rgb[10], 
                       color.rgb_rgb565.rgb[9]) << 16 ;
      memcpy(prgb565,rgb565x2,8);
      prgb565 += 2;
      pixCnt -= 4;
   }
   return 0;
}

void bgr2rgb565(cv::InputArray _src, cv::OutputArray _dst, uint32_t image_len){
   Mat src = _src.getMat(); 
   _dst.create(src.size(), CV_16U); 
   Mat dst = _dst.getMat(); 
   uint16_t *dst_rgb16 = (uint16_t*)dst.data;
   uint8_t  *src_rgb8 = src.data;
   RGB888toRGB565_struct((uint32_t*)src_rgb8, (uint32_t *)dst_rgb16, image_len); 
}

最终的pxp_handler::resize函数如下：

int pxp_handler::resize(cv::InputArray _src, cv::OutputArray _dst, cv::Size dsize, float fx, float fy, int rotate_code, int flip_code){
   Mat src = _src.getMat(); 
   Mat dst = _dst.getMat();
   if(src.data == dst.data || dst.data == nullptr){
   // only 90/270 need create new one
   if((rotate_code == ROTATE_90_CLOCKWISE) || (rotate_code == ROTATE_90_COUNTERCLOCKWISE))
      _dst.create(Size(dsize.height,dsize.width),src.type());
   else
      _dst.create(Size(dsize.width, dsize.height), src.type()); 
      dst=_dst.getMat();
   }
   uint32_t src_w = src.cols, src_h = src.rows, src_c = src.channels(), src_ptr = (uint32_t)src.data; 
   uint32_t dst_w = dst.cols, dst_h = dst.rows, dst_c = dst.channels(), dst_ptr = (uint32_t)dst.data; 
   Mat tmp(src);
   if(src_c != 2){ 
      bgr2rgb565(src, tmp, src_w * src_h);
      src_ptr = (uint32_t)tmp.data;
   }
   PXP_CFG(dsize.width, dsize.height);
   PXP_SetProcessSurfaceScaler(PXP, src_w, src_h, dsize.width, dsize.height); 
   WAIT_PXP_DONE();
   return 1;
}