Advertisement

图像分割—mean shift(OpenCV源码注解)

阅读量:

关于meanshitf的介绍:

均值平移算法在图像分割中的应用(一)**: 1. 基本概念, 2. 具体实现流程

mean shift 图像分割 (二): 3 算法原理部分, 4 其延展内容

Mean Shift算法 图像分割 (三): 5 非参数密度估计

不得不说,这个OpenCV实现实在不咋地,

这次我改风格了,中英文杂合注释

main.cpp

复制代码
 #include "opencv2/opencv.hpp"

    
  
    
  
    
 #include <iostream>
    
  
    
 using namespace cv;
    
 using namespace std;
    
  
    
  
    
  
    
 static void help(char** argv)
    
 {
    
 	cout << "\nDemonstrate mean-shift based color segmentation in spatial pyramid.\n"
    
 		<< "Call:\n   " << argv[0] << " image\n"
    
 		<< "This program allows you to set the spatial and color radius\n"
    
 		<< "of the mean shift window as well as the number of pyramid reduction levels explored\n"
    
 		<< endl;
    
 }
    
  
    
 //This colors the segmentations
    
 static void floodFillPostprocess(Mat& img, const Scalar& colorDiff = Scalar::all(1))
    
 {
    
 	CV_Assert(!img.empty());
    
 	RNG rng = theRNG();
    
 	Mat mask(img.rows + 2, img.cols + 2, CV_8UC1, Scalar::all(0));
    
 	for (int y = 0; y < img.rows; y++)
    
 	{
    
 		for (int x = 0; x < img.cols; x++)
    
 		{
    
 			if (mask.at<uchar>(y + 1, x + 1) == 0)
    
 			{
    
 				Scalar newVal(rng(256), rng(256), rng(256));
    
 				floodFill(img, mask, Point(x, y), newVal, 0, colorDiff, colorDiff);
    
 			}
    
 		}
    
 	}
    
  
    
 }
    
  
    
 string winName = "meanshift";
    
 int spatialRad, colorRad, maxPyrLevel;
    
 Mat img, res;
    
  
    
 static void meanShiftSegmentation(int, void*)
    
 {
    
 	cout << "spatialRad=" << spatialRad << "; "
    
 		<< "colorRad=" << colorRad << "; "
    
 		<< "maxPyrLevel=" << maxPyrLevel << endl;
    
 	pyrMeanShiftFiltering(img, res, spatialRad, colorRad, maxPyrLevel);
    
 	floodFillPostprocess(res, Scalar::all(2));
    
 	imshow(winName, res);
    
 }
    
  
    
  
    
 int main(int argc, char** argv)
    
 {
    
  
    
 	//if (argc != 2)
    
 	//{
    
 	//	help(argv);
    
 	//	return -1;
    
 	//}
    
 	string fimg = "G:/Pic/fruits.jpg";//"G:/Pic/2012060619243397.png";
    
 	img = imread(fimg);
    
 	if (img.empty())
    
 		return -1;
    
 	//640-by-480  it works well to 	set spatialRadiusequal = 2 and colorRadiusequal = 40
    
 	// max_level, which describes how many levels of scale pyramid you want 
    
 	//used for segmentation.A max_levelof 2 or 3 works well for a 640 - by - 480 color image
    
 	spatialRad = 40;
    
 	colorRad = 22;
    
 	maxPyrLevel = 2;
    
  
    
 	namedWindow(winName, WINDOW_AUTOSIZE);
    
  
    
 	createTrackbar("spatialRad", winName, &spatialRad, 80, meanShiftSegmentation);
    
 	createTrackbar("colorRad", winName, &colorRad, 60, meanShiftSegmentation);
    
 	createTrackbar("maxPyrLevel", winName, &maxPyrLevel, 5, meanShiftSegmentation);
    
  
    
 	meanShiftSegmentation(0, 0);
    
 	waitKey();
    
 	return 0;
    
 }

segmentation.cpp

复制代码
 /*M///

    
 //
    
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
    
 //
    
 //  By downloading, copying, installing or using the software you agree to this license.
    
 //  If you do not agree to this license, do not download, install,
    
 //  copy or use the software.
    
 //
    
 //
    
 //                        Intel License Agreement
    
 //                For Open Source Computer Vision Library
    
 //
    
 // Copyright (C) 2000, Intel Corporation, all rights reserved.
    
 // Third party copyrights are property of their respective owners.
    
 //
    
 // Redistribution and use in source and binary forms, with or without modification,
    
 // are permitted provided that the following conditions are met:
    
 //
    
 //   * Redistribution's of source code must retain the above copyright notice,
    
 //     this list of conditions and the following disclaimer.
    
 //
    
 //   * Redistribution's in binary form must reproduce the above copyright notice,
    
 //     this list of conditions and the following disclaimer in the documentation
    
 //     and/or other materials provided with the distribution.
    
 //
    
 //   * The name of Intel Corporation may not be used to endorse or promote products
    
 //     derived from this software without specific prior written permission.
    
 //
    
 // This software is provided by the copyright holders and contributors "as is" and
    
 // any express or implied warranties, including, but not limited to, the implied
    
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
    
 // In no event shall the Intel Corporation or contributors be liable for any direct,
    
 // indirect, incidental, special, exemplary, or consequential damages
    
 // (including, but not limited to, procurement of substitute goods or services;
    
 // loss of use, data, or profits; or business interruption) however caused
    
 // and on any theory of liability, whether in contract, strict liability,
    
 // or tort (including negligence or otherwise) arising in any way out of
    
 // the use of this software, even if advised of the possibility of such damage.
    
 //
    
 //M*/
    
  
    
 #include "precomp.hpp"
    
  
    
  
    
  
    
 /****************************************************************************************\
    
 *                                         Meanshift                                      *
    
 \****************************************************************************************/
    
  
    
 CV_IMPL void
    
 cvPyrMeanShiftFiltering( const CvArr* srcarr, CvArr* dstarr,
    
                      double sp0, double sr, int max_level,
    
                      CvTermCriteria termcrit )
    
 {
    
     const int cn = 3;
    
     const int MAX_LEVELS = 8;
    
  
    
     if( (unsigned)max_level > (unsigned)MAX_LEVELS )
    
     CV_Error( CV_StsOutOfRange, "The number of pyramid levels is too large or negative" );
    
  
    
     std::vector<cv::Mat> src_pyramid(max_level+1);
    
     std::vector<cv::Mat> dst_pyramid(max_level+1);
    
     cv::Mat mask0;
    
     int i, j, level;
    
     //uchar* submask = 0;
    
  
    
     #define cdiff(ofs0) (tab[c0-dptr[ofs0]+255] + \
    
     tab[c1-dptr[(ofs0)+1]+255] + tab[c2-dptr[(ofs0)+2]+255] >= isr22) // color diffference  Note: it‘s >= not <
    
  
    
     double sr2 = sr * sr;// sr: ||x|| color window radius 
    
     int isr2 = cvRound(sr2), isr22 = MAX(isr2,16);
    
     int tab[768]; // 256*,lookup table for fast square distance  computation 
    
     cv::Mat src0 = cv::cvarrToMat(srcarr);
    
     cv::Mat dst0 = cv::cvarrToMat(dstarr);
    
  
    
     if( src0.type() != CV_8UC3 )
    
     CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 3-channel images are supported" );
    
  
    
     if( src0.type() != dst0.type() )
    
     CV_Error( CV_StsUnmatchedFormats, "The input and output images must have the same type" );
    
  
    
     if( src0.size() != dst0.size() )
    
     CV_Error( CV_StsUnmatchedSizes, "The input and output images must have the same size" );
    
  
    
     if( !(termcrit.type & CV_TERMCRIT_ITER) )
    
     termcrit.max_iter = 5; // if we don't have set the number of iterations ,it will iterate 5  times at most .   
    
     termcrit.max_iter = MAX(termcrit.max_iter,1);
    
     termcrit.max_iter = MIN(termcrit.max_iter,100); // max iteration is 100
    
     if( !(termcrit.type & CV_TERMCRIT_EPS) )
    
     termcrit.epsilon = 1.f; // the default epsilon 
    
     termcrit.epsilon = MAX(termcrit.epsilon, 0.f);
    
  
    
     for( i = 0; i < 768; i++ )
    
     tab[i] = (i - 255)*(i - 255); //tab[0]=255^2,tab[255]=0,tab[512]=255^2
    
  
    
     // 1. construct pyramid
    
     src_pyramid[0] = src0;
    
     dst_pyramid[0] = dst0;
    
     for( level = 1; level <= max_level; level++ )
    
     {
    
     src_pyramid[level].create( (src_pyramid[level-1].rows+1)/2,
    
                     (src_pyramid[level-1].cols+1)/2, src_pyramid[level-1].type() );// plus 1 to ensure both the clos and row are even 
    
     dst_pyramid[level].create( src_pyramid[level].rows,
    
                     src_pyramid[level].cols, src_pyramid[level].type() ); //
    
     cv::pyrDown( src_pyramid[level-1], src_pyramid[level], src_pyramid[level].size() );// first using Gaussian blure then  removing every even-numbered row and column
    
     //CV_CALL( cvResize( src_pyramid[level-1], src_pyramid[level], CV_INTER_AREA ));
    
     }
    
  
    
     mask0.create(src0.rows, src0.cols, CV_8UC1);  // memory buf for mask of every scale 
    
     //CV_CALL( submask = (uchar*)cvAlloc( (sp+2)*(sp+2) ));
    
  
    
     // 2. apply meanshift, starting from the pyramid top (i.e. the smallest layer)
    
     for( level = max_level; level >= 0; level-- )
    
     {
    
     cv::Mat src = src_pyramid[level]; // the current processing layer 
    
     cv::Size size = src.size();
    
     uchar* sptr = src.data; //  
    
     int sstep = (int)src.step;// all bytee in a row(including the padded pixels )
    
     uchar* mask = 0;
    
     int mstep = 0;
    
     uchar* dptr;
    
     int dstep;
    
     float sp = (float)(sp0 / (1 << level)); // spatial window radius,keep the contents which the kernel can cover are identical     
    
     sp = MAX( sp, 1 );
    
  
    
     if( level < max_level ) //except for the top level,先跳过,其实也可以忽略
    
     {
    
         cv::Size size1 = dst_pyramid[level+1].size(); // notice that layer level+1 has been  processed 
    
         cv::Mat m( size.height, size.width, CV_8UC1, mask0.data );  // Note that the memory to which .data point don't have the same size as the m. Howerver,the former will alway large or equal to the later. We just use the mask0 as an big enough container that only allocate one time. 
    
         dstep = (int)dst_pyramid[level+1].step;//
    
         dptr = dst_pyramid[level+1].data + dstep + cn; //jump the first row and first cloumn(including 3 channels) 
    
         mstep = (int)m.step;
    
         mask = m.data + mstep;//jump the first  row
    
         //cvResize( dst_pyramid[level+1], dst_pyramid[level], CV_INTER_CUBIC );
    
         cv::pyrUp( dst_pyramid[level+1], dst_pyramid[level], dst_pyramid[level].size() ); // 这一行有意义吗?完全可以去掉啊?????
    
 			// Note:the image is first upsized with new even rows and cols filled with 0s ,thereafter the missing values is approximated with the Gaussian convolution.
    
         m.setTo(cv::Scalar::all(0));
    
  
    
         for( i = 1; i < size1.height-1; i++, dptr += dstep - (size1.width-2)*3, mask += mstep*2 )
    
 			// dptr + dstep + width*3*2 : jump to the second point of the next row 
    
 			// mstep*2  : 2 row in mask is correspondence to 1 rows in dst_pyramid[level+1]; 
    
             for( j = 1; j < size1.width-1; j++, dptr += cn )//jump the first and the last column,Notice that before jump to the next row,the dptr have pointed to the last column
    
             {
    
                 int c0 = dptr[0], c1 = dptr[1], c2 = dptr[2];
    
                 mask[j*2 - 1] = cdiff(-3) || cdiff(3) || cdiff(-dstep-3) || cdiff(-dstep) ||
    
                     cdiff(-dstep+3) || cdiff(dstep-3) || cdiff(dstep) || cdiff(dstep+3);//if any of it's 8 neigbours doesn't similar with it in color sapce,it should be proceed  which labeled with 1
    
             }
    
         }
    
  
    
         cv::dilate( m, m, cv::Mat() );
    
         mask = m.data;
    
     }
    
  
    
     dptr = dst_pyramid[level].data;
    
     dstep = (int)dst_pyramid[level].step;
    
  
    
     for( i = 0; i < size.height; i++, sptr += sstep - size.width*3,// jumpping the padding bytes in the ends of each row of src 
    
                                       dptr += dstep - size.width*3,// jumpping the padding bytes in the ends of each row of src
    
                                       mask += mstep ) // the offset of mask
    
     {
    
         for( j = 0; j < size.width; j++, sptr += 3, dptr += 3 )
    
         {
    
             int x0 = j, y0 = i, x1, y1, iter;// x1,y1: the position of mode 
    
             int c0, c1, c2;
    
  
    
             if( mask && !mask[j] ) // 可以忽略,mask !=0:except for the top level, mask[j]==0: similar to all of it's 8 neighbors
    
                 continue;
    
  
    
             c0 = sptr[0], c1 = sptr[1], c2 = sptr[2]; // b,g,r or L,u,v of the central position 
    
  
    
             // iterate meanshift procedure,核心部分
    
             for( iter = 0; iter < termcrit.max_iter; iter++ )
    
             {
    
                 uchar* ptr;
    
                 int x, y, count = 0; // count : count the number of pixels whithin the color support in a square window  
    
                 int minx, miny, maxx, maxy;
    
                 int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;//
    
                 double icount;
    
                 int stop_flag;
    
  
    
                 //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
    
 					//a square window of (2*sp+1)*(2*sp+1)
    
                 minx = cvRound(x0 - sp); minx = MAX(minx, 0);//ensure minx doesn't less than the first column 
    
                 miny = cvRound(y0 - sp); miny = MAX(miny, 0);//ensure minx doesn't less than the first row
    
                 maxx = cvRound(x0 + sp); maxx = MIN(maxx, size.width-1);
    
                 maxy = cvRound(y0 + sp); maxy = MIN(maxy, size.height-1);
    
                 ptr = sptr + (miny - i)*sstep + (minx - j)*3;// move to (minx,miny)
    
  
    
                 for( y = miny; y <= maxy; y++, ptr += sstep - (maxx-minx+1)*3 ) 
    
                 {
    
                     int row_count = 0; // count the number of pixels whthin the color support in a row
    
                     x = minx;
    
                     #if CV_ENABLE_UNROLLED //展开,先跳过
    
                     for( ; x + 3 <= maxx; x += 4, ptr += 12 )//process 4 colums every cycle to reduce the cycle times. it will be  faster than loop every column
    
                     {
    
                         int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
    
                         if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
    
                         {
    
                             s0 += t0; s1 += t1; s2 += t2;
    
                             sx += x; row_count++;
    
                         }
    
                         t0 = ptr[3], t1 = ptr[4], t2 = ptr[5];
    
                         if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
    
                         {
    
                             s0 += t0; s1 += t1; s2 += t2;
    
                             sx += x+1; row_count++;
    
                         }
    
                         t0 = ptr[6], t1 = ptr[7], t2 = ptr[8];
    
                         if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
    
                         {
    
                             s0 += t0; s1 += t1; s2 += t2;
    
                             sx += x+2; row_count++;
    
                         }
    
                         t0 = ptr[9], t1 = ptr[10], t2 = ptr[11];
    
                         if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )
    
                         {
    
                             s0 += t0; s1 += t1; s2 += t2;
    
                             sx += x+3; row_count++;
    
                         }
    
                     }
    
                     #endif
    
                     for( ; x <= maxx; x++, ptr += 3 ) // if we have defined CV_ENABLE_UNROLLED then processing the remain (maxx+1)%4 cloumns otherwise processing all of columns
    
                     {
    
                         int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; //b,g,r
    
                         if( tab[t0-c0+255] + tab[t1-c1+255] + tab[t2-c2+255] <= isr2 )//truncate with isr2 to have finite support(color similarity ) 
    
                         {  // the value [-255,255] first map to [0 510],then map to the squre distance to central position (i,j)
    
                             s0 += t0; s1 += t1; s2 += t2;
    
                             sx += x; 
    
 								row_count++;
    
                         }
    
                     }
    
                     count += row_count;
    
                     sy += y*row_count;
    
                 }
    
  
    
                 if( count == 0 )
    
                     break;
    
  
    
                 icount = 1./count;
    
                 x1 = cvRound(sx*icount); // x mean 
    
                 y1 = cvRound(sy*icount); // Y mean 
    
                 s0 = cvRound(s0*icount); // b mean 
    
                 s1 = cvRound(s1*icount); // g mean 
    
                 s2 = cvRound(s2*icount); // r mean
    
  
    
                 stop_flag = (x0 == x1 && y0 == y1) || abs(x1-x0) + abs(y1-y0) + // converge to (i,j)
    
                     tab[s0 - c0 + 255] + tab[s1 - c1 + 255] +
    
                     tab[s2 - c2 + 255] <= termcrit.epsilon; //movement can be ignored
    
  
    
                 x0 = x1; y0 = y1;
    
                 c0 = s0; c1 = s1; c2 = s2;// Notice;the center color was replaced by the filtered value  
    
  
    
                 if( stop_flag )
    
                     break;
    
             }
    
 				
    
             dptr[0] = (uchar)c0; //assign the filtered value of converging point to the starting point
    
             dptr[1] = (uchar)c1;
    
             dptr[2] = (uchar)c2;
    
         }
    
     }
    
     }
    
 }
    
  
    
 void cv::pyrMeanShiftFiltering( InputArray _src, OutputArray _dst,
    
                             double sp, double sr, int maxLevel,
    
                             TermCriteria termcrit )
    
 {
    
     Mat src = _src.getMat();
    
  
    
     if( src.empty() )
    
     return;
    
  
    
     _dst.create( src.size(), src.type() );
    
     CvMat c_src = src, c_dst = _dst.getMat();
    
     cvPyrMeanShiftFiltering( &c_src, &c_dst, sp, sr, maxLevel, termcrit );
    
 }

全部评论 (0)

还没有任何评论哟~