Advertisement

局部敏感哈希(原始LSH)C++实现

阅读量:
复制代码
    之前项目中用到LSH算法来做特征检索,对LSH算法很好奇,最近看了LSH的论文,依照自己的理解,初步写了LSH代码。测试效果不是特别理想,参数的选择也基本靠尝试,姑且先把代码放上来,之后再改进吧(2016.01.24)
    
    
      
    
    AI写代码

代码块

复制代码
    #include <vector>
    #include <opencv2/core/core.hpp>
    #include <opencv2/imgproc/imgproc.hpp>
    #include <opencv2/highgui/highgui.hpp>
    using namespace std;
    using namespace cv;
    /* 算法思路
 确定参数 k(bitCount)、以及哈希表个数l; (这里设置固定值吧 不知道怎么求最优质)
 生成一级哈希函数;
 数据一级哈希到bucket
 压缩bucket.用二级哈希函数把一级的bucket哈希到二级哈希表中(需要确定二级哈希表的容量M);
      */
    const int k = 700; //或者用展开成01串的特征的长度来计算k 
    const int l = 10;
    const int MaxValue = 255;//特征为uchar类型
    const int M = 100; //二级哈希表的容量
    
    
    struct Bucket
    {
    vector<Mat> features;//保存hash到这个桶的图像的特征
    vector<int> names;//保存图像的名称
    int bucketSize;
    };
    struct Table 
    {
    vector<Bucket> bucket;
    int TableSize;
    };
    class myLSH
    {
    public:
    myLSH(){};
    ~myLSH(){};
    public:
    int Init( int m, int l, float ratio,int featureDims );
    int train(Mat featurtes);
    int Search(Mat feature,int& name, int& dist);//输入查询特征 返回特征名 和距离
    private:
    //int creatHashFamily(); 
    int GetHashFun_level1();//生成一级哈希函数
    int Hash_level1( Mat feature,int hashFunID,vector<unsigned char>& value );//输入一级哈希函数、原始特征,得到原始特征的映射value;这里用的算法避免了把数字展开成01串
    int Hash_level2( vector<unsigned char> value, int& bucketID );//输入一级哈希映射后的value,计算二级哈希映射后的value(也就是二级哈希表中的桶的id)
    int calcDist( Mat feature0, Mat feature1,int& dist);
    
    
    private:
    int m_k;//随机选取的位数
    int m_d;//转换成01串后的特征维数;当然得注意,实际并没有显式的转成01字符串,m_d只是用到而已;
    int m_l;//一级哈希表的个数
    int m_M;//二级哈希表的容量
    int m_FeatureDims;//图像特征维数
    vector<vector<int>> m_hashFun_level1;//现在没用到
    vector<vector<vector<int>>> m_hashFun_level1Subset; 
    vector<int> m_hashFun_level2;
    Table m_table;
    //vector<vector<int>> m_hashFamily;
    
    
    };
    
    #include "myLSH.h"
    #include <iostream>
    
    
    int myLSH::Init( int m, int l, float ratio,int featureDims)
    {
    int i;
    m_M = m;
    m_l = l;
    m_k = (int)MaxValue*featureDims*ratio;
    m_d = (int)MaxValue*featureDims;
    m_FeatureDims = featureDims;
    m_table.bucket.resize(m_M);//初始化table
    m_table.TableSize = m_M;
    m_hashFun_level1Subset.clear();
    m_hashFun_level2.clear();
    m_hashFun_level1Subset.resize(l);
    for ( i = 0; i < l; i++ )
    {
    m_hashFun_level1Subset[i].resize(featureDims);
    }
    for ( i = 0; i <m_k; i++ )
    {
    m_hashFun_level2.push_back(rand()%m_M); //初始化二级哈希函数
    }
    return 1;
    }
    
    
    int myLSH::train(Mat featurtes)
    {
    int nums,i,j,ret,bucketID;
    vector<unsigned char> value;
    Mat feature;
    
    //生成一级hash函数
    ret = GetHashFun_level1();
    if ( 0 == ret )
    {
    return 0;
    }
    
    nums = featurtes.rows;
    for ( i = 0; i < nums; i++ )//every img
    {
    featurtes.row(i).copyTo(feature);
    for ( j = 0; j < m_l; j++ )//every table
    {
    ret = Hash_level1(feature,j,value);
    if ( 0 == ret )
    {
    return 0;
    }
    //value.clear();
    
    //二级hash 
    ret = Hash_level2(value,bucketID);
    if ( 0 == ret )
    {
    return 0;
    }
    
    
    //把特征以及特征对应名称存到桶中
    m_table.bucket[bucketID].features.push_back(feature.clone());
    m_table.bucket[bucketID].names.push_back(i);
    m_table.bucket[bucketID].bucketSize = int(m_table.bucket[bucketID].names.size());
    }
    }
    return 1;
    }
    //生成一级hash函数
    int myLSH::GetHashFun_level1()
    {
    
    //vector<int> temp;
    int id;
    int i, j, pos;
    
    
    //修改
    for ( i = 0; i < m_l; i++ )//every table
    {
    for ( j = 0; j < m_k; j++ )//every bit
    {
      id = rand()%(m_d);//随机选取的位置
      pos = (int)id/(MaxValue);
      {
      int c = m_hashFun_level1Subset[i].size();
      m_hashFun_level1Subset[i][pos].push_back(id);
      }
      //temp.push_back(id);
    }
    //m_hashFun_level1.push_back(temp);
    //temp.clear();
    }
    return 1;
    }
    int myLSH::Hash_level1( Mat feature,int hashFunID,vector<unsigned char>& value )
    {
    int i,j,val0,val1,one_nums,zero_nums;
    vector<vector<int>> tablePos; //第hashFunID个table对应的...
    vector<int> temp;
    
    
    value.clear();
    one_nums = 0;
    zero_nums = 0;
    tablePos = m_hashFun_level1Subset[hashFunID];//size 应该等于m_FeatureDims
    for ( i = 0; i < (int)tablePos.size(); i++ )
    {
    val0 = feature.at<uchar>(0,i);
    for ( j = 0; j < (int)tablePos[i].size(); j++ )
    {
    val1 = tablePos[i][j] - m_d*i;//对应原始第i维特征要提取的pos
    if ( val1<=val0 )
    {
    one_nums++;
    }
    else
    {
    zero_nums++;
    }
    
    
    while ( one_nums > 0 )
    {
    value.push_back(1);
    one_nums--;
    }
    while ( zero_nums > 0 )
    {
    value.push_back(0);
    zero_nums--;
    }
    }
    }
    return 1;
    }
    
    
    int myLSH::Hash_level2( vector<unsigned char> value, int& bucketID )
    {
    int i,val;
    val = 0;
    for ( i = 0; i <(int) value.size(); i++ )
    {
    val += value[i]*m_hashFun_level2[i];
    }
    bucketID = val % m_M;
    return 1;
    }
    
    
    int myLSH::Search(Mat feature,int& name, int& dist)
    {
    if (feature.empty())
    {
    cout<<"error: myLSH::Search"<<endl;
    return 0;
    }
    dist = -1;
    name = -1;
    
    
    int ret,i,j,bucketID;
    vector<unsigned char> value;
    vector<int> buckets;
    vector<Mat> tempFeature;
    vector<int> tempName;
    float minDist = 1000000;
    
    
    for ( i = 0; i < m_l; i++ )
    {
    //一级hash
    ret = Hash_level1(feature,i,value);
    if ( 0 == ret )
    {
    return 0;
    }
    
    
    //二级hash 
    ret = Hash_level2(value,bucketID);
    if ( 0 == ret )
    {
    return 0;
    }
    buckets.push_back(bucketID);
    }
    
    
    //在buckets内的所有特征中寻找距离最小的点  bruteForce
    cout<<"候选桶的数量为:"<<(int)buckets.size()<<endl;
    for ( i = 0; i <buckets.size(); i++ )
    {
    tempFeature = m_table.bucket[buckets[i]].features;//提取该桶包含的所有特征一级对应的name
    tempName = m_table.bucket[buckets[i]].names;
    cout<<"该候选桶内含点的数量为:"<<(int)tempFeature.size()<<endl;
    for ( j = 0; j < tempFeature.size();j++ )
    {
    ret = calcDist(feature,tempFeature[j],dist) ;//计算距离的函数
    if ( 0 == ret )
    {
    return 0;
    }
            if ( dist < minDist )
            {
    minDist = dist;
    name = tempName[j];
            }
    }
    tempFeature.clear();
    tempName.clear();
    }
    dist = minDist;
    return 1;
    }
    
    
    int myLSH::calcDist(Mat feature0, Mat feature1,int&dist)
    {
    if ( feature0.cols != feature1.cols )
    {
    cout<<"error: myLSH::calcDist"<<endl;
    return 0;
    }
    dist = 0;
    int i;
    int dims = feature0.cols;
    
    
    for ( i = 0; i <dims; i++ )
    {
    dist += abs(feature0.at<uchar>(0,i) - feature1.at<uchar>(0,i));
    }
    return 1;
    }
    
    #include "myLSH.h"
    #include <iostream>
    int main()
    {
    myLSH lsh;
    int ret;
    int name,dist;
    Mat data(1000,40,CV_8U);
    cv::randu(data,0,256);
    //cout<<data<<endl;
    cout<<"初始化"<<endl;
    lsh.Init(1000,10,0.05,20);
    cout<<"开始训练..."<<endl;
    ret = lsh.train(data);
    cout<<"训练结束...."<<endl;
    
    
    int count = 0;
    for ( int i = 0; i < (int)data.rows; i++ )
    {
    Mat tempFeature = data.row(i).clone();
    int j = 0;
    while(tempFeature.at<uchar>(0,j)<10)
    {
    j++;
    }
    tempFeature.at<uchar>(0,j) -= 10;
    ret = lsh.Search(tempFeature,name,dist);
    if ( name == i )
    {
    count++;
    }
    }
    cout<<"accuracy:"<<count/(float)(data.rows)<<endl;
    return 1;
    }
    
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
    AI写代码

全部评论 (0)

还没有任何评论哟~