局部敏感哈希(原始LSH)C++实现
发布时间
阅读量:
阅读量
之前项目中用到LSH算法来做特征检索,对LSH算法很好奇,最近看了LSH的论文,依照自己的理解,初步写了LSH代码。测试效果不是特别理想,参数的选择也基本靠尝试,姑且先把代码放上来,之后再改进吧(2016.01.24)
AI写代码
代码块
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv2/highgui/highgui.hpp>
using namespace std;
using namespace cv;
/* 算法思路
确定参数 k(bitCount)、以及哈希表个数l; (这里设置固定值吧 不知道怎么求最优质)
生成一级哈希函数;
数据一级哈希到bucket
压缩bucket.用二级哈希函数把一级的bucket哈希到二级哈希表中(需要确定二级哈希表的容量M);
*/
const int k = 700; //或者用展开成01串的特征的长度来计算k
const int l = 10;
const int MaxValue = 255;//特征为uchar类型
const int M = 100; //二级哈希表的容量
struct Bucket
{
vector<Mat> features;//保存hash到这个桶的图像的特征
vector<int> names;//保存图像的名称
int bucketSize;
};
struct Table
{
vector<Bucket> bucket;
int TableSize;
};
class myLSH
{
public:
myLSH(){};
~myLSH(){};
public:
int Init( int m, int l, float ratio,int featureDims );
int train(Mat featurtes);
int Search(Mat feature,int& name, int& dist);//输入查询特征 返回特征名 和距离
private:
//int creatHashFamily();
int GetHashFun_level1();//生成一级哈希函数
int Hash_level1( Mat feature,int hashFunID,vector<unsigned char>& value );//输入一级哈希函数、原始特征,得到原始特征的映射value;这里用的算法避免了把数字展开成01串
int Hash_level2( vector<unsigned char> value, int& bucketID );//输入一级哈希映射后的value,计算二级哈希映射后的value(也就是二级哈希表中的桶的id)
int calcDist( Mat feature0, Mat feature1,int& dist);
private:
int m_k;//随机选取的位数
int m_d;//转换成01串后的特征维数;当然得注意,实际并没有显式的转成01字符串,m_d只是用到而已;
int m_l;//一级哈希表的个数
int m_M;//二级哈希表的容量
int m_FeatureDims;//图像特征维数
vector<vector<int>> m_hashFun_level1;//现在没用到
vector<vector<vector<int>>> m_hashFun_level1Subset;
vector<int> m_hashFun_level2;
Table m_table;
//vector<vector<int>> m_hashFamily;
};
#include "myLSH.h"
#include <iostream>
int myLSH::Init( int m, int l, float ratio,int featureDims)
{
int i;
m_M = m;
m_l = l;
m_k = (int)MaxValue*featureDims*ratio;
m_d = (int)MaxValue*featureDims;
m_FeatureDims = featureDims;
m_table.bucket.resize(m_M);//初始化table
m_table.TableSize = m_M;
m_hashFun_level1Subset.clear();
m_hashFun_level2.clear();
m_hashFun_level1Subset.resize(l);
for ( i = 0; i < l; i++ )
{
m_hashFun_level1Subset[i].resize(featureDims);
}
for ( i = 0; i <m_k; i++ )
{
m_hashFun_level2.push_back(rand()%m_M); //初始化二级哈希函数
}
return 1;
}
int myLSH::train(Mat featurtes)
{
int nums,i,j,ret,bucketID;
vector<unsigned char> value;
Mat feature;
//生成一级hash函数
ret = GetHashFun_level1();
if ( 0 == ret )
{
return 0;
}
nums = featurtes.rows;
for ( i = 0; i < nums; i++ )//every img
{
featurtes.row(i).copyTo(feature);
for ( j = 0; j < m_l; j++ )//every table
{
ret = Hash_level1(feature,j,value);
if ( 0 == ret )
{
return 0;
}
//value.clear();
//二级hash
ret = Hash_level2(value,bucketID);
if ( 0 == ret )
{
return 0;
}
//把特征以及特征对应名称存到桶中
m_table.bucket[bucketID].features.push_back(feature.clone());
m_table.bucket[bucketID].names.push_back(i);
m_table.bucket[bucketID].bucketSize = int(m_table.bucket[bucketID].names.size());
}
}
return 1;
}
//生成一级hash函数
int myLSH::GetHashFun_level1()
{
//vector<int> temp;
int id;
int i, j, pos;
//修改
for ( i = 0; i < m_l; i++ )//every table
{
for ( j = 0; j < m_k; j++ )//every bit
{
id = rand()%(m_d);//随机选取的位置
pos = (int)id/(MaxValue);
{
int c = m_hashFun_level1Subset[i].size();
m_hashFun_level1Subset[i][pos].push_back(id);
}
//temp.push_back(id);
}
//m_hashFun_level1.push_back(temp);
//temp.clear();
}
return 1;
}
int myLSH::Hash_level1( Mat feature,int hashFunID,vector<unsigned char>& value )
{
int i,j,val0,val1,one_nums,zero_nums;
vector<vector<int>> tablePos; //第hashFunID个table对应的...
vector<int> temp;
value.clear();
one_nums = 0;
zero_nums = 0;
tablePos = m_hashFun_level1Subset[hashFunID];//size 应该等于m_FeatureDims
for ( i = 0; i < (int)tablePos.size(); i++ )
{
val0 = feature.at<uchar>(0,i);
for ( j = 0; j < (int)tablePos[i].size(); j++ )
{
val1 = tablePos[i][j] - m_d*i;//对应原始第i维特征要提取的pos
if ( val1<=val0 )
{
one_nums++;
}
else
{
zero_nums++;
}
while ( one_nums > 0 )
{
value.push_back(1);
one_nums--;
}
while ( zero_nums > 0 )
{
value.push_back(0);
zero_nums--;
}
}
}
return 1;
}
int myLSH::Hash_level2( vector<unsigned char> value, int& bucketID )
{
int i,val;
val = 0;
for ( i = 0; i <(int) value.size(); i++ )
{
val += value[i]*m_hashFun_level2[i];
}
bucketID = val % m_M;
return 1;
}
int myLSH::Search(Mat feature,int& name, int& dist)
{
if (feature.empty())
{
cout<<"error: myLSH::Search"<<endl;
return 0;
}
dist = -1;
name = -1;
int ret,i,j,bucketID;
vector<unsigned char> value;
vector<int> buckets;
vector<Mat> tempFeature;
vector<int> tempName;
float minDist = 1000000;
for ( i = 0; i < m_l; i++ )
{
//一级hash
ret = Hash_level1(feature,i,value);
if ( 0 == ret )
{
return 0;
}
//二级hash
ret = Hash_level2(value,bucketID);
if ( 0 == ret )
{
return 0;
}
buckets.push_back(bucketID);
}
//在buckets内的所有特征中寻找距离最小的点 bruteForce
cout<<"候选桶的数量为:"<<(int)buckets.size()<<endl;
for ( i = 0; i <buckets.size(); i++ )
{
tempFeature = m_table.bucket[buckets[i]].features;//提取该桶包含的所有特征一级对应的name
tempName = m_table.bucket[buckets[i]].names;
cout<<"该候选桶内含点的数量为:"<<(int)tempFeature.size()<<endl;
for ( j = 0; j < tempFeature.size();j++ )
{
ret = calcDist(feature,tempFeature[j],dist) ;//计算距离的函数
if ( 0 == ret )
{
return 0;
}
if ( dist < minDist )
{
minDist = dist;
name = tempName[j];
}
}
tempFeature.clear();
tempName.clear();
}
dist = minDist;
return 1;
}
int myLSH::calcDist(Mat feature0, Mat feature1,int&dist)
{
if ( feature0.cols != feature1.cols )
{
cout<<"error: myLSH::calcDist"<<endl;
return 0;
}
dist = 0;
int i;
int dims = feature0.cols;
for ( i = 0; i <dims; i++ )
{
dist += abs(feature0.at<uchar>(0,i) - feature1.at<uchar>(0,i));
}
return 1;
}
#include "myLSH.h"
#include <iostream>
int main()
{
myLSH lsh;
int ret;
int name,dist;
Mat data(1000,40,CV_8U);
cv::randu(data,0,256);
//cout<<data<<endl;
cout<<"初始化"<<endl;
lsh.Init(1000,10,0.05,20);
cout<<"开始训练..."<<endl;
ret = lsh.train(data);
cout<<"训练结束...."<<endl;
int count = 0;
for ( int i = 0; i < (int)data.rows; i++ )
{
Mat tempFeature = data.row(i).clone();
int j = 0;
while(tempFeature.at<uchar>(0,j)<10)
{
j++;
}
tempFeature.at<uchar>(0,j) -= 10;
ret = lsh.Search(tempFeature,name,dist);
if ( name == i )
{
count++;
}
}
cout<<"accuracy:"<<count/(float)(data.rows)<<endl;
return 1;
}
AI写代码
全部评论 (0)
还没有任何评论哟~
