redis去重字符串的示例:
基于Redis实现Bloomfilter去重,代码如下:
# encoding=utf-8
importredis
fromhashlibimportmd5
classSimpleHash(object):
def__init__(self,cap,seed):
self.cap=cap
self.seed=seed
defhash(self,value):
ret=0
foriinrange(len(value)):
ret+=self.seed*ret+ord(value[i])
return(self.cap-1)&ret
classBloomFilter(object):
def__init__(self,host='localhost',port=6379,db=0,blockNum=1,key='bloomfilter'):
"""
:param host: the host of Redis
:param port: the port of Redis
:param db: witch db in Redis
:param blockNum: one blockNum for about 90,000,000; if you have more strings for filtering, increase it.
:param key: the key's name in Redis
"""
self.server=redis.Redis(host=host,port=port,db=db)
self.bit_size=1<<31# Redis的String类型最大容量为512M,现使用256M
self.seeds=[5,7,11,13,31,37,61]
self.key=key
self.blockNum=blockNum
self.hashfunc=[]
forseedinself.seeds:
self.hashfunc.append(SimpleHash(self.bit_size,seed))
defisContains(self,str_input):
ifnotstr_input:
returnFalse
m5=md5()
m5.update(str_input)
str_input=m5.hexdigest()
ret=True
name=self.key+str(int(str_input[0:2],16)%self.blockNum)
forfinself.hashfunc:
loc=f.hash(str_input)
ret=ret&self.server.getbit(name,loc)
returnret
definsert(self,str_input):
m5=md5()
m5.update(str_input)
str_input=m5.hexdigest()
name=self.key+str(int(str_input[0:2],16)%self.blockNum)
forfinself.hashfunc:
loc=f.hash(str_input)
self.server.setbit(name,loc,1)
if__name__=='__main__':
""" 第一次运行时会显示 not exists!,之后再运行会显示 exists! """
bf=BloomFilter()
ifbf.isContains('http://www.baidu.com'):# 判断字符串是否存在
print'exists!'
else:
print'not exists!'
bf.insert('http://www.baidu.com')