这是我能想到的最好的算法。
def get_primes(n):
numbers = set(range(n, 1, -1))
primes = []
while numbers:
p = numbers.pop()
primes.append(p)
numbers.difference_update(set(range(p*2, n+1, p)))
return primes
>>> timeit.Timer(stmt='get_primes.get_primes(1000000)', setup='import get_primes').timeit(1)
1.1499958793645562
还能做得更快吗?
这段代码有一个缺陷:由于numbers是一个无序集,不能保证numbers.pop()将从集合中移除最低的数字。尽管如此,它还是适用于(至少对我来说)一些输入数字:
>>> sum(get_primes(2000000))
142913828922L
#That's the correct sum of all numbers below 2 million
>>> 529 in get_primes(1000)
False
>>> 529 in get_primes(530)
True
对于最快的代码,numpy解决方案是最好的。不过,出于纯粹的学术原因,我发布了我的纯python版本,它比上面发布的食谱版本快不到50%。由于我将整个列表放在内存中,所以需要足够的空间来容纳所有内容,但它的可伸缩性似乎相当好。
def daniel_sieve_2(maxNumber):
"""
Given a number, returns all numbers less than or equal to
that number which are prime.
"""
allNumbers = range(3, maxNumber+1, 2)
for mIndex, number in enumerate(xrange(3, maxNumber+1, 2)):
if allNumbers[mIndex] == 0:
continue
# now set all multiples to 0
for index in xrange(mIndex+number, (maxNumber-3)/2+1, number):
allNumbers[index] = 0
return [2] + filter(lambda n: n!=0, allNumbers)
结果是:
>>>mine = timeit.Timer("daniel_sieve_2(1000000)",
... "from sieves import daniel_sieve_2")
>>>prev = timeit.Timer("get_primes_erat(1000000)",
... "from sieves import get_primes_erat")
>>>print "Mine: {0:0.4f} ms".format(min(mine.repeat(3, 1))*1000)
Mine: 428.9446 ms
>>>print "Previous Best {0:0.4f} ms".format(min(prev.repeat(3, 1))*1000)
Previous Best 621.3581 ms
这里有一个来自Python Cookbook的非常简洁的示例——该URL的最快版本是:
import itertools
def erat2( ):
D = { }
yield 2
for q in itertools.islice(itertools.count(3), 0, None, 2):
p = D.pop(q, None)
if p is None:
D[q*q] = q
yield q
else:
x = p + q
while x in D or not (x&1):
x += p
D[x] = p
这就给出了
def get_primes_erat(n):
return list(itertools.takewhile(lambda p: p<n, erat2()))
在shell提示符(正如我喜欢做的那样)中测量这段代码在pri.py中,我观察到:
$ python2.5 -mtimeit -s'import pri' 'pri.get_primes(1000000)'
10 loops, best of 3: 1.69 sec per loop
$ python2.5 -mtimeit -s'import pri' 'pri.get_primes_erat(1000000)'
10 loops, best of 3: 673 msec per loop
所以看起来食谱解决方案的速度是原来的两倍多。
在Pure Python中最快的质数筛分:
from itertools import compress
def half_sieve(n):
"""
Returns a list of prime numbers less than `n`.
"""
if n <= 2:
return []
sieve = bytearray([True]) * (n // 2)
for i in range(3, int(n ** 0.5) + 1, 2):
if sieve[i // 2]:
sieve[i * i // 2::i] = bytearray((n - i * i - 1) // (2 * i) + 1)
primes = list(compress(range(1, n, 2), sieve))
primes[0] = 2
return primes
我优化了埃拉托色尼筛子的速度和内存。
基准
from time import clock
import platform
def benchmark(iterations, limit):
start = clock()
for x in range(iterations):
half_sieve(limit)
end = clock() - start
print(f'{end/iterations:.4f} seconds for primes < {limit}')
if __name__ == '__main__':
print(platform.python_version())
print(platform.platform())
print(platform.processor())
it = 10
for pw in range(4, 9):
benchmark(it, 10**pw)
输出
>>> 3.6.7
>>> Windows-10-10.0.17763-SP0
>>> Intel64 Family 6 Model 78 Stepping 3, GenuineIntel
>>> 0.0003 seconds for primes < 10000
>>> 0.0021 seconds for primes < 100000
>>> 0.0204 seconds for primes < 1000000
>>> 0.2389 seconds for primes < 10000000
>>> 2.6702 seconds for primes < 100000000
这里是最快的函数之一的两个更新版本(纯Python 3.6),
from itertools import compress
def rwh_primes1v1(n):
""" Returns a list of primes < n for n > 2 """
sieve = bytearray([True]) * (n//2)
for i in range(3,int(n**0.5)+1,2):
if sieve[i//2]:
sieve[i*i//2::i] = bytearray((n-i*i-1)//(2*i)+1)
return [2,*compress(range(3,n,2), sieve[1:])]
def rwh_primes1v2(n):
""" Returns a list of primes < n for n > 2 """
sieve = bytearray([True]) * (n//2+1)
for i in range(1,int(n**0.5)//2+1):
if sieve[i]:
sieve[2*i*(i+1)::2*i+1] = bytearray((n//2-2*i*(i+1))//(2*i+1)+1)
return [2,*compress(range(3,n,2), sieve[1:])]
这是使用存储列表查找质数的一种优雅而简单的解决方案。从4个变量开始,你只需要测试除数的奇数质数,你只需要测试你要测试的质数的一半(测试9,11,13是否能整除17没有意义)。它将先前存储的质数作为除数进行测试。
# Program to calculate Primes
primes = [1,3,5,7]
for n in range(9,100000,2):
for x in range(1,(len(primes)/2)):
if n % primes[x] == 0:
break
else:
primes.append(n)
print primes