这是我能想到的最好的算法。
def get_primes(n):
numbers = set(range(n, 1, -1))
primes = []
while numbers:
p = numbers.pop()
primes.append(p)
numbers.difference_update(set(range(p*2, n+1, p)))
return primes
>>> timeit.Timer(stmt='get_primes.get_primes(1000000)', setup='import get_primes').timeit(1)
1.1499958793645562
还能做得更快吗?
这段代码有一个缺陷:由于numbers是一个无序集,不能保证numbers.pop()将从集合中移除最低的数字。尽管如此,它还是适用于(至少对我来说)一些输入数字:
>>> sum(get_primes(2000000))
142913828922L
#That's the correct sum of all numbers below 2 million
>>> 529 in get_primes(1000)
False
>>> 529 in get_primes(530)
True
使用Numpy实现的半筛子略有不同:
http://rebrained.com/?p=458
import math
import numpy
def prime6(upto):
primes=numpy.arange(3,upto+1,2)
isprime=numpy.ones((upto-1)/2,dtype=bool)
for factor in primes[:int(math.sqrt(upto))]:
if isprime[(factor-2)/2]: isprime[(factor*3-2)/2:(upto-1)/2:factor]=0
return numpy.insert(primes[isprime],0,2)
有人能把这个和其他时间比较一下吗?在我的机器上,它似乎与其他Numpy半筛相当。
下面是我在Python中通常用来生成质数的代码:
$ python -mtimeit -s'import sieve' 'sieve.sieve(1000000)'
10 loops, best of 3: 445 msec per loop
$ cat sieve.py
from math import sqrt
def sieve(size):
prime=[True]*size
rng=xrange
limit=int(sqrt(size))
for i in rng(3,limit+1,+2):
if prime[i]:
prime[i*i::+i]=[False]*len(prime[i*i::+i])
return [2]+[i for i in rng(3,size,+2) if prime[i]]
if __name__=='__main__':
print sieve(100)
它不能与这里发布的更快的解决方案竞争,但至少它是纯python。
谢谢你提出这个问题。我今天真的学到了很多东西。
对于Python 3
def rwh_primes2(n):
correction = (n%6>1)
n = {0:n,1:n-1,2:n+4,3:n+3,4:n+2,5:n+1}[n%6]
sieve = [True] * (n//3)
sieve[0] = False
for i in range(int(n**0.5)//3+1):
if sieve[i]:
k=3*i+1|1
sieve[ ((k*k)//3) ::2*k]=[False]*((n//6-(k*k)//6-1)//k+1)
sieve[(k*k+4*k-2*k*(i&1))//3::2*k]=[False]*((n//6-(k*k+4*k-2*k*(i&1))//6-1)//k+1)
return [2,3] + [3*i+1|1 for i in range(1,n//3-correction) if sieve[i]]
这里是最快的函数之一的两个更新版本(纯Python 3.6),
from itertools import compress
def rwh_primes1v1(n):
""" Returns a list of primes < n for n > 2 """
sieve = bytearray([True]) * (n//2)
for i in range(3,int(n**0.5)+1,2):
if sieve[i//2]:
sieve[i*i//2::i] = bytearray((n-i*i-1)//(2*i)+1)
return [2,*compress(range(3,n,2), sieve[1:])]
def rwh_primes1v2(n):
""" Returns a list of primes < n for n > 2 """
sieve = bytearray([True]) * (n//2+1)
for i in range(1,int(n**0.5)//2+1):
if sieve[i]:
sieve[2*i*(i+1)::2*i+1] = bytearray((n//2-2*i*(i+1))//(2*i+1)+1)
return [2,*compress(range(3,n,2), sieve[1:])]
我很惊讶居然没人提到numba。
该版本在2.47 ms±36.5µs内达到1M标记。
几年前,维基百科页面上出现了一个阿特金筛子的伪代码。这已经不存在了,参考阿特金筛似乎是一个不同的算法。一个2007/03/01版本的维基百科页面(Primer number as 2007-03-01)显示了我用作参考的伪代码。
import numpy as np
from numba import njit
@njit
def nb_primes(n):
# Generates prime numbers 2 <= p <= n
# Atkin's sieve -- see https://en.wikipedia.org/w/index.php?title=Prime_number&oldid=111775466
sqrt_n = int(np.sqrt(n)) + 1
# initialize the sieve
s = np.full(n + 1, -1, dtype=np.int8)
s[2] = 1
s[3] = 1
# put in candidate primes:
# integers which have an odd number of
# representations by certain quadratic forms
for x in range(1, sqrt_n):
x2 = x * x
for y in range(1, sqrt_n):
y2 = y * y
k = 4 * x2 + y2
if k <= n and (k % 12 == 1 or k % 12 == 5): s[k] *= -1
k = 3 * x2 + y2
if k <= n and (k % 12 == 7): s[k] *= -1
k = 3 * x2 - y2
if k <= n and x > y and k % 12 == 11: s[k] *= -1
# eliminate composites by sieving
for k in range(5, sqrt_n):
if s[k]:
k2 = k*k
# k is prime, omit multiples of its square; this is sufficient because
# composites which managed to get on the list cannot be square-free
for i in range(1, n // k2 + 1):
j = i * k2 # j ∈ {k², 2k², 3k², ..., n}
s[j] = -1
return np.nonzero(s>0)[0]
# initial run for "compilation"
nb_primes(10)
时机
In[10]:
%timeit nb_primes(1_000_000)
Out[10]:
2.47 ms ± 36.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
In[11]:
%timeit nb_primes(10_000_000)
Out[11]:
33.4 ms ± 373 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
In[12]:
%timeit nb_primes(100_000_000)
Out[12]:
828 ms ± 5.64 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
使用Numpy实现的半筛子略有不同:
http://rebrained.com/?p=458
import math
import numpy
def prime6(upto):
primes=numpy.arange(3,upto+1,2)
isprime=numpy.ones((upto-1)/2,dtype=bool)
for factor in primes[:int(math.sqrt(upto))]:
if isprime[(factor-2)/2]: isprime[(factor*3-2)/2:(upto-1)/2:factor]=0
return numpy.insert(primes[isprime],0,2)
有人能把这个和其他时间比较一下吗?在我的机器上,它似乎与其他Numpy半筛相当。