这是我能想到的最好的算法。

def get_primes(n):
    numbers = set(range(n, 1, -1))
    primes = []
    while numbers:
        p = numbers.pop()
        primes.append(p)
        numbers.difference_update(set(range(p*2, n+1, p)))
    return primes

>>> timeit.Timer(stmt='get_primes.get_primes(1000000)', setup='import   get_primes').timeit(1)
1.1499958793645562

还能做得更快吗?

这段代码有一个缺陷:由于numbers是一个无序集,不能保证numbers.pop()将从集合中移除最低的数字。尽管如此,它还是适用于(至少对我来说)一些输入数字:

>>> sum(get_primes(2000000))
142913828922L
#That's the correct sum of all numbers below 2 million
>>> 529 in get_primes(1000)
False
>>> 529 in get_primes(530)
True

当前回答

我在这里找到了一个纯Python 2素数生成器,在Willy Good的评论中,它比rwh2_primes快。

def primes235(limit):
yield 2; yield 3; yield 5
if limit < 7: return
modPrms = [7,11,13,17,19,23,29,31]
gaps = [4,2,4,2,4,6,2,6,4,2,4,2,4,6,2,6] # 2 loops for overflow
ndxs = [0,0,0,0,1,1,2,2,2,2,3,3,4,4,4,4,5,5,5,5,5,5,6,6,7,7,7,7,7,7]
lmtbf = (limit + 23) // 30 * 8 - 1 # integral number of wheels rounded up
lmtsqrt = (int(limit ** 0.5) - 7)
lmtsqrt = lmtsqrt // 30 * 8 + ndxs[lmtsqrt % 30] # round down on the wheel
buf = [True] * (lmtbf + 1)
for i in xrange(lmtsqrt + 1):
    if buf[i]:
        ci = i & 7; p = 30 * (i >> 3) + modPrms[ci]
        s = p * p - 7; p8 = p << 3
        for j in range(8):
            c = s // 30 * 8 + ndxs[s % 30]
            buf[c::p8] = [False] * ((lmtbf - c) // p8 + 1)
            s += p * gaps[ci]; ci += 1
for i in xrange(lmtbf - 6 + (ndxs[(limit - 7) % 30])): # adjust for extras
    if buf[i]: yield (30 * (i >> 3) + modPrms[i & 7])

我的结果:

$ time ./prime_rwh2.py 1e8
5761455 primes found < 1e8

real    0m3.201s
user    0m2.609s
sys     0m0.578s
$ time ./prime_wheel.py 1e8
5761455 primes found < 1e8

real    0m2.710s
user    0m2.469s
sys     0m0.219s

...在我最近的中档笔记本电脑(i5 8265U 1.6GHz)上运行Ubuntu Win 10。

这是一个mod 30轮筛,跳过倍数2,3和5。对我来说,它在2.5e9左右的时候工作得很好,那时我的笔记本电脑开始用完8G内存,需要大量交换。

我喜欢对30取余,因为它只有8个余数不是2 3 5的倍数。这允许使用移位和“&”进行乘法,除法和mod,并应该允许将一个mod 30轮的结果打包到一个字节中。我把威利的代码变成了一个分段的mod 30轮筛,以消除大N的抖动,并张贴在这里。

还有一个更快的Javascript版本,它是分段的,并使用了一个mod 210轮(没有2,3,5或7的倍数)@GordonBGood与一个深入的解释,这对我很有用。

其他回答

如果你可以控制N,列出所有质数的最快方法就是预先计算它们。认真对待。预计算是一种被忽视的优化方法。

你有一个更快的代码和最简单的代码生成质数。 但对于更大的数字,当n=10000, 10000000时,它不起作用,可能是。pop()方法失败了

考虑:N是质数吗?

case 1: You got some factors of N, for i in range(2, N): If N is prime loop is performed for ~(N-2) times. else less number of times case 2: for i in range(2, int(math.sqrt(N)): Loop is performed for almost ~(sqrt(N)-2) times if N is prime else will break somewhere case 3: Better We Divide N With Only number of primes<=sqrt(N) Where loop is performed for only π(sqrt(N)) times π(sqrt(N)) << sqrt(N) as N increases from math import sqrt from time import * prime_list = [2] n = int(input()) s = time() for n0 in range(2,n+1): for i0 in prime_list: if n0%i0==0: break elif i0>=int(sqrt(n0)): prime_list.append(n0) break e = time() print(e-s) #print(prime_list); print(f'pi({n})={len(prime_list)}') print(f'{n}: {len(prime_list)}, time: {e-s}') Output 100: 25, time: 0.00010275840759277344 1000: 168, time: 0.0008606910705566406 10000: 1229, time: 0.015588521957397461 100000: 9592, time: 0.023436546325683594 1000000: 78498, time: 4.1965954303741455 10000000: 664579, time: 109.24591708183289 100000000: 5761455, time: 2289.130858898163

小于1000似乎很慢,但小于10^6我认为更快。

然而,我无法理解时间的复杂性。

我测试了一些unutbu的功能,我用饥饿的百万数字计算它

获胜者是使用numpy库的函数,

注意:做一个内存利用率测试也很有趣:)

示例代码

完整的代码在我的github存储库

#!/usr/bin/env python

import lib
import timeit
import sys
import math
import datetime

import prettyplotlib as ppl
import numpy as np

import matplotlib.pyplot as plt
from prettyplotlib import brewer2mpl

primenumbers_gen = [
    'sieveOfEratosthenes',
    'ambi_sieve',
    'ambi_sieve_plain',
    'sundaram3',
    'sieve_wheel_30',
    'primesfrom3to',
    'primesfrom2to',
    'rwh_primes',
    'rwh_primes1',
    'rwh_primes2',
]

def human_format(num):
    # https://stackoverflow.com/questions/579310/formatting-long-numbers-as-strings-in-python?answertab=active#tab-top
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    # add more suffixes if you need them
    return '%.2f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])


if __name__=='__main__':

    # Vars
    n = 10000000 # number itereration generator
    nbcol = 5 # For decompose prime number generator
    nb_benchloop = 3 # Eliminate false positive value during the test (bench average time)
    datetimeformat = '%Y-%m-%d %H:%M:%S.%f'
    config = 'from __main__ import n; import lib'
    primenumbers_gen = {
        'sieveOfEratosthenes': {'color': 'b'},
        'ambi_sieve': {'color': 'b'},
        'ambi_sieve_plain': {'color': 'b'},
         'sundaram3': {'color': 'b'},
        'sieve_wheel_30': {'color': 'b'},
# # #        'primesfrom2to': {'color': 'b'},
        'primesfrom3to': {'color': 'b'},
        # 'rwh_primes': {'color': 'b'},
        # 'rwh_primes1': {'color': 'b'},
        'rwh_primes2': {'color': 'b'},
    }


    # Get n in command line
    if len(sys.argv)>1:
        n = int(sys.argv[1])

    step = int(math.ceil(n / float(nbcol)))
    nbs = np.array([i * step for i in range(1, int(nbcol) + 1)])
    set2 = brewer2mpl.get_map('Paired', 'qualitative', 12).mpl_colors

    print datetime.datetime.now().strftime(datetimeformat)
    print("Compute prime number to %(n)s" % locals())
    print("")

    results = dict()
    for pgen in primenumbers_gen:
        results[pgen] = dict()
        benchtimes = list()
        for n in nbs:
            t = timeit.Timer("lib.%(pgen)s(n)" % locals(), setup=config)
            execute_times = t.repeat(repeat=nb_benchloop,number=1)
            benchtime = np.mean(execute_times)
            benchtimes.append(benchtime)
        results[pgen] = {'benchtimes':np.array(benchtimes)}

fig, ax = plt.subplots(1)
plt.ylabel('Computation time (in second)')
plt.xlabel('Numbers computed')
i = 0
for pgen in primenumbers_gen:

    bench = results[pgen]['benchtimes']
    avgs = np.divide(bench,nbs)
    avg = np.average(bench, weights=nbs)

    # Compute linear regression
    A = np.vstack([nbs, np.ones(len(nbs))]).T
    a, b = np.linalg.lstsq(A, nbs*avgs)[0]

    # Plot
    i += 1
    #label="%(pgen)s" % locals()
    #ppl.plot(nbs, nbs*avgs, label=label, lw=1, linestyle='--', color=set2[i % 12])
    label="%(pgen)s avg" % locals()
    ppl.plot(nbs, a * nbs + b, label=label, lw=2, color=set2[i % 12])
print datetime.datetime.now().strftime(datetimeformat)

ppl.legend(ax, loc='upper left', ncol=4)

# Change x axis label
ax.get_xaxis().get_major_formatter().set_scientific(False)
fig.canvas.draw()
labels = [human_format(int(item.get_text())) for item in ax.get_xticklabels()]

ax.set_xticklabels(labels)
ax = plt.gca()

plt.show()

这是问题解的一种变化应该比问题本身更快。它使用埃拉托色尼的静态筛,没有其他优化。

from typing import List

def list_primes(limit: int) -> List[int]:
    primes = set(range(2, limit + 1))
    for i in range(2, limit + 1):
        if i in primes:
            primes.difference_update(set(list(range(i, limit + 1, i))[1:]))
    return sorted(primes)

>>> list_primes(100)
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]

很抱歉打扰,但erat2()在算法中有一个严重的缺陷。

在搜索下一个合成时,我们只需要测试奇数。 Q p都是奇数;那么q+p是偶数,不需要检验,但q+2*p总是奇数。这消除了while循环条件中的“if even”测试,并节省了大约30%的运行时。

当我们在它:而不是优雅的'D.pop(q,None)'获取和删除方法,使用'if q in D: p=D[q],del D[q]',这是两倍的速度!至少在我的机器上(P3-1Ghz)。 所以我建议这个聪明算法的实现:

def erat3( ):
    from itertools import islice, count

    # q is the running integer that's checked for primeness.
    # yield 2 and no other even number thereafter
    yield 2
    D = {}
    # no need to mark D[4] as we will test odd numbers only
    for q in islice(count(3),0,None,2):
        if q in D:                  #  is composite
            p = D[q]
            del D[q]
            # q is composite. p=D[q] is the first prime that
            # divides it. Since we've reached q, we no longer
            # need it in the map, but we'll mark the next
            # multiple of its witnesses to prepare for larger
            # numbers.
            x = q + p+p        # next odd(!) multiple
            while x in D:      # skip composites
                x += p+p
            D[x] = p
        else:                  # is prime
            # q is a new prime.
            # Yield it and mark its first multiple that isn't
            # already marked in previous iterations.
            D[q*q] = q
            yield q