Merged with Lucas A. Brown's primefac.py

This commit is contained in:
Emile Clark-Boman 2025-07-01 15:33:09 +10:00
parent afa8fcec56
commit 588c2acf7c

987
imp/math/primefac.py Normal file
View file

@ -0,0 +1,987 @@
'''
Credit: Lucas A. Brown (@lucasaugustus) for his primefac module (v2.0.12):
https://github.com/lucasaugustus/primefac
primefac is under an MIT license. Moreover I've attached Lucas' credits statement:
"Significant parts of this code are derived or outright copied from other
people's work. In particular, the SIQS code was derived mostly verbatim
from https://github.com/skollmann/PyFactorise by Stephan Kollmann, while
the functions to manipulate points on elliptic curves were copied from a
reply to the blog post at http://programmingpraxis.com/2010/04/23/. The
rest, I believe, is my own work, but I may have forgotten something."
- Lucas A. Brown (primefac.py)
'''
from math import sqrt, isqrt, log2, ceil, gcd, inf, prod as iterprod
from itertools import takewhile, compress, count
from multiprocessing import Process, Queue as mpQueue
from random import randrange#, seed; seed(0)
from time import time
from datetime import datetime, timedelta
def primegen(limit=inf):
"""
Generates primes < limit almost lazily by a segmented sieve of Eratosthenes.
Memory usage depends on the sequence of prime gaps. Unconditionally, we can
say that memory usage is O(p^0.7625), where p is the most-recently-yielded
prime. On the Riemann Hypothesis and Cramer's conjecture, we can bring this
down to O(p^0.75 * log(p)) and O(sqrt(p) * log(p)^2), respectively.
Input: limit -- a number (default = inf)
Output: sequence of integers
Examples:
>>> list(islice(primegen(), 20))
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71]
>>> list(primegen(73))
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71]
"""
# We don't sieve 2, so we ought to be able to get sigificant savings by halving the length of the sieve.
# But the tiny extra computation involved in that seems to exceed the savings.
yield from takewhile(lambda x: x < limit, (2,3,5,7,11,13,17,19,23,29,31,37,41,43,47))
pl, pg = [3,5,7], primegen()
for p in pl: next(pg)
n = next(pg); nn = n*n
while True:
n = next(pg)
ll, nn = nn, n*n
sl = (nn - ll)
sieve = bytearray([True]) * sl
for p in pl:
k = (-ll) % p
sieve[k::p] = bytearray([False]) * ((sl-k)//p + 1)
if nn > limit: break # TODO bring this condition up to the while statement
yield from compress(range(ll,ll+sl,2), sieve[::2])
pl.append(n)
yield from takewhile(lambda x: x < limit, compress(range(ll,ll+sl,2), sieve[::2]))
def ilog(x, b): # TODO: investigate optimization starting from x.bin_length() * 2 // b
"""
Greatest integer l such that b**l <= x
Input: x, b -- integers
Output: An integer
Examples:
>>> ilog(263789, 10)
5
>>> ilog(1023, 2)
9
"""
l = 0
while x >= b:
x //= b
l += 1
return l
# TODO possible optimization route: x.bit_length() == ilog(x, 2) + 1; we can therefore use x.bit_length() * 2 // b as a
# 1st approximation to ilog(x, b), then compute pow(b, x.bit_length() * 2 // b), then compare that to x and adjust.
def modinv(a, m):
"""
Returns the inverse of a modulo m, normalized to lie between 0 and m-1. If
a is not coprime to m, return None.
Input:
a -- an integer coprime to m
n -- a positive integer
Output: None or an integer x between 0 and m-1 such that (a * x) % m == 1
Examples:
>>> [modinv(1,1), modinv(2,5), modinv(5,8), modinv(37,100), modinv(12,30)]
[0, 3, 5, 73, None]
"""
return pow(a, -1, m)
def introot(n, r=2): # TODO Newton iteration?
"""
For returns the rth root of n, rounded to the nearest integer in the
direction of zero. Returns None if r is even and n is negative.
Input:
n -- an integer
r -- a natural number or None
Output: An integer
Examples:
>>> [introot(-729, 3), introot(-728, 3), introot(1023, 2), introot(1024, 2)]
[-9, -8, 31, 32]
"""
if n < 0: return None if r%2 == 0 else -introot(-n, r)
if n < 2: return n
if r == 1: return n
if r == 2: return isqrt(n)
#if r % 2 == 0: return introot(isqrt(n), r//2) # TODO Check validity of this line.
lower = upper = 1 << (n.bit_length() // r)
while lower ** r > n: lower >>= 2
while upper ** r <= n: upper <<= 2
while lower != upper - 1:
mid = (lower + upper) // 2
m = mid**r
if m == n: return mid
elif m < n: lower = mid
elif m > n: upper = mid
return lower
def ispower(n, r=0):
"""
If r == 0:
If n is a perfect power, return a tuple containing largest integer (in
terms of magnitude) that, when squared/cubed/etc, yields n as the first
component and the relevant power as the second component.
If n is not a perfect power, return None.
If r > 0:
We check whether n is a perfect rth power; we return its rth root if it
is and None if it isn't.
Input:
n -- an integer
r -- an integer
Output: An integer, a 2-tuple of integers, or None
Examples:
>>> [ispower(n) for n in [64, 25, -729, 1729]]
[(8, 2), (5, 2), (-9, 3), None]
>>> [ispower(64, r) for r in range(7)]
[(8, 2), 64, 8, 4, None, None, 2]
"""
#if r == 0: return any(ispower(n, r) for r in primegen(n.bit_length()+1))
#return n == introot(n, r) ** r
if r == 0:
if n in (0, 1, -1): return (n, 1)
for r in primegen(n.bit_length()+1):
x = ispower(n, r)
if x is not None: return (x, r)
return None
# TODO tricks for special cases
if (r == 2) and (n & 2): return None
if (r == 3) and (n & 7) in (2,4,6): return None
x = introot(n, r)
return None if x is None else (x if x**r == n else None)
def jacobi(a, n):
"""
The Jacobi symbol (a|n).
Input:
a -- any integer
n -- odd integer
Output: -1, 0, or 1
Examples:
>>> [jacobi(a, 15) for a in [-10, -7, -4, -2, -1, 0, 1, 2, 4, 7, 10]]
[0, 1, -1, -1, -1, 0, 1, 1, 1, -1, 0]
>>> [jacobi(a, 13) for a in [-10, -9, -4, -2, -1, 0, 1, 2, 4, 9, 10]]
[1, 1, 1, -1, 1, 0, 1, -1, 1, 1, 1]
>>> [jacobi(a, 11) for a in [-10, -9, -4, -2, -1, 0, 1, 2, 4, 9, 10]]
[1, -1, -1, 1, -1, 0, 1, -1, 1, 1, -1]
"""
if (n%2 == 0) or (n < 0): return None # n must be a positive odd number TODO delete this check?
if (a == 0) or (a == 1): return a
a, t = a%n, 1
while a != 0:
while not a & 1:
a //= 2
if n & 7 in (3, 5): t *= -1
a, n = n, a
if (a & 3 == 3) and (n & 3) == 3: t *= -1
a %= n
return t if n == 1 else 0
def isprime(n, tb=(3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59)): # TODO optimize the basis, possibly by varying it with n
"""
BPSW primality test variant: we use the strong Lucas PRP test and preface
the computation with trial division for speed. No composites are known to
pass the test, though it is suspected that infinitely many will do so.
There are definitely no such errors below 2^64.
This function is mainly a streamlined version of bpsw().
Input:
n -- integer. Number to be examined.
tb -- iterable of primes. Basis for trial division.
Output: True if probably prime; False if definitely composite.
Examples:
>>> [n for n in range(91) if isprime(1000*n+1)]
[3, 4, 7, 9, 13, 16, 19, 21, 24, 28, 51, 54, 55, 61, 69, 70, 76, 81, 88, 90]
>>> [isprime(rpn("38 ! 1 +")), isprime(rpn("38 ! 1 -"))]
[False, True]
"""
# 1. Do some trial division with tb as the basis.
if n % 2 == 0 or n < 3: return n == 2
for p in tb:
if n % p == 0: return n == p
# 2. If sprp(n,2) fails, return False. If it succeeds, continue.
t, s = (n - 1) // 2, 1
while t % 2 == 0: t //= 2; s += 1
#assert 1 + 2**s * t == n
x = pow(2, t, n)
if x != 1 and x != n - 1:
for j in range(1, s):
x = pow(x, 2, n)
if x == 1: return False
elif x == n - 1: break
else: return False
# 3. Select parameters for slprp.
for D in count(5, 4):
j = jacobi(D, n)
if j == 0: return D == n
if j == -1: break
D = -2 - D
j = jacobi(D, n)
if j == 0: return -D == n
if j == -1: break
if D == -13 and ispower(n,2): return False # If n is square, then this loop amounts to very slow trial division.
# Now run slprp(n, 1, (1 - D) // 4) and return the result.
b = (1 - D) // 4
if 1 < gcd(n, b) < n: return False
s, t = 1, (n + 1) // 2
while t % 2 == 0: s += 1; t //= 2
v, w, q, Q = 2, 1, 1, 1
for k in bin(t)[2:]:
q = (q*Q) % n
if k == '1': Q, v, w = (q*b) % n, (w*v - q) % n, (w*w - 2*q*b) % n
else: Q, w, v = q , (w*v - q) % n, (v*v - 2*q ) % n
# assert ( (2*w-v) * modinv(D,n) ) % n, v == lucasmod(t, 1, b, n)
if v == 0 or ( (2*w-v) * modinv(D,n) ) % n == 0: return True
q = pow(b, t, n)
for _ in range(1, s):
v = (v*v - 2*q) % n
if v == 0: return True
q = (q*q) % n
return False
def pollardrho_brent(n, verbose=False):
"""
Factors integers using Brent's variation of Pollard's rho algorithm.
If n is prime, we immediately return n; if not, we keep chugging until a
nontrivial factor is found. This function calls the randomizer; two
successive calls may therefore return two different results.
Input: n -- number to factor
Output: A factor of n.
Examples:
>>> n = rpn("20 ! 1 +"); f = pollardrho_brent(n); n % f
0
"""
if isprime(n): return n
g = n
while g == n:
y, c, m, g, r, q = randrange(1, n), randrange(1, n), randrange(1, n), 1, 1, 1
while g==1:
x, k = y, 0
for i in range(r): y = (y**2 + c) % n
while k < r and g == 1:
ys = y
for i in range(min(m, r-k)):
y = (y**2 + c) % n
q = q * abs(x-y) % n
g, k = gcd(q, n), k+m
r *= 2
if g==n:
while True:
ys = (ys**2+c)%n
g = gcd(x-ys, n)
if g > 1: break
return g
def pollard_pm1(n, B1=100, B2=1000, verbose=False): # TODO: What are the best default bounds and way to increment them?
"""
Integer factoring function. Uses Pollard's p-1 algorithm. Note that this
is only efficient if the number to be factored has a prime factor p such
that p-1's largest prime factor is "small". In this implementation, that
tends to mean less than 10,000,000 or so.
Input:
n -- number to factor
B1 -- Natural number. Bound for phase 1. Default == 100.
B2 -- Natural number > B1. Bound for phase 2. Default == 1000.
Output: A factor of n.
Examples:
>>> pollard_pm1(rpn("28 ! 1 - 239 //"))
1224040923709997
"""
if isprime(n): return n
m = ispower(n)
if m: return m[0]
while True:
pg = primegen()
q = 2 # TODO: what about other initial values of q?
p = next(pg)
while p <= B1: q, p = pow(q, p**ilog(B1, p), n), next(pg)
g = gcd(q-1, n)
if 1 < g < n: return g
while p <= B2: q, p = pow(q, p, n), next(pg)
g = gcd(q-1, n)
if 1 < g < n: return g
# These bounds failed. Increase and try again.
B1 *= 10
B2 *= 10
def mlucas(v, a, n):
# Helper for williams_pp1(). Multiplies along a Lucas sequence mod n.
v1, v2 = v, (v**2 - 2) % n
for bit in bin(a)[3:]: v1, v2 = ((v1**2 - 2) % n, (v1*v2 - v) % n) if bit == "0" else ((v1*v2 - v) % n, (v2**2 - 2) % n)
return v1
def williams_pp1(n, verbose=False): # TODO: experiment with different values of v0, and implement the two-phase version
"""
Integer factoring function. Uses Williams' p+1 algorithm, single-stage
variant. Note that this is only efficient when the number to be factored
has a prime factor p such that p+1's largest prime factor is "small".
Input: n -- integer to factor
Output: Integer. A nontrivial factor of n.
Example:
>>> williams_pp1(315951348188966255352482641444979927)
12403590655726899403
"""
if isprime(n): return n
m = ispower(n)
if m: return m[0]
for v in count(3):
for p in primegen():
e = ilog(isqrt(n), p)
if e == 0: break
for _ in range(e): v = mlucas(v, p, n)
g = gcd(v - 2, n)
if 1 < g < n: return g
if g == n: break
def ecadd(p1, p2, p0, n):
# Helper for ecm(). Adds two points on a Montgomery curve mod n.
x1,z1 = p1; x2,z2 = p2; x0,z0 = p0
t1, t2 = (x1-z1)*(x2+z2), (x1+z1)*(x2-z2)
return (z0*pow(t1+t2,2,n) % n, x0*pow(t1-t2,2,n) % n)
def ecdub(p, A, n):
# Helper for ecm(). Doubles a point on a Montgomery curve mod n.
x, z = p; An, Ad = A
t1, t2 = pow(x+z,2,n), pow(x-z,2,n)
t = t1 - t2
return (t1*t2*4*Ad % n, (4*Ad*t2 + t*An)*t % n)
def ecmul(m, p, A, n):
# Helper for ecm(). Multiplies a point on a Montgomery curve mod n.
if m == 0: return (0, 0)
elif m == 1: return p
else:
q = ecdub(p, A, n)
if m == 2: return q
b = 1
while b < m: b *= 2
b //= 4
r = p
while b:
if m&b: q, r = ecdub(q, A, n), ecadd(q, r, p, n)
else: q, r = ecadd(r, q, p, n), ecdub(r, A, n)
b //= 2
return r
def secm(n, B1, B2, seed):
"""
Seeded ECM. Helper function for ecm(). Returns a possibly-trivial divisor
of n given two bounds and a seed. Uses the two-phase algorithm on
Montgomery curves. See https://wp.me/prTJ7-zI and https://wp.me/prTJ7-A7
for more details. Most of the code for this function's "helpers" were
shamelessly copied from the first of those links.
Input:
n -- Integer to factor
B1 -- Integer. Number of iterations for the first phase.
B2 -- Integer. Number of iterations for the second phase.
seed -- Integer. Selects the specific curve we'll be working on.
Output: Integer. A possibly-trivial factor of n.
Examples:
>>> secm(rpn('24 ! 1 -'), 100, 1000, 22)
991459181683
"""
u, v = (seed**2 - 5) % n, 4*seed % n
p = pow(u, 3, n)
Q, C = (pow(v-u,3,n)*(3*u+v) % n, 4*p*v % n), (p, pow(v,3,n))
pg = primegen()
p = next(pg)
while p <= B1: Q, p = ecmul(p**ilog(B1, p), Q, C, n), next(pg)
g = gcd(Q[1], n)
if 1 < g < n: return g
while p <= B2:
# There is a trick that can speed up the second stage. Instead of multiplying each prime by Q, we iterate over i from
# B1 + 1 to B2, adding 2Q at each step; when i is prime, the current Q can be accumulated into the running solution.
# Again, we defer the calculation of the GCD until the end of the iteration. TODO: Implement and compare performance.
Q = ecmul(p, Q, C, n)
g *= Q[1]
g %= n
p = next(pg)
return gcd(g, n)
def ecmparams(n): # TODO: Better parameters.
counter = 0
for i in count():
for _ in range(2*i+1):
yield (2**i, 10 * 2**i, randrange(6,n), counter)
counter += 1
for j in range(i+1):
yield (2**j, 10 * 2**j, randrange(6,n), counter)
counter += 1
def ecm(n, paramseq=ecmparams, nprocs=1, verbose=False):
"""
"Modern" integer factoring via elliptic curves. Uses Montgomery curves, the
two-phase algorithm, and (optionally) multiple processes. The hard work is
done by secm(); this function just does the managerial work of pulling a
sequence of parameters out of a generator and feeding them into secm().
Input:
n -- number to factor
paramseq -- sequence of parameters to feed into secm(). It must be an
infinite generator of 4-tuples (a,b,c,d), where a is the
number of iterations for the first phase, b is the number of
iterations for the second phase, c is a seed to select the
curve to work on, and d is an auxiliary used to count the
parameters generated so far. We need a < b and 6 <= c < n.
nprocs -- number of processes to use. Default == 1. Setting this > 1
is discouraged on "small" inputs because managing multiple
processes incurs significant overhead.
Output:
A factor of n.
Note that if the parameter sequence calls the randomizer (which is
currently the default behavior), then two successive calls may therefore
return two different results.
Examples:
>>> n = 625793187653 * 991459181683 # = 620448401733239439359999 = 24! - 1
>>> f = ecm(n)
>>> (n//f) * f
620448401733239439359999
"""
g = n % 6
if g % 2 == 0: return 2
if g % 3 == 0: return 3
if isprime(n): return n
m = ispower(n)
if m: return m[0]
if nprocs == 1:
for (B1,B2,seed,i) in paramseq(n):
f = secm(n, B1, B2, seed)
if 1 < f < n: return f
assert nprocs != 1
def factory(params, output): output.put(secm(*params))
ps, facs, procs = paramseq(n), mpQueue(), []
procs = [Process(target=factory, args=((n,)+next(ps)[:3], facs)) for _ in range(nprocs)]
for p in procs: p.start()
while True:
g = facs.get()
if 1 < g < n:
for p in procs: p.terminate()
return g
for p in range(nprocs): # TODO: Try doing this with a process pool.
if not procs[p].is_alive():
del procs[p]
break
procs.append(Process(target=factory, args=((n,)+next(ps)[:3], facs)))
procs[-1].start()
def sqrtmod_prime(a, p):
"""
Solves x**2 == a (mod p) for x. We assume that p is a prime and a is a
quadratic residue modulo p. If either of these assumptions is false, the
return value is meaningless.
The Cipolla-Lehmer section is my own. The rest appears to be derived from
https://codegolf.stackexchange.com/a/9088.
Input:
a -- natural number
p -- prime number
Output: whole number less than p
Examples:
>>> sqrtmod_prime(4, 5)
3
>>> sqrtmod_prime(13, 23)
6
>>> sqrtmod_prime(997, 7304723089)
761044645
"""
a %= p
if p%4 == 3: return pow(a, (p+1) >> 2, p)
elif p%8 == 5:
v = pow(a << 1, (p-5) >> 3, p)
return (a*v*(((a*v*v<<1)%p)-1))%p
elif p%8 == 1:
# CranPom ex 2.31, pg 112 / 126. Pretty sure this amounts to Cipolla-Lehmer.
if a == 0: return 0 # Necessary to avoid an infinite loop in the legendre section
h = 2
# while legendre(h*h - 4*a, p) != -1:
while pow(h*h - 4*a, (p-1) >> 1, p) != p - 1: h += 1 # TODO compare speed vs random selection
#return ( lucasmod((p+1)//2, h, a, p)[1] * modinv(2, p) ) % p
k, v, w, q, Q = (p+1)//2, 2, h % p, 1, 1
for kj in bin(k)[2:]:
q = (q*Q) % p
if kj == '1': Q, v, w = (q*a) % p, (w*v - h*q) % p, (w*w - 2*q*a) % p
else: Q, w, v = q , (w*v - h*q) % p, (v*v - 2*q ) % p
return (v*k) % p
else: return a # p == 2
def siqs(n, verbose=False):
"""
Uses the Self-Initializing Quadratic Sieve to extract a factor of n.
We make some calls to randrange, so the output may change from call to call.
This is derived from https://github.com/skollmann/PyFactorise.
Input:
n -- number to factor
verbose -- if True, print progress reports. Default == False.
Output:
If n is prime, we return n.
If n is composte, we return a nontrivial factor of n.
If n is too small, we raise a ValueError.
Examples:
>>> siqs(factorial(24) - 1) in (625793187653, 991459181683)
True
"""
if (not isinstance(n, int)) or n < 0: raise ValueError("Number must be a positive integer.")
if n < 2**64:
if verbose: print("Number is too small for SIQS. Using Pollard Rho instead.")
return pollardrho_brent(n)
if verbose: print("Factorizing %d (%d digits)..." % (n, len(str(n))))
if isprime(n): return n
if verbose: print("Number is composite.")
if verbose: print("Checking whether it is a perfect power...")
perfect_power = ispower(n)
if perfect_power:
if verbose: print(n, "=", "%d^%d." % (perfect_power[0], perfect_power[1]))
return perfect_power[0]
if verbose: print("Not a perfect power.")
if verbose: print("Using SIQS on %d (%d digits)..." % (n, len(str(n))))
if verbose: starttime, sievetime, latime = time(), 0, 0
# Choose parameters nf (sieve of factor base) and m (for sieving in [-m,m].
# Using similar parameters as msieve-1.52
dig = len(str(n))
if dig <= 34: nf, m = 200, 65536
elif dig <= 36: nf, m = 300, 65536
elif dig <= 38: nf, m = 400, 65536
elif dig <= 40: nf, m = 500, 65536
elif dig <= 42: nf, m = 600, 65536
elif dig <= 44: nf, m = 700, 65536
elif dig <= 48: nf, m = 1000, 65536
elif dig <= 52: nf, m = 1200, 65536
elif dig <= 56: nf, m = 2000, 65536 * 3
elif dig <= 60: nf, m = 4000, 65536 * 3
elif dig <= 66: nf, m = 6000, 65536 * 3
elif dig <= 74: nf, m = 10000, 65536 * 3
elif dig <= 80: nf, m = 30000, 65536 * 3
elif dig <= 88: nf, m = 50000, 65536 * 3
elif dig <= 94: nf, m = 60000, 65536 * 9
else: nf, m = 100000, 65536 * 9
class FactorBasePrime:
"""A factor base prime for the SIQS"""
def __init__(self, p, tmem, lp):
self.p, self.soln1, self.soln2, self.tmem, self.lp, self.ainv = p, None, None, tmem, lp, None
# Compute and return nf factor base primes suitable for a SIQS on n.
factor_base = []
for p in primegen():
if pow(n, (p-1) >> 1, p) == 1: # if n is a quadratic residue mod p
t = sqrtmod_prime(n, p)
lp = round(log2(p)) # This gets rounded for the sake of speed.
factor_base.append(FactorBasePrime(p, t, lp))
if len(factor_base) >= nf: break
if verbose: print("Factor base size: %d primes.\nLargest prime in base: %d." % (nf, factor_base[-1].p))
npolys, i_poly, prev_cnt, relcount, smooth_relations, required_relations_ratio = 0, 0, 0, 0, [], 1.05
p_min_i, p_max_i = None, None
for (i,fb) in enumerate(factor_base):
if p_min_i is None and fb.p >= 400: # 400 is a tunable parameter.
p_min_i = i
if p_max_i is None and fb.p > 4000: # 4000 is a tunable parameter.
p_max_i = i - 1
break
# The following may happen if the factor base is small, make sure that we have enough primes.
if p_max_i is None: p_max_i = len(factor_base) - 1
if p_min_i is None or p_max_i - p_min_i < 20: p_min_i = min(p_min_i, 5) # TODO This line is problematic for some small n.
target = sqrt(2 * float(n)) / m
target1 = target / sqrt((factor_base[p_min_i].p + factor_base[p_max_i].p) / 2)
while True:
if verbose: temptime = time()
if verbose: print("*** Phase 1: Finding smooth relations ***")
required_relations = round(nf * required_relations_ratio)
if verbose: print("Target: %d relations" % required_relations)
enough_relations = False
while not enough_relations:
if i_poly == 0: # Compute the first of a set of polynomials
# find q such that the product of factor_base[q_i] is about sqrt(2n)/m; try a few sets to find a good one
best_q, best_a, best_ratio = None, None, None
for _ in range(30):
a, q = 1, []
while a < target1:
p_i = 0
while p_i == 0 or p_i in q: p_i = randrange(p_min_i, p_max_i+1)
a *= factor_base[p_i].p
q.append(p_i)
ratio = a / target
# ratio too small seems to be not good
if (best_ratio is None or (ratio >= 0.9 and ratio < best_ratio) or best_ratio < 0.9 and ratio > best_ratio):
best_q, best_a, best_ratio = q, a, ratio
a, q = best_a, best_q
s, B = len(q), []
for l in range(s):
fb_l = factor_base[q[l]]
q_l = fb_l.p
assert a % q_l == 0
gamma = (fb_l.tmem * modinv(a // q_l, q_l)) % q_l
if gamma > q_l // 2: gamma = q_l - gamma
B.append(a // q_l * gamma)
b = sum(B) % a
b_orig = b
if (2 * b > a): b = a - b
assert 0 < b and 2 * b <= a and (b * b - n) % a == 0
g1, g2, g3, ga, gb = b * b - n, 2 * a * b, a * a, a, b_orig
ha, hb = a, b
for fb in factor_base:
if a % fb.p != 0:
fb.ainv = modinv(a, fb.p)
fb.soln1 = (fb.ainv * (fb.tmem - b)) % fb.p
fb.soln2 = (fb.ainv * (-fb.tmem - b)) % fb.p
else: # Compute the (i+1)-th polynomial, given that g is the i-th polynomial.
#v = lowest_set_bit(i) + 1
#z = -1 if ceil(i / (2 ** v)) % 2 == 1 else 1
z = -1 if ceil(i_poly / (1 + (i_poly ^ (i_poly-1)))) % 2 == 1 else 1
#b = (g.b + 2 * z * B[v - 1]) % g.a
b = (gb + 2 * z * B[(i_poly & (-i_poly)).bit_length() - 1]) % ga
a = ga
b_orig = b
if (2 * b > a): b = a - b
assert (b * b - n) % a == 0
g1, g2, g3, ga, gb = b * b - n, 2 * a * b, a * a, a, b_orig
ha, hb = a, b
for fb in factor_base:
if a % fb.p != 0:
fb.soln1 = (fb.ainv * ( fb.tmem - b)) % fb.p
fb.soln2 = (fb.ainv * (-fb.tmem - b)) % fb.p
i_poly += 1
npolys += 1
if i_poly >= 2 ** (len(B) - 1): i_poly = 0
# BEGIN SIEVING. Most of our time is spent between here and the "END SIEVING" comment.
sieve_array = [0] * (2 * m + 1)
for fb in factor_base:
if fb.soln1 is None: continue
p, lp = fb.p, fb.lp
a_start_1 = fb.soln1 - ((m + fb.soln1) // p) * p
if p > 20:
for a in range(a_start_1 + m, 2 * m + 1, p): sieve_array[a] += lp
a_start_2 = fb.soln2 - ((m + fb.soln2) // p) * p
for a in range(a_start_2 + m, 2 * m + 1, p): sieve_array[a] += lp
# Perform the trial division step of the SIQS.
limit = round(log2(m * sqrt(float(n)))) - 25 # 25 is a tunable parameter. The rounding is for speed.
for (i,sa) in enumerate(sieve_array):
if sa >= limit:
x = i - m
gx = (g3 * x + g2) * x + g1
# Determine whether gx can be fully factorized into primes from the factor base.
# If so, store the indices of the factors from the factor base as divisors_idx. If not, set that to None.
a, divisors_idx = gx, []
for (fbi,fb) in enumerate(factor_base):
if a % fb.p == 0:
exp = 0
while a % fb.p == 0:
a //= fb.p
exp += 1
divisors_idx.append((fbi, exp))
if a == 1:
u = ha * x + hb
v = gx
assert (u * u) % n == v % n
smooth_relations.append((u, v, divisors_idx))
break
relcount = len(smooth_relations)
if relcount >= required_relations:
enough_relations = True
break
# END SIEVING. Most of our time is spent between here and the "BEGIN SIEVING" comment.
if verbose and relcount > 0 and (relcount >= required_relations or i_poly % 8 == 0 or relcount > prev_cnt):
frac = relcount / required_relations
t = time() - starttime # Time spent so far
ett = t / frac # Estimated total time
ettc = ett - t # Estimated time to completion
eta = datetime.isoformat(datetime.now() + timedelta(seconds=ettc), sep=' ', timespec='seconds')
print('\b'*256 + "Found %d rels (%.1f%%) on %d polys. ETA %ds (%s). " % (relcount, 100*frac, npolys, ettc, eta), end='', flush=True)
prev_cnt = relcount
if verbose:
sievetime += time() - temptime
print("\nSieving time: %f seconds." % sievetime)
temptime = time()
print("*** Phase 2: Linear Algebra ***")
print("Building matrix for linear algebra step...")
M = [0] * nf
mask = 1
for sr in smooth_relations:
for (j,exp) in sr[2]:
if exp % 2: M[j] += mask
mask <<= 1
if verbose: print("Finding perfect squares using matrix and factors from perfect squares...")
# Perform the linear algebra step of the SIQS: do fast Gaussian elimination to determine pairs of perfect squares mod n.
# Use the optimisations described in [Çetin K. Koç and Sarath N. Arachchige. 'A Fast Algorithm for Gaussian Elimination
# over GF(2) and its Implementation on the GAPP.' Journal of Parallel and Distributed Computing 13.1 (1991): 118-122].
if verbose: gaussstart = time()
row_is_marked = bytearray([False]) * relcount
pivots = [-1] * nf
for j in range(nf):
M_j = M[j]
i = (M_j & (-M_j)).bit_length() - 1 #i = -1 if M[j] == 0 else lowest_set_bit(M[j])
# i is now the row of the first nonzero entry in column j, or -1 if no such row exists.
if i > -1:
pivots[j] = i
row_is_marked[i] = True
for k in range(nf):
if (M[k] >> i) & 1 and k != j: # test M[i][k] == 1
M[k] ^= M_j # add column j to column k mod 2
if verbose and ((nf - j) % 100 == 0 or nf == j + 1):
frac = (j+1) / nf
t = time() - gaussstart # Time spent so far
ett = t / frac # Estimated total time
ettc = ett - t # Estimated time to completion
eta = datetime.isoformat(datetime.now() + timedelta(seconds=ettc), sep=' ', timespec='seconds')
print('\b'*256 + "Gaussian elimination: %d/%d (%.1f%%). ETA %ds (%s). " % (j+1, nf, 100*frac, ettc, eta), end='', flush=True)
if verbose: print("\nGaussian elimination time: %f seconds" % (time() - gaussstart))
attempts = 0
for i in range(relcount):
if not row_is_marked[i]:
square_indices = [i]
for j in range(nf):
if (M[j] >> i) & 1: # test M[i][j] == 1
square_indices.append(pivots[j])
# Given the solution encoded by square_indices, try to find a factor of n, and return it.
attempts += 1
if verbose: print('\b'*42 + "Attempt #%d" % attempts, end='', flush=True)
# Given on of the solutions returned by siqs_solve_matrix and the corresponding smooth relations,
# calculate the pair (sqrt1, sqrt2) such that sqrt1^2 = sqrt2^2 (mod n).
sqrt1, sqrt2 = 1, 1
for idx in square_indices:
sqrt1 *= smooth_relations[idx][0]
sqrt2 *= smooth_relations[idx][1]
sqrt2 = isqrt(sqrt2)
assert (sqrt1 * sqrt1) % n == (sqrt2 * sqrt2) % n
factor = gcd(sqrt1 - sqrt2, n)
if 1 != factor != n:
if verbose:
print(" succeeded.")
latime += time() - temptime
print("Linear algebra time: %f seconds." % latime)
totaltime = time() - starttime
print("Total time: %f seconds." % (totaltime))
return factor
if verbose:
print('\b'*42 + "All %d attempts failed." % attempts)
latime += time() - temptime
print("Linear algebra time: %f seconds." % latime)
print("Failed to find a solution. Finding more relations...")
required_relations_ratio += 0.05
def multifactor(n, methods=(pollardrho_brent, pollard_pm1, williams_pp1, ecm, siqs), verbose=False):
"""
Integer factoring function. Uses several methods in parallel. Waits for a
function to return, kills the rest, and reports. Note that two successive
calls may return different results depending on which method finishes first
and whether any methods call the randomizer.
Input:
n -- number to factor
methods -- list of functions to run.
Output: A factor of n.
Examples:
>>> methods = [pollardrho_brent, pollard_pm1, williams_pp1, ecm, siqs]
>>> n = rpn("24 ! 1 -"); f = multifactor(n, methods); n%f
0
"""
# Note that the multiprocing incurs relatively significant overhead. Only call this if n is proving difficult to factor.
def factory(method, n, verbose, output): output.put((method(n, verbose=verbose), str(method).split()[1]))
factors = mpQueue()
procs = [Process(target=factory, args=(m, n, verbose, factors)) for m in methods]
for p in procs: p.start()
(f, g) = factors.get()
for p in procs: p.terminate()
names = {"pollardrho_brent":"prb", "pollard_pm1":"p-1", "williams_pp1":"p+1"}
return (f, names.get(g, g))
def primefac(n, trial=1000, rho=42000, verbose=False, methods=(pollardrho_brent,)):
"""
Generates the prime factors of the input. Factors that appear x times are
yielded x times.
Input:
n -- the number to be factored
trial -- Trial division is performed up to this limit and no further.
We trial divide by 2 and 3 whether the user wants to or not.
Default == 1000.
rho -- How long to have Pollard's rho chug before declaring a cofactor
difficult. Default == 42,000 iterations. Floating-point inf is
an acceptable value.
methods -- Use these methods on difficult cofactors. If the tuple has
more than 1 element, we have multifactor handle it. Calling
multifactor has a high overhead, so when the tuple has a
single element, we call that function directly. The default
is (pollardrho_brent,). Each function f in methods must
accept a single number n as its argument. If n is prime,
f(n) must return n. If n is composite, f(n) must return a
number strictly between 1 and n that evenly divides n.
Giving up is not allowed.
Output: Prime factors of n
Factoring:
We use a three-stage factoring algorithm.
1. Trial divide with all primes less than or equal to the specified
limit. We trial divide by 2 and 3 regardless of the limit.
2. Run Pollard's rho algorithm on whatever remains. This algorithm
may split a number into two composite cofactors. Such cofactors
remain here until they survive the specified number of rounds of
the rho algorithm.
3. Subject each remaining cofactor to a set of up to five factoring
methods in parallel:
Pollard's rho algorithm with Brent's improvement,
Pollard's p-1 method,
Williams' p+1 method,
the elliptic curve method,
and the self-initializing quadratic sieve.
Examples:
>>> list(primefac(1729))
[7, 13, 19]
>>> list(sorted(primefac(rpn("24 ! 1 -"))))
[625793187653, 991459181683]
"""
# Obtains a complete factorization of n, yielding the prime factors as they are obtained.
# If the user explicitly specifies a splitting method, use that method. Otherwise,
# 1. Pull out small factors with trial division.
# 2. Do a few rounds of Pollard's Rho algorithm.
# 3. Launch multifactor on the remainder. Note that multifactor's multiprocessing incurs relatively significant overhead.
if verbose: print("\nFactoring %d (%d digits):" % (n, len(str(n))))
if n < 0:
if verbose: print("Trial division: -1")
yield -1; n *= -1
if n < 2: return
if isprime(n):
if verbose: print("Number is prime.")
yield n
return
if verbose: print("Number is composite.")
# Trial division
factors, nroot = [], isqrt(n)
for p in primegen(max(4,trial)+1): # Note that we check for 2 and 3 whether the user wants to or not.
if verbose: print('\b'*80 + "Trial division:", p, end='', flush=False)
if n%p == 0:
while n%p == 0:
if verbose: print('\b'*80 + "Trial division:", p)
yield p
n //= p
nroot = isqrt(n)
if p > nroot:
if n != 1:
if verbose:
laststr = "Trial division: " + str(p)
print('\b'*80 + str(n) + " " * len(laststr))
yield n
return
if isprime(n):
if verbose:
laststr = "Trial division: " + str(p)
print('\b'*80 + str(n) + " " * len(laststr))
yield n
return
if verbose: print("\nTrial division finished.")
# TODO: Fermat's method?
# Pollard's rho
factors, difficult = [n], []
while len(factors) != 0:
rhocount = 0
n = factors.pop()
if verbose: print("Beginning Pollard Rho on %d." % n)
try:
g = n
while g == n:
x, c, g = randrange(1, n), randrange(1, n), 1
y = x
while g==1:
if rhocount >= rho: raise Exception
rhocount += 1
x = (x**2 + c) % n
y = (y**2 + c) % n
y = (y**2 + c) % n
g = gcd(x-y, n)
# We now have a nontrivial factor g of n. If we took too long to get here, we're actually at the except statement.
f = n // g
if verbose: print("Pollard Rho split %d into %d and %d." % (n, g, f))
if isprime(g):
if verbose: print(g, "is prime.")
yield g
else:
if verbose: print(g, "is composite.")
factors.append(g)
if isprime(f):
if verbose: print(f, "is prime.")
yield f
else:
if verbose: print(f, "is composite.")
factors.append(f)
except Exception:
if verbose: print("Declaring", n, "difficult.")
difficult.append(n) # Factoring n took too long. We'll have multifactor chug on it.
# TODO: P-1 by itself?
# TODO: ECM by itself?
names = {"pollardrho_brent":"prb", "pollard_pm1":"p-1", "williams_pp1":"p+1", "ecm":"ecm", "siqs":"siqs"}
factors = difficult
if len(methods) == 1:
method = methods[0]
name = names[str(method).split()[1]]
while len(factors) != 0:
n = factors.pop()
if verbose: print("Beginning %s on %d." % (name, n))
f = method(n, verbose=verbose)
g = n // f
if verbose: print("%s split %d into %d and %d." % (name, n, f, g))
if isprime(f):
if verbose: print(f, "is prime.")
yield f
else:
if verbose: print(f, "is composite.")
factors.append(f)
if isprime(g):
if verbose: print(g, "is prime.")
yield g
else:
if verbose: print(g, "is composite.")
factors.append(g)
else:
while len(factors) != 0:
n = factors.pop()
if verbose:
#methodstring = ", ".join(names[str(m).split()[1]] for m in methods)
if len(methods) == 2: methodstring = " and ".join(names[str(m).split()[1]] for m in methods)
else:
assert len(methods) > 2
methodstring = ", ".join(names[str(m).split()[1]] for m in methods[:-1])
methodstring += ", and " + names[str(methods[-1]).split()[1]]
if verbose: print("Beginning %s on %d." % (methodstring, n))
f, name = multifactor(n, methods=methods, verbose=verbose)
g = n // f
if verbose: print("%s split %d into %d and %d." % (name, n, f, g))
if isprime(f):
if verbose: print(f, "is prime.")
yield f
else:
if verbose: print(f, "is composite.")
factors.append(f)
if isprime(g):
if verbose: print(g, "is prime.")
yield g
else:
if verbose: print(g, "is composite.")
factors.append(g)