Project is now named Celeste

This commit is contained in:
Emile Clark-Boman 2025-07-06 19:20:20 +10:00
parent 87917f9526
commit 269092fb53
45 changed files with 1507 additions and 12 deletions

0
celeste/__init__.py Normal file
View file

View file

View file

@ -0,0 +1,106 @@
from math import inf, ceil
from typing import Callable
from celeste.math.util import clamp_max
SPACER = b'\xff' # arbitrary spacing character
class DecryptFailed(Exception):
pass
def round_to_blocks(n: int, block_size: int) -> int:
return ceil(n / block_size)
def crack_secret_len(cipher: Callable[[str], str],
max_iters: int = inf) -> int | None:
# calculate length for i = 1
init_len = len(cipher(SPACER))
secret_len = None
i = 2
while True:
if i - 2 > max_iters:
break
elif len(cipher(SPACER*i)) != init_len:
secret_len = init_len - i
break
i += 1
return secret_len
'''
NOTE: pad_if_perfect exists for PKCS#7 which will add a full block
NOTE: of padding if the input is perfectly alligned to the blocks already.
'''
def crack(cipher: Callable[[str], str],
padfn: Callable[[bytes, int], bytes],
charset: list,
block_size: int,
max_secret_iters: int = inf,
batch_size: int = 1,
pad_if_perfect: bool = True,
debug: bool = False) -> str | None:
if len(charset) % batch_size:
raise ValueError(f'batch_size={batch_size} does not divide len(charset)={len(charset)}')
# calculate the secret length
secret_len = crack_secret_len(cipher, max_iters=max_secret_iters)
if debug:
print(f'[+] Found secret length: {secret_len}')
# calculate how many blocks the secret stretches over
# NOTE: secret_block_len - secret_len represents the number of
# NOTE: bytes required to make the secret fill the blocks with no padding
secret_block_len = round_to_blocks(secret_len, block_size) * block_size
default_push = (secret_block_len - secret_len)
known = b''
while True:
# the "full tail" is all characters we know + the 1 we're cracking (target)
# the "current tail" is the characters in the same block as the target
full_tail_bytes = len(known) + 1
tail_bytes = clamp_max(full_tail_bytes, block_size - 1)
# generate ALL possible tails (avoid padding if no padding required)
tails = [c + known[:tail_bytes] for c in charset]
if len(tails[0]) != block_size:
tails = [padfn(tail, 16) for tail in tails]
# calculate the "push" applied to the secret
push_size = (default_push + full_tail_bytes) % block_size
matched = False
NUM_BATCHES = len(tails) // batch_size
for i in range(NUM_BATCHES):
if debug:
print(f'{int(i/NUM_BATCHES*100)}%', end='\r')
batch = tails[i*batch_size : (i+1)*batch_size]
batch = b''.join(batch) + (SPACER * push_size) # apply spacing
# encrypt batch and split the ciphertext into blocks
ciphertext = cipher(batch)
num_blocks = len(ciphertext)//block_size
blocks = [ciphertext[i*block_size : (i+1)*block_size] for i in range(num_blocks)]
oracle_pos = round_to_blocks(full_tail_bytes, block_size)
if pad_if_perfect and (push_size + secret_len) % block_size == 0:
oracle_pos += 1
for j, cipher_block in enumerate(blocks[:batch_size]):
if cipher_block == blocks[-oracle_pos]:
char = charset[i*batch_size + j]
known = char + known
if debug:
print(f'[*] Found Tail: {known}')
matched = True
break
if matched:
break
if not matched:
break
elif len(known) == secret_len:
if debug:
print('[+] SUCCESS')
return known
# if we reached the end (no return)
# then the attack failed
err_msg = 'Padding oracle attack failed'
if not debug:
raise DecryptFailed(err_msg)
print(f'\n[!] {err_msg}')

20
celeste/constants.py Normal file
View file

@ -0,0 +1,20 @@
'''
ASCII Character Ranges
'''
# Common
WHITESPACE = '\t\n\r\x0b\x0c'
DIGITS = '0123456789'
ALPHA_LOWER = 'abcdefghijklmnopqrstuvwxyz'
ALPHA_UPPER = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
ALPHA = ALPHA_LOWER + ALPHA_UPPER
ALPHANUM = ALPHA + DIGITS
SYMBOLS = '!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
PRINTABLE = ALPHANUM + SYMBOLS
# Other
DIGITS_BIN = '01'
DIGITS_OCT = '01234567'
DIGITS_HEX_LOWER = '0123456789abcdef'
DIGITS_HEX_UPPER = '0123456789ABCDEF'
CHARSET_HEX = '0123456789abcdefABCDEF'
DIGITS_B64 = ALPHANUM + '+/'
CHARSET_B64 = DIGITS_B64 + '=' # Base64 charset contains = padding

View file

137
celeste/crypto/cyphers.py Normal file
View file

@ -0,0 +1,137 @@
'''
Implementation of various substitution cyphers, some of which are
already in the standard library but lack features/options I require.
Terminology:
1. "Simple": simple substitution cyphers operates on single letters
2. "Polygraphic": polygraphic substitution cyphers operate on groups
of letters larger than length 1.
3. "Monoalphabetic cypher": a monoalphabetic cypher applies the same fixed
substitutions to the entire message (ie ROT13)
4. "Polyalphabetic cypher": a polyalphabetic cypher applies different
substitutions to the entire message (ie vigenere)
'''
from celeste.constants import ALPHA_LOWER, ALPHA_UPPER
'''
Constant Declarations
'''
# Error Message Format Strings
ERRMSG_CHARSET_DUPL = 'Bad charsets for {0}: found duplicate character \'{1}\''
ERRMSG_NOT_IN_CHARSET = 'No charset for {0} has character \'{1}\''
# Charsets
CHARSET_SUB_DEFAULT = [ALPHA_LOWER, ALPHA_UPPER]
'''
==========================================
Simple Monoalphabetic Substitution Cyphers
==========================================
'''
def ROT(plaintext: str | list,
n: int,
charsets: list[str] | list[list] = CHARSET_SUB_DEFAULT,
ignore_noncharset: bool = True,
as_string: bool = True) -> str:
cyphertext = []
for c in plaintext:
i = None
active_charset = None
for charset in charsets:
try:
i = charset.index(c)
# if we reached here then no ValueError occured
if active_charset is not None:
raise ValueError(ERRMSG_CHARSET_DUPL.format('ROT', c))
active_charset = charset
except ValueError: pass
if i is not None:
cyphertext.append(active_charset[(i + n) % len(active_charset)])
elif not ignore_noncharset:
raise ValueError(ERRMSG_NOT_IN_CHARSET.format('ROT', c))
else:
# if ignore_noncharset then just append c
cyphertext.append(c)
if as_string:
return ''.join(cyphertext)
return cyphertext
# Common ROT aliases
def ROT13(plaintext: str, charsets: list[str] = CHARSET_SUB_DEFAULT) -> str:
return ROT(plaintext, 13, charsets=charsets)
def caesar(plaintext: str, charsets: list[str] = CHARSET_SUB_DEFAULT) -> str:
return ROT(plaintext, 1, charsets=charsets)
# The Atbash Cypher maps a character to its reverse (ie a -> z, b -> y, etc)
def atbash(plaintext: str | list,
charsets: list[str] | list[list] = CHARSET_SUB_DEFAULT,
ignore_noncharset: bool = True,
as_string: bool = True) -> str:
cyphertext = []
for c in plaintext:
i = None
active_charset = None
for charset in charsets:
try:
i = charset.index(c)
# if we reached here then no ValueError occured
if active_charset is not None:
raise ValueError(ERRMSG_CHARSET_DUPL.format('Atbash', c))
active_charset = charset
except ValueError: pass
if i is not None:
L = len(active_charset)
cyphertext.append(active_charset[(L - i - 1) % L])
elif not ignore_noncharset:
raise ValueError(ERRMSG_NOT_IN_CHARSET.format('Atbash', c))
else:
# if ignore_noncharset then just append c
cyphertext.append(c)
if as_string:
return ''.join(cyphertext)
return cyphertext
'''
=========================================
Simple Polyalphabetic Substituion Cyphers
=========================================
'''
def vigenere(plaintext: str | list,
key: str | list,
charsets: list[str] | list[list] = CHARSET_SUB_DEFAULT,
key_charset: str | list = ALPHA_UPPER,
decrypt: bool = False,
ignore_noncharset: bool = True,
as_string: bool = True) -> str:
cyphertext = []
# map the key characters to their rotations
rots = [key_charset.index(k) for k in key]
pos = 0
for c in plaintext:
i = None
active_charset = None
for charset in charsets:
try:
i = charset.index(c)
# if we reached here then no ValueError occured
if active_charset is not None:
raise ValueError(ERRMSG_CHARSET_DUPL.format('Vigenere', c))
active_charset = charset
except ValueError: pass
if i is not None:
rot = rots[pos % len(rots)]
if decrypt: rot = -rot
cyphertext.append(active_charset[(i + rot) % len(active_charset)])
pos += 1
elif not ignore_noncharset:
raise ValueError(ERRMSG_NOT_IN_CHARSET.format('Vigenere', c))
else:
# if ignore_noncharset then just append c
cyphertext.append(c)
if as_string:
return ''.join(cyphertext)
return cyphertext

7
celeste/crypto/hash.py Normal file
View file

@ -0,0 +1,7 @@
import hashlib
def sha256(data: bytes, as_bytes: bool = False) -> bytes:
hasher = hashlib.sha256()
hasher.update(data)
hash = hasher.digest()
return hash if as_bytes else int.from_bytes(hash)

18
celeste/crypto/rsa.py Normal file
View file

@ -0,0 +1,18 @@
'''
Simplification of Euler's Totient function knowing
the prime factorisation for the public key N value.
'''
def _totient(p: int, q: int) -> int:
return (p - 1) * (q - 1)
'''
Implements RSA encryption as modular exponentiation.
'''
def encrypt(plaintext: int, e: int, N: int) -> int:
return pow(plaintext, e, N)
def decrypt(ciphertext: int, d: int, N: int) -> int:
return pow(ciphertext, d, N)
def gen_private_key(e: int, p: int, q: int) -> int:
return pow(e, -1, _totient(p, q))

0
celeste/extern/__init__.py vendored Normal file
View file

987
celeste/extern/primefac.py vendored Normal file
View file

@ -0,0 +1,987 @@
'''
Credit: Lucas A. Brown (@lucasaugustus) for his primefac module (v2.0.12):
https://github.com/lucasaugustus/primefac
primefac is under an MIT license. Moreover I've attached Lucas' credits statement:
"Significant parts of this code are derived or outright copied from other
people's work. In particular, the SIQS code was derived mostly verbatim
from https://github.com/skollmann/PyFactorise by Stephan Kollmann, while
the functions to manipulate points on elliptic curves were copied from a
reply to the blog post at http://programmingpraxis.com/2010/04/23/. The
rest, I believe, is my own work, but I may have forgotten something."
- Lucas A. Brown (primefac.py)
'''
from math import sqrt, isqrt, log2, ceil, gcd, inf, prod as iterprod
from itertools import takewhile, compress, count
from multiprocessing import Process, Queue as mpQueue
from random import randrange#, seed; seed(0)
from time import time
from datetime import datetime, timedelta
def primegen(limit=inf):
"""
Generates primes < limit almost lazily by a segmented sieve of Eratosthenes.
Memory usage depends on the sequence of prime gaps. Unconditionally, we can
say that memory usage is O(p^0.7625), where p is the most-recently-yielded
prime. On the Riemann Hypothesis and Cramer's conjecture, we can bring this
down to O(p^0.75 * log(p)) and O(sqrt(p) * log(p)^2), respectively.
Input: limit -- a number (default = inf)
Output: sequence of integers
Examples:
>>> list(islice(primegen(), 20))
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71]
>>> list(primegen(73))
[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71]
"""
# We don't sieve 2, so we ought to be able to get sigificant savings by halving the length of the sieve.
# But the tiny extra computation involved in that seems to exceed the savings.
yield from takewhile(lambda x: x < limit, (2,3,5,7,11,13,17,19,23,29,31,37,41,43,47))
pl, pg = [3,5,7], primegen()
for p in pl: next(pg)
n = next(pg); nn = n*n
while True:
n = next(pg)
ll, nn = nn, n*n
sl = (nn - ll)
sieve = bytearray([True]) * sl
for p in pl:
k = (-ll) % p
sieve[k::p] = bytearray([False]) * ((sl-k)//p + 1)
if nn > limit: break # TODO bring this condition up to the while statement
yield from compress(range(ll,ll+sl,2), sieve[::2])
pl.append(n)
yield from takewhile(lambda x: x < limit, compress(range(ll,ll+sl,2), sieve[::2]))
def ilog(x, b): # TODO: investigate optimization starting from x.bin_length() * 2 // b
"""
Greatest integer l such that b**l <= x
Input: x, b -- integers
Output: An integer
Examples:
>>> ilog(263789, 10)
5
>>> ilog(1023, 2)
9
"""
l = 0
while x >= b:
x //= b
l += 1
return l
# TODO possible optimization route: x.bit_length() == ilog(x, 2) + 1; we can therefore use x.bit_length() * 2 // b as a
# 1st approximation to ilog(x, b), then compute pow(b, x.bit_length() * 2 // b), then compare that to x and adjust.
def modinv(a, m):
"""
Returns the inverse of a modulo m, normalized to lie between 0 and m-1. If
a is not coprime to m, return None.
Input:
a -- an integer coprime to m
n -- a positive integer
Output: None or an integer x between 0 and m-1 such that (a * x) % m == 1
Examples:
>>> [modinv(1,1), modinv(2,5), modinv(5,8), modinv(37,100), modinv(12,30)]
[0, 3, 5, 73, None]
"""
return pow(a, -1, m)
def introot(n, r=2): # TODO Newton iteration?
"""
For returns the rth root of n, rounded to the nearest integer in the
direction of zero. Returns None if r is even and n is negative.
Input:
n -- an integer
r -- a natural number or None
Output: An integer
Examples:
>>> [introot(-729, 3), introot(-728, 3), introot(1023, 2), introot(1024, 2)]
[-9, -8, 31, 32]
"""
if n < 0: return None if r%2 == 0 else -introot(-n, r)
if n < 2: return n
if r == 1: return n
if r == 2: return isqrt(n)
#if r % 2 == 0: return introot(isqrt(n), r//2) # TODO Check validity of this line.
lower = upper = 1 << (n.bit_length() // r)
while lower ** r > n: lower >>= 2
while upper ** r <= n: upper <<= 2
while lower != upper - 1:
mid = (lower + upper) // 2
m = mid**r
if m == n: return mid
elif m < n: lower = mid
elif m > n: upper = mid
return lower
def ispower(n, r=0):
"""
If r == 0:
If n is a perfect power, return a tuple containing largest integer (in
terms of magnitude) that, when squared/cubed/etc, yields n as the first
component and the relevant power as the second component.
If n is not a perfect power, return None.
If r > 0:
We check whether n is a perfect rth power; we return its rth root if it
is and None if it isn't.
Input:
n -- an integer
r -- an integer
Output: An integer, a 2-tuple of integers, or None
Examples:
>>> [ispower(n) for n in [64, 25, -729, 1729]]
[(8, 2), (5, 2), (-9, 3), None]
>>> [ispower(64, r) for r in range(7)]
[(8, 2), 64, 8, 4, None, None, 2]
"""
#if r == 0: return any(ispower(n, r) for r in primegen(n.bit_length()+1))
#return n == introot(n, r) ** r
if r == 0:
if n in (0, 1, -1): return (n, 1)
for r in primegen(n.bit_length()+1):
x = ispower(n, r)
if x is not None: return (x, r)
return None
# TODO tricks for special cases
if (r == 2) and (n & 2): return None
if (r == 3) and (n & 7) in (2,4,6): return None
x = introot(n, r)
return None if x is None else (x if x**r == n else None)
def jacobi(a, n):
"""
The Jacobi symbol (a|n).
Input:
a -- any integer
n -- odd integer
Output: -1, 0, or 1
Examples:
>>> [jacobi(a, 15) for a in [-10, -7, -4, -2, -1, 0, 1, 2, 4, 7, 10]]
[0, 1, -1, -1, -1, 0, 1, 1, 1, -1, 0]
>>> [jacobi(a, 13) for a in [-10, -9, -4, -2, -1, 0, 1, 2, 4, 9, 10]]
[1, 1, 1, -1, 1, 0, 1, -1, 1, 1, 1]
>>> [jacobi(a, 11) for a in [-10, -9, -4, -2, -1, 0, 1, 2, 4, 9, 10]]
[1, -1, -1, 1, -1, 0, 1, -1, 1, 1, -1]
"""
if (n%2 == 0) or (n < 0): return None # n must be a positive odd number TODO delete this check?
if (a == 0) or (a == 1): return a
a, t = a%n, 1
while a != 0:
while not a & 1:
a //= 2
if n & 7 in (3, 5): t *= -1
a, n = n, a
if (a & 3 == 3) and (n & 3) == 3: t *= -1
a %= n
return t if n == 1 else 0
def isprime(n, tb=(3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59)): # TODO optimize the basis, possibly by varying it with n
"""
BPSW primality test variant: we use the strong Lucas PRP test and preface
the computation with trial division for speed. No composites are known to
pass the test, though it is suspected that infinitely many will do so.
There are definitely no such errors below 2^64.
This function is mainly a streamlined version of bpsw().
Input:
n -- integer. Number to be examined.
tb -- iterable of primes. Basis for trial division.
Output: True if probably prime; False if definitely composite.
Examples:
>>> [n for n in range(91) if isprime(1000*n+1)]
[3, 4, 7, 9, 13, 16, 19, 21, 24, 28, 51, 54, 55, 61, 69, 70, 76, 81, 88, 90]
>>> [isprime(rpn("38 ! 1 +")), isprime(rpn("38 ! 1 -"))]
[False, True]
"""
# 1. Do some trial division with tb as the basis.
if n % 2 == 0 or n < 3: return n == 2
for p in tb:
if n % p == 0: return n == p
# 2. If sprp(n,2) fails, return False. If it succeeds, continue.
t, s = (n - 1) // 2, 1
while t % 2 == 0: t //= 2; s += 1
#assert 1 + 2**s * t == n
x = pow(2, t, n)
if x != 1 and x != n - 1:
for j in range(1, s):
x = pow(x, 2, n)
if x == 1: return False
elif x == n - 1: break
else: return False
# 3. Select parameters for slprp.
for D in count(5, 4):
j = jacobi(D, n)
if j == 0: return D == n
if j == -1: break
D = -2 - D
j = jacobi(D, n)
if j == 0: return -D == n
if j == -1: break
if D == -13 and ispower(n,2): return False # If n is square, then this loop amounts to very slow trial division.
# Now run slprp(n, 1, (1 - D) // 4) and return the result.
b = (1 - D) // 4
if 1 < gcd(n, b) < n: return False
s, t = 1, (n + 1) // 2
while t % 2 == 0: s += 1; t //= 2
v, w, q, Q = 2, 1, 1, 1
for k in bin(t)[2:]:
q = (q*Q) % n
if k == '1': Q, v, w = (q*b) % n, (w*v - q) % n, (w*w - 2*q*b) % n
else: Q, w, v = q , (w*v - q) % n, (v*v - 2*q ) % n
# assert ( (2*w-v) * modinv(D,n) ) % n, v == lucasmod(t, 1, b, n)
if v == 0 or ( (2*w-v) * modinv(D,n) ) % n == 0: return True
q = pow(b, t, n)
for _ in range(1, s):
v = (v*v - 2*q) % n
if v == 0: return True
q = (q*q) % n
return False
def pollardrho_brent(n, verbose=False):
"""
Factors integers using Brent's variation of Pollard's rho algorithm.
If n is prime, we immediately return n; if not, we keep chugging until a
nontrivial factor is found. This function calls the randomizer; two
successive calls may therefore return two different results.
Input: n -- number to factor
Output: A factor of n.
Examples:
>>> n = rpn("20 ! 1 +"); f = pollardrho_brent(n); n % f
0
"""
if isprime(n): return n
g = n
while g == n:
y, c, m, g, r, q = randrange(1, n), randrange(1, n), randrange(1, n), 1, 1, 1
while g==1:
x, k = y, 0
for i in range(r): y = (y**2 + c) % n
while k < r and g == 1:
ys = y
for i in range(min(m, r-k)):
y = (y**2 + c) % n
q = q * abs(x-y) % n
g, k = gcd(q, n), k+m
r *= 2
if g==n:
while True:
ys = (ys**2+c)%n
g = gcd(x-ys, n)
if g > 1: break
return g
def pollard_pm1(n, B1=100, B2=1000, verbose=False): # TODO: What are the best default bounds and way to increment them?
"""
Integer factoring function. Uses Pollard's p-1 algorithm. Note that this
is only efficient if the number to be factored has a prime factor p such
that p-1's largest prime factor is "small". In this implementation, that
tends to mean less than 10,000,000 or so.
Input:
n -- number to factor
B1 -- Natural number. Bound for phase 1. Default == 100.
B2 -- Natural number > B1. Bound for phase 2. Default == 1000.
Output: A factor of n.
Examples:
>>> pollard_pm1(rpn("28 ! 1 - 239 //"))
1224040923709997
"""
if isprime(n): return n
m = ispower(n)
if m: return m[0]
while True:
pg = primegen()
q = 2 # TODO: what about other initial values of q?
p = next(pg)
while p <= B1: q, p = pow(q, p**ilog(B1, p), n), next(pg)
g = gcd(q-1, n)
if 1 < g < n: return g
while p <= B2: q, p = pow(q, p, n), next(pg)
g = gcd(q-1, n)
if 1 < g < n: return g
# These bounds failed. Increase and try again.
B1 *= 10
B2 *= 10
def mlucas(v, a, n):
# Helper for williams_pp1(). Multiplies along a Lucas sequence mod n.
v1, v2 = v, (v**2 - 2) % n
for bit in bin(a)[3:]: v1, v2 = ((v1**2 - 2) % n, (v1*v2 - v) % n) if bit == "0" else ((v1*v2 - v) % n, (v2**2 - 2) % n)
return v1
def williams_pp1(n, verbose=False): # TODO: experiment with different values of v0, and implement the two-phase version
"""
Integer factoring function. Uses Williams' p+1 algorithm, single-stage
variant. Note that this is only efficient when the number to be factored
has a prime factor p such that p+1's largest prime factor is "small".
Input: n -- integer to factor
Output: Integer. A nontrivial factor of n.
Example:
>>> williams_pp1(315951348188966255352482641444979927)
12403590655726899403
"""
if isprime(n): return n
m = ispower(n)
if m: return m[0]
for v in count(3):
for p in primegen():
e = ilog(isqrt(n), p)
if e == 0: break
for _ in range(e): v = mlucas(v, p, n)
g = gcd(v - 2, n)
if 1 < g < n: return g
if g == n: break
def ecadd(p1, p2, p0, n):
# Helper for ecm(). Adds two points on a Montgomery curve mod n.
x1,z1 = p1; x2,z2 = p2; x0,z0 = p0
t1, t2 = (x1-z1)*(x2+z2), (x1+z1)*(x2-z2)
return (z0*pow(t1+t2,2,n) % n, x0*pow(t1-t2,2,n) % n)
def ecdub(p, A, n):
# Helper for ecm(). Doubles a point on a Montgomery curve mod n.
x, z = p; An, Ad = A
t1, t2 = pow(x+z,2,n), pow(x-z,2,n)
t = t1 - t2
return (t1*t2*4*Ad % n, (4*Ad*t2 + t*An)*t % n)
def ecmul(m, p, A, n):
# Helper for ecm(). Multiplies a point on a Montgomery curve mod n.
if m == 0: return (0, 0)
elif m == 1: return p
else:
q = ecdub(p, A, n)
if m == 2: return q
b = 1
while b < m: b *= 2
b //= 4
r = p
while b:
if m&b: q, r = ecdub(q, A, n), ecadd(q, r, p, n)
else: q, r = ecadd(r, q, p, n), ecdub(r, A, n)
b //= 2
return r
def secm(n, B1, B2, seed):
"""
Seeded ECM. Helper function for ecm(). Returns a possibly-trivial divisor
of n given two bounds and a seed. Uses the two-phase algorithm on
Montgomery curves. See https://wp.me/prTJ7-zI and https://wp.me/prTJ7-A7
for more details. Most of the code for this function's "helpers" were
shamelessly copied from the first of those links.
Input:
n -- Integer to factor
B1 -- Integer. Number of iterations for the first phase.
B2 -- Integer. Number of iterations for the second phase.
seed -- Integer. Selects the specific curve we'll be working on.
Output: Integer. A possibly-trivial factor of n.
Examples:
>>> secm(rpn('24 ! 1 -'), 100, 1000, 22)
991459181683
"""
u, v = (seed**2 - 5) % n, 4*seed % n
p = pow(u, 3, n)
Q, C = (pow(v-u,3,n)*(3*u+v) % n, 4*p*v % n), (p, pow(v,3,n))
pg = primegen()
p = next(pg)
while p <= B1: Q, p = ecmul(p**ilog(B1, p), Q, C, n), next(pg)
g = gcd(Q[1], n)
if 1 < g < n: return g
while p <= B2:
# There is a trick that can speed up the second stage. Instead of multiplying each prime by Q, we iterate over i from
# B1 + 1 to B2, adding 2Q at each step; when i is prime, the current Q can be accumulated into the running solution.
# Again, we defer the calculation of the GCD until the end of the iteration. TODO: Implement and compare performance.
Q = ecmul(p, Q, C, n)
g *= Q[1]
g %= n
p = next(pg)
return gcd(g, n)
def ecmparams(n): # TODO: Better parameters.
counter = 0
for i in count():
for _ in range(2*i+1):
yield (2**i, 10 * 2**i, randrange(6,n), counter)
counter += 1
for j in range(i+1):
yield (2**j, 10 * 2**j, randrange(6,n), counter)
counter += 1
def ecm(n, paramseq=ecmparams, nprocs=1, verbose=False):
"""
"Modern" integer factoring via elliptic curves. Uses Montgomery curves, the
two-phase algorithm, and (optionally) multiple processes. The hard work is
done by secm(); this function just does the managerial work of pulling a
sequence of parameters out of a generator and feeding them into secm().
Input:
n -- number to factor
paramseq -- sequence of parameters to feed into secm(). It must be an
infinite generator of 4-tuples (a,b,c,d), where a is the
number of iterations for the first phase, b is the number of
iterations for the second phase, c is a seed to select the
curve to work on, and d is an auxiliary used to count the
parameters generated so far. We need a < b and 6 <= c < n.
nprocs -- number of processes to use. Default == 1. Setting this > 1
is discouraged on "small" inputs because managing multiple
processes incurs significant overhead.
Output:
A factor of n.
Note that if the parameter sequence calls the randomizer (which is
currently the default behavior), then two successive calls may therefore
return two different results.
Examples:
>>> n = 625793187653 * 991459181683 # = 620448401733239439359999 = 24! - 1
>>> f = ecm(n)
>>> (n//f) * f
620448401733239439359999
"""
g = n % 6
if g % 2 == 0: return 2
if g % 3 == 0: return 3
if isprime(n): return n
m = ispower(n)
if m: return m[0]
if nprocs == 1:
for (B1,B2,seed,i) in paramseq(n):
f = secm(n, B1, B2, seed)
if 1 < f < n: return f
assert nprocs != 1
def factory(params, output): output.put(secm(*params))
ps, facs, procs = paramseq(n), mpQueue(), []
procs = [Process(target=factory, args=((n,)+next(ps)[:3], facs)) for _ in range(nprocs)]
for p in procs: p.start()
while True:
g = facs.get()
if 1 < g < n:
for p in procs: p.terminate()
return g
for p in range(nprocs): # TODO: Try doing this with a process pool.
if not procs[p].is_alive():
del procs[p]
break
procs.append(Process(target=factory, args=((n,)+next(ps)[:3], facs)))
procs[-1].start()
def sqrtmod_prime(a, p):
"""
Solves x**2 == a (mod p) for x. We assume that p is a prime and a is a
quadratic residue modulo p. If either of these assumptions is false, the
return value is meaningless.
The Cipolla-Lehmer section is my own. The rest appears to be derived from
https://codegolf.stackexchange.com/a/9088.
Input:
a -- natural number
p -- prime number
Output: whole number less than p
Examples:
>>> sqrtmod_prime(4, 5)
3
>>> sqrtmod_prime(13, 23)
6
>>> sqrtmod_prime(997, 7304723089)
761044645
"""
a %= p
if p%4 == 3: return pow(a, (p+1) >> 2, p)
elif p%8 == 5:
v = pow(a << 1, (p-5) >> 3, p)
return (a*v*(((a*v*v<<1)%p)-1))%p
elif p%8 == 1:
# CranPom ex 2.31, pg 112 / 126. Pretty sure this amounts to Cipolla-Lehmer.
if a == 0: return 0 # Necessary to avoid an infinite loop in the legendre section
h = 2
# while legendre(h*h - 4*a, p) != -1:
while pow(h*h - 4*a, (p-1) >> 1, p) != p - 1: h += 1 # TODO compare speed vs random selection
#return ( lucasmod((p+1)//2, h, a, p)[1] * modinv(2, p) ) % p
k, v, w, q, Q = (p+1)//2, 2, h % p, 1, 1
for kj in bin(k)[2:]:
q = (q*Q) % p
if kj == '1': Q, v, w = (q*a) % p, (w*v - h*q) % p, (w*w - 2*q*a) % p
else: Q, w, v = q , (w*v - h*q) % p, (v*v - 2*q ) % p
return (v*k) % p
else: return a # p == 2
def siqs(n, verbose=False):
"""
Uses the Self-Initializing Quadratic Sieve to extract a factor of n.
We make some calls to randrange, so the output may change from call to call.
This is derived from https://github.com/skollmann/PyFactorise.
Input:
n -- number to factor
verbose -- if True, print progress reports. Default == False.
Output:
If n is prime, we return n.
If n is composte, we return a nontrivial factor of n.
If n is too small, we raise a ValueError.
Examples:
>>> siqs(factorial(24) - 1) in (625793187653, 991459181683)
True
"""
if (not isinstance(n, int)) or n < 0: raise ValueError("Number must be a positive integer.")
if n < 2**64:
if verbose: print("Number is too small for SIQS. Using Pollard Rho instead.")
return pollardrho_brent(n)
if verbose: print("Factorizing %d (%d digits)..." % (n, len(str(n))))
if isprime(n): return n
if verbose: print("Number is composite.")
if verbose: print("Checking whether it is a perfect power...")
perfect_power = ispower(n)
if perfect_power:
if verbose: print(n, "=", "%d^%d." % (perfect_power[0], perfect_power[1]))
return perfect_power[0]
if verbose: print("Not a perfect power.")
if verbose: print("Using SIQS on %d (%d digits)..." % (n, len(str(n))))
if verbose: starttime, sievetime, latime = time(), 0, 0
# Choose parameters nf (sieve of factor base) and m (for sieving in [-m,m].
# Using similar parameters as msieve-1.52
dig = len(str(n))
if dig <= 34: nf, m = 200, 65536
elif dig <= 36: nf, m = 300, 65536
elif dig <= 38: nf, m = 400, 65536
elif dig <= 40: nf, m = 500, 65536
elif dig <= 42: nf, m = 600, 65536
elif dig <= 44: nf, m = 700, 65536
elif dig <= 48: nf, m = 1000, 65536
elif dig <= 52: nf, m = 1200, 65536
elif dig <= 56: nf, m = 2000, 65536 * 3
elif dig <= 60: nf, m = 4000, 65536 * 3
elif dig <= 66: nf, m = 6000, 65536 * 3
elif dig <= 74: nf, m = 10000, 65536 * 3
elif dig <= 80: nf, m = 30000, 65536 * 3
elif dig <= 88: nf, m = 50000, 65536 * 3
elif dig <= 94: nf, m = 60000, 65536 * 9
else: nf, m = 100000, 65536 * 9
class FactorBasePrime:
"""A factor base prime for the SIQS"""
def __init__(self, p, tmem, lp):
self.p, self.soln1, self.soln2, self.tmem, self.lp, self.ainv = p, None, None, tmem, lp, None
# Compute and return nf factor base primes suitable for a SIQS on n.
factor_base = []
for p in primegen():
if pow(n, (p-1) >> 1, p) == 1: # if n is a quadratic residue mod p
t = sqrtmod_prime(n, p)
lp = round(log2(p)) # This gets rounded for the sake of speed.
factor_base.append(FactorBasePrime(p, t, lp))
if len(factor_base) >= nf: break
if verbose: print("Factor base size: %d primes.\nLargest prime in base: %d." % (nf, factor_base[-1].p))
npolys, i_poly, prev_cnt, relcount, smooth_relations, required_relations_ratio = 0, 0, 0, 0, [], 1.05
p_min_i, p_max_i = None, None
for (i,fb) in enumerate(factor_base):
if p_min_i is None and fb.p >= 400: # 400 is a tunable parameter.
p_min_i = i
if p_max_i is None and fb.p > 4000: # 4000 is a tunable parameter.
p_max_i = i - 1
break
# The following may happen if the factor base is small, make sure that we have enough primes.
if p_max_i is None: p_max_i = len(factor_base) - 1
if p_min_i is None or p_max_i - p_min_i < 20: p_min_i = min(p_min_i, 5) # TODO This line is problematic for some small n.
target = sqrt(2 * float(n)) / m
target1 = target / sqrt((factor_base[p_min_i].p + factor_base[p_max_i].p) / 2)
while True:
if verbose: temptime = time()
if verbose: print("*** Phase 1: Finding smooth relations ***")
required_relations = round(nf * required_relations_ratio)
if verbose: print("Target: %d relations" % required_relations)
enough_relations = False
while not enough_relations:
if i_poly == 0: # Compute the first of a set of polynomials
# find q such that the product of factor_base[q_i] is about sqrt(2n)/m; try a few sets to find a good one
best_q, best_a, best_ratio = None, None, None
for _ in range(30):
a, q = 1, []
while a < target1:
p_i = 0
while p_i == 0 or p_i in q: p_i = randrange(p_min_i, p_max_i+1)
a *= factor_base[p_i].p
q.append(p_i)
ratio = a / target
# ratio too small seems to be not good
if (best_ratio is None or (ratio >= 0.9 and ratio < best_ratio) or best_ratio < 0.9 and ratio > best_ratio):
best_q, best_a, best_ratio = q, a, ratio
a, q = best_a, best_q
s, B = len(q), []
for l in range(s):
fb_l = factor_base[q[l]]
q_l = fb_l.p
assert a % q_l == 0
gamma = (fb_l.tmem * modinv(a // q_l, q_l)) % q_l
if gamma > q_l // 2: gamma = q_l - gamma
B.append(a // q_l * gamma)
b = sum(B) % a
b_orig = b
if (2 * b > a): b = a - b
assert 0 < b and 2 * b <= a and (b * b - n) % a == 0
g1, g2, g3, ga, gb = b * b - n, 2 * a * b, a * a, a, b_orig
ha, hb = a, b
for fb in factor_base:
if a % fb.p != 0:
fb.ainv = modinv(a, fb.p)
fb.soln1 = (fb.ainv * (fb.tmem - b)) % fb.p
fb.soln2 = (fb.ainv * (-fb.tmem - b)) % fb.p
else: # Compute the (i+1)-th polynomial, given that g is the i-th polynomial.
#v = lowest_set_bit(i) + 1
#z = -1 if ceil(i / (2 ** v)) % 2 == 1 else 1
z = -1 if ceil(i_poly / (1 + (i_poly ^ (i_poly-1)))) % 2 == 1 else 1
#b = (g.b + 2 * z * B[v - 1]) % g.a
b = (gb + 2 * z * B[(i_poly & (-i_poly)).bit_length() - 1]) % ga
a = ga
b_orig = b
if (2 * b > a): b = a - b
assert (b * b - n) % a == 0
g1, g2, g3, ga, gb = b * b - n, 2 * a * b, a * a, a, b_orig
ha, hb = a, b
for fb in factor_base:
if a % fb.p != 0:
fb.soln1 = (fb.ainv * ( fb.tmem - b)) % fb.p
fb.soln2 = (fb.ainv * (-fb.tmem - b)) % fb.p
i_poly += 1
npolys += 1
if i_poly >= 2 ** (len(B) - 1): i_poly = 0
# BEGIN SIEVING. Most of our time is spent between here and the "END SIEVING" comment.
sieve_array = [0] * (2 * m + 1)
for fb in factor_base:
if fb.soln1 is None: continue
p, lp = fb.p, fb.lp
a_start_1 = fb.soln1 - ((m + fb.soln1) // p) * p
if p > 20:
for a in range(a_start_1 + m, 2 * m + 1, p): sieve_array[a] += lp
a_start_2 = fb.soln2 - ((m + fb.soln2) // p) * p
for a in range(a_start_2 + m, 2 * m + 1, p): sieve_array[a] += lp
# Perform the trial division step of the SIQS.
limit = round(log2(m * sqrt(float(n)))) - 25 # 25 is a tunable parameter. The rounding is for speed.
for (i,sa) in enumerate(sieve_array):
if sa >= limit:
x = i - m
gx = (g3 * x + g2) * x + g1
# Determine whether gx can be fully factorized into primes from the factor base.
# If so, store the indices of the factors from the factor base as divisors_idx. If not, set that to None.
a, divisors_idx = gx, []
for (fbi,fb) in enumerate(factor_base):
if a % fb.p == 0:
exp = 0
while a % fb.p == 0:
a //= fb.p
exp += 1
divisors_idx.append((fbi, exp))
if a == 1:
u = ha * x + hb
v = gx
assert (u * u) % n == v % n
smooth_relations.append((u, v, divisors_idx))
break
relcount = len(smooth_relations)
if relcount >= required_relations:
enough_relations = True
break
# END SIEVING. Most of our time is spent between here and the "BEGIN SIEVING" comment.
if verbose and relcount > 0 and (relcount >= required_relations or i_poly % 8 == 0 or relcount > prev_cnt):
frac = relcount / required_relations
t = time() - starttime # Time spent so far
ett = t / frac # Estimated total time
ettc = ett - t # Estimated time to completion
eta = datetime.isoformat(datetime.now() + timedelta(seconds=ettc), sep=' ', timespec='seconds')
print('\b'*256 + "Found %d rels (%.1f%%) on %d polys. ETA %ds (%s). " % (relcount, 100*frac, npolys, ettc, eta), end='', flush=True)
prev_cnt = relcount
if verbose:
sievetime += time() - temptime
print("\nSieving time: %f seconds." % sievetime)
temptime = time()
print("*** Phase 2: Linear Algebra ***")
print("Building matrix for linear algebra step...")
M = [0] * nf
mask = 1
for sr in smooth_relations:
for (j,exp) in sr[2]:
if exp % 2: M[j] += mask
mask <<= 1
if verbose: print("Finding perfect squares using matrix and factors from perfect squares...")
# Perform the linear algebra step of the SIQS: do fast Gaussian elimination to determine pairs of perfect squares mod n.
# Use the optimisations described in [Çetin K. Koç and Sarath N. Arachchige. 'A Fast Algorithm for Gaussian Elimination
# over GF(2) and its Implementation on the GAPP.' Journal of Parallel and Distributed Computing 13.1 (1991): 118-122].
if verbose: gaussstart = time()
row_is_marked = bytearray([False]) * relcount
pivots = [-1] * nf
for j in range(nf):
M_j = M[j]
i = (M_j & (-M_j)).bit_length() - 1 #i = -1 if M[j] == 0 else lowest_set_bit(M[j])
# i is now the row of the first nonzero entry in column j, or -1 if no such row exists.
if i > -1:
pivots[j] = i
row_is_marked[i] = True
for k in range(nf):
if (M[k] >> i) & 1 and k != j: # test M[i][k] == 1
M[k] ^= M_j # add column j to column k mod 2
if verbose and ((nf - j) % 100 == 0 or nf == j + 1):
frac = (j+1) / nf
t = time() - gaussstart # Time spent so far
ett = t / frac # Estimated total time
ettc = ett - t # Estimated time to completion
eta = datetime.isoformat(datetime.now() + timedelta(seconds=ettc), sep=' ', timespec='seconds')
print('\b'*256 + "Gaussian elimination: %d/%d (%.1f%%). ETA %ds (%s). " % (j+1, nf, 100*frac, ettc, eta), end='', flush=True)
if verbose: print("\nGaussian elimination time: %f seconds" % (time() - gaussstart))
attempts = 0
for i in range(relcount):
if not row_is_marked[i]:
square_indices = [i]
for j in range(nf):
if (M[j] >> i) & 1: # test M[i][j] == 1
square_indices.append(pivots[j])
# Given the solution encoded by square_indices, try to find a factor of n, and return it.
attempts += 1
if verbose: print('\b'*42 + "Attempt #%d" % attempts, end='', flush=True)
# Given on of the solutions returned by siqs_solve_matrix and the corresponding smooth relations,
# calculate the pair (sqrt1, sqrt2) such that sqrt1^2 = sqrt2^2 (mod n).
sqrt1, sqrt2 = 1, 1
for idx in square_indices:
sqrt1 *= smooth_relations[idx][0]
sqrt2 *= smooth_relations[idx][1]
sqrt2 = isqrt(sqrt2)
assert (sqrt1 * sqrt1) % n == (sqrt2 * sqrt2) % n
factor = gcd(sqrt1 - sqrt2, n)
if 1 != factor != n:
if verbose:
print(" succeeded.")
latime += time() - temptime
print("Linear algebra time: %f seconds." % latime)
totaltime = time() - starttime
print("Total time: %f seconds." % (totaltime))
return factor
if verbose:
print('\b'*42 + "All %d attempts failed." % attempts)
latime += time() - temptime
print("Linear algebra time: %f seconds." % latime)
print("Failed to find a solution. Finding more relations...")
required_relations_ratio += 0.05
def multifactor(n, methods=(pollardrho_brent, pollard_pm1, williams_pp1, ecm, siqs), verbose=False):
"""
Integer factoring function. Uses several methods in parallel. Waits for a
function to return, kills the rest, and reports. Note that two successive
calls may return different results depending on which method finishes first
and whether any methods call the randomizer.
Input:
n -- number to factor
methods -- list of functions to run.
Output: A factor of n.
Examples:
>>> methods = [pollardrho_brent, pollard_pm1, williams_pp1, ecm, siqs]
>>> n = rpn("24 ! 1 -"); f = multifactor(n, methods); n%f
0
"""
# Note that the multiprocing incurs relatively significant overhead. Only call this if n is proving difficult to factor.
def factory(method, n, verbose, output): output.put((method(n, verbose=verbose), str(method).split()[1]))
factors = mpQueue()
procs = [Process(target=factory, args=(m, n, verbose, factors)) for m in methods]
for p in procs: p.start()
(f, g) = factors.get()
for p in procs: p.terminate()
names = {"pollardrho_brent":"prb", "pollard_pm1":"p-1", "williams_pp1":"p+1"}
return (f, names.get(g, g))
def primefac(n, trial=1000, rho=42000, verbose=False, methods=(pollardrho_brent,)):
"""
Generates the prime factors of the input. Factors that appear x times are
yielded x times.
Input:
n -- the number to be factored
trial -- Trial division is performed up to this limit and no further.
We trial divide by 2 and 3 whether the user wants to or not.
Default == 1000.
rho -- How long to have Pollard's rho chug before declaring a cofactor
difficult. Default == 42,000 iterations. Floating-point inf is
an acceptable value.
methods -- Use these methods on difficult cofactors. If the tuple has
more than 1 element, we have multifactor handle it. Calling
multifactor has a high overhead, so when the tuple has a
single element, we call that function directly. The default
is (pollardrho_brent,). Each function f in methods must
accept a single number n as its argument. If n is prime,
f(n) must return n. If n is composite, f(n) must return a
number strictly between 1 and n that evenly divides n.
Giving up is not allowed.
Output: Prime factors of n
Factoring:
We use a three-stage factoring algorithm.
1. Trial divide with all primes less than or equal to the specified
limit. We trial divide by 2 and 3 regardless of the limit.
2. Run Pollard's rho algorithm on whatever remains. This algorithm
may split a number into two composite cofactors. Such cofactors
remain here until they survive the specified number of rounds of
the rho algorithm.
3. Subject each remaining cofactor to a set of up to five factoring
methods in parallel:
Pollard's rho algorithm with Brent's improvement,
Pollard's p-1 method,
Williams' p+1 method,
the elliptic curve method,
and the self-initializing quadratic sieve.
Examples:
>>> list(primefac(1729))
[7, 13, 19]
>>> list(sorted(primefac(rpn("24 ! 1 -"))))
[625793187653, 991459181683]
"""
# Obtains a complete factorization of n, yielding the prime factors as they are obtained.
# If the user explicitly specifies a splitting method, use that method. Otherwise,
# 1. Pull out small factors with trial division.
# 2. Do a few rounds of Pollard's Rho algorithm.
# 3. Launch multifactor on the remainder. Note that multifactor's multiprocessing incurs relatively significant overhead.
if verbose: print("\nFactoring %d (%d digits):" % (n, len(str(n))))
if n < 0:
if verbose: print("Trial division: -1")
yield -1; n *= -1
if n < 2: return
if isprime(n):
if verbose: print("Number is prime.")
yield n
return
if verbose: print("Number is composite.")
# Trial division
factors, nroot = [], isqrt(n)
for p in primegen(max(4,trial)+1): # Note that we check for 2 and 3 whether the user wants to or not.
if verbose: print('\b'*80 + "Trial division:", p, end='', flush=False)
if n%p == 0:
while n%p == 0:
if verbose: print('\b'*80 + "Trial division:", p)
yield p
n //= p
nroot = isqrt(n)
if p > nroot:
if n != 1:
if verbose:
laststr = "Trial division: " + str(p)
print('\b'*80 + str(n) + " " * len(laststr))
yield n
return
if isprime(n):
if verbose:
laststr = "Trial division: " + str(p)
print('\b'*80 + str(n) + " " * len(laststr))
yield n
return
if verbose: print("\nTrial division finished.")
# TODO: Fermat's method?
# Pollard's rho
factors, difficult = [n], []
while len(factors) != 0:
rhocount = 0
n = factors.pop()
if verbose: print("Beginning Pollard Rho on %d." % n)
try:
g = n
while g == n:
x, c, g = randrange(1, n), randrange(1, n), 1
y = x
while g==1:
if rhocount >= rho: raise Exception
rhocount += 1
x = (x**2 + c) % n
y = (y**2 + c) % n
y = (y**2 + c) % n
g = gcd(x-y, n)
# We now have a nontrivial factor g of n. If we took too long to get here, we're actually at the except statement.
f = n // g
if verbose: print("Pollard Rho split %d into %d and %d." % (n, g, f))
if isprime(g):
if verbose: print(g, "is prime.")
yield g
else:
if verbose: print(g, "is composite.")
factors.append(g)
if isprime(f):
if verbose: print(f, "is prime.")
yield f
else:
if verbose: print(f, "is composite.")
factors.append(f)
except Exception:
if verbose: print("Declaring", n, "difficult.")
difficult.append(n) # Factoring n took too long. We'll have multifactor chug on it.
# TODO: P-1 by itself?
# TODO: ECM by itself?
names = {"pollardrho_brent":"prb", "pollard_pm1":"p-1", "williams_pp1":"p+1", "ecm":"ecm", "siqs":"siqs"}
factors = difficult
if len(methods) == 1:
method = methods[0]
name = names[str(method).split()[1]]
while len(factors) != 0:
n = factors.pop()
if verbose: print("Beginning %s on %d." % (name, n))
f = method(n, verbose=verbose)
g = n // f
if verbose: print("%s split %d into %d and %d." % (name, n, f, g))
if isprime(f):
if verbose: print(f, "is prime.")
yield f
else:
if verbose: print(f, "is composite.")
factors.append(f)
if isprime(g):
if verbose: print(g, "is prime.")
yield g
else:
if verbose: print(g, "is composite.")
factors.append(g)
else:
while len(factors) != 0:
n = factors.pop()
if verbose:
#methodstring = ", ".join(names[str(m).split()[1]] for m in methods)
if len(methods) == 2: methodstring = " and ".join(names[str(m).split()[1]] for m in methods)
else:
assert len(methods) > 2
methodstring = ", ".join(names[str(m).split()[1]] for m in methods[:-1])
methodstring += ", and " + names[str(methods[-1]).split()[1]]
if verbose: print("Beginning %s on %d." % (methodstring, n))
f, name = multifactor(n, methods=methods, verbose=verbose)
g = n // f
if verbose: print("%s split %d into %d and %d." % (name, n, f, g))
if isprime(f):
if verbose: print(f, "is prime.")
yield f
else:
if verbose: print(f, "is composite.")
factors.append(f)
if isprime(g):
if verbose: print(g, "is prime.")
yield g
else:
if verbose: print(g, "is composite.")
factors.append(g)

0
celeste/math/__init__.py Normal file
View file

124
celeste/math/crt.py Normal file
View file

@ -0,0 +1,124 @@
def crt(eqs: list[tuple[int, int]]) -> tuple[int, int]:
'''
Solves simultaneous linear diophantine equations
using the Chinese-Remainder Theorem.
Example:
>>> crt([2, 3), (3, 5), (2, 7)])
(23, 105)
# aka x is congruent to 23 modulo 105
'''
# calculate the quotient and remainder form of the unknown
q = 1
r = 0
# store remainders and moduli for each linear equation
R = [eq[0] for eq in eqs]
M = [eq[1] for eq in eqs]
N = len(eqs)
for i in range(N):
(R_i, M_i) = (R[i], M[i])
# adjust quotient and remainder
r += R_i * q
q *= M_i
# apply current eq to future eqs
for j in range(i+1, N):
R[j] -= R_i
R[j] *= pow(M_i, -1, M[j])
R[j] %= M[j]
return r # NOTE: forall integers k: qk+r is also a solution
from math import isqrt, prod
def bsgs(g: int, h: int, n: int) -> int:
'''
The Baby-Step Giant-Step algorithm computes the
discrete logarithm (or order) of an element in a
finite abelian group.
Specifically, for a generator g of a finite abelian
group G order n, and an element h in G, the BSGS
algorithm returns the integer x such that g^x = h.
For example, if G = Z_n the cyclic group order n then:
bsgs solves g^x = h (mod n) for x.
'''
# ensure g and h are reduced modulo n
g %= n
h %= n
# ignore trivial case
# NOTE: for the Pohlig-Hellman algorithm to work properly
# NOTE: BSGS must return 1 NOT 0 for bsgs(1, 1, n)
if g == h: return 1
m = isqrt(n) + 1 # ceil(sqrt(n))
# store g^j values in a hash table
H = {j: pow(g, j, n) for j in range(m)}
I = pow(g, -m, n)
y = h
for i in range(m):
for j in range(m):
if H[j] == y:
return i*m + j
y = (y * I) % n
return None # No Solutions
def factors_prod(pf: list[tuple[int, int]]) -> int:
return prod(p**e for (p, e) in pf)
def sph(g: int, h: int, pf: list[tuple[int, int]]) -> int:
'''
The Silver-Pohlig-Hellman algorithm for computing
discrete logarithms in finite abelian groups whose
order is a smooth integer.
NOTE: this works in the special case,
NOTE: but I can't get the general case to work :(
'''
# ignore the trivial case
if g == h:
return 1
R = len(pf) # number of prime factors
N = factors_prod(pf) # pf is the prime factorisation of N
print('N', N)
# Special Case: groups of prime-power order:
if R == 1:
(p, e) = pf[0]
x = 0
y = pow(g, p**(e-1), N)
for k in range(e):
# temporary variables for defining h_k
w = pow(g, -x, N)
# NOTE: by construction the order of h_k must divide p
h_k = pow(w*h, p**(e-1-k), N)
# apply BSGS to find d such that y^d = h_k
d = bsgs(y, h_k, N)
if d is None:
return None # No Solutions
x = x + (p**k)*d
return x
# General Case:
eqs = []
for i in range(R):
(p_i, e_i) = pf[i]
# phi = (p_i - 1)*(p_i**(e_i - 1)) # Euler totient
# pe = p_i**e_i
# P = (N//(p_i**e_i)) % phi # reduce mod phi by Euler's theorem
g_i = pow(g, N//(p_i**e_i), N)
h_i = pow(h, N//(p_i**e_i), N)
print("g and h", g_i, h_i)
print("p^e", p_i, e_i)
x_i = sph(g_i, h_i, [(p_i,e_i)])
print("x_i", x_i)
if x_i is None:
return None # No Solutions
eqs.append((x_i, p_i**e_i))
print()
# ignore the quotient, solve CRT for 0 <= x <= n-1
x = crt(eqs) # NOTE: forall integers k: Nk+x is also a solution
print('solution:', x)
print(eqs)
return x
if __name__ == '__main__':
result = sph(3, 11, [(2, 1),(7,1)])

View file

@ -0,0 +1,2 @@
from .pftrialdivision import trial_division
from .factordb import DBResult, FType, FCertainty

View file

@ -0,0 +1,161 @@
'''
Simple interface for https://factordb.com inspired by
https://github.com/ihebski/factordb
TODO:
1. Implement primality certificate generation, read this:
https://reference.wolfram.com/language/PrimalityProving/ref/ProvablePrimeQ.html
'''
import requests
from enum import Enum, StrEnum
_FDB_URI = 'https://factordb.com'
# Generated by https://www.asciiart.eu/text-to-ascii-art
# using "ANSI Shadow" font and "Box drawings double" border with 1 H. Padding
_BANNER = '''
cli by imbored
'''.strip()
# Enumeration of number types based on their factorisation
class FType(Enum):
Unit = 1
Composite = 2
Prime = 3
Unknown = 4
class FCertainty(Enum):
Certain = 1
Partial = 2
Unknown = 3
# FactorDB result status codes
class DBStatus(StrEnum):
C = 'C'
CF = 'CF'
FF = 'FF'
P = 'P'
PRP = 'PRP'
U = 'U'
Unit = 'Unit' # just for 1
N = 'N'
Add = '*'
def is_unknown(self) -> bool:
return (self in [DBStatus.U, DBStatus.N, DBStatus.Add])
def classify(self) -> tuple[FType, FCertainty]:
return _STATUS_MAP[self]
def msg_verbose(self) -> str:
return _STATUS_MSG_VERBOSE[self]
# Map of DB Status codes to their factorisation type and certainty
_STATUS_MAP = {
DBStatus.Unit: (FType.Unit, FCertainty.Certain),
DBStatus.C: (FType.Composite, FCertainty.Unknown),
DBStatus.CF: (FType.Composite, FCertainty.Partial),
DBStatus.FF: (FType.Composite, FCertainty.Certain),
DBStatus.P: (FType.Prime, FCertainty.Certain),
DBStatus.PRP: (FType.Prime, FCertainty.Partial),
DBStatus.U: (FType.Unknown, FCertainty.Unknown),
DBStatus.N: (FType.Unknown, FCertainty.Unknown),
DBStatus.Add: (FType.Unknown, FCertainty.Unknown),
}
# Reference: https://factordb.com/status.html
# NOTE: my factor messages differ slightly from the reference
_STATUS_MSG_VERBOSE = {
DBStatus.Unit: 'Unit, trivial factorisation',
DBStatus.C: 'Composite, no factors known',
DBStatus.CF: 'Composite, *partially* factors',
DBStatus.FF: 'Composite, fully factored',
DBStatus.P: 'Prime',
DBStatus.PRP: 'Probable prime',
DBStatus.U: 'Unknown (*but in database)',
DBStatus.N: 'Not in database (-not added due to your settings)',
DBStatus.Add: 'Not in database (+added during request)',
}
# Struct for storing database results with named properties
class DBResult:
def __init__(self,
status: DBStatus,
factors: tuple[tuple[int, int]]) -> None:
self.status = status
self.factors = factors
self.ftype, self.certainty = self.status.classify()
def _make_cookie(fdbuser: str | None) -> dict[str, str]:
return {} if fdbuser is None else {'fdbuser': fdbuser}
def _get_key(by_id: bool):
return 'id' if by_id else 'query'
def _api_query(n: int,
fdbuser: str | None,
by_id: bool = False) -> requests.models.Response:
key = _get_key(by_id)
uri = f'{_FDB_URI}/api?{key}={n}'
return requests.get(uri, cookies=_make_cookie(fdbuser))
def _report_factor(n: int,
factor: int,
fdbuser: str | None,
by_id: bool = False) -> requests.models.Response:
key = _get_key(by_id)
uri = f'{_FDB_URI}/reportfactor.php?{key}={n}&factor={factor}'
return requests.get(uri, cookies=_make_cookie(fdbuser))
'''
Attempts a query to FactorDB, returns a DBResult object
on success, or None if the request failed due to the
get request raising a RequestException.
'''
def query(n: int,
token: str | None = None,
by_id: bool = False,
cli: bool = False) -> DBResult | None:
if cli:
print(_BANNER)
try:
resp = _api_query(n, token, by_id=by_id)
except requests.exceptions.RequestException:
return None
content = resp.json()
result = DBResult(
DBStatus(content['status']),
tuple((int(F[0]), F[1]) for F in content['factors'])
)
if cli:
print(f'Status: {result.status.msg_verbose()}')
print(result.factors)
# ensure the unit has the trivial factorisation (for consistency)
if result.status == DBStatus.Unit:
result.factors = ((1, 1),)
return result
'''
Reports a known factor to FactorDB, also tests it is
actually a factor to avoid wasting FactorDBs resources.
'''
def report(n: int,
factor: int,
by_id: int,
token: str | None = None) -> None:
try:
resp = _report_factor(n, factor, token)
except requests.exceptions.RequestException:
return None
content = resp.json()
print(content)

View file

@ -0,0 +1,46 @@
'''
The trial division algorithm is essentially the idea that
all factors of an integer n are less than or equal to isqrt(n),
where isqrt is floor(sqrt(n)).
Moreover, if p divides n, then all other factors of n must
be factors of n//p. Hence they must be <= isqrt(n//p).
'''
from math import isqrt # integer square root
# Returns the "multiplicity" of a prime factor
def pf_multiplicity(n: int, p: int) -> int:
mult = 0
while n % p == 0:
n //= p
mult += 1
return n, mult
'''
Trial division prime factorisation algorithm.
Returns a list of tuples (p, m) where p is
a prime factor and m is its multiplicity.
'''
def trial_division(n: int) -> list[tuple[int, int]]:
factors = []
# determine multiplicity of the only even prime (2)
n, mult_2 = pf_multiplicity(n, 2)
if mult_2: factors.append((2, mult_2))
# determine odd factors and their multiplicities
p = 3
mult = 0
limit = isqrt(n)
while p <= limit:
n, mult = pf_multiplicity(n, p)
if mult:
factors.append((p, mult))
limit = isqrt(n) # recalculate limit
mult = 0 # reset
else:
p += 2
# if n is still greater than 1, then n is a prime factor
if n > 1:
factors.append((n, 1))
return factors

37
celeste/math/groups.py Normal file
View file

@ -0,0 +1,37 @@
'''
This library exists to isolate all math functions
related to groups and their representations.
'''
from math import gcd
'''
Returns the multiplicative cyclic subgroup
generated by an element g modulo m.
Returns the cyclic subgroup as a list[int],
the order of that subgroup, and a boolean
indicating whether g is infinitely repeating
with period == ord<g> (or otherwise if it
terminates with g**ord<g> == 0).
'''
def cyclic_subgrp(g: int,
m: int,
ignore_zero: bool = True) -> tuple[list[int], int, bool]:
G = []
order = 0
periodic = True
a = 1 # start at identity
for _ in range(m):
a = (a * g) % m
if a == 0:
if not ignore_zero:
G.append(a)
order += 1
periodic = False
break
# check if we've reached something periodic
elif a in G[:1]:
break
G.append(a)
order += 1
return G, order, periodic

View file

@ -0,0 +1,82 @@
'''
Terminology:
Although "divisor" and "factor" mean the same thing.
When Celeste discusses "divisors of n" it is implied to
mean "proper divisors of n + n itself", and "factors" are
the "prime proper divisors of n".
'''
from celeste.extern.primefac import primefac
def factors(n: int) -> int:
pfactors: list[tuple[int, int]] = []
# generate primes and progressively store them in pfactors
pfgen = primefac(n)
watching = next(pfgen)
mult = 1
# ASSUMPTION: prime generation is (non-strict) monotone increasing
while True:
p = next(pfgen, None)
if p == watching:
mult += 1
else:
pfactors.append((watching, mult))
watching = p # reset
mult = 1 # reset
if p is None:
break
return pfactors
def factors2divisors(pfactors: list[tuple[int, int]],
sorted: bool = True) -> list[int]:
'''
Generates all divisors < n of an integer n given its prime factorisation.
Input: prime factorisation of n (excluding 1 and n, and duplicates)
in the typical form: list[(prime, multiplicity)]
'''
divisors = [1]
for (prime, multiplicity) in pfactors:
extension = []
for i in range(1, multiplicity+1):
term = prime**i
extension.extend(list([divisor*term for divisor in divisors]))
divisors.extend(extension)
if sorted: divisors.sort()
return divisors
def factors2aliquots(pfactors: list[tuple[int, int]]) -> list[int]:
return factors2divisors(pfactors)[:-1]
# "aliquots(n)" is an alias for "divisors(n)"
def aliquots(n: int) -> int:
'''
Returns all aliquot parts (proper divisors) of
an integer n, that is all divisors 0 < d <= n.
'''
return factors2aliquots(factors(n))
def divisors(n: int) -> int:
'''
Returns all divisors 0 < d < n of an integer n.
'''
return factors2divisors(factors(n))
def aliquot_sum(n: int) -> int:
return sum(aliquots(n))
def littleomega(n: int) -> int:
'''
The Little Omega function counts the number of
distinct prime factors of an integer n.
Ref: https://en.wikipedia.org/wiki/Prime_omega_function
'''
return len(factors(n))
def bigomega(n: int) -> int:
'''
The Big Omega function counts the total number of
prime factors (including multiplicity) of an integer n.
Ref: https://en.wikipedia.org/wiki/Prime_omega_function
'''
return sum(factor[1] for factor in factors(n))

View file

@ -0,0 +1,3 @@
def factorial(n: int) -> int:
if n == 0: return 1
return n * factorial(n-1)

View file

@ -0,0 +1 @@

View file

@ -0,0 +1,21 @@
def euclidean_algo(a: int, b: int) -> int:
while b: a, b = b, a % b
return a
'''
Calculates coefficients x and y of Bezout's identity: ax + by = gcd(a,b)
NOTE: Based on the Extended Euclidean Algorithm's Wikipedia page
'''
def extended_euclid_algo(a: int, b: int) -> tuple[int, int]:
(old_r, r) = (a, b)
(old_s, s) = (1, 0)
(old_t, t) = (0, 1)
while r != 0:
q = old_r // r
(old_r, r) = (r, old_r - q*r)
(old_s, s) = (s, old_s - q*s)
(old_t, t) = (t, old_t - q*t)
# Bezout cofficients: (old_s, old_t)
# Greatest Common Divisor: old_r
# Quotients by the gcd: (t, s)
return (t, s)

47
celeste/math/primes.py Normal file
View file

@ -0,0 +1,47 @@
from math import gcd, inf
from celeste.math.numbers import bigomega, factors
from celeste.extern.primefac import (
isprime,
primegen as Primes,
)
def coprime(n: int, m: int) -> bool:
return gcd(n, m) == 1
def almostprime(n: int, k: int) -> bool:
'''
A natural n is "k-almost prime" if it has exactly
k prime factors (including multiplicity).
'''
return (bigomega(n) == k)
def semiprime(n: int) -> bool:
'''
A semiprime number is one that is 2-almost prime.
Ref: https://en.wikipedia.org/wiki/Semiprime
'''
return almostprime(n, 2)
def eulertotient(x: int | list) -> int:
'''
Evaluates Euler's Totient function.
Input: `x: int` is prime factorised by Lucas A. Brown's primefac.py
else `x: list` is assumed to the prime factorisation of `x: int`
'''
pfactors = x if isinstance(x, list) else factors(n)
return prod((p-1)*(p**(e-1)) for (p, e) in pfactors)
# def eulertotient(n: int) -> int:
# '''
# Uses trial division to compute
# Euler's Totient (Phi) Function.
# '''
# phi = int(n > 1 and n)
# for p in range(2, int(n ** .5) + 1):
# if not n % p:
# phi -= phi // p
# while not n % p:
# n //= p
# #if n is > 1 it means it is prime
# if n > 1: phi -= phi // n
# return phi

View file

@ -0,0 +1,48 @@
from math import inf, isqrt # integer square root
from itertools import takewhile, compress
SMALL_PRIMES = (2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59)
'''
Euler's Totient (Phi) Function
Implemented in O(nloglog(n)) using the Sieve of Eratosthenes.
'''
def eulertotient(n: int) -> int:
phi = int(n > 1 and n)
for p in range(2, isqrt(n) + 1):
if not n % p:
phi -= phi // p
while not n % p:
n //= p
#if n is > 1 it means it is prime
if n > 1: phi -= phi // n
return phi
'''
Tests the primality of an integer using its totient.
NOTE: If totient(n) has already been calculated
then pass it as the optional phi parameter.
'''
def is_prime(n: int, phi: int = None) -> bool:
return n - 1 == (phi if phi is not None else eulertotient(n))
# Taken from Lucas A. Brown's primefac.py (some variables renamed)
def primegen(limit=inf) -> int:
ltlim = lambda x: x < limit
yield from takewhile(ltlim, SMALL_PRIMES)
pl, prime = [3,5,7], primegen()
for p in pl: next(prime)
n = next(prime); nn = n*n
while True:
n = next(prime); ll, nn = nn, n*n
delta = nn - ll
sieve = bytearray([True]) * delta
for p in pl:
k = (-ll) % p
sieve[k::p] = bytearray([False]) * ((delta-k)//p + 1)
if nn > limit: break
yield from compress(range(ll,ll+delta,2), sieve[::2])
pl.append(n)
yield from takewhile(ltlim, compress(range(ll,ll+delta,2), sieve[::2]))

29
celeste/math/util.py Normal file
View file

@ -0,0 +1,29 @@
from collections.abc import Iterable
from itertools import chain, combinations
def clamp(n: int, min: int, max: int) -> int:
if n < min:
return min
elif n > max:
return max
return n
def clamp_max(n: int, max: int) -> int:
return max if n > max else n
def clamp_min(n: int, max: int) -> int:
return min if n < min else n
def digits(n: int) -> int:
return len(str(n))
# NOTE: assumes A and B are equal length
def xor_bytes(A: bytes, B: bytes) -> bytes:
return b''.join([(a ^ b).to_bytes() for (a, b) in zip(A, B)])
def xor_str(A: str, B: str) -> str:
return ''.join([chr(ord(a) ^ ord(b)) for (a, b) in zip(A, B)])
def powerset(iterable: Iterable) -> Iterable:
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

View file

@ -0,0 +1 @@
from noether.lib.structs.__result import Result

View file

@ -0,0 +1,19 @@
from enum import Enum
from typing import Optional
class Result:
def __init__(self,
success: bool,
reason: str,
value: Optional[any] = None) -> None:
self.success = success
self.reason = reason
self.value = value
@classmethod
def succeed(cls, value: any, reason: str = 'Ok') -> 'Result':
return cls(True, reason, value=value)
@classmethod
def fail(cls, reason: str, value: Optional[any] = None) -> 'Result':
return cls(False, reason, value=value)