#![allow(improper_ctypes)]
use std::cmp::Ordering;
use ll;
use ll::limb::Limb;
use super::{overlap, same_or_separate, same_or_incr};
use mem;
use ll::limb_ptr::{Limbs, LimbsMut};
const TOOM22_THRESHOLD : i32 = 20;
#[allow(dead_code)]
unsafe fn mul_1_generic(mut wp: LimbsMut, mut xp: Limbs, mut n: i32, vl: Limb) -> Limb {
let mut cl = Limb(0);
loop {
let xl = *xp;
let (hpl, lpl) = xl.mul_hilo(vl);
let (lpl, carry) = lpl.add_overflow(cl);
cl = hpl + carry;
*wp = lpl;
n -= 1;
if n == 0 { break; }
wp = wp.offset(1);
xp = xp.offset(1);
}
return cl;
}
#[cfg(not(asm))]
#[inline]
pub unsafe fn mul_1(wp: LimbsMut, xp: Limbs, n: i32, vl: Limb) -> Limb {
debug_assert!(n > 0);
debug_assert!(same_or_incr(wp, n, xp, n));
mul_1_generic(wp, xp, n, vl)
}
#[cfg(asm)]
#[inline]
pub unsafe fn mul_1(mut wp: LimbsMut, xp: Limbs, n: i32, vl: Limb) -> Limb {
debug_assert!(n > 0);
debug_assert!(same_or_incr(wp, n, xp, n));
extern "C" {
fn ramp_mul_1(wp: *mut Limb, xp: *const Limb, n: i32, vl: Limb) -> Limb;
}
ramp_mul_1(&mut *wp, &*xp, n, vl)
}
#[allow(dead_code)]
unsafe fn addmul_1_generic(mut wp: LimbsMut, mut xp: Limbs, mut n: i32, vl: Limb) -> Limb {
debug_assert!(n > 0);
debug_assert!(same_or_separate(wp, n, xp, n));
let mut cl = Limb(0);
loop {
let xl = *xp;
let (hpl, lpl) = xl.mul_hilo(vl);
let (lpl, carry) = lpl.add_overflow(cl);
cl = hpl + carry;
let (lpl, carry) = (*wp).add_overflow(lpl);
cl = cl + carry;
*wp = lpl;
n -= 1;
if n == 0 { break; }
wp = wp.offset(1);
xp = xp.offset(1);
}
return cl;
}
#[cfg(not(asm))]
#[inline]
pub unsafe fn addmul_1(wp: LimbsMut, xp: Limbs, n: i32, vl: Limb) -> Limb {
addmul_1_generic(wp, xp, n, vl)
}
#[cfg(asm)]
#[inline]
pub unsafe fn addmul_1(mut wp: LimbsMut, xp: Limbs, n: i32, vl: Limb) -> Limb {
extern "C" {
fn ramp_addmul_1(wp: *mut Limb, xp: *const Limb, n: i32, vl: Limb) -> Limb;
}
ramp_addmul_1(&mut *wp, &*xp, n, vl)
}
#[allow(dead_code)]
unsafe fn submul_1_generic(mut wp: LimbsMut, mut xp: Limbs, mut n: i32, vl: Limb) -> Limb {
debug_assert!(n > 0);
debug_assert!(same_or_separate(wp, n, xp, n));
let mut cl = Limb(0);
loop {
let xl = *xp;
let (hpl, lpl) = xl.mul_hilo(vl);
let (lpl, carry) = lpl.add_overflow(cl);
cl = hpl + carry;
let (lpl, carry) = (*wp).sub_overflow(lpl);
cl = cl + carry;
*wp = lpl;
n -= 1;
if n == 0 { break; }
wp = wp.offset(1);
xp = xp.offset(1);
}
return cl;
}
#[cfg(not(asm))]
#[inline]
pub unsafe fn submul_1(wp: LimbsMut, xp: Limbs, n: i32, vl: Limb) -> Limb {
submul_1_generic(wp, xp, n, vl)
}
#[cfg(asm)]
#[inline]
pub unsafe fn submul_1(mut wp: LimbsMut, xp: Limbs, n: i32, vl: Limb) -> Limb {
extern "C" {
fn ramp_submul_1(wp: *mut Limb, xp: *const Limb, n: i32, vl: Limb) -> Limb;
}
ramp_submul_1(&mut *wp, &*xp, n, vl)
}
pub unsafe fn mul(wp: LimbsMut, xp: Limbs, xs: i32, yp: Limbs, ys: i32) {
debug_assert!(xs >= ys);
debug_assert!(ys > 0);
debug_assert!(!overlap(wp, xs + ys, xp, xs));
debug_assert!(!overlap(wp, xs + ys, yp, ys));
if ys <= TOOM22_THRESHOLD {
mul_basecase(wp, xp, xs, yp, ys);
} else {
let mut tmp = mem::TmpAllocator::new();
let scratch = tmp.allocate((xs * 2) as usize);
if (xs * 2) >= (ys * 3) {
mul_unbalanced(wp, xp, xs, yp, ys, scratch);
} else {
mul_toom22(wp, xp, xs, yp, ys, scratch);
}
}
}
unsafe fn mul_basecase(mut wp: LimbsMut, xp: Limbs, xs: i32, mut yp: Limbs, mut ys: i32) {
*wp.offset(xs as isize) = ll::mul_1(wp, xp, xs, *yp);
wp = wp.offset(1);
yp = yp.offset(1);
ys -= 1;
while ys > 0 {
*wp.offset(xs as isize) = ll::addmul_1(wp, xp, xs, *yp);
wp = wp.offset(1);
yp = yp.offset(1);
ys -= 1;
}
}
#[inline(always)]
unsafe fn mul_rec(wp: LimbsMut,
xp: Limbs, xs: i32,
yp: Limbs, ys: i32,
scratch: LimbsMut) {
if ys < TOOM22_THRESHOLD {
mul_basecase(wp, xp, xs, yp, ys);
} else if (xs * 2) >= (ys*3) {
mul_unbalanced(wp, xp, xs, yp, ys, scratch);
} else {
mul_toom22(wp, xp, xs, yp, ys, scratch);
}
}
unsafe fn mul_toom22(wp: LimbsMut,
xp: Limbs, xs: i32,
yp: Limbs, ys: i32,
scratch: LimbsMut) {
debug_assert!(xs >= ys && xs < ys*2,
"assertion failed: `xs >= ys && xs < ys*2` xs: {}, ys: {}", xs, ys);
let xh = xs >> 1;
let nl = xs - xh;
let yh = ys - nl;
debug_assert!(0 < xh && xh <= nl);
debug_assert!(0 < yh && yh <= xh,
"assertion failed: 0 < yh && yh <= xh, xs: {}, ys: {}, xh: {}, yh: {}",
xs, ys, xh, yh);
let x0 = xp;
let y0 = yp;
let x1 = xp.offset(nl as isize);
let y1 = yp.offset(nl as isize);
let zx1 = wp;
let zy1 = wp.offset(nl as isize);
let mut z1_neg = false;
if nl == xh {
if ll::cmp(x0, x1, nl) == Ordering::Less {
ll::sub_n(zx1, x1, x0, nl);
z1_neg = true;
} else {
ll::sub_n(zx1, x0, x1, nl);
}
} else {
if ll::is_zero(x0.offset(xh as isize), nl-xh) && ll::cmp(x0, x1, xh) == Ordering::Less {
ll::sub_n(zx1, x1, x0, xh);
ll::zero(zx1.offset(xh as isize), nl-xh);
z1_neg = true;
} else {
ll::sub(zx1, x0, nl, x1, xh);
}
}
if nl == yh {
if ll::cmp(y0, y1, nl) == Ordering::Less {
ll::sub_n(zy1, y1, y0, nl);
z1_neg = !z1_neg;
} else {
ll::sub_n(zy1, y0, y1, nl);
}
} else {
if ll::is_zero(y0.offset(yh as isize), nl-yh) && ll::cmp(y0, y1, yh) == Ordering::Less {
ll::sub_n(zy1, y1, y0, yh);
ll::zero(zy1.offset(yh as isize), nl-yh);
z1_neg = !z1_neg;
} else {
ll::sub(zy1, y0, nl, y1, yh);
}
}
let z0 = wp;
let z1 = scratch;
let z2 = wp.offset((nl * 2) as isize);
let scratch_out = scratch.offset((nl * 2) as isize);
mul_rec(z1, zx1.as_const(), nl, zy1.as_const(), nl, scratch_out);
mul_rec(z0, x0, nl, y0, nl, scratch_out);
mul_rec(z2, x1, xh, y1, yh, scratch_out);
let cy = ll::add_n(wp.offset((2*nl) as isize),
z2.as_const(), z0.offset(nl as isize).as_const(),
nl);
let cy2 = cy + ll::add_n(wp.offset(nl as isize),
z0.as_const(), z2.as_const(),
nl);
let mut cy = cy + ll::add(wp.offset((2*nl) as isize),
z2.as_const(), nl,
z2.offset(nl as isize).as_const(), yh+xh-nl);
if z1_neg {
cy = cy + ll::add_n(wp.offset(nl as isize),
wp.offset(nl as isize).as_const(), z1.as_const(),
2*nl);
} else {
cy = cy - ll::sub_n(wp.offset(nl as isize),
wp.offset(nl as isize).as_const(), z1.as_const(),
2*nl);
}
ll::incr(wp.offset((nl * 2) as isize), cy2);
ll::incr(wp.offset((nl * 3) as isize), cy);
}
unsafe fn mul_unbalanced(mut wp: LimbsMut,
mut xp: Limbs, mut xs: i32,
yp: Limbs, ys: i32,
scratch: LimbsMut) {
debug_assert!(xs > ys);
mul_toom22(wp, xp, ys, yp, ys, scratch);
xs -= ys;
xp = xp.offset(ys as isize);
wp = wp.offset(ys as isize);
let mut tmp = mem::TmpAllocator::new();
let w_tmp = tmp.allocate((ys * 3) as usize);
while xs >= (ys * 2) {
mul_toom22(w_tmp, xp, ys, yp, ys, scratch);
xs -= ys;
xp = xp.offset(ys as isize);
let cy = ll::add_n(wp, wp.as_const(), w_tmp.as_const(), ys);
ll::copy_incr(w_tmp.offset(ys as isize).as_const(),
wp.offset(ys as isize),
ys);
ll::incr(wp.offset(ys as isize), cy);
wp = wp.offset(ys as isize);
}
if xs >= ys {
mul_rec(w_tmp, xp, xs, yp, ys, scratch);
} else {
mul_rec(w_tmp, yp, ys, xp, xs, scratch);
}
let cy = ll::add_n(wp, wp.as_const(), w_tmp.as_const(), ys);
ll::copy_incr(w_tmp.offset(ys as isize).as_const(),
wp.offset(ys as isize),
xs);
ll::incr(wp.offset(ys as isize), cy);
}
pub unsafe fn sqr(wp: LimbsMut, xp: Limbs, xs: i32) {
debug_assert!(xs > 0);
debug_assert!(!overlap(wp, 2*xs, xp, xs));
if xs <= TOOM22_THRESHOLD {
mul_basecase(wp, xp, xs, xp, xs);
} else {
let mut tmp = mem::TmpAllocator::new();
let scratch = tmp.allocate((xs * 2) as usize);
sqr_toom2(wp, xp, xs, scratch);
}
}
#[inline(always)]
unsafe fn sqr_rec(wp: LimbsMut, xp: Limbs, xs: i32, scratch: LimbsMut) {
if xs < TOOM22_THRESHOLD {
mul_basecase(wp, xp, xs, xp, xs);
} else {
sqr_toom2(wp, xp, xs, scratch);
}
}
unsafe fn sqr_toom2(wp: LimbsMut, xp: Limbs, xs: i32, scratch: LimbsMut) {
let xh = xs >> 1;
let xl = xs - xh;
let x0 = xp;
let x1 = xp.offset(xl as isize);
let z0 = wp;
let z1 = scratch;
let z2 = wp.offset((xl * 2) as isize);
let scratch_out = scratch.offset((xl * 2) as isize);
mul_rec(z1, x0, xl, x1, xh, scratch_out);
sqr_rec(z0, x0, xl, scratch_out);
sqr_rec(z2, x1, xh, scratch_out);
let mut cy = ll::add_n(z1, z1.as_const(), z1.as_const(), xs);
cy = cy + ll::add_n(wp.offset(xl as isize), wp.offset(xl as isize).as_const(), z1.as_const(), xs);
ll::incr(wp.offset((xl + xs) as isize), cy);
}