任意模数 NTT 和 DFT 的优化

可能就存个板子,而且先咕了

来更了

参考 毛啸《再探快速傅里叶变换》

三模数 NTT

不说了

拆系数 FFT

描述

\(M=2^{15}=32768\),对于需要卷积的数列 \(A_i\)\(B_i\) 中的每个数 \(x\),拆成 \(x=k\times M+b\) 的形式

当长度是 \(10^5\) ,模数在 \(10^9\) 左右时,最大的数字大约在 \(10^{14}\),单位根处理得好一点就不会爆 double 的精度

\(A_i\) 拆出的两个数列和 \(B_i\) 拆出的两个数列 DFT 后各选一个相乘,最终的系数分别是 \(1,2^{15},2^{15},2^{30}\),相同的两项可以一起 IDFT

总共需要 7 次 DFT

已经挺快的了

代码

就是下面的模板题

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#include<cstdio>
#include<algorithm>
#include<cctype>
#include<string.h>
#include<cmath>

using namespace std;
#define ll long long

inline char read() {
static const int IN_LEN = 1000000;
static char buf[IN_LEN], *s, *t;
return (s==t?t=(s=buf)+fread(buf,1,IN_LEN,stdin),(s==t?-1:*s++):*s++);
}
template<class T>
inline void read(T &x) {
static bool iosig;
static char c;
for (iosig=false, c=read(); !isdigit(c); c=read()) {
if (c == '-') iosig=true;
if (c == -1) return;
}
for (x=0; isdigit(c); c=read()) x=((x+(x<<2))<<1)+(c^'0');
if (iosig) x=-x;
}
const int OUT_LEN = 1000000;
char obuf[OUT_LEN], *ooh=obuf;
inline void print(char c) {
if (ooh==obuf+OUT_LEN) fwrite(obuf, 1, OUT_LEN, stdout), ooh=obuf;
*ooh++=c;
}
template<class T>
inline void print(T x) {
static int buf[30], cnt;
if (x==0) print('0');
else {
if (x<0) print('-'), x=-x;
for (cnt=0; x; x/=10) buf[++cnt]=x%10+48;
while(cnt) print((char)buf[cnt--]);
}
}
inline void flush() { fwrite(obuf, 1, ooh - obuf, stdout); }

const int N = 1<<18;
const double Pi=acos(-1);
int n, m, p, l, k;
struct cp{
double a, b;
inline cp operator +(const cp &rhs)const{ return (cp){a+rhs.a, b+rhs.b};}
inline cp operator -(const cp &rhs)const{ return (cp){a-rhs.a, b-rhs.b};}
inline cp operator *(const cp &rhs)const{ return (cp){a*rhs.a-b*rhs.b, a*rhs.b+b*rhs.a};}
inline cp operator *(const double rhs)const{ return (cp){a*rhs, b*rhs};}
} a[N], b[N], c[N], d[N], f[N], g[N], h[N], w[N+1];
inline int Get(int n){ int p=1; while(p<=n) p<<=1; return p;}
void DFT(cp *f, int n){
for(int i=0, j=0; i<n; ++i){
if(i>j) swap(f[i], f[j]);
for(int k=n>>1; (j^=k)<k; k>>=1);
}
for(int i=1; i<n; i<<=1) for(int j=0; j<n; j+=i<<1)
for(int k=j; k<j+i; ++k){
cp t=w[i+k-j]*f[k+i];
f[k+i]=f[k]-t, f[k]=f[k]+t;
}
}
void IDFT(cp *f, int n){
reverse(f+1, f+n), DFT(f, n);
double k=1./n;
for(int i=0; i<n; ++i) f[i]=f[i]*k;
}
int main() {
read(n), read(m), read(p), l=Get(n+m);
for(int i=0, x; i<=n; ++i) read(x), a[i].a=x>>15, b[i].a=x&32767;
for(int i=0, x; i<=m; ++i) read(x), c[i].a=x>>15, d[i].a=x&32767;
for(int i=1; i<l; i<<=1) for(int j=0; j<i; ++j)
w[i+j]=(cp){cos(Pi*j/i), sin(Pi*j/i)};
DFT(a, l), DFT(b, l), DFT(c, l), DFT(d, l);
for(int i=0; i<l; ++i)
f[i]=f[i]+a[i]*c[i], g[i]=g[i]+a[i]*d[i]+b[i]*c[i], h[i]=h[i]+b[i]*d[i];
IDFT(f, l), IDFT(g, l), IDFT(h, l);
for(int i=0; i<=n+m; ++i, print(' '))
print((((ll)(f[i].a+.5)%p<<30)+((ll)(g[i].a+.5)<<15)+(ll)(h[i].a+.5))%p);
return flush(), 0;
}

DFT 的优化

描述

假设 \(n\)\(2\) 的整数次幂,现在需要对长度为 \(n\) 的多项式 \(A(x)\)\(B(x)\) 进行 DFT,我们可以合并只做一次

做法

\[ \begin{align} P(x)=A(x)+iB(x) \\ Q(x)=A(x)-iB(x) \end{align} \]

\(P'[k]\)\(Q'[k]\) 分别是 \(P(x)\)\(Q(x)\) 进行 DFT 后的序列

\(P'[k]=P(\omega_n^k),Q'[j]=Q(\omega_n^k)\),即代入 \(n\) 次单位根的幂后的点值

推导直接拉了

\(\text{conj}(x)\) 表示 \(x\) 的共轭复数,\(A_i\)\(A(x)\)\(i\) 次项系数

\[ \begin{align} P'[k] &= A(\omega_{n}^{k}) + i B(\omega_{n}^{k}) \\ & = \sum_{j=0}^{n-1} A_{j} \omega_{n}^{jk} + i B_{j} \omega_{n}^{jk} \\ & = \sum_{j=0}^{n-1} (A_{j} + i B_{j}) \left(\cos \left(\frac{2 \pi jk}{n}\right) + i \sin \left(\frac{2 \pi jk}{n}\right)\right) \\ \\ Q'[k] &= A(\omega_{n}^{k}) - i B(\omega_{n}^{k}) \\ & = \sum_{j=0}^{n-1} A_{j} \omega_{n}^{jk} - i B_{j} \omega_{n}^{jk} \\ & = \sum_{j=0}^{n-1} (A_{j} - i B_{j}) \left(\cos \left(\frac{2 \pi jk}{n}\right) + i \sin \left(\frac{2 \pi jk}{n}\right)\right) \\ & = \sum_{j=0}^{n-1} \left(A_{j} \cos \left(\frac{2 \pi jk}{n}\right) + B_{j} \sin \left(\frac{2 \pi jk}{n}\right)\right) + i \left(A_{j} \sin \left(\frac{2 \pi jk}{n}\right) - B_{j} \cos \left(\frac{2 \pi jk}{n}\right)\right) \\ & = \text{conj} \left( \sum_{j=0}^{n-1} \left(A_{j} \cos \left(\frac{2 \pi jk}{n}\right) + B_{j} \sin \left(\frac{2 \pi jk}{n}\right)\right) - i \left(A_{j} \sin \left(\frac{2 \pi jk}{n}\right) - B_{j} \cos \left(\frac{2 \pi jk}{n}\right)\right) \right) \\ & = \text{conj} \left( \sum_{j=0}^{n-1} \left(A_{j} \cos \left(\frac{-2 \pi jk}{n}\right) - B_{j} \sin \left(\frac{-2 \pi jk}{n}\right)\right) + i \left(A_{j} \sin \left(\frac{-2 \pi jk}{n}\right) + B_{j} \cos \left(\frac{-2 \pi jk}{n}\right)\right) \right) \\ & = \text{conj} \left( \sum_{j=0}^{n-1} (A_{j} + i B_{j}) \left(\cos \left(\frac{-2 \pi jk}{n}\right) + i \sin \left(\frac{-2 \pi jk}{n}\right)\right)\right) \\ & = \text{conj} \left( \sum_{j=0}^{n-1} (A_{j} + i B_{j}) \omega_{n}^{-jk} \right) \\ & = \text{conj} \left( \sum_{j=0}^{n-1} (A_{j} + i B_{j}) \omega_{n}^{(n-k)j} \right) \\ & = \text{conj} (P'[n-k]) \end{align} \]

于是我们可以通过 \(P(x)\) 得到 \(Q(x)\)

注意最后得到的 \(n-k\) 是模 \(n\) 意义下的,当 \(k=0\) 时需要特殊处理

\(A'[k]\)\(B'[k]\) 分别是 \(A(x)\)\(B(x)\) 进行 DFT 后的序列

\[ \begin{align} A'[k]=\frac{P'[k]+Q'[k]}{2} \\ B'[k]=\frac{P'[k]-Q'[k]}{2i} \end{align} \]

通过这种方法可以用一次长度不变的 DFT 同时计算两个序列 DFT 后的结果

假设对 \(P'[k]\) 进行了 IDFT,实部和虚部分别就是 \(A(x)\)\(B(x)\)

单个 DFT 的优化

可以分奇次项和偶次项拆成两个序列,可以用一次原先一半长度的 DFT 来得到 DFT 的结果,我就不学了

update: 来学了

半次 DFT

DFT

考虑把多项式 \(f(x)\) 的偶次系数和奇次系数分开得到两个一半长度的序列,当做多项式分别是 \(f_0(x)\)\(f_1(x)\)

于是

\[ \begin{align} f(x) &= f_0(x^2)+x \times f_1(x^2) \\ \\ f(\omega_n^k) &= f_0(\omega_n^{2k})+\omega_n^k \times f_1(\omega_n^{2k}) \\ &= f_0(\omega_{\frac{n}{2}}^k)+\omega_n^k \times f_1(\omega_{\frac{n}{2}}^k) \end{align} \]

我们把两个一半长度的多项式合并做 DFT,通过上式可以算出原多项式 DFT 的结果

IDFT

几乎是反的,用 \(f(\omega_n^k)\)\(f(\omega_n^{k+\frac{n}{2}})\) 可以解出 \(f_0(\omega_{\frac{n}{2}}^k)\)\(f_1(\omega_{\frac{n}{2}}^k)\)

然后那样塞回一半长度的序列,做 IDFT 后实部和虚部分别就是偶次和奇次的系数了

实现可以参考 \(3.5\) 次的模板

应用

在上面的拆系数 FFT 中可以优化到 \(4\) 次或者所谓的 \(3.5\) 次的 DFT

模板

Luogu P4245 【模板】任意模数NTT

代码

\(4\) 次 DFT

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#include<cstdio>
#include<algorithm>
#include<cctype>
#include<cmath>

using namespace std;
#define ll long long

static char buf[1<<21], *s=buf;
inline void read(int &x) {
while(isspace(*s)) ++s;
x=*s++^'0';
while(isdigit(*s)) x=x*10+(*s++^'0');
}
char obuf[1<<20], *ooh=obuf;
inline void print(int x) {
static int buf[30], cnt;
if (x==0) *ooh++='0';
else {
for (cnt=0; x; x/=10) buf[++cnt]=x%10+48;
while(cnt) *ooh++=buf[cnt--];
}
}

const int N = 1<<18;
const double Pi=acos(-1);
int n, m, p, l, k;
struct cp{
double a, b;
inline void operator +=(const cp &rhs){ a+=rhs.a, b+=rhs.b;}
inline cp operator +(const cp &rhs)const{ return (cp){a+rhs.a, b+rhs.b};}
inline cp operator -(const cp &rhs)const{ return (cp){a-rhs.a, b-rhs.b};}
inline cp operator *(const cp &rhs)const{ return (cp){a*rhs.a-b*rhs.b, a*rhs.b+b*rhs.a};}
inline cp operator *(const double rhs)const{ return (cp){a*rhs, b*rhs};}
inline cp operator ~()const{ return (cp){a, -b};}
} a[N], b[N], f[N], g[N], w[N];
inline int Get(int n){ int p=1; while(p<=n) p<<=1; return p;}
inline void DFT(cp *f, int n){
for(register int i=0, j=0; i<n; ++i){
if(i>j) swap(f[i], f[j]);
for(register int k=n>>1; (j^=k)<k; k>>=1);
}
for(register int i=1; i<n; i<<=1) for(register int j=0; j<n; j+=i<<1)
for(register int k=j; k<j+i; ++k){
cp t=w[i+k-j]*f[k+i];
f[k+i]=f[k]-t, f[k]+=t;
}
}
inline void IDFT(cp *f, int n){ reverse(f+1, f+n), DFT(f, n);}
int main() {
fread(s, 1, 1<<21, stdin);
read(n), read(m), read(p), l=Get(n+m);
for(register int i=0, x=0; i<=n; ++i) read(x), f[i].a=x>>15, f[i].b=x&32767;
for(register int i=0, x=0; i<=m; ++i) read(x), g[i].a=x>>15, g[i].b=x&32767;
for(register int i=1; i<l; i<<=1){
w[i]=(cp){1, 0};
for(register int j=1; j<i; ++j)
w[i+j]=((j&31)==1?(cp){cos(Pi*j/i), sin(Pi*j/i)}:w[i+j-1]*w[i+1]);
}
DFT(f, l), DFT(g, l);
for(register int i=0; i<l; ++i){
static cp q, f0, f1, g0, g1;
q=~f[i?l-i:0], f0=(f[i]-q)*(cp){0, -.5}, f1=(f[i]+q)*.5;
q=~g[i?l-i:0], g0=(g[i]-q)*(cp){0, -.5}, g1=(g[i]+q)*.5;
a[i]=f1*g1, b[i]=f1*g0+f0*g1+f0*g0*(cp){0, 1};
}
IDFT(a, l), IDFT(b, l);
double k=1./l;
for(register int i=0; i<=n+m; ++i, *ooh++=' ')
print((((ll)(a[i].a*k+.5)%p<<30)+((ll)(b[i].a*k+.5)<<15)+(ll)(b[i].b*k+.5))%p);
return fwrite(obuf, 1, ooh - obuf, stdout), 0;
}

\(3.5\) 次 DFT

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#include<cstdio>
#include<algorithm>
#include<cctype>
#include<cmath>

using namespace std;
#define ll long long

static char buf[1<<21], *s=buf;
inline void read(int &x) {
while(isspace(*s)) ++s;
x=*s++^'0';
while(isdigit(*s)) x=x*10+(*s++^'0');
}
char obuf[1<<20], *ooh=obuf;
inline void print(int x) {
static int buf[30], cnt;
if (x==0) *ooh++='0';
else {
for (cnt=0; x; x/=10) buf[++cnt]=x%10+48;
while(cnt) *ooh++=buf[cnt--];
}
}

const int N = 1<<18;
const double Pi=acos(-1);
int n, m, p, l, k;
struct cp{
double a, b;
inline void operator +=(const cp &rhs){ a+=rhs.a, b+=rhs.b;}
inline cp operator +(const cp &rhs)const{ return (cp){a+rhs.a, b+rhs.b};}
inline cp operator -(const cp &rhs)const{ return (cp){a-rhs.a, b-rhs.b};}
inline cp operator *(const cp &rhs)const{ return (cp){a*rhs.a-b*rhs.b, a*rhs.b+b*rhs.a};}
inline cp operator *(const double rhs)const{ return (cp){a*rhs, b*rhs};}
inline cp operator ~()const{ return (cp){a, -b};}
} a[N], b[N], c[N], d[N], f[N], g[N], h[N], w[N];
inline int Get(int n){ int p=1; while(p<=n) p<<=1; return p;}
inline void DFT_(cp *f, int n){
for(register int i=0, j=0; i<n; ++i){
if(i>j) swap(f[i], f[j]);
for(register int k=n>>1; (j^=k)<k; k>>=1);
}
for(register int i=1; i<n; i<<=1) for(register int j=0; j<n; j+=i<<1)
for(register int k=j; k<j+i; ++k){
cp t=w[i+k-j]*f[k+i];
f[k+i]=f[k]-t, f[k]+=t;
}
}
inline void DFT(cp *f, int n){
if(n==1) return;
n>>=1;
static cp a[N/2];
for(register int i=0; i<n; ++i) a[i]=(cp){f[i<<1].a, f[i<<1|1].a};
DFT_(a, n);
for(register int i=0; i<n; ++i){
cp q=~a[(n-i)&(n-1)], x=(a[i]+q)*.5, y=(a[i]-q)*(cp){0, -.5}, t=y*w[n+i];
f[i]=x+t, f[n+i]=x-t;
}
}
inline void IDFT(cp *f, int n){
if(n==1) return;
reverse(f+1, f+n), n>>=1;
static cp a[N/2];
for(register int i=0; i<n; ++i)
a[i]=(f[i]+f[i+n])*.5 + (f[i]-f[i+n])*(cp){0, .5}*w[n+i];
DFT_(a, n);
double k=1./n;
for(register int i=0; i<n; ++i) f[i<<1]=(cp){a[i].a*k, 0}, f[i<<1|1]=(cp){a[i].b*k, 0};
}
int main() {
fread(s, 1, 1<<21, stdin);
read(n), read(m), read(p), l=Get(n+m);
for(register int i=0, x=0; i<=n; ++i) read(x), a[i].a=x>>15, b[i].a=x&32767;
for(register int i=0, x=0; i<=m; ++i) read(x), c[i].a=x>>15, d[i].a=x&32767;
for(register int i=1; i<l; i<<=1){
w[i]=(cp){1, 0};
for(register int j=1; j<i; ++j)
w[i+j]=((j&31)==1?(cp){cos(Pi*j/i), sin(Pi*j/i)}:w[i+j-1]*w[i+1]);
}
DFT(a, l), DFT(b, l), DFT(c, l), DFT(d, l);
for(register int i=0; i<l; ++i)
f[i]=a[i]*c[i], g[i]=a[i]*d[i]+c[i]*b[i], h[i]=b[i]*d[i];
IDFT(f, l), IDFT(g, l), IDFT(h, l);
for(register int i=0; i<=n+m; ++i, *ooh++=' ')
print((((ll)(f[i].a+.5)%p<<30)+((ll)(g[i].a+.5)<<15)+(ll)(h[i].a+.5))%p);
return fwrite(obuf, 1, ooh - obuf, stdout), 0;
}

例题

贴代码,方便以后拉

多项式求逆

Luogu P4239 【模板】多项式求逆(加强版)

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#include<cstdio>
#include<algorithm>
#include<ctype.h>
#include<string.h>
#include<math.h>
#include<vector>

using namespace std;
#define ll long long

inline char read() {
static const int IN_LEN = 1000000;
static char buf[IN_LEN], *s, *t;
return (s == t ? t = (s = buf) + fread(buf, 1, IN_LEN, stdin), (s == t ? -1 : *s++) : *s++);
}
template<class T>
inline void read(T &x) {
static bool iosig;
static char c;
for (iosig = false, c = read(); !isdigit(c); c = read()) {
if (c == '-') iosig = true;
if (c == -1) return;
}
for (x = 0; isdigit(c); c = read()) x = ((x + (x << 2)) << 1) + (c ^ '0');
if (iosig) x = -x;
}
const int OUT_LEN = 1000000;
char obuf[OUT_LEN], *ooh = obuf;
inline void print(char c) {
if (ooh == obuf + OUT_LEN) fwrite(obuf, 1, OUT_LEN, stdout), ooh = obuf;
*ooh++ = c;
}
template<class T>
inline void print(T x) {
static int buf[30], cnt;
if (x == 0) print('0');
else {
if (x < 0) print('-'), x = -x;
for (cnt = 0; x; x /= 10) buf[++cnt] = x % 10 + 48;
while (cnt) print((char)buf[cnt--]);
}
}
inline void flush() { fwrite(obuf, 1, ooh - obuf, stdout); }

const double Pi = acos(-1);
const int P = 1000000007;
int n, k;
vector<int> a;

namespace Poly{
const int LEN = 1<<18;

vector<int> ans;// for Evaluate()
vector<vector<int>> p;// for Evaluate() & Interpolate()
struct cp{
double a, b;
inline void operator +=(const cp &rhs){ a+=rhs.a, b+=rhs.b;}
inline cp operator +(const cp &rhs)const{ return (cp){a+rhs.a, b+rhs.b};}
inline cp operator -(const cp &rhs)const{ return (cp){a-rhs.a, b-rhs.b};}
inline cp operator *(const cp &rhs)const{ return (cp){a*rhs.a-b*rhs.b, a*rhs.b+b*rhs.a};}
inline cp operator *(const double rhs)const{ return (cp){a*rhs, b*rhs};}
inline cp operator ~()const{ return (cp){a, -b};}
} f[LEN], g[LEN], w[LEN];

inline int Pow(ll x, int y=P-2){
int ans=1;
for(; y; y>>=1, x=x*x%P) if(y&1) ans=ans*x%P;
return ans;
}
inline void Init(){
for(int i=1; i<LEN; i<<=1){
w[i]=(cp){1, 0};
for(int j=1; j<i; ++j)
w[i+j]=((j&31)==1?(cp){cos(Pi*j/i), sin(Pi*j/i)}:w[i+j-1]*w[i+1]);
}
}
inline int Get(int x){ int n=1; while(n<=x) n<<=1; return n;}
inline void DFT_(cp *f, int n){
for(register int i=0, j=0; i<n; ++i){
if(i>j) swap(f[i], f[j]);
for(register int k=n>>1; (j^=k)<k; k>>=1);
}
for(register int i=1; i<n; i<<=1) for(register int j=0; j<n; j+=i<<1)
for(register int k=j; k<j+i; ++k){
cp t=w[i+k-j]*f[k+i];
f[k+i]=f[k]-t, f[k]+=t;
}
}
inline void DFT(cp *f, int n){
if(n==1) return;
n>>=1;
static cp a[LEN/2];
for(register int i=0; i<n; ++i) a[i]=(cp){f[i<<1].a, f[i<<1|1].a};
DFT_(a, n);
for(register int i=0; i<n; ++i){
cp q=~a[(n-i)&(n-1)], x=(a[i]+q)*.5, y=(a[i]-q)*(cp){0, -.5}, t=y*w[n+i];
f[i]=x+t, f[n+i]=x-t;
}
}
inline void IDFT(cp *f, int n){
if(n==1) return;
reverse(f+1, f+n), n>>=1;
static cp a[LEN/2];
for(register int i=0; i<n; ++i)
a[i]=(f[i]+f[i+n])*.5 + (f[i]-f[i+n])*(cp){0, .5}*w[n+i];
DFT_(a, n);
double k=1./n;
for(register int i=0; i<n; ++i) f[i<<1]=(cp){a[i].a*k, 0}, f[i<<1|1]=(cp){a[i].b*k, 0};
}
inline vector<int> Mul(const vector<int> &f, const vector<int> &g){
vector<int> ans(f.size()+g.size()-1);
if(f.size()*g.size()<=1000){
for(unsigned i=0; i<f.size(); ++i) for(unsigned j=0; j<g.size(); ++j)
ans[i+j]=(ans[i+j]+(ll)f[i]*g[j])%P;
return ans;
}
int l=Get(f.size()+g.size()-2);
static cp f0[LEN], f1[LEN], g0[LEN], g1[LEN], A[LEN], B[LEN], C[LEN];
memset(f0, 0, sizeof(cp)*l), memset(f1, 0, sizeof(cp)*l);
memset(g0, 0, sizeof(cp)*l), memset(g1, 0, sizeof(cp)*l);
for(unsigned i=0; i<f.size(); ++i) f0[i].a=f[i]&32767, f1[i].a=f[i]>>15;
for(unsigned i=0; i<g.size(); ++i) g0[i].a=g[i]&32767, g1[i].a=g[i]>>15;
DFT(f0, l), DFT(f1, l), DFT(g0, l), DFT(g1, l);
for(int i=0; i<l; ++i)
A[i]=f1[i]*g1[i], B[i]=f1[i]*g0[i]+f0[i]*g1[i], C[i]=f0[i]*g0[i];
IDFT(A, l), IDFT(B, l), IDFT(C, l);
for(unsigned i=0; i<ans.size(); ++i)
ans[i]=(((ll)(A[i].a+.5)%P<<30)+((ll)(B[i].a+.5)<<15)+(ll)(C[i].a+.5))%P;
return ans;
}
vector<int> PolyInv(const vector<int> &f, int n=-1){
if(n==-1) n=f.size();
if(n==1){
vector<int> ans;
return ans.push_back(Pow(f[0])), ans;
}
vector<int> ans=PolyInv(f, (n+1)/2), tmp(&f[0], &f[0]+n);
tmp=Mul(Mul(tmp, ans), ans), tmp.resize(n);
for(unsigned i=0; i<ans.size(); ++i)
tmp[i]=(2*ans[i]-tmp[i])%P, tmp[i]=(tmp[i]<0?tmp[i]+P:tmp[i]);
for(int i=ans.size(); i<n; ++i) tmp[i]=(tmp[i]?P-tmp[i]:0);
return tmp;
}
}

int main() {
Poly::Init();
read(n), a.resize(n);
for(int i=0; i<n; ++i) read(a[i]);
a=Poly::PolyInv(a);
for(int i=0; i<n; ++i) print(a[i]), print(' ');
return flush(), 0;
}

传球

Luogu P5173 传球

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#include<cstdio>
#include<algorithm>
#include<cctype>
#include<string.h>
#include<cmath>
#include<vector>

using namespace std;
#define ll long long

const double Pi=acos(-1);
const int N = 3505, P = 1000000007, L = 1<<13;
int C, m, Ans[N], x[N];
struct cp{
double a, b;
inline void operator +=(const cp &rhs){ a+=rhs.a, b+=rhs.b;}
inline cp operator +(const cp &rhs)const{ return (cp){a+rhs.a, b+rhs.b};}
inline cp operator -(const cp &rhs)const{ return (cp){a-rhs.a, b-rhs.b};}
inline cp operator *(const cp &rhs)const{ return (cp){a*rhs.a-b*rhs.b, a*rhs.b+b*rhs.a};}
inline cp operator *(const double rhs)const{ return (cp){a*rhs, b*rhs};}
inline cp operator ~()const{ return (cp){a, -b};}
} a[L], b[L], c[L], d[L], f[L], g[L], h[L], w[L];
inline int Pow(ll x, int y=P-2){
int ans=1;
for(; y; y>>=1, x=x*x%P) if(y&1) ans=ans*x%P;
return ans;
}
inline int Get(int n){ int p=1; while(p<=n) p<<=1; return p;}
inline void DFT_(cp *f, int n){
for(register int i=0, j=0; i<n; ++i){
if(i>j) swap(f[i], f[j]);
for(register int k=n>>1; (j^=k)<k; k>>=1);
}
for(register int i=1; i<n; i<<=1) for(register int j=0; j<n; j+=i<<1)
for(register int k=j; k<j+i; ++k){
cp t=w[i+k-j]*f[k+i];
f[k+i]=f[k]-t, f[k]+=t;
}
}
inline void DFT(cp *f, int n){
if(n==1) return;
n>>=1;
static cp a[L/2];
for(register int i=0; i<n; ++i) a[i]=(cp){f[i<<1].a, f[i<<1|1].a};
DFT_(a, n);
for(register int i=0; i<n; ++i){
cp q=~a[(n-i)&(n-1)], x=(a[i]+q)*.5, y=(a[i]-q)*(cp){0, -.5}, t=y*w[n+i];
f[i]=x+t, f[n+i]=x-t;
}
}
inline void IDFT(cp *f, int n){
if(n==1) return;
reverse(f+1, f+n), n>>=1;
static cp a[L/2];
for(register int i=0; i<n; ++i)
a[i]=(f[i]+f[i+n])*.5 + (f[i]-f[i+n])*(cp){0, .5}*w[n+i];
DFT_(a, n);
double k=1./n;
for(register int i=0; i<n; ++i) f[i<<1]=(cp){a[i].a*k, 0}, f[i<<1|1]=(cp){a[i].b*k, 0};
}
inline void sqr(int *A, int &len){
int n=Get(len*2-2);
memset(a, 0, sizeof(cp)*n), memset(b, 0, sizeof(cp)*n);
for(int i=0; i<len; ++i) a[i].a=A[i]>>15, b[i].a=A[i]&32767;
DFT(a, n), DFT(b, n);
for(int i=0; i<n; ++i) f[i]=a[i]*a[i], g[i]=a[i]*b[i]*2, h[i]=b[i]*b[i];
IDFT(f, n), IDFT(g, n), IDFT(h, n);
for(int i=0; i<len*2-1 && i<C; ++i)
A[i]=(((ll)(f[i].a+.5)%P<<30)+((ll)(g[i].a+.5)<<15)+(ll)(h[i].a+.5))%P;
for(int i=C; i<len*2-1; ++i)
A[i-C]=(A[i-C]+(((ll)(f[i].a+.5)%P<<30)+((ll)(g[i].a+.5)<<15)+(ll)(h[i].a+.5)))%P;
len=min(C, len*2-1);
}
void mul(int *A, int *B, int &lena, int lenb){
int n=Get(lena+lenb-2);
memset(a, 0, sizeof(cp)*n), memset(b, 0, sizeof(cp)*n);
memset(c, 0, sizeof(cp)*n), memset(d, 0, sizeof(cp)*n);
for(int i=0; i<lena; ++i) a[i].a=A[i]>>15, b[i].a=A[i]&32767;
for(int i=0; i<lenb; ++i) c[i].a=B[i]>>15, d[i].a=B[i]&32767;
DFT(a, n), DFT(b, n), DFT(c, n), DFT(d, n);
for(int i=0; i<n; ++i) f[i]=a[i]*c[i], g[i]=a[i]*d[i]+b[i]*c[i], h[i]=b[i]*d[i];
IDFT(f, n), IDFT(g, n), IDFT(h, n);
for(int i=0; i<lena+lenb-1 && i<C; ++i)
A[i]=(((ll)(f[i].a+.5)%P<<30)+((ll)(g[i].a+.5)<<15)+(ll)(h[i].a+.5))%P;
for(int i=C; i<lena+lenb-1; ++i)
A[i-C]=(A[i-C]+(((ll)(f[i].a+.5)%P<<30)+((ll)(g[i].a+.5)<<15)+(ll)(h[i].a+.5)))%P;
lena=min(lena+lenb-1, C);
}
inline void solve(int *ans, int n){
memset(ans, 0, C<<2), memset(x, 0, C<<2), x[0]=x[2]=1, ans[0]=1;
int lenx=3, lena=1;
for(; n; n>>=1, sqr(x, lenx)) if(n&1) mul(ans, x, lena, lenx);
}
int main() {
for(int i=1; i<L; i<<=1){
w[i]=(cp){1, 0};
for(int j=1; j<i; ++j)
w[i+j]=((j&31)==1?(cp){cos(Pi*j/i), sin(Pi*j/i)}:w[i+j-1]*w[i+1]);
}
scanf("%d%d", &C, &m);
solve(Ans, m);
printf("%d\n", Ans[m%C]);
return 0;
}

Flair

LOJ #6132. 「2017 山东三轮集训 Day1」Flair

性质

令最大的两个数的乘积为 \(a\),所有数的 \(\gcd\)\(b\),选择了 \(i\) 个的浪费是 \(f_i\)

对于 \(i\ge a\)\(f_i=f_{i+b}\)

那么我们做长度为 \(b\) 的循环卷积快速幂,对于 \(a\) 以内的特判就好了

考试的时候没带脑子

有些细节不管复杂度就是 \(\mathcal O(b\log b\log n + m\times a)\)

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include<cstdio>
#include<algorithm>
#include<cctype>
#include<string.h>
#include<cmath>
#include<vector>

using namespace std;
#define ll long long

const double Pi=acos(-1);
const int N = 10005, M = 105, P = 1000000007, L = 1<<15;
int T, n, m, p, mx, mn, C[M], s[N], Ans[N], x[N];
struct cp{
double a, b;
inline void operator +=(const cp &rhs){ a+=rhs.a, b+=rhs.b;}
inline cp operator +(const cp &rhs)const{ return (cp){a+rhs.a, b+rhs.b};}
inline cp operator -(const cp &rhs)const{ return (cp){a-rhs.a, b-rhs.b};}
inline cp operator *(const cp &rhs)const{ return (cp){a*rhs.a-b*rhs.b, a*rhs.b+b*rhs.a};}
inline cp operator *(const double rhs)const{ return (cp){a*rhs, b*rhs};}
inline cp operator ~()const{ return (cp){a, -b};}
} a[L], b[L], c[L], d[L], f[L], g[L], h[L], w[L];
int gcd(int x, int y){ return y?gcd(y, x%y):x;}
inline int Pow(ll x, int y=P-2){
int ans=1;
for(; y; y>>=1, x=x*x%P) if(y&1) ans=ans*x%P;
return ans;
}
inline int Get(int n){ int p=1; while(p<=n) p<<=1; return p;}
inline void DFT_(cp *f, int n){
for(register int i=0, j=0; i<n; ++i){
if(i>j) swap(f[i], f[j]);
for(register int k=n>>1; (j^=k)<k; k>>=1);
}
for(register int i=1; i<n; i<<=1) for(register int j=0; j<n; j+=i<<1)
for(register int k=j; k<j+i; ++k){
cp t=w[i+k-j]*f[k+i];
f[k+i]=f[k]-t, f[k]+=t;
}
}
inline void DFT(cp *f, int n){
if(n==1) return;
n>>=1;
static cp a[L/2];
for(register int i=0; i<n; ++i) a[i]=(cp){f[i<<1].a, f[i<<1|1].a};
DFT_(a, n);
for(register int i=0; i<n; ++i){
cp q=~a[(n-i)&(n-1)], x=(a[i]+q)*.5, y=(a[i]-q)*(cp){0, -.5}, t=y*w[n+i];
f[i]=x+t, f[n+i]=x-t;
}
}
inline void IDFT(cp *f, int n){
if(n==1) return;
reverse(f+1, f+n), n>>=1;
static cp a[L/2];
for(register int i=0; i<n; ++i)
a[i]=(f[i]+f[i+n])*.5 + (f[i]-f[i+n])*(cp){0, .5}*w[n+i];
DFT_(a, n);
double k=1./n;
for(register int i=0; i<n; ++i) f[i<<1]=(cp){a[i].a*k, 0}, f[i<<1|1]=(cp){a[i].b*k, 0};
}
inline void sqr(int *A, int &len){
int n=Get(len*2-2);
memset(a, 0, sizeof(cp)*n), memset(b, 0, sizeof(cp)*n);
for(int i=0; i<len; ++i) a[i].a=A[i]>>15, b[i].a=A[i]&32767;
DFT(a, n), DFT(b, n);
for(int i=0; i<n; ++i) f[i]=a[i]*a[i], g[i]=a[i]*b[i]*2, h[i]=b[i]*b[i];
IDFT(f, n), IDFT(g, n), IDFT(h, n);
for(int i=0; i<len*2-1 && i<mn; ++i)
A[i]=(((ll)(f[i].a+.5)%P<<30)+((ll)(g[i].a+.5)<<15)+(ll)(h[i].a+.5))%P;
for(int i=mn; i<len*2-1; ++i)
A[i-mn]=(A[i-mn]+(((ll)(f[i].a+.5)%P<<30)+((ll)(g[i].a+.5)<<15)+(ll)(h[i].a+.5)))%P;
len=min(mn, len*2-1);
}
void mul(int *A, int *B, int &lena, int lenb){
int n=Get(lena+lenb-2);
memset(a, 0, sizeof(cp)*n), memset(b, 0, sizeof(cp)*n);
memset(c, 0, sizeof(cp)*n), memset(d, 0, sizeof(cp)*n);
for(int i=0; i<lena; ++i) a[i].a=A[i]>>15, b[i].a=A[i]&32767;
for(int i=0; i<lenb; ++i) c[i].a=B[i]>>15, d[i].a=B[i]&32767;
DFT(a, n), DFT(b, n), DFT(c, n), DFT(d, n);
for(int i=0; i<n; ++i) f[i]=a[i]*c[i], g[i]=a[i]*d[i]+b[i]*c[i], h[i]=b[i]*d[i];
IDFT(f, n), IDFT(g, n), IDFT(h, n);
for(int i=0; i<lena+lenb-1 && i<mn; ++i)
A[i]=(((ll)(f[i].a+.5)%P<<30)+((ll)(g[i].a+.5)<<15)+(ll)(h[i].a+.5))%P;
for(int i=mn; i<lena+lenb-1; ++i)
A[i-mn]=(A[i-mn]+(((ll)(f[i].a+.5)%P<<30)+((ll)(g[i].a+.5)<<15)+(ll)(h[i].a+.5)))%P;
lena=min(lena+lenb-1, mn);
}
inline void solve(int *ans, int n){
memset(ans, 0, mn<<2), memset(x, 0, mn<<2), x[0]=100-p, x[1]=p, ans[0]=1;
int lenx=2, lena=1;
for(; n; n>>=1, sqr(x, lenx)){
if(n&1) mul(ans, x, lena, lenx);
}
}
int main() {
freopen("flair.in", "r", stdin);
freopen("flair.out", "w", stdout);
for(int i=1; i<L; i<<=1){
w[i]=(cp){1, 0};
for(int j=1; j<i; ++j)
w[i+j]=((j&31)==1?(cp){cos(Pi*j/i), sin(Pi*j/i)}:w[i+j-1]*w[i+1]);
}
scanf("%d", &T);
while(T--){
scanf("%d%d%d", &n, &m, &p);
for(int i=1; i<=m; ++i) scanf("%d", C+i);
if(m==1) mx=C[1]; else sort(C+1, C+m+1), mx=C[m]*C[m-1];
mn=C[1];
for(int i=2; i<=m; ++i) mn=gcd(mn, C[i]);
solve(Ans, n);
int ans=0;
for(int i=1; i<mn; ++i) ans=(ans+(ll)Ans[i]*(mn-i))%P;
// for(int i=0; i<mn && i<=16; ++i) printf("[%d]\n", Ans[i]); puts("");
memset(s, 0, (mx+1)<<2);
s[0]=1;
for(int i=1; i<=m; ++i) for(int j=C[i]; j<mx; ++j) s[j]|=s[j-C[i]];
for(int i=mx-1; ~i; --i) s[i]=(s[i]?0:s[i+1]+1);
for(int i=0, k=1; i<mx && i<=n; k=(ll)k*(n-i)%P*Pow(i+1)%P, ++i)
ans=(ans+(ll)Pow(p, i)*Pow(100-p, n-i)%P*k%P*(s[i]-(i+mn-1)/mn*mn+i))%P;
printf("%d\n", (ans+P)%P);
}
return 0;
}

任意模数 NTT 和 DFT 的优化

https://cekavis.github.io/fft-optimization/

Author

Cekavis

Posted on

2019-01-07

Updated on

2022-06-16

Licensed under

Comments