一般做矩阵快速幂的时候,是需要“把母矩阵和一个答案矩阵相乘赋值给答案矩阵”这个操作执行 \(n\) 次。这时我们可以使用矩阵快速幂优化这个过程。
传入参数的时候不引用,就不会改变母矩阵本身的值,可以重复利用。
一般为了方便会把矩阵的大小固定下来,如果 \(2\times 2\) 的矩阵乘 \(2\times 1\)的矩阵,大小不一致会很麻烦,用 \(0\) 把第二个矩阵补上变成都是 \(2 \times 2\) 的,不会影响矩阵对应位置的数字,只需要记录矩阵长=宽 \(matn=2\) 即可。
时间复杂度 \(O(\log n \times matn^3)\)。
#include<bits/stdc++.h> using namespace std; #define f(i, a, b) for(int i = (a); i <= (b); i++) #define cl(i, n) i.clear(),i.resize(n); #define endl '\n' typedef long long ll; typedef unsigned long long ull; typedef pair<int, int> pii; const int inf = 1e9; const int mod = 1e9 + 7; int n; struct mat { int n; ll a[110][110]; }a; mat mul(mat a, mat b) { mat ret; ret.n = a.n; memset(ret.a, 0, sizeof(ret.a)); f(i, 1, ret.n) { f(j, 1, ret.n) { f(k, 1, ret.n) { ret.a[i][j] += a.a[i][k] * b.a[k][j]; ret.a[i][j] %= mod; } } } return ret; } mat qpow(mat a, ll k) { mat ret; ret.n = a.n; memset(ret.a, 0, sizeof(ret.a)); f(i, 1, ret.n) { ret.a[i][i] = 1; } while(k) { if(k & 1) ret = mul(a, ret); a = mul(a, a); k >>= 1; } return ret; } int main() { ios::sync_with_stdio(0); cin.tie(NULL); cout.tie(NULL); ll n, k; cin >> n >> k; a.n = n; f(i, 1, n) f(j, 1, n) cin >> a.a[i][j]; a = qpow(a, k); f(i, 1, n) f(j, 1, n) cout << a.a[i][j] << " \n"[j == n]; return 0; }
将一维 DP 转移式换成矩阵形式然后快速幂可以把 \(n\) 带个 \(\log\)。比如:
#include<bits/stdc++.h> using namespace std; #define f(i, a, b) for(int i = (a); i <= (b); i++) #define cl(i, n) i.clear(),i.resize(n); #define endl '\n' typedef long long ll; typedef unsigned long long ull; typedef pair<int, int> pii; const int inf = 1e9; const int mod = 1e9 + 7; int n; struct mat { ll a[110][110]; }a; mat mul(mat a, mat b) { mat ret; memset(ret.a, 0, sizeof(ret.a)); f(i, 1, n) { f(j, 1, n) { f(k, 1, n) { ret.a[i][j] += a.a[i][k] * b.a[k][j]; ret.a[i][j] %= mod; } } } return ret; } mat qpow(mat ret, ll k) { while(k) { if(k & 1) ret = mul(a, ret); a = mul(a, a); k >>= 1; } return ret; } int main() { ios::sync_with_stdio(0); cin.tie(NULL); cout.tie(NULL); n = 2; ll k; cin >> k; memset(a.a, 0, sizeof(a.a)); a.a[1][1] = a.a[2][2] = 3; a.a[1][2] = a.a[2][1] = 1; mat b; memset(b.a, 0, sizeof(b.a)); b.a[1][1] = 1; b = qpow(b, k); cout << b.a[1][1] << endl; return 0; }
#include<bits/stdc++.h> using namespace std; #define f(i, a, b) for(int i = (a); i <= (b); i++) #define cl(i, n) i.clear(),i.resize(n); #define endl '\n' typedef long long ll; typedef unsigned long long ull; typedef pair<int, int> pii; const int inf = 1e9; const ll mod = 998244353; ll n; ll s; ll a[100010]; ll matn = 2; struct mat { ll a[5][5]; }fir,sec; ll x; //记录第一个的下标 //f(0)=0,f(1)=1 //f(a_i)=f(a_i-1)+f(a_i-2),f(a_i+1)=f(a_i-1) //f(s)=f(s-1)+f(s-2),... //算到f(a_i-1)和f(a_i-2) //正常用f(a_(i-1))和f(a_(i-1)+1)推矩阵快速幂 mat mul(mat aa, mat bb) { mat ret; memset(ret.a, 0, sizeof(ret.a)); f(i, 1, matn) { f(j, 1, matn) { f(k, 1, matn) { ret.a[i][j] += aa.a[i][k] * bb.a[k][j]; ret.a[i][j] %= mod; } } } return ret; } mat qpow(mat aa, mat bb, ll k) { // cout << k << endl; while(k) { if(k & 1) aa = mul(bb, aa); bb = mul(bb, bb); k >>= 1; } return aa; } int main() { ios::sync_with_stdio(0); cin.tie(NULL); cout.tie(NULL); cin >> n >> s; f(i, 1, n)cin>>a[i]; memset(fir.a, 0, sizeof(fir.a)); memset(sec.a, 0, sizeof(sec.a)); fir.a[1][2] = fir.a[2][1] = fir.a[2][2] = 1; sec.a[1][2] = sec.a[2][1] = 1; mat ans;memset(ans.a, 0, sizeof(ans.a)); x = 0; ans.a[2][1] = 1; a[n + 1] = s; f(i, 1, n + 1) { if(a[i] - 1 > x) ans = qpow(ans, fir, a[i] - 1 - x); x = a[i] - 1; ans = mul(sec, ans); x = a[i]; } cout << ans.a[1][1]; return 0; }