# Benq 2D offline BIT explanation?

Benq’s 2D offline BIT is mentioned here. Can someone provide an explanation to his code?

Here’s a cleaner solution to DMOPC '19 Contest 7 P5 - Soriya's Programming Project - DMOJ: Modern Online Judge that uses it:

``````#include<bits/stdc++.h>
using namespace std;

template<class T> using V = vector<T>;
using pi = pair<int,int>;

template<class T, int SZ> class BIT {
T bit[SZ]{};
public:
void update(int x, T t) {
for (; x < SZ; x += x&-x)
bit[x] += t;
}
T query(int x) {
T res = 0;
for (; x; x -= x&-x)
res += bit[x];
return res;
}
};

/**
* The following data structure supports 2D range update + query, where the points to be updated
are known beforehand.
* The idea is to store a separate 1D BIT for each x. The x-th 1D BIT supports operations
- update bit[y] += t where y must be one of all_y[y_start[x] ... y_start[x] + y_cnt[x]-1]
- query sum_{y' <= y} bit[y'] for any y
* Memory: O(U log SZ) where U is the number of updates
* Time: O(log U log SZ) per update or query
*/

template<class T, int SZ> class OffBIT2D {
bool initialized = 0;
V<int> all_y;
int y_cnt[SZ]{};
int y_start[SZ]{};
V<T> bit;
int rank_of_y(int x, int y) { // get rank of y in distinct y-values for x
int l = y_start[x], r = y_start[x]+y_cnt[x];
return upper_bound(begin(all_y)+l,begin(all_y)+r,y)-begin(all_y)-l;
}
public:
// all updates {x,y} must satisfy x in (0, SZ), no restrictions on y
void init(V<pair<int,int>> updates) {
assert(!initialized);
initialized = true;
sort(begin(updates), end(updates),[](const pi& a, const pi& b) {
return a.second < b.second;
});
for (auto [x, y]: updates) // push to log(N) different BITs
for (; x < SZ; x += x&-x)
++y_cnt[x];
int total_size = 0;
for (int i = 0; i < SZ; ++i)
y_start[i] = (total_size += y_cnt[i]);
all_y.resize(total_size), bit.resize(total_size);
for (auto [x, y]: updates)
for (; x < SZ; x += x&-x)
all_y[--y_start[x]] = y;
}
void update(int x, int _y, T t) {
assert(initialized);
for (; x < SZ; x += x&-x)
for (int y = rank_of_y(x,_y); y <= y_cnt[x]; y += y&-y)
bit[y_start[x]+y-1] += t;
}
T query(int x, int _y) {
assert(initialized);
T res = 0;
for (; x; x -= x&-x)
for (int y = rank_of_y(x,_y); y; y -= y&-y)
res += bit[y_start[x]+y-1];
return res;
}
};

/**
* Description: A set (not multiset!) with support for finding the \$n\$'th
* element, and finding the index of an element. Change \texttt{null\_type} for map.
* Time: O(\log N)
* Source: KACTL
* https://codeforces.com/blog/entry/11080
* Verification: many
*/

#include <ext/pb_ds/assoc_container.hpp>
using namespace __gnu_pbds;
template<class T> using Tree = tree<T, null_type, less<T>,
rb_tree_tag, tree_order_statistics_node_update>;
#define ook order_of_key
#define fbo find_by_order

int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
int N; cin >> N;

V<int> A(N);
for (int& i: A) cin >> i;

// make A a permutation
V<int> A_idx(N); iota(begin(A_idx), end(A_idx), 0);
stable_sort(begin(A_idx), end(A_idx),[&](int x, int y) {
return A[x] < A[y];
});
for (int i = 0; i < N; ++i)
A[A_idx[i]] = i+1;

vector<int> P(N);
for (int& i: P) cin >> i;

OffBIT2D<int,500005> O;
for (int i = 0; i < N; ++i) {
const int x = A[P[i]-1];
}

BIT<int, 500005> t1, t2;
int64_t ans = 0;
for (int i = 0; i < N; ++i) {
const int x = A[P[i]-1];
const int lower_left = O.query(P[i], x);
O.update(P[i], x, 1);
ans += t1.query(P[i])-lower_left;
ans += t2.query(x)-lower_left;
t1.update(P[i], 1), t2.update(x, 1);
cout << ans << "\n";
}
}
``````

This version (which should be easier to understand) uses arrays of vectors so it ends up using nearly twice the memory of the previous code (but is nearly as fast):

``````#include<bits/stdc++.h>
using namespace std;

template<class T> using V = vector<T>;
using pi = pair<int,int>;

template<class T, int SZ> class BIT {
T bit[SZ]{};
public:
void update(int x, T t) {
for (; x < SZ; x += x&-x)
bit[x] += t;
}
T query(int x) {
T res = 0;
for (; x; x -= x&-x)
res += bit[x];
return res;
}
};

/**
* The following data structure supports 2D range update + query, where the points to be updated
are known beforehand.
* The idea is to store a separate 1D BIT for each x. The x-th 1D BIT supports operations
- update bit[y] += t where y must be one of all_y[y_start[x] ... y_start[x] + y_cnt[x]-1]
- query sum_{y' <= y} bit[y'] for any y
* Memory: O(U log SZ) where U is the number of updates
* Time: O(log U log SZ) per update or query
*/

template<class T, int SZ> class OffBIT2D {
bool initialized = 0;
// V<int> all_y;
vector<int> all_y[SZ];
// int y_cnt[SZ]{};
// int y_start[SZ]{};
// V<T> bit;
V<T> bit[SZ];
int rank_of_y(int x, int y) { // get rank of y in distinct y-values for x
// int l = y_start[x], r = y_start[x]+y_cnt[x];
// return upper_bound(begin(all_y)+l,begin(all_y)+r,y)-begin(all_y)-l;
return upper_bound(begin(all_y[x]),end(all_y[x]), y)-begin(all_y[x]);
}
public:
// all updates {x,y} must satisfy x in (0, SZ), no restrictions on y
void init(V<pair<int,int>> updates) {
assert(!initialized);
initialized = true;
sort(begin(updates), end(updates),[](const pi& a, const pi& b) {
return a.second < b.second;
});
vector<int> y_cnt(SZ);
for (auto [x, y]: updates) // push to log(N) different BITs
for (; x < SZ; x += x&-x) {
++y_cnt[x];
}
for (int x = 0; x < SZ; ++x) {
all_y[x].reserve(y_cnt[x]);
bit[x].resize(y_cnt[x]);
}
for (auto [x, y]: updates) {
for (; x < SZ; x += x&-x) {
all_y[x].push_back(y);
}
}
}
void update(int x, int _y, T t) {
assert(initialized);
for (; x < SZ; x += x&-x)
for (int y = rank_of_y(x,_y); y <= size(all_y[x]); y += y&-y)
bit[x][y-1] += t;
}
T query(int x, int _y) {
assert(initialized);
T res = 0;
for (; x; x -= x&-x)
for (int y = rank_of_y(x,_y); y; y -= y&-y)
res += bit[x][y-1];
return res;
}
};

/**
* Description: A set (not multiset!) with support for finding the \$n\$'th
* element, and finding the index of an element. Change \texttt{null\_type} for map.
* Time: O(\log N)
* Source: KACTL
* https://codeforces.com/blog/entry/11080
* Verification: many
*/

#include <ext/pb_ds/assoc_container.hpp>
using namespace __gnu_pbds;
template<class T> using Tree = tree<T, null_type, less<T>,
rb_tree_tag, tree_order_statistics_node_update>;
#define ook order_of_key
#define fbo find_by_order

int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
int N; cin >> N;

V<int> A(N);
for (int& i: A) cin >> i;

// make A a permutation
V<int> A_idx(N); iota(begin(A_idx), end(A_idx), 0);
stable_sort(begin(A_idx), end(A_idx),[&](int x, int y) {
return A[x] < A[y];
});
for (int i = 0; i < N; ++i)
A[A_idx[i]] = i+1;

vector<int> P(N);
for (int& i: P) cin >> i;

OffBIT2D<int,500005> O;
for (int i = 0; i < N; ++i) {
const int x = A[P[i]-1];
}

BIT<int, 500005> t1, t2;
int64_t ans = 0;
for (int i = 0; i < N; ++i) {
const int x = A[P[i]-1];
const int lower_left = O.query(P[i], x);
O.update(P[i], x, 1);
ans += t1.query(P[i])-lower_left;
ans += t2.query(x)-lower_left;
t1.update(P[i], 1), t2.update(x, 1);
cout << ans << "\n";
}
}
``````
1 Like