The Wu TLE

Problem Info

My Work

#include <iostream>
#include <cassert>
#include <vector>
#include <string>
#include <map>
#include <algorithm>

#include "debugging.h"

using std::cout;
using std::endl;
using std::vector;

// https://codeforces.com/problemset/problem/1017/D (sample input ommitted due to length)
int main() {
    int str_len;
    int set_size;
    int query_num;
    std::cin >> str_len >> set_size >> query_num;

    vector<int> values(str_len);
    for (int& v : values) {
        std::cin >> v;
    }
    std::reverse(values.begin(), values.end());

    vector<int> set_buckets(1 << str_len);
    for (int i = 0; i < set_size; i++) {
        std::string val;
        std::cin >> val;
        assert(val.length() == str_len);
        set_buckets[std::stoi(val, nullptr, 2)]++;
    }

    vector<std::map<int, int>> cached_results(1 << str_len);
    for (int q = 0; q < query_num; q++) {
        std::string query_str;
        int max_val;
        std::cin >> query_str >> max_val;
        
        assert(query_str.length() == str_len);
        int query = std::stoi(query_str, nullptr, 2);
        std::map<int, int>& wu_vals = cached_results[query];

        if (wu_vals.empty()) {
            for (int i = 0; i < (1 << str_len); i++) {
                if (set_buckets[i] == 0) {
                    continue;
                }
                int wu_val = 0;
                for (int at = 0; at < str_len; at++) {
                    wu_val += (((query >> at) & 1) == ((i >> at) & 1)) * values[at];
                }
                wu_vals[wu_val] += set_buckets[i];
            }
            int prev_amt = 0;
            for (auto it = wu_vals.begin(); it != wu_vals.end(); it++) {
                it->second += prev_amt;
                prev_amt = it->second;
            }
        }

        if (max_val < wu_vals.begin()->first) {
            cout << 0 << '\n';
        } else {
            auto it = wu_vals.lower_bound(max_val);
            if (it == wu_vals.end()) {
                it--;
            }
            cout << it->second << '\n';
        }
    }
}

It basically processes the queries online, catching the Wu values when operated with each of the personal treasure pieces.

However, this TLEs on test case #4.

What I’ve Tried

I can’t download the test data (because it’s CF), and I don’t really know how to write my own. It should meet the complexity constraints- it’s somewhere around 2^{24}, right?

It’s 2^24 * log(2^12) * a large constant for using a map. There are similar solutions that run in 2^24 using only arrays.

Even then, it seems to TLE.

#include <iostream>
#include <cassert>
#include <vector>
#include <string>
#include <algorithm>

using std::cout;
using std::endl;
using std::vector;

// https://codeforces.com/problemset/problem/1017/D (sample input ommitted due to length)
int main() {
    int str_len;
    int set_size;
    int query_num;
    std::cin >> str_len >> set_size >> query_num;

    int max_wu_val = 0;
    vector<int> values(str_len);
    for (int& v : values) {
        std::cin >> v;
        max_wu_val += v;
    }
    std::reverse(values.begin(), values.end());

    vector<int> set_buckets(1 << str_len);
    for (int i = 0; i < set_size; i++) {
        std::string val;
        std::cin >> val;
        // assert(val.length() == str_len);
        set_buckets[std::stoi(val, nullptr, 2)]++;
    }

    vector<vector<int>> cached_results(1 << str_len, vector<int>(max_wu_val + 1));
    for (int i = 0; i < (1 << str_len); i++) {
        for (int j = 0; j < (1 << str_len); j++) {
            int wu_val = 0;
            for (int at = 0; at < str_len; at++) {
                wu_val += (((i >> at) & 1) == ((j >> at) & 1)) * values[at];
            }
            cached_results[i][wu_val] += set_buckets[j];
        }
        for (int wv = 1; wv <= max_wu_val; wv++) {
            cached_results[i][wv] += cached_results[i][wv - 1];
        }
    }

    for (int q = 0; q < query_num; q++) {
        std::string query_str;
        int val_cap;
        std::cin >> query_str >> val_cap;
        
        // assert(query_str.length() == str_len);
        int query = std::stoi(query_str, nullptr, 2);
        if (val_cap > max_wu_val) {
            cout << cached_results[query].back() << '\n';
        } else {
            cout << cached_results[query][val_cap] << '\n';
        }
    }
}

The precalculation doesn’t take too long, as I’ve tried it with 12 bits, and the precalculation’s complexity is mainly dependent on the bit length.

Precompute another array[bitmask] = sum of wu for a certain bitmask. Then use array[i &j] instead of iterating.

Well, it turns out all I needed to do was unsync the IO. But your optimization did make it get AC’d faster. Thanks alot!