How to use less memory: persistent seg3 with lazy propagation

I’m currently struggling with NOI '10 P2 - Super Piano - DMOJ: Modern Online Judge. My algorithm is giving correct results, yet I’m using too much memory.

#include <bits/stdc++.h>
#define MAX 505000
typedef std::pair<int,int> pii;
const int inf = 1e9;
struct Node {
    int left,right;
    pii valor;
    int lazy;
};
Node mem[25000000];
int cursormem=0;
int alocar(void){
    return cursormem++;
}
void propagate(int x){
    auto&y=mem[x];
    if(y.lazy){
        if(y.left){
            int nov = alocar();
            mem[nov]=mem[y.left];
            mem[nov].lazy+=y.lazy;
            y.left=nov;
        }
        if(y.right){
            int nov = alocar();
            mem[nov]=mem[y.right];
            mem[nov].lazy+=y.lazy;
            y.right=nov;
        }
        y.valor.first+=y.lazy;
        y.lazy=0;
    }
}
int N,K,L,R;
void build(int x,int la=0,int ra=N-1){
    if(la==ra){
        mem[x].valor={0,la};
        return;
    }
    int m=(la+ra)/2;
    mem[x].left=alocar();
    mem[x].right=alocar();
    build(mem[x].left,la,m);
    build(mem[x].right,m+1,ra);
    mem[x].valor=std::max(mem[mem[x].left].valor,mem[mem[x].right].valor);
}
pii query(int x,int l,int r,int la=0,int ra=N-1){
    propagate(x);
    if(ra<l||la>r)return {-inf,-inf};
    if(la>=l&&ra<=r){
        return mem[x].valor;
    }
    int m=(la+ra)/2;
    return std::max(query(mem[x].left,l,r,la,m),query(mem[x].right,l,r,m+1,ra));
}
int update(int pos,int l,int r,int k,int la=0,int ra=N-1){
    propagate(pos);
    if(ra<l||la>r)return pos;
    if(la>=l&&ra<=r){
        int nov = alocar();
        mem[nov]=mem[pos];
        mem[nov].lazy+=k;
        propagate(nov);
        return nov;
    }
    int m=(la+ra)/2;
    int nov = alocar();
    int lef = update(mem[pos].left,l,r,k,la,m);
    int rig = update(mem[pos].right,l,r,k,m+1,ra);
    mem[nov].left=lef;
    mem[nov].right=rig;
    mem[nov].valor=std::max(mem[lef].valor,mem[rig].valor);
    return nov;
}
int principal = alocar();
std::vector<int> niveis;
int array[MAX];
typedef std::pair<int,pii> pip;
int main()
{
    std::ios::sync_with_stdio(false);
    std::cin.tie(0);
    std::cout.tie(0);
    std::cin>>N>>K>>L>>R;--L;--R;
    build(principal);
    for(int i=0;i!=N;++i){
        std::cin>>array[i];
    }
    niveis.push_back(principal);
    for(int i=0;i!=N;++i){
        niveis[i]=update(niveis[i],0,i,array[i]);
        niveis.push_back(niveis[i]);
    }
    std::priority_queue<pip> queue;
    for(int i=0;i!=N;++i){
        int p=i-L;
        if(p>=0){
            int liminicio = std::max(0,i-R);
            pii min = query(niveis[i],liminicio,p);
            queue.push({min.first,{i,min.second}});
        }
    }
    long long total=0;
    for(int i=0;i!=K;++i){
        pip b = queue.top();
        queue.pop();
        total+=(long long)(b.first);
        niveis[b.second.first]=update(niveis[b.second.first],b.second.second,b.second.second,-inf);
        int p=b.second.first-L,lim=std::max(0,b.second.first-R);
        pii k = query(niveis[b.second.first],lim,p);
        queue.push({k.first,{b.second.first,k.second}});
    }
    std::cout<<total<<"\n";
}

I failed to pass test cases (5,9,10). I got RTE (because there’s not enough memory to allocate everything necessary). I already removed calloc and used integers instead of pointers to reduce memory usage. But I just can’t use less than 512 MB. Is there a way to make the persistent tree use less memory?