Here is a dataset filtering algorithm that took 3 seconds for 10000 entries
json& w = where; // condition for filter in JSON format // iterate an array of 10000 entries for (json::iterator it = col.begin(); it != col.end(); ++it) { bool matched = true; // iterate the condition array (area of interest) for (json::iterator wit = w.begin(); wit != w.end(); ++wit) { json compareParams = wit.value(); std::string key = compareParams.begin().key(); for (auto item : compareParams) { //item.dump(4); std::string op = item.begin().key(); int val = item.begin().value(); matched &= match(*it, key, op, val); } } if (matched) { out.push_back(*it); } }
The algorithm was not performing up to the expectation. So a little tweak made the code 10 times faster. There was a condition in JSON format which was to be matched. Just moved moved the json parsing code for condition outside the big loop and wallah ... the algorithm returned results 10 times faster (or more).
// iterate the condition array struct Compare{ Compare(std::string k, std::string o, int v) { key = k; op = o; val = v; } std::string key; std::string op; int val; }; std::vector c; json& w = where; for (json::iterator wit = w.begin(); wit != w.end(); ++wit) { json compareParams = wit.value(); std::string key = compareParams.begin().key(); for (auto item : compareParams) { std::string op = item.begin().key(); int val = item.begin().value(); c.push_back(Compare(key, op, val)); } } // iterate the collection array for (json::iterator it = col.begin(); it != col.end(); ++it) { bool matched = true; // area of interest: This is where the condition parsing codes were for (int i = 0; i < c.size(); i++) { matched &= match(*it, c[i].key, c[i].op, c[i].val); } if (matched) { out.push_back(*it); } }