It may be just semantics, but it's important to understand that you cannot run this in a synchronous way. You have to run it asynchronously, and manage the order of the processing to get the desired effect. I find it useful to think about these kinds of problems more in terms of how I want to transform the data (à la functional programming) rather than the imperative code I would write in a more synchronous environment.
From what I can tell by the code, you want to end up with a data structure in super_cats
that looks something like this:
[
{
super_id: 1,
cats: [
{
cat_id: 2,
cat_name: "Category",
subcats: [
{
subcat_id: 3,
subcat_name: "Subcategory"
},
...
]
},
...
]
},
...
]
Let's start by extracting this into a single function call with a single callback.
function getCategoryTree(callback) {
}
Now, then, let's take it from the top. You want to run a single asynchronous function (an SQL query), and you want to produce an array with one entry per result. That sounds like a map
operation to me. However, since we want one of the values (cats
) to be determined asynchronously, we need to use an asynchronous map, which the async
library provides.
Let's just fill in the async.map
signature for now; we want to map over our results
(this is the functional equivalent of our for
loop), and for each one we want to turn the result into something—the asynchronous function that does the something is called the iterator. Finally, once we have all our transformed array elements, we want to call the callback given to our function.
function getCategoryTree(callback) {
conn.query("SELECT * FROM `super_cats`", function(error, results, fields) {
async.map(results, iterator, callback);
});
}
Let's create a new function for getting the top-level category information, and use its name in place of our iterator
placeholder.
function getCategoryTree(callback) {
conn.query("SELECT * FROM `super_cats`", function(error, results, fields) {
async.map(results, getSuperCategory, callback);
});
}
function getSuperCategory(resultRow, callback) {
}
Now we need to decide what we want to give back for each resultRow
. Based on our diagram above, we want an object with super_id
equal to the row's ID, and cats
equal to all the categories in the top-level category. However, since cats
is also determined asynchronously, we need to run the next query and transform those results before we can move on.
Similar to last time, we want each item in our cats
array to be an object with some information from the query's result, but we also want a subcats
array, which is again determined asynchronously, so we'll use async.map
again. This time, however, we'll use an anonymous function for the callback, since we want to do something with the results before we give them to the higher-level callback.
function getSuperCategory(resultItem, callback) {
var supcat_id = resultItem.id;
conn.query("SELECT * FROM `categories` WHERE supcat_id` = " + supcat_id, function(error, results, fields) {
async.map(results, getCategory, function(err, categories) {
callback(err, { super_id: supcat_id, cats: categories });
});
});
}
As you can see, once this async.map
is done, it means we have all the categories under this this super-category; thus, we can call our callback
with the object we want to be in the array.
Now that that's done, we just need to implement getCategory
. It will look very similar to getSuperCategory
, because we want to do basically the same thing—for each result, return an object that has some data from the query, but also an asynchronous component.
function getCategory(resultItem, callback) {
var cat_id = resultItem.id;
var cat_name = resultItem.cat_name;
conn.query("SELECT * FROM `subcategories` WHERE `category` = " + cat_id, function(error, results, fields) {
async.map(results, getSubCategory, function(err, subcategories) {
callback(err, { cat_id: cat_id, cat_name: cat_name, subcats: subcategories });
});
});
}
Now, we just need to implement getSubCategory
.
function getSubCategory(resultItem, callback) {
callback(null, {
subcat_id: resultItem.id,
subcat_name: resultItem.subcategory
});
}
Oops! The data we need from getSubCategory
doesn't have an asynchronous component! It turns out we didn't need that last async.map
at all; we could have used a regular array map; let's change getCategory
and getSubCategory
to work that way.
function getCategory(resultItem, callback) {
var cat_id = resultItem.id;
var cat_name = resultItem.cat_name;
conn.query("SELECT * FROM `subcategories` WHERE `category` = " + cat_id, function(error, results, fields) {
var subcategories = results.map(getSubCategory);
callback(error, { cat_id: cat_id, cat_name: cat_name, subcats: subcategories });
});
}
function getSubCategory(resultItem) {
return {
subcat_id: resultItem.id,
subcat_name: resultItem.subcategory
};
}
It's worth noting that our original method worked fine; if there's a chance getSubCategory
ever has an async component, you could just leave it as it was.
And that's it! Here's the code that I wrote as I was writing this answer; note that I had to fake out the SQL a bit, but I think the idea is there:
var async = require("async");
// fake out sql queries
queryNum = 0;
var conn = {
query: function(query, callback) {
queryNum++;
var results = [1, 2, 3, 4, 5].map(function(elem) {
return {
id: queryNum + "-" + elem,
cat_name: "catname-" + queryNum + "-" + elem,
subcategory: "subcategory-" + queryNum + "-" + elem
};
});
callback(null, results, null);
}
};
function getCategoryTree(callback) {
conn.query("SELECT * FROM `super_cats`", function(error, results, fields) {
async.map(results, getSuperCategory, callback);
});
}
function getSuperCategory(resultItem, callback) {
var supcat_id = resultItem.id;
conn.query("SELECT * FROM `categories` WHERE supcat_id` = " + supcat_id, function(error, results, fields) {
async.map(results, getCategory, function(err, categories) {
callback(err, { super_id: supcat_id, cats: categories });
});
});
}
function getCategory(resultItem, callback) {
var cat_id = resultItem.id;
var cat_name = resultItem.cat_name;
conn.query("SELECT * FROM `subcategories` WHERE `category` = " + cat_id, function(error, results, fields) {
var subcategories = results.map(getSubCategory);
callback(error, { cat_id: cat_id, cat_name: cat_name, subcats: subcategories });
});
}
function getSubCategory(resultItem) {
return {
subcat_id: resultItem.id,
subcat_name: resultItem.subcategory
};
}
getCategoryTree(function(err, result) {
console.log(JSON.stringify(result, null, " "));
});
There are some inefficiencies here, but for simplicity's sake I've glossed over them. For example, rather than running the second sub-query over and over, you could query at once for all the category IDs, then query all the categories at once, etc. Then, once you have all the data, you could loop over each array synchronously to pull out the pieces you need.
In addition, there are better ways to store tree structures in relational databases; in particular, take a look at Modified Preorder Tree Traversal.