I'm trying to extract the (full) class names from demangled symbol output of nm using boost::regex
.
This sample program
#include <vector>
namespace Ns1
{
namespace Ns2
{
template<typename T, class Cont>
class A
{
public:
A() {}
~A() {}
void foo(const Cont& c) {}
void bar(const A<T,Cont>& x) {}
private:
Cont cont;
};
}
}
int main()
{
Ns1::Ns2::A<int,std::vector<int> > a;
Ns1::Ns2::A<int,std::vector<int> > b;
std::vector<int> v;
a.foo(v);
a.bar(b);
}
will produce the following symbols for class A
Ns1::Ns2::A<int, std::vector<int, std::allocator<int> > >::A()
Ns1::Ns2::A<int, std::vector<int, std::allocator<int> > >::bar(Ns1::Ns2::A<int, std::vector<int, std::allocator<int> > > const&)
Ns1::Ns2::A<int, std::vector<int, std::allocator<int> > >::foo(std::vector<int, std::allocator<int> > const&)
Ns1::Ns2::A<int, std::vector<int, std::allocator<int> > >::~A()
I want to extract the class (instance) name Ns1::Ns2::A<int, std::vector<int, std::allocator<int> > >
preferably using a single regular expression pattern, but I have problems to parse the recursively occuring class specifiers within the <>
pairs.
Does anyone know how to do this with a regular expression pattern (that's supported by boost::regex
)?
My solution (based on David Hammen's answer, thus the accept):
I don't use (single) regular expressions to extract class and namespace symbols. I have created a simple function that strips off bracketing character pairs (e.g. <>
or ()
) from the tail of symbol strings:
std::string stripBracketPair(char openingBracket,char closingBracket,const std::string& symbol, std::string& strippedPart)
{
std::string result = symbol;
if(!result.empty() &&
result[result.length() -1] == closingBracket)
{
size_t openPos = result.find_first_of(openingBracket);
if(openPos != std::string::npos)
{
strippedPart = result.substr(openPos);
result = result.substr(0,openPos);
}
}
return result;
}
This is used in two other methods that extract the namespace / class from the symbol:
std::string extractNamespace(const std::string& symbol)
{
std::string ns;
std::string strippedPart;
std::string cls = extractClass(symbol);
if(!cls.empty())
{
cls = stripBracketPair('<','>',cls,strippedPart);
std::vector<std::string> classPathParts;
boost::split(classPathParts,cls,boost::is_any_of("::"),boost::token_compress_on);
ns = buildNamespaceFromSymbolPath(classPathParts);
}
else
{
// Assume this symbol is a namespace global function/variable
std::string globalSymbolName = stripBracketPair('(',')',symbol,strippedPart);
globalSymbolName = stripBracketPair('<','>',globalSymbolName,strippedPart);
std::vector<std::string> symbolPathParts;
boost::split(symbolPathParts,globalSymbolName,boost::is_any_of("::"),boost::token_compress_on);
ns = buildNamespaceFromSymbolPath(symbolPathParts);
std::vector<std::string> wsSplitted;
boost::split(wsSplitted,ns,boost::is_any_of(" "),boost::token_compress_on);
if(wsSplitted.size() > 1)
{
ns = wsSplitted[wsSplitted.size() - 1];
}
}
if(isClass(ns))
{
ns = "";
}
return ns;
}
std::string extractClass(const std::string& symbol)
{
std::string cls;
std::string strippedPart;
std::string fullSymbol = symbol;
boost::trim(fullSymbol);
fullSymbol = stripBracketPair('(',')',symbol,strippedPart);
fullSymbol = stripBracketPair('<','>',fullSymbol,strippedPart);
size_t pos = fullSymbol.find_last_of(':');
if(pos != std::string::npos)
{
--pos;
cls = fullSymbol.substr(0,pos);
std::string untemplatedClassName = stripBracketPair('<','>',cls,strippedPart);
if(untemplatedClassName.find('<') == std::string::npos &&
untemplatedClassName.find(' ') != std::string::npos)
{
cls = "";
}
}
if(!cls.empty() && !isClass(cls))
{
cls = "";
}
return cls;
}
the buildNamespaceFromSymbolPath()
method simply concatenates valid namespace parts:
std::string buildNamespaceFromSymbolPath(const std::vector<std::string>& symbolPathParts)
{
if(symbolPathParts.size() >= 2)
{
std::ostringstream oss;
bool firstItem = true;
for(unsigned int i = 0;i < symbolPathParts.size() - 1;++i)
{
if((symbolPathParts[i].find('<') != std::string::npos) ||
(symbolPathParts[i].find('(') != std::string::npos))
{
break;
}
if(!firstItem)
{
oss << "::";
}
else
{
firstItem = false;
}
oss << symbolPathParts[i];
}
return oss.str();
}
return "";
}
At least the isClass()
method uses a regular expression to scan all symbols for a constructor method (which unfortunately doesn't seem to work for classes only containing member functions):
std::set<std::string> allClasses;
bool isClass(const std::string& classSymbol)
{
std::set<std::string>::iterator foundClass = allClasses.find(classSymbol);
if(foundClass != allClasses.end())
{
return true;
}
std::string strippedPart;
std::string constructorName = stripBracketPair('<','>',classSymbol,strippedPart);
std::vector<std::string> constructorPathParts;
boost::split(constructorPathParts,constructorName,boost::is_any_of("::"),boost::token_compress_on);
if(constructorPathParts.size() > 1)
{
constructorName = constructorPathParts.back();
}
boost::replace_all(constructorName,"(","[\(]");
boost::replace_all(constructorName,")","[\)]");
boost::replace_all(constructorName,"*","[\*]");
std::ostringstream constructorPattern;
std::string symbolPattern = classSymbol;
boost::replace_all(symbolPattern,"(","[\(]");
boost::replace_all(symbolPattern,")","[\)]");
boost::replace_all(symbolPattern,"*","[\*]");
constructorPattern << "^" << symbolPattern << "::" << constructorName << "[\(].+$";
boost::regex reConstructor(constructorPattern.str());
for(std::vector<NmRecord>::iterator it = allRecords.begin();
it != allRecords.end();
++it)
{
if(boost::regex_match(it->symbolName,reConstructor))
{
allClasses.insert(classSymbol);
return true;
}
}
return false;
}
As mentioned the last method doesn't safely find a class name if the class doesn't provide any constructor, and is quite slow on big symbol tables. But at least this seems to cover what you can get out of nm's symbol information.
I have left the regex tag for the question, that other users may find regex is not the right approach.
See Question&Answers more detail:
os