If you want to do it all in awk (GNU awk 4 is required for this script to work as intended):
awk '{
for (i = 0; ++i <= NF;) {
substr($i, 1, 1) == """ &&
$i = substr($i, 2, length($i) - 2)
printf "[%s]%s", $i, (i < NF ? OFS : RS)
}
}' FPAT='([^,]+)|("[^"]+")' infile
Sample output:
% cat infile
Australian Capital Territory,AU-ACT,20034,AU,Australia
Piaui,BR-PI,20100,BR,Brazil
"Adygeya, Republic",RU-AD,21250,RU,Russian Federation
% awk '{
for (i = 0; ++i <= NF;) {
substr($i, 1, 1) == """ &&
$i = substr($i, 2, length($i) - 2)
printf "[%s]%s", $i, (i < NF ? OFS : RS)
}
}' FPAT='([^,]+)|("[^"]+")' infile
[Australian Capital Territory] [AU-ACT] [20034] [AU] [Australia]
[Piaui] [BR-PI] [20100] [BR] [Brazil]
[Adygeya, Republic] [RU-AD] [21250] [RU] [Russian Federation]
With Perl:
perl -MText::ParseWords -lne'
print join " ", map "[$_]",
parse_line(",",0, $_);
' infile
This should work with your awk version (based on this c.u.s. post, removed the embedded commas too).
awk '{
n = parse_csv($0, data)
for (i = 0; ++i <= n;) {
gsub(/,/, " ", data[i])
printf "[%s]%s", data[i], (i < n ? OFS : RS)
}
}
function parse_csv(str, array, field, i) {
split( "", array )
str = str ","
while ( match(str, /[ ]*("[^"]*(""[^"]*)*"|[^,]*)[ ]*,/) ) {
field = substr(str, 1, RLENGTH)
gsub(/^[ ]*"?|"?[ ]*,$/, "", field)
gsub(/""/, """, field)
array[++i] = field
str = substr(str, RLENGTH + 1)
}
return i
}' infile