Here is my take at a reasonably simple tokenize helper that
- stores results in a dynamically growing array
- null-terminating the array
- keeps the input string safe (strtok modifies the input string, which is undefined behaviour on a literal char[], at least I think in C99)
To make the code re-entrant, use the non-standard strtok_r
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
char** tokenize(const char* input)
{
char* str = strdup(input);
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
char* tok=strtok(str," ");
while(1)
{
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = tok? strdup(tok) : tok;
if (!tok) break;
tok=strtok(NULL," ");
}
free(str);
return result;
}
int main ()
{
char** tokens = tokenize("test string.");
char** it;
for(it=tokens; it && *it; ++it)
{
printf("%s
", *it);
free(*it);
}
free(tokens);
return 0;
}
Here is a strtok
-free reimplementation of that (uses strpbrk
instead):
char** tokenize(const char* str)
{
int count = 0;
int capacity = 10;
char** result = malloc(capacity*sizeof(*result));
const char* e=str;
if (e) do
{
const char* s=e;
e=strpbrk(s," ");
if (count >= capacity)
result = realloc(result, (capacity*=2)*sizeof(*result));
result[count++] = e? strndup(s, e-s) : strdup(s);
} while (e && *(++e));
if (count >= capacity)
result = realloc(result, (capacity+=1)*sizeof(*result));
result[count++] = 0;
return result;
}
与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…