1
0
mirror of https://github.com/openbsd/src.git synced 2026-06-18 07:13:36 +02:00

flesh out match_pattern() tests, including a new comparison test of

the new NFA-based implementation against the original one for all
possible combinations of short inputs and patterns constructed from
a small dictionary of possibilities.
This commit is contained in:
djm
2026-05-31 04:20:58 +00:00
parent 3ec0b86f8a
commit 713668bf91
+200 -2
View File
@@ -1,4 +1,4 @@
/* $OpenBSD: tests.c,v 1.9 2025/04/15 04:00:42 djm Exp $ */
/* $OpenBSD: tests.c,v 1.10 2026/05/31 04:20:58 djm Exp $ */
/*
* Regress test for matching functions
*
@@ -15,6 +15,142 @@
#include "match.h"
/* Original match_pattern() implementation; has bad worst-case behaviour */
static int
match_pattern_original(const char *s, const char *pattern)
{
for (;;) {
/* If at end of pattern, accept if also at end of string. */
if (!*pattern)
return !*s;
if (*pattern == '*') {
/* Skip this and any consecutive asterisks. */
while (*pattern == '*')
pattern++;
/* If at end of pattern, accept immediately. */
if (!*pattern)
return 1;
/* If next character in pattern is known, optimize. */
if (*pattern != '?' && *pattern != '*') {
/*
* Look instances of the next character in
* pattern, and try to match starting from
* those.
*/
for (; *s; s++)
if (*s == *pattern &&
match_pattern_original(s + 1,
pattern + 1))
return 1;
/* Failed. */
return 0;
}
/*
* Move ahead one character at a time and try to
* match at each position.
*/
for (; *s; s++)
if (match_pattern_original(s, pattern))
return 1;
/* Failed. */
return 0;
}
/*
* There must be at least one more character in the string.
* If we are at the end, fail.
*/
if (!*s)
return 0;
/* Check if the next character of the string is acceptable. */
if (*pattern != '?' && *pattern != *s)
return 0;
/* Move to the next character, both in string and in pattern. */
s++;
pattern++;
}
/* NOTREACHED */
}
/* n^x for size_t */
static size_t
pow_size(size_t base, size_t exp)
{
size_t r = 1;
ASSERT_SIZE_T_NE(base, 0);
while (exp-- != 0) {
ASSERT_SIZE_T_LE(r, SIZE_MAX / base);
r *= base;
}
return r;
}
static void
make_word(size_t v, const char *alphabet, size_t alphabet_len,
char *word, size_t wordlen)
{
size_t i;
for (i = 0; i < wordlen; i++) {
word[i] = alphabet[v % alphabet_len];
v /= alphabet_len;
}
word[wordlen] = '\0';
}
#define PATTERN_LEN 8
#define INPUT_LEN 7
static void
match_pattern_check_one_input(const char *input, const char *pattern_alphabet)
{
char pattern[PATTERN_LEN];
size_t len, i, npatterns;
int actual, expected;
for (len = 0; len < sizeof(pattern); len++) {
/* Check with all patterns of this size using alphabet */
npatterns = pow_size(strlen(pattern_alphabet), len);
for (i = 0; i < npatterns; i++) {
make_word(i, pattern_alphabet,
strlen(pattern_alphabet), pattern, len);
actual = match_pattern(input, pattern);
expected = match_pattern_original(input, pattern);
test_subtest_info("input=\"%s\" pattern=\"%s\"",
input, pattern);
ASSERT_INT_EQ(actual, expected);
}
}
}
/*
* Check current match_pattern against original one with an exhaustive
* combination of patterns and inputs.
*/
static void
match_pattern_exhaustive_original(void)
{
const char *pattern_alphabet = "abx?*";
const char *input_alphabet = "abc";
char input[INPUT_LEN];
size_t len, i, ninputs;
for (len = 0; len < sizeof(input); len++) {
/* Check every possible input from alphabet of this size */
ninputs = pow_size(strlen(input_alphabet), len);
for (i = 0; i < ninputs; i++) {
make_word(i, input_alphabet,
strlen(input_alphabet), input, len);
match_pattern_check_one_input(input, pattern_alphabet);
}
}
}
void
tests(void)
{
@@ -24,22 +160,84 @@ tests(void)
ASSERT_INT_EQ(match_pattern("aaa", ""), 0);
ASSERT_INT_EQ(match_pattern("aaa", "aaaa"), 0);
ASSERT_INT_EQ(match_pattern("aaaa", "aaa"), 0);
ASSERT_INT_EQ(match_pattern("abc", "abc"), 1);
ASSERT_INT_EQ(match_pattern("abc", "abd"), 0);
ASSERT_INT_EQ(match_pattern("abc", "abcd"), 0);
ASSERT_INT_EQ(match_pattern("abcd", "abc"), 0);
TEST_DONE();
TEST_START("match_pattern wildcard");
ASSERT_INT_EQ(match_pattern("", "*"), 1);
ASSERT_INT_EQ(match_pattern("", "**"), 1);
ASSERT_INT_EQ(match_pattern("", "***"), 1);
ASSERT_INT_EQ(match_pattern("", "?"), 0);
ASSERT_INT_EQ(match_pattern("", "*?"), 0);
ASSERT_INT_EQ(match_pattern("", "?*"), 0);
ASSERT_INT_EQ(match_pattern("", "**a*"), 0);
ASSERT_INT_EQ(match_pattern("a", "?"), 1);
ASSERT_INT_EQ(match_pattern("aa", "a?"), 1);
ASSERT_INT_EQ(match_pattern("a", "*"), 1);
ASSERT_INT_EQ(match_pattern("aa", "a*"), 1);
ASSERT_INT_EQ(match_pattern("aa", "?*"), 1);
ASSERT_INT_EQ(match_pattern("aa", "**"), 1);
ASSERT_INT_EQ(match_pattern("aa", "?a"), 1);
ASSERT_INT_EQ(match_pattern("aa", "*a"), 1);
ASSERT_INT_EQ(match_pattern("ba", "a?"), 0);
ASSERT_INT_EQ(match_pattern("ba", "a*"), 0);
ASSERT_INT_EQ(match_pattern("ab", "?a"), 0);
ASSERT_INT_EQ(match_pattern("ab", "*a"), 0);
ASSERT_INT_EQ(match_pattern("aa", "**"), 1);
ASSERT_INT_EQ(match_pattern("ab", "a***b"), 1);
ASSERT_INT_EQ(match_pattern("axb", "a***b"), 1);
ASSERT_INT_EQ(match_pattern("axxb", "a***b"), 1);
ASSERT_INT_EQ(match_pattern("ax", "a***b"), 0);
ASSERT_INT_EQ(match_pattern("abbb", "a*b*b"), 1);
ASSERT_INT_EQ(match_pattern("abbb", "a*b*c"), 0);
ASSERT_INT_EQ(match_pattern("aaaaaaaaac", "a*a*a*a*b"), 0);
ASSERT_INT_EQ(match_pattern("aaaaaaaaab", "a*a*a*a*b"), 1);
ASSERT_INT_EQ(match_pattern("ab", "*b"), 1);
ASSERT_INT_EQ(match_pattern("ab", "*a*"), 1);
ASSERT_INT_EQ(match_pattern("ab", "*a*b"), 1);
ASSERT_INT_EQ(match_pattern("ab", "*a*c"), 0);
ASSERT_INT_EQ(match_pattern("abc", "a?c"), 1);
ASSERT_INT_EQ(match_pattern("abc", "??c"), 1);
ASSERT_INT_EQ(match_pattern("abc", "???"), 1);
ASSERT_INT_EQ(match_pattern("abc", "a?d"), 0);
ASSERT_INT_EQ(match_pattern("ab", "???"), 0);
ASSERT_INT_EQ(match_pattern("abc", "ab*"), 1);
ASSERT_INT_EQ(match_pattern("ab", "ab*"), 1);
ASSERT_INT_EQ(match_pattern("a", "ab*"), 0);
ASSERT_INT_EQ(match_pattern("abc", "ab?"), 1);
ASSERT_INT_EQ(match_pattern("ab", "ab?"), 0);
ASSERT_INT_EQ(match_pattern("abcd", "ab?"), 0);
ASSERT_INT_EQ(match_pattern("abc", "?bc"), 1);
ASSERT_INT_EQ(match_pattern("abc", "?b*"), 1);
ASSERT_INT_EQ(match_pattern("abc", "?c"), 0);
ASSERT_INT_EQ(match_pattern("abc", "a*?c"), 1);
ASSERT_INT_EQ(match_pattern("ac", "a*?c"), 0);
ASSERT_INT_EQ(match_pattern("abbc", "a*?c"), 1);
ASSERT_INT_EQ(match_pattern("abc", "a?*c"), 1);
ASSERT_INT_EQ(match_pattern("ac", "a?*c"), 0);
ASSERT_INT_EQ(match_pattern("abbc", "a?*c"), 1);
ASSERT_INT_EQ(match_pattern("abc", "a*?*c"), 1);
ASSERT_INT_EQ(match_pattern("ac", "a*?*c"), 0);
ASSERT_INT_EQ(match_pattern("abc", "?*c"), 1);
ASSERT_INT_EQ(match_pattern("ac", "?*c"), 1);
ASSERT_INT_EQ(match_pattern("c", "?*c"), 0);
ASSERT_INT_EQ(match_pattern("abc", "*?c"), 1);
ASSERT_INT_EQ(match_pattern("ac", "*?c"), 1);
ASSERT_INT_EQ(match_pattern("c", "*?c"), 0);
ASSERT_INT_EQ(match_pattern("abc", "a?*"), 1);
ASSERT_INT_EQ(match_pattern("ab", "a?*"), 1);
ASSERT_INT_EQ(match_pattern("a", "a?*"), 0);
ASSERT_INT_EQ(match_pattern("abc", "a*?"), 1);
ASSERT_INT_EQ(match_pattern("ab", "a*?"), 1);
ASSERT_INT_EQ(match_pattern("a", "a*?"), 0);
ASSERT_INT_EQ(match_pattern("abb", "a*b"), 1);
ASSERT_INT_EQ(match_pattern("abbc", "a*b"), 0);
TEST_DONE();
TEST_START("match_pattern exhaustive original");
match_pattern_exhaustive_original();
TEST_DONE();
TEST_START("match_pattern_list");