mirror of
https://github.com/openbsd/src.git
synced 2026-06-18 07:13:36 +02:00
flesh out match_pattern() tests, including a new comparison test of
the new NFA-based implementation against the original one for all possible combinations of short inputs and patterns constructed from a small dictionary of possibilities.
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
/* $OpenBSD: tests.c,v 1.9 2025/04/15 04:00:42 djm Exp $ */
|
||||
/* $OpenBSD: tests.c,v 1.10 2026/05/31 04:20:58 djm Exp $ */
|
||||
/*
|
||||
* Regress test for matching functions
|
||||
*
|
||||
@@ -15,6 +15,142 @@
|
||||
|
||||
#include "match.h"
|
||||
|
||||
/* Original match_pattern() implementation; has bad worst-case behaviour */
|
||||
static int
|
||||
match_pattern_original(const char *s, const char *pattern)
|
||||
{
|
||||
for (;;) {
|
||||
/* If at end of pattern, accept if also at end of string. */
|
||||
if (!*pattern)
|
||||
return !*s;
|
||||
|
||||
if (*pattern == '*') {
|
||||
/* Skip this and any consecutive asterisks. */
|
||||
while (*pattern == '*')
|
||||
pattern++;
|
||||
|
||||
/* If at end of pattern, accept immediately. */
|
||||
if (!*pattern)
|
||||
return 1;
|
||||
|
||||
/* If next character in pattern is known, optimize. */
|
||||
if (*pattern != '?' && *pattern != '*') {
|
||||
/*
|
||||
* Look instances of the next character in
|
||||
* pattern, and try to match starting from
|
||||
* those.
|
||||
*/
|
||||
for (; *s; s++)
|
||||
if (*s == *pattern &&
|
||||
match_pattern_original(s + 1,
|
||||
pattern + 1))
|
||||
return 1;
|
||||
/* Failed. */
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Move ahead one character at a time and try to
|
||||
* match at each position.
|
||||
*/
|
||||
for (; *s; s++)
|
||||
if (match_pattern_original(s, pattern))
|
||||
return 1;
|
||||
/* Failed. */
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* There must be at least one more character in the string.
|
||||
* If we are at the end, fail.
|
||||
*/
|
||||
if (!*s)
|
||||
return 0;
|
||||
|
||||
/* Check if the next character of the string is acceptable. */
|
||||
if (*pattern != '?' && *pattern != *s)
|
||||
return 0;
|
||||
|
||||
/* Move to the next character, both in string and in pattern. */
|
||||
s++;
|
||||
pattern++;
|
||||
}
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
/* n^x for size_t */
|
||||
static size_t
|
||||
pow_size(size_t base, size_t exp)
|
||||
{
|
||||
size_t r = 1;
|
||||
|
||||
ASSERT_SIZE_T_NE(base, 0);
|
||||
while (exp-- != 0) {
|
||||
ASSERT_SIZE_T_LE(r, SIZE_MAX / base);
|
||||
r *= base;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static void
|
||||
make_word(size_t v, const char *alphabet, size_t alphabet_len,
|
||||
char *word, size_t wordlen)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < wordlen; i++) {
|
||||
word[i] = alphabet[v % alphabet_len];
|
||||
v /= alphabet_len;
|
||||
}
|
||||
word[wordlen] = '\0';
|
||||
}
|
||||
|
||||
#define PATTERN_LEN 8
|
||||
#define INPUT_LEN 7
|
||||
|
||||
static void
|
||||
match_pattern_check_one_input(const char *input, const char *pattern_alphabet)
|
||||
{
|
||||
char pattern[PATTERN_LEN];
|
||||
size_t len, i, npatterns;
|
||||
int actual, expected;
|
||||
|
||||
for (len = 0; len < sizeof(pattern); len++) {
|
||||
/* Check with all patterns of this size using alphabet */
|
||||
npatterns = pow_size(strlen(pattern_alphabet), len);
|
||||
for (i = 0; i < npatterns; i++) {
|
||||
make_word(i, pattern_alphabet,
|
||||
strlen(pattern_alphabet), pattern, len);
|
||||
actual = match_pattern(input, pattern);
|
||||
expected = match_pattern_original(input, pattern);
|
||||
test_subtest_info("input=\"%s\" pattern=\"%s\"",
|
||||
input, pattern);
|
||||
ASSERT_INT_EQ(actual, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check current match_pattern against original one with an exhaustive
|
||||
* combination of patterns and inputs.
|
||||
*/
|
||||
static void
|
||||
match_pattern_exhaustive_original(void)
|
||||
{
|
||||
const char *pattern_alphabet = "abx?*";
|
||||
const char *input_alphabet = "abc";
|
||||
char input[INPUT_LEN];
|
||||
size_t len, i, ninputs;
|
||||
|
||||
for (len = 0; len < sizeof(input); len++) {
|
||||
/* Check every possible input from alphabet of this size */
|
||||
ninputs = pow_size(strlen(input_alphabet), len);
|
||||
for (i = 0; i < ninputs; i++) {
|
||||
make_word(i, input_alphabet,
|
||||
strlen(input_alphabet), input, len);
|
||||
match_pattern_check_one_input(input, pattern_alphabet);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
tests(void)
|
||||
{
|
||||
@@ -24,22 +160,84 @@ tests(void)
|
||||
ASSERT_INT_EQ(match_pattern("aaa", ""), 0);
|
||||
ASSERT_INT_EQ(match_pattern("aaa", "aaaa"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("aaaa", "aaa"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "abc"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "abd"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "abcd"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abcd", "abc"), 0);
|
||||
TEST_DONE();
|
||||
|
||||
TEST_START("match_pattern wildcard");
|
||||
ASSERT_INT_EQ(match_pattern("", "*"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("", "**"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("", "***"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("", "?"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("", "*?"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("", "?*"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("", "**a*"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("a", "?"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("aa", "a?"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("a", "*"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("aa", "a*"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("aa", "?*"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("aa", "**"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("aa", "?a"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("aa", "*a"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ba", "a?"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("ba", "a*"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "?a"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "*a"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("aa", "**"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "a***b"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("axb", "a***b"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("axxb", "a***b"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ax", "a***b"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abbb", "a*b*b"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("abbb", "a*b*c"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("aaaaaaaaac", "a*a*a*a*b"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("aaaaaaaaab", "a*a*a*a*b"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "*b"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "*a*"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "*a*b"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "*a*c"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "a?c"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "??c"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "???"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "a?d"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "???"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "ab*"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "ab*"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("a", "ab*"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "ab?"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "ab?"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abcd", "ab?"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "?bc"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "?b*"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "?c"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "a*?c"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ac", "a*?c"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abbc", "a*?c"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "a?*c"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ac", "a?*c"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abbc", "a?*c"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "a*?*c"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ac", "a*?*c"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "?*c"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ac", "?*c"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("c", "?*c"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "*?c"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ac", "*?c"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("c", "*?c"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "a?*"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "a?*"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("a", "a?*"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abc", "a*?"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("ab", "a*?"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("a", "a*?"), 0);
|
||||
ASSERT_INT_EQ(match_pattern("abb", "a*b"), 1);
|
||||
ASSERT_INT_EQ(match_pattern("abbc", "a*b"), 0);
|
||||
TEST_DONE();
|
||||
|
||||
TEST_START("match_pattern exhaustive original");
|
||||
match_pattern_exhaustive_original();
|
||||
TEST_DONE();
|
||||
|
||||
TEST_START("match_pattern_list");
|
||||
|
||||
Reference in New Issue
Block a user