From 713668bf91125af83441e5de5e034fb9316e80a3 Mon Sep 17 00:00:00 2001 From: djm Date: Sun, 31 May 2026 04:20:58 +0000 Subject: [PATCH] flesh out match_pattern() tests, including a new comparison test of the new NFA-based implementation against the original one for all possible combinations of short inputs and patterns constructed from a small dictionary of possibilities. --- regress/usr.bin/ssh/unittests/match/tests.c | 202 +++++++++++++++++++- 1 file changed, 200 insertions(+), 2 deletions(-) diff --git a/regress/usr.bin/ssh/unittests/match/tests.c b/regress/usr.bin/ssh/unittests/match/tests.c index 240f4f35d6a..64069a6abd0 100644 --- a/regress/usr.bin/ssh/unittests/match/tests.c +++ b/regress/usr.bin/ssh/unittests/match/tests.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tests.c,v 1.9 2025/04/15 04:00:42 djm Exp $ */ +/* $OpenBSD: tests.c,v 1.10 2026/05/31 04:20:58 djm Exp $ */ /* * Regress test for matching functions * @@ -15,6 +15,142 @@ #include "match.h" +/* Original match_pattern() implementation; has bad worst-case behaviour */ +static int +match_pattern_original(const char *s, const char *pattern) +{ + for (;;) { + /* If at end of pattern, accept if also at end of string. */ + if (!*pattern) + return !*s; + + if (*pattern == '*') { + /* Skip this and any consecutive asterisks. */ + while (*pattern == '*') + pattern++; + + /* If at end of pattern, accept immediately. */ + if (!*pattern) + return 1; + + /* If next character in pattern is known, optimize. */ + if (*pattern != '?' && *pattern != '*') { + /* + * Look instances of the next character in + * pattern, and try to match starting from + * those. + */ + for (; *s; s++) + if (*s == *pattern && + match_pattern_original(s + 1, + pattern + 1)) + return 1; + /* Failed. */ + return 0; + } + /* + * Move ahead one character at a time and try to + * match at each position. + */ + for (; *s; s++) + if (match_pattern_original(s, pattern)) + return 1; + /* Failed. */ + return 0; + } + /* + * There must be at least one more character in the string. + * If we are at the end, fail. + */ + if (!*s) + return 0; + + /* Check if the next character of the string is acceptable. */ + if (*pattern != '?' && *pattern != *s) + return 0; + + /* Move to the next character, both in string and in pattern. */ + s++; + pattern++; + } + /* NOTREACHED */ +} + +/* n^x for size_t */ +static size_t +pow_size(size_t base, size_t exp) +{ + size_t r = 1; + + ASSERT_SIZE_T_NE(base, 0); + while (exp-- != 0) { + ASSERT_SIZE_T_LE(r, SIZE_MAX / base); + r *= base; + } + return r; +} + +static void +make_word(size_t v, const char *alphabet, size_t alphabet_len, + char *word, size_t wordlen) +{ + size_t i; + + for (i = 0; i < wordlen; i++) { + word[i] = alphabet[v % alphabet_len]; + v /= alphabet_len; + } + word[wordlen] = '\0'; +} + +#define PATTERN_LEN 8 +#define INPUT_LEN 7 + +static void +match_pattern_check_one_input(const char *input, const char *pattern_alphabet) +{ + char pattern[PATTERN_LEN]; + size_t len, i, npatterns; + int actual, expected; + + for (len = 0; len < sizeof(pattern); len++) { + /* Check with all patterns of this size using alphabet */ + npatterns = pow_size(strlen(pattern_alphabet), len); + for (i = 0; i < npatterns; i++) { + make_word(i, pattern_alphabet, + strlen(pattern_alphabet), pattern, len); + actual = match_pattern(input, pattern); + expected = match_pattern_original(input, pattern); + test_subtest_info("input=\"%s\" pattern=\"%s\"", + input, pattern); + ASSERT_INT_EQ(actual, expected); + } + } +} + +/* + * Check current match_pattern against original one with an exhaustive + * combination of patterns and inputs. + */ +static void +match_pattern_exhaustive_original(void) +{ + const char *pattern_alphabet = "abx?*"; + const char *input_alphabet = "abc"; + char input[INPUT_LEN]; + size_t len, i, ninputs; + + for (len = 0; len < sizeof(input); len++) { + /* Check every possible input from alphabet of this size */ + ninputs = pow_size(strlen(input_alphabet), len); + for (i = 0; i < ninputs; i++) { + make_word(i, input_alphabet, + strlen(input_alphabet), input, len); + match_pattern_check_one_input(input, pattern_alphabet); + } + } +} + void tests(void) { @@ -24,22 +160,84 @@ tests(void) ASSERT_INT_EQ(match_pattern("aaa", ""), 0); ASSERT_INT_EQ(match_pattern("aaa", "aaaa"), 0); ASSERT_INT_EQ(match_pattern("aaaa", "aaa"), 0); + ASSERT_INT_EQ(match_pattern("abc", "abc"), 1); + ASSERT_INT_EQ(match_pattern("abc", "abd"), 0); + ASSERT_INT_EQ(match_pattern("abc", "abcd"), 0); + ASSERT_INT_EQ(match_pattern("abcd", "abc"), 0); TEST_DONE(); TEST_START("match_pattern wildcard"); ASSERT_INT_EQ(match_pattern("", "*"), 1); + ASSERT_INT_EQ(match_pattern("", "**"), 1); + ASSERT_INT_EQ(match_pattern("", "***"), 1); + ASSERT_INT_EQ(match_pattern("", "?"), 0); + ASSERT_INT_EQ(match_pattern("", "*?"), 0); + ASSERT_INT_EQ(match_pattern("", "?*"), 0); + ASSERT_INT_EQ(match_pattern("", "**a*"), 0); ASSERT_INT_EQ(match_pattern("a", "?"), 1); ASSERT_INT_EQ(match_pattern("aa", "a?"), 1); ASSERT_INT_EQ(match_pattern("a", "*"), 1); ASSERT_INT_EQ(match_pattern("aa", "a*"), 1); ASSERT_INT_EQ(match_pattern("aa", "?*"), 1); - ASSERT_INT_EQ(match_pattern("aa", "**"), 1); ASSERT_INT_EQ(match_pattern("aa", "?a"), 1); ASSERT_INT_EQ(match_pattern("aa", "*a"), 1); ASSERT_INT_EQ(match_pattern("ba", "a?"), 0); ASSERT_INT_EQ(match_pattern("ba", "a*"), 0); ASSERT_INT_EQ(match_pattern("ab", "?a"), 0); ASSERT_INT_EQ(match_pattern("ab", "*a"), 0); + ASSERT_INT_EQ(match_pattern("aa", "**"), 1); + ASSERT_INT_EQ(match_pattern("ab", "a***b"), 1); + ASSERT_INT_EQ(match_pattern("axb", "a***b"), 1); + ASSERT_INT_EQ(match_pattern("axxb", "a***b"), 1); + ASSERT_INT_EQ(match_pattern("ax", "a***b"), 0); + ASSERT_INT_EQ(match_pattern("abbb", "a*b*b"), 1); + ASSERT_INT_EQ(match_pattern("abbb", "a*b*c"), 0); + ASSERT_INT_EQ(match_pattern("aaaaaaaaac", "a*a*a*a*b"), 0); + ASSERT_INT_EQ(match_pattern("aaaaaaaaab", "a*a*a*a*b"), 1); + ASSERT_INT_EQ(match_pattern("ab", "*b"), 1); + ASSERT_INT_EQ(match_pattern("ab", "*a*"), 1); + ASSERT_INT_EQ(match_pattern("ab", "*a*b"), 1); + ASSERT_INT_EQ(match_pattern("ab", "*a*c"), 0); + ASSERT_INT_EQ(match_pattern("abc", "a?c"), 1); + ASSERT_INT_EQ(match_pattern("abc", "??c"), 1); + ASSERT_INT_EQ(match_pattern("abc", "???"), 1); + ASSERT_INT_EQ(match_pattern("abc", "a?d"), 0); + ASSERT_INT_EQ(match_pattern("ab", "???"), 0); + ASSERT_INT_EQ(match_pattern("abc", "ab*"), 1); + ASSERT_INT_EQ(match_pattern("ab", "ab*"), 1); + ASSERT_INT_EQ(match_pattern("a", "ab*"), 0); + ASSERT_INT_EQ(match_pattern("abc", "ab?"), 1); + ASSERT_INT_EQ(match_pattern("ab", "ab?"), 0); + ASSERT_INT_EQ(match_pattern("abcd", "ab?"), 0); + ASSERT_INT_EQ(match_pattern("abc", "?bc"), 1); + ASSERT_INT_EQ(match_pattern("abc", "?b*"), 1); + ASSERT_INT_EQ(match_pattern("abc", "?c"), 0); + ASSERT_INT_EQ(match_pattern("abc", "a*?c"), 1); + ASSERT_INT_EQ(match_pattern("ac", "a*?c"), 0); + ASSERT_INT_EQ(match_pattern("abbc", "a*?c"), 1); + ASSERT_INT_EQ(match_pattern("abc", "a?*c"), 1); + ASSERT_INT_EQ(match_pattern("ac", "a?*c"), 0); + ASSERT_INT_EQ(match_pattern("abbc", "a?*c"), 1); + ASSERT_INT_EQ(match_pattern("abc", "a*?*c"), 1); + ASSERT_INT_EQ(match_pattern("ac", "a*?*c"), 0); + ASSERT_INT_EQ(match_pattern("abc", "?*c"), 1); + ASSERT_INT_EQ(match_pattern("ac", "?*c"), 1); + ASSERT_INT_EQ(match_pattern("c", "?*c"), 0); + ASSERT_INT_EQ(match_pattern("abc", "*?c"), 1); + ASSERT_INT_EQ(match_pattern("ac", "*?c"), 1); + ASSERT_INT_EQ(match_pattern("c", "*?c"), 0); + ASSERT_INT_EQ(match_pattern("abc", "a?*"), 1); + ASSERT_INT_EQ(match_pattern("ab", "a?*"), 1); + ASSERT_INT_EQ(match_pattern("a", "a?*"), 0); + ASSERT_INT_EQ(match_pattern("abc", "a*?"), 1); + ASSERT_INT_EQ(match_pattern("ab", "a*?"), 1); + ASSERT_INT_EQ(match_pattern("a", "a*?"), 0); + ASSERT_INT_EQ(match_pattern("abb", "a*b"), 1); + ASSERT_INT_EQ(match_pattern("abbc", "a*b"), 0); + TEST_DONE(); + + TEST_START("match_pattern exhaustive original"); + match_pattern_exhaustive_original(); TEST_DONE(); TEST_START("match_pattern_list");