summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2010-03-13 15:19:04 (GMT)
committer Denys Vlasenko <vda.linux@googlemail.com>2010-03-13 15:19:04 (GMT)
commitb76356b28e9058fac1c9a50b274db8ce24273ca9 (patch)
treeca594b49433d813415cc5edff30b001ef83ae9e4
parent6eaeb7737d95661ca31b162977ac443ffeb7b0b3 (diff)
downloadbusybox-b76356b28e9058fac1c9a50b274db8ce24273ca9.tar.gz
busybox-b76356b28e9058fac1c9a50b274db8ce24273ca9.tar.bz2
ash: fix quadratic matching slowdown is ${v/*foo*/repl} (really bad one)
It is especially bad with patterns starting with "*". With ASH_OPTIMIZE_FOR_SIZE=y, only those are optimized, +few bytes: text data bss dec hex filename 836337 441 7564 844342 ce236 busybox_old 836341 441 7564 844346 ce23a busybox_unstripped With ASH_OPTIMIZE_FOR_SIZE off, we also optimize patterns _ending_ with "*", which costs about 80 bytes: text data bss dec hex filename 836656 441 7564 844661 ce375 busybox_old 836732 441 7564 844737 ce3c1 busybox_unstripped Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--shell/ash.c71
1 files changed, 59 insertions, 12 deletions
diff --git a/shell/ash.c b/shell/ash.c
index 0a8b6c0..ce82a96 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -6040,25 +6040,61 @@ scanleft(char *startp, char *rmesc, char *rmescend UNUSED_PARAM, char *str, int
}
static char *
-scanright(char *startp, char *rmesc, char *rmescend, char *str, int quotes,
- int zero)
+scanright(char *startp, char *rmesc, char *rmescend, char *pattern, int quotes, int match_at_start)
{
+#if !ENABLE_ASH_OPTIMIZE_FOR_SIZE
+ int try2optimize = match_at_start;
+#endif
int esc = 0;
char *loc;
char *loc2;
- for (loc = str - 1, loc2 = rmescend; loc >= startp; loc2--) {
+ /* If we called by "${v/pattern/repl}" or "${v//pattern/repl}":
+ * startp="escaped_value_of_v" rmesc="raw_value_of_v"
+ * rmescend=""(ptr to NUL in rmesc) pattern="pattern" quotes=match_at_start=1
+ * Logic:
+ * loc starts at NUL at the end of startp, loc2 starts at the end of rmesc,
+ * and on each iteration they go back two/one char until they reach the beginning.
+ * We try to find a match in "raw_value_of_v", "raw_value_of_", "raw_value_of" etc.
+ */
+ /* TODO: document in what other circumstances we are called. */
+
+ for (loc = pattern - 1, loc2 = rmescend; loc >= startp; loc2--) {
int match;
char c = *loc2;
const char *s = loc2;
- if (zero) {
+ if (match_at_start) {
*loc2 = '\0';
s = rmesc;
}
- match = pmatch(str, s);
+ match = pmatch(pattern, s);
+ //bb_error_msg("pmatch(pattern:'%s',s:'%s'):%d", pattern, s, match);
*loc2 = c;
if (match)
return loc;
+#if !ENABLE_ASH_OPTIMIZE_FOR_SIZE
+ if (try2optimize) {
+ /* Maybe we can optimize this:
+ * if pattern ends with unescaped *, we can avoid checking
+ * shorter strings: if "foo*" doesnt match "raw_value_of_v",
+ * it wont match truncated "raw_value_of_" strings too.
+ */
+ unsigned plen = strlen(pattern);
+ /* Does it end with "*"? */
+ if (plen != 0 && pattern[--plen] == '*') {
+ /* "xxxx*" is not escaped */
+ /* "xxx\*" is escaped */
+ /* "xx\\*" is not escaped */
+ /* "x\\\*" is escaped */
+ int slashes = 0;
+ while (plen != 0 && pattern[--plen] == '\\')
+ slashes++;
+ if (!(slashes & 1))
+ break; /* ends with unescaped "*" */
+ }
+ try2optimize = 0;
+ }
+#endif
loc--;
if (quotes) {
if (--esc < 0) {
@@ -6248,7 +6284,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
#if ENABLE_ASH_BASH_COMPAT
if (subtype == VSREPLACE || subtype == VSREPLACEALL) {
- char *idx, *end, *restart_detect;
+ char *idx, *end;
if (!repl) {
repl = parse_sub_pattern(str, varflags & VSQUOTE);
@@ -6257,17 +6293,19 @@ subevalvar(char *p, char *str, int strloc, int subtype,
}
/* If there's no pattern to match, return the expansion unmolested */
- if (*str == '\0')
+ if (str[0] == '\0')
return 0;
len = 0;
idx = startp;
end = str - 1;
while (idx < end) {
+ try_to_match:
loc = scanright(idx, rmesc, rmescend, str, quotes, 1);
if (!loc) {
/* No match, advance */
- restart_detect = stackblock();
+ char *restart_detect = stackblock();
+ skip_matching:
STPUTC(*idx, expdest);
if (quotes && (unsigned char)*idx == CTLESC) {
idx++;
@@ -6279,7 +6317,16 @@ subevalvar(char *p, char *str, int strloc, int subtype,
idx++;
len++;
rmesc++;
- continue;
+ /* continue; - prone to quadratic behavior, smarter code: */
+ if (idx >= end)
+ break;
+ if (str[0] == '*') {
+ /* Pattern is "*foo". If "*foo" does not match "long_string",
+ * it would never match "ong_string" etc, no point in trying.
+ */
+ goto skip_matching;
+ }
+ goto try_to_match;
}
if (subtype == VSREPLACEALL) {
@@ -6294,7 +6341,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
}
for (loc = repl; *loc; loc++) {
- restart_detect = stackblock();
+ char *restart_detect = stackblock();
STPUTC(*loc, expdest);
if (stackblock() != restart_detect)
goto restart;
@@ -6303,7 +6350,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
if (subtype == VSREPLACE) {
while (*idx) {
- restart_detect = stackblock();
+ char *restart_detect = stackblock();
STPUTC(*idx, expdest);
if (stackblock() != restart_detect)
goto restart;
@@ -6332,7 +6379,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
if (subtype < 0 || subtype > 7)
abort();
#endif
- /* zero = subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX */
+ /* zero = (subtype == VSTRIMLEFT || subtype == VSTRIMLEFTMAX) */
zero = subtype >> 1;
/* VSTRIMLEFT/VSTRIMRIGHTMAX -> scanleft */
scan = (subtype & 1) ^ zero ? scanleft : scanright;