https://git.reactos.org/?p=reactos.git;a=commitdiff;h=d029a626e964ad459e346…
commit d029a626e964ad459e3462156b958664ae6da776
Author: Hermès Bélusca-Maïto <hermes.belusca-maito(a)reactos.org>
AuthorDate: Sat May 23 00:35:54 2020 +0200
Commit: Hermès Bélusca-Maïto <hermes.belusca-maito(a)reactos.org>
CommitDate: Sun Sep 27 02:27:15 2020 +0200
[CMD] Make the command-line parser more compatible with Windows' CMD one.
All these modifications have been verified with Windows' CMD, either
by using written cmd_rostests and the existing cmd_winetests, or
manually by enabling the flags cmd!fDumpTokens and cmd!fDumpParse
(available in the public symbols) and analyzing how the tokens are
being parsed, as well as the generated command tree.
See also the following links for more details (but remember that these
observations have to be double-checked in Windows' CMD!):
* Parser rules:
https://stackoverflow.com/a/4095133/13530036
* Discussion:
https://www.dostips.com/forum/viewtopic.php?f=3&t=8355
* Numbers parsing:
https://www.dostips.com/forum/viewtopic.php?t=3758
* Label names vs. GOTO and CALL:
https://www.dostips.com/forum/viewtopic.php?f=3&t=3803
and:
https://www.dostips.com/forum/viewtopic.php?f=3&t=3803&p=55405#p554…
- Fix REM command parsing. A C_COMMAND-like structure should still
be built, so that it can show up during batch command echo. However
some specific handling needs to be done, so use instead a new C_REM
command type.
Escape carets are parsed differently than usual: they are explicitly
kept in the command line and don't participate in line continuations.
Also, the Windows' CMD behaviour is to discards everything before the
last line continuation.
- Prefix operator '@' (the "silent" operator) is parsed as a
separate
command. Thus, the command @@foo@bar is parsed as: '@', '@',
'foo@bar'.
- Improve the checks for numbered redirection.
For this purpose, we check whether this is a number, that is in first
position in the current parsing buffer or is preceded by a whitespace-
like separator, including standard command operators (excepting '@' !)
and double-quotes.
- Empty command blocks, i.e. "( )", standing by themselves, or present
in IF or FOR commands, are considered invalid. (The closing parenthesis
is considered "unexpected".)
- Ignore single closing parenthesis when being outside of command blocks,
thus interpreting it as a command, and ignore explicitly everything
following on the same line, including line continuations.
This very specific situation can happen e.g. while running in batch mode,
when jumping to a label present inside a command block.
See the code for a thorough explanation.
- Detect whether a parenthesized block is not terminated at the end
of a command stream (getting a NUL character instead of a newline),
and if so, bail out early instead of entering into an infinite loop.
- Perform a similar check for the parenthesized list in FOR commands.
- Initialize the static 'InsideBlock' value to a known value.
- The '&' operator (multi-commmand) is allowed to have an empty RHS.
When such situation occurs, turn the CurrentTokenType to TOK_END
so as to avoid a parse error later on.
- The main body of a IF statement, or its 'else' clause, as well as
the main body of a FOR statement, must not be empty, otherwise this
is considered a syntax error. If so, call ParseError() that sets
the 'bParseError' flag, and forcing all batch execution to stop.
---
base/shell/cmd/cmd.c | 17 +-
base/shell/cmd/cmd.h | 76 ++-
base/shell/cmd/internal.c | 2 +-
base/shell/cmd/parser.c | 1205 +++++++++++++++++++++++++++++++++++----------
4 files changed, 1002 insertions(+), 298 deletions(-)
diff --git a/base/shell/cmd/cmd.c b/base/shell/cmd/cmd.c
index ba4e90dd05e..a3253026f10 100644
--- a/base/shell/cmd/cmd.c
+++ b/base/shell/cmd/cmd.c
@@ -664,7 +664,7 @@ ExecuteAsync(PARSED_COMMAND *Cmd)
/* Build the parameter string to pass to cmd.exe */
ParamsEnd = _stpcpy(CmdParams, _T("/S/D/C\""));
- ParamsEnd = Unparse(Cmd, ParamsEnd, &CmdParams[CMDLINE_LENGTH - 2]);
+ ParamsEnd = UnparseCommand(Cmd, ParamsEnd, &CmdParams[CMDLINE_LENGTH - 2]);
if (!ParamsEnd)
{
error_out_of_memory();
@@ -785,12 +785,13 @@ ExecuteCommand(
LPTSTR First, Rest;
INT Ret = 0;
+ /* If we don't have any command, or if this is REM, ignore it */
+ if (!Cmd || (Cmd->Type == C_REM))
+ return 0;
/*
* Do not execute any command if we are about to exit CMD, or about to
* change batch execution context, e.g. in case of a CALL / GOTO / EXIT.
*/
- if (!Cmd)
- return 0;
if (bExit || SeenGoto())
return 0;
@@ -812,6 +813,8 @@ ExecuteCommand(
}
cmd_free(First);
}
+ /* Fall through */
+ case C_REM:
break;
case C_QUIET:
@@ -842,13 +845,13 @@ ExecuteCommand(
Ret = ExecutePipeline(Cmd);
break;
- case C_IF:
- Ret = ExecuteIf(Cmd);
- break;
-
case C_FOR:
Ret = ExecuteFor(Cmd);
break;
+
+ case C_IF:
+ Ret = ExecuteIf(Cmd);
+ break;
}
UndoRedirection(Cmd->Redirections, NULL);
diff --git a/base/shell/cmd/cmd.h b/base/shell/cmd/cmd.h
index 93fefe4f4c6..bb2221fb23d 100644
--- a/base/shell/cmd/cmd.h
+++ b/base/shell/cmd/cmd.h
@@ -259,7 +259,8 @@ INT CommandHistory(LPTSTR param);
/* Prototypes for IF.C */
#define IFFLAG_NEGATE 1 /* NOT */
#define IFFLAG_IGNORECASE 2 /* /I - Extended */
-enum {
+typedef enum _IF_OPERATOR
+{
/** Unary operators **/
/* Standard */
IF_ERRORLEVEL, IF_EXIST,
@@ -271,7 +272,8 @@ enum {
IF_STRINGEQ, /* == */
/* Extended */
IF_EQU, IF_NEQ, IF_LSS, IF_LEQ, IF_GTR, IF_GEQ
-};
+} IF_OPERATOR;
+
INT ExecuteIf(struct _PARSED_COMMAND *Cmd);
/* Prototypes for INTERNAL.C */
@@ -345,42 +347,78 @@ INT CommandMsgbox (LPTSTR);
/* These three characters act like spaces to the parser in most contexts */
#define STANDARD_SEPS _T(",;=")
-enum { C_COMMAND, C_QUIET, C_BLOCK, C_MULTI, C_OR, C_AND, C_PIPE, C_IF, C_FOR };
+typedef enum _COMMAND_TYPE
+{
+ /* Standard command */
+ C_COMMAND,
+ /* Quiet operator */
+ C_QUIET,
+ /* Parenthesized block */
+ C_BLOCK,
+ /* Operators */
+ C_MULTI, C_OR, C_AND, C_PIPE,
+ /* Special parsed commands */
+ C_FOR, C_IF, C_REM
+} COMMAND_TYPE;
+
typedef struct _PARSED_COMMAND
{
+ /*
+ * For IF : this is the 'main' case (the 'else' is obtained via
SubCmd->Next).
+ * For FOR: this is the list of all the subcommands in the DO.
+ */
struct _PARSED_COMMAND *Subcommands;
- struct _PARSED_COMMAND *Next;
+
+ struct _PARSED_COMMAND *Next; // Next command(s) in the chain.
struct _REDIRECTION *Redirections;
- BYTE Type;
+ COMMAND_TYPE Type;
union
{
struct
{
- TCHAR *Rest;
+ PTSTR Rest;
TCHAR First[];
} Command;
struct
- {
- BYTE Flags;
- BYTE Operator;
- TCHAR *LeftArg;
- TCHAR *RightArg;
- } If;
- struct
{
BYTE Switches;
TCHAR Variable;
- LPTSTR Params;
- LPTSTR List;
+ PTSTR Params;
+ PTSTR List;
struct _FOR_CONTEXT *Context;
} For;
+ struct
+ {
+ BYTE Flags;
+ IF_OPERATOR Operator;
+ PTSTR LeftArg;
+ PTSTR RightArg;
+ } If;
};
} PARSED_COMMAND;
-PARSED_COMMAND *ParseCommand(LPTSTR Line);
-VOID EchoCommand(PARSED_COMMAND *Cmd);
-TCHAR *Unparse(PARSED_COMMAND *Cmd, TCHAR *Out, TCHAR *OutEnd);
-VOID FreeCommand(PARSED_COMMAND *Cmd);
+PARSED_COMMAND*
+ParseCommand(
+ IN PCTSTR Line);
+
+VOID
+DumpCommand(
+ IN PARSED_COMMAND* Cmd,
+ IN ULONG SpacePad);
+
+VOID
+EchoCommand(
+ IN PARSED_COMMAND* Cmd);
+
+PTCHAR
+UnparseCommand(
+ IN PARSED_COMMAND* Cmd,
+ OUT PTCHAR Out,
+ IN PTCHAR OutEnd);
+
+VOID
+FreeCommand(
+ IN OUT PARSED_COMMAND* Cmd);
VOID ParseErrorEx(IN PCTSTR s);
extern BOOL bParseError;
diff --git a/base/shell/cmd/internal.c b/base/shell/cmd/internal.c
index 87fe7df9d81..2bb99144d4a 100644
--- a/base/shell/cmd/internal.c
+++ b/base/shell/cmd/internal.c
@@ -694,7 +694,7 @@ INT CommandExit(LPTSTR param)
*/
INT CommandRem (LPTSTR param)
{
- if (!_tcsncmp (param, _T("/?"), 2))
+ if (_tcsstr(param, _T("/?")) == param)
{
ConOutResPaging(TRUE,STRING_REM_HELP);
}
diff --git a/base/shell/cmd/parser.c b/base/shell/cmd/parser.c
index 37bc548f2a1..1680c5be4d8 100644
--- a/base/shell/cmd/parser.c
+++ b/base/shell/cmd/parser.c
@@ -1,12 +1,31 @@
/*
- * PARSER.C - command parsing.
+ * PARSER.C - Command-line Lexical Analyzer/Tokenizer and Parser.
*/
#include "precomp.h"
-/* Enable this define for "buggy" Windows' CMD command echoer compatibility
*/
+/*
+ * Defines for enabling different Windows' CMD compatibility behaviours.
+ */
+
+/* Enable this define for command echoer compatibility */
#define MSCMD_ECHO_COMMAND_COMPAT
+/* Enable this define for parser quirks (see UnParseToken() for more details) */
+#define MSCMD_PARSER_BUGS
+
+/* Enable this define for parenthesized blocks parsing quirks */
+// #define MSCMD_PARENS_PARSE_BUGS
+
+/* Enable this define for redirection parsing quirks */
+#define MSCMD_REDIR_PARSE_BUGS
+
+/* Enable this define for allowing '&' commands with an empty RHS.
+ * The default behaviour is to just return the LHS instead.
+ * See ParseCommandBinaryOp() for details. */
+// #define MSCMD_MULTI_EMPTY_RHS
+
+
/*
* Parser debugging support. These flags are global so that their values can be
* modified at runtime from a debugger. They correspond to the public Windows'
@@ -22,8 +41,10 @@ static const TCHAR OpString[][3] = { _T("&"),
_T("||"), _T("&&"), _T("|") };
static const TCHAR RedirString[][3] = { _T("<"), _T(">"),
_T(">>") };
-static const TCHAR *const IfOperatorString[] =
+static const TCHAR* const IfOperatorString[] =
{
+ /** Unary operators **/
+
/* Standard */
_T("errorlevel"),
_T("exist"),
@@ -33,6 +54,8 @@ static const TCHAR *const IfOperatorString[] =
_T("defined"),
#define IF_MAX_UNARY IF_DEFINED
+ /** Binary operators **/
+
/* Standard */
_T("=="),
@@ -46,12 +69,12 @@ static const TCHAR *const IfOperatorString[] =
#define IF_MAX_COMPARISON IF_GEQ
};
-static BOOL IsSeparator(TCHAR Char)
+static __inline BOOL IsSeparator(TCHAR Char)
{
- return _istspace(Char) || (Char && _tcschr(STANDARD_SEPS, Char));
+ return _istspace(Char) || (Char && !!_tcschr(STANDARD_SEPS, Char));
}
-enum
+typedef enum _TOK_TYPE
{
TOK_END,
TOK_NORMAL,
@@ -59,7 +82,7 @@ enum
TOK_REDIRECTION,
TOK_BEGIN_BLOCK,
TOK_END_BLOCK
-};
+} TOK_TYPE;
/* Scratch buffer for temporary command substitutions / expansions */
static TCHAR TempBuf[CMDLINE_LENGTH];
@@ -67,19 +90,103 @@ static TCHAR TempBuf[CMDLINE_LENGTH];
/*static*/ BOOL bParseError;
static BOOL bLineContinuations;
/*static*/ TCHAR ParseLine[CMDLINE_LENGTH];
-static TCHAR *ParsePos;
-static TCHAR CurChar;
+static PTCHAR ParsePos;
+static PTCHAR OldParsePos;
static TCHAR CurrentToken[CMDLINE_LENGTH];
-static int CurrentTokenType;
-static int InsideBlock;
+static TOK_TYPE CurrentTokenType = TOK_END;
+#ifndef MSCMD_PARSER_BUGS
+static BOOL bReparseToken = FALSE;
+static PTCHAR LastCurTokPos;
+#endif
+static INT InsideBlock = 0;
+
+static VOID ResetParser(IN PTCHAR Pos)
+{
+ bParseError = FALSE;
+ ParsePos = Pos;
+ OldParsePos = ParsePos;
+}
+
+/*
+ * This function "refetches" the last parsed token back into the stream
+ * for later reparsing -- since the way of lexing it is context-dependent.
+ * This "feature" is at the root of many obscure CMD parsing quirks,
+ * due to the fact this feature is in opposition with line-continuation.
+ * Indeed, when a stream of characters has a line-continuation, the lexer-
+ * parser will parse the stream up to the end of the line, then will
+ * reset the parser state and position back to the beginning of the line
+ * before accepting the rest of the character stream and continuing
+ * parsing them. This means that all the non-parsed characters before the
+ * line-continuation have been lost. Of course, their parsed form is now
+ * within the current parsed token. However, suppose now we need to
+ * unparse this token for reparsing it a different way later on. If we
+ * somehow pushed the already-parsed current token back into the beginning
+ * of the character stream, besides the complications of moving up the
+ * characters in the stream buffer, we would basically have "new" data
+ * that has been already parsed one way, to be now parsed another way.
+ * If instead we had saved somehow the unparsed form of the token, and
+ * we push back that form into the stream buffer for reparsing, we would
+ * encounter again the line-continuation, that, depending on which
+ * context the token is reparsed, would cause problems:
+ * e.g. in the case of REM command parsing, the parser would stop at the
+ * first line-continuation.
+ *
+ * When MSCMD_PARSER_BUGS is undefined, the UnParseToken() / ParseToken()
+ * cycle keeps the current token in its buffer, but also saves the start
+ * position corresponding to the batch of characters that have been parsed
+ * during the last line-continuation. The next ParseToken() would then
+ * reparse these latest charcters and the result replaces the last part
+ * in the current token.
+ *
+ * For example, a first parsing of
+ * foo^\n
+ * bar^\n
+ * baz
+ * would result in the current token "foobarbaz", where the start position
+ * corresponding to the batch of characters parsed during the last line-continuation
+ * being pointing at "baz". The stream buffer only contains "baz"
(and following data).
+ * Then UnParseToken() saves this info so that at the next ParseToken(), the
"baz"
+ * part of the stream buffer gets reparsed (possibly differently) and the result
+ * would replace the "baz" part in the current token.
+ *
+ * If MSCMD_PARSER_BUGS is defined however, then the behaviour of the Windows' CMD
+ * parser applies: in the example above, the last ParseToken() call would completely
+ * replace the current token "foobarbaz" with the new result of the parsing of
"baz".
+ */
+static VOID UnParseToken(VOID)
+{
+ ParsePos = OldParsePos;
+
+ /* Debugging support */
+ if (fDumpTokens)
+ ConOutPrintf(_T("Ungetting: '%s'\n"), ParsePos);
+
+#ifndef MSCMD_PARSER_BUGS
+ bReparseToken = TRUE;
+#endif
+}
+
+static VOID InitParser(VOID)
+{
+ *CurrentToken = 0;
+ CurrentTokenType = TOK_END;
+ InsideBlock = 0;
+
+#ifndef MSCMD_PARSER_BUGS
+ bReparseToken = FALSE;
+ LastCurTokPos = NULL;
+#endif
+
+ ResetParser(ParseLine);
+}
-static TCHAR ParseChar(void)
+static TCHAR ParseChar(VOID)
{
TCHAR Char;
if (bParseError)
- return (CurChar = 0);
+ return 0;
restart:
/*
@@ -93,23 +200,22 @@ restart:
}
while (Char == _T('\r'));
- if (!Char)
+ if (!Char) --ParsePos;
+ if (!Char && bLineContinuations)
{
- ParsePos--;
- if (bLineContinuations)
+ if (!ReadLine(ParseLine, TRUE))
{
- if (!ReadLine(ParseLine, TRUE))
- {
- /* ^C pressed, or line was too long */
- bParseError = TRUE;
- }
- else if (*(ParsePos = ParseLine))
- {
+ /* ^C pressed, or line was too long */
+ bParseError = TRUE;
+ }
+ else
+ {
+ ResetParser(ParseLine);
+ if (*ParsePos)
goto restart;
- }
}
}
- return (CurChar = Char);
+ return Char;
}
VOID ParseErrorEx(IN PCTSTR s)
@@ -125,41 +231,79 @@ static __inline VOID ParseError(VOID)
ParseErrorEx(CurrentTokenType != TOK_END ? CurrentToken : NULL);
}
-/*
- * Yes, cmd has a Lexical Analyzer. Whenever the parser gives an "xxx was
- * unexpected at this time." message, it shows what the last token read was.
- */
-static int ParseToken(TCHAR ExtraEnd, TCHAR *Separators)
+static TOK_TYPE
+ParseTokenEx(
+ IN TCHAR PrefixOperator OPTIONAL,
+ IN TCHAR ExtraEnd OPTIONAL,
+ IN PCTSTR Separators OPTIONAL,
+ IN BOOL bHandleContinuations)
{
- TCHAR *Out = CurrentToken;
+ TOK_TYPE Type;
+ PTCHAR CurrentTokStart = CurrentToken;
+ PTCHAR Out = CurrentTokStart;
TCHAR Char;
- int Type;
BOOL bInQuote = FALSE;
- for (Char = CurChar; Char && Char != _T('\n'); Char = ParseChar())
+#ifndef MSCMD_PARSER_BUGS
+ if (bReparseToken)
+ {
+ bReparseToken = FALSE;
+
+ /*
+ * We will append the part to be reparsed to the old one
+ * (still present in CurrentToken).
+ */
+ CurrentTokStart = LastCurTokPos;
+ Out = CurrentTokStart;
+ }
+ else
+ {
+ LastCurTokPos = CurrentToken;
+ }
+#endif
+
+ /* Start with what we have at current ParsePos */
+ OldParsePos = ParsePos;
+
+ for (Char = ParseChar(); Char && Char != _T('\n'); Char =
ParseChar())
{
bInQuote ^= (Char == _T('"'));
if (!bInQuote)
{
if (Separators != NULL)
{
- if (_istspace(Char) || _tcschr(Separators, Char))
+ if (_istspace(Char) || !!_tcschr(Separators, Char))
{
/* Skip leading separators */
- if (Out == CurrentToken)
+ if (Out == CurrentTokStart)
continue;
break;
}
}
- /* Check for numbered redirection */
- if ((Char >= _T('0') && Char <= _T('9')
&&
- (ParsePos == &ParseLine[1] || IsSeparator(ParsePos[-2]))
- && (*ParsePos == _T('<') || *ParsePos ==
_T('>'))))
+ /* Check for prefix operator */
+ if ((Out == CurrentTokStart) && (Char == PrefixOperator))
+ break;
+
+ /*
+ * Check for numbered redirection.
+ *
+ * For this purpose, we check whether this is a number, that is
+ * in first position in the current parsing buffer (remember that
+ * ParsePos points to the next character) or is preceded by a
+ * whitespace-like separator, including standard command operators
+ * (excepting '@' !) and double-quotes.
+ */
+ if ( _istdigit(Char) &&
+ (ParsePos == &OldParsePos[1] ||
+ IsSeparator(ParsePos[-2]) ||
+ !!_tcschr(_T("()&|\""), ParsePos[-2]))
&&
+ (*ParsePos == _T('<') || *ParsePos == _T('>')) )
{
break;
}
+ /* Check for other delimiters / operators */
if (Char == ExtraEnd)
break;
if (InsideBlock && Char == _T(')'))
@@ -167,59 +311,90 @@ static int ParseToken(TCHAR ExtraEnd, TCHAR *Separators)
if (_tcschr(_T("&|<>"), Char))
break;
- if (Char == _T('^'))
+ if (bHandleContinuations && (Char == _T('^')))
{
Char = ParseChar();
/* Eat up a \n, allowing line continuation */
if (Char == _T('\n'))
+ {
+#ifndef MSCMD_PARSER_BUGS
+ LastCurTokPos = Out;
+#endif
Char = ParseChar();
+ }
/* Next character is a forced literal */
+
+ if (Out == CurrentTokStart)
+ {
+ /* Ignore any prefix operator if we don't start a new command
block */
+ if (CurrentTokenType != TOK_BEGIN_BLOCK)
+ PrefixOperator = 0;
+ }
}
}
if (Out == &CurrentToken[CMDLINE_LENGTH - 1])
break;
*Out++ = Char;
+
+ // PrefixOperator = 0;
}
- /* Check if we got at least one character before reaching a special one.
- * If so, return them and leave the special for the next call. */
- if (Out != CurrentToken)
+ /*
+ * We exited the parsing loop. If the current character is the first one
+ * (Out == CurrentTokStart), interpret it as an operator. Otherwise,
+ * terminate the current token (type TOK_NORMAL) and keep the current
+ * character so that it can be refetched as an operator at the next call.
+ */
+
+ if (Out != CurrentTokStart)
{
Type = TOK_NORMAL;
}
+ /*
+ * Else we have an operator.
+ */
+ else if (Char == _T('@'))
+ {
+ Type = TOK_OPERATOR; // TOK_QUIET / TOK_PREFIX_OPERATOR
+ *Out++ = Char;
+ Char = ParseChar();
+ }
else if (Char == _T('('))
{
Type = TOK_BEGIN_BLOCK;
*Out++ = Char;
- ParseChar();
+ Char = ParseChar();
}
else if (Char == _T(')'))
{
Type = TOK_END_BLOCK;
*Out++ = Char;
- ParseChar();
+ Char = ParseChar();
}
else if (Char == _T('&') || Char == _T('|'))
{
Type = TOK_OPERATOR;
*Out++ = Char;
Char = ParseChar();
- /* check for && or || */
+ /* Check for '&&' or '||' */
if (Char == Out[-1])
{
*Out++ = Char;
- ParseChar();
+ Char = ParseChar();
}
}
- else if ((Char >= _T('0') && Char <= _T('9'))
- || (Char == _T('<') || Char == _T('>')))
+ else if ( _istdigit(Char) ||
+ (Char == _T('<') || Char == _T('>')) )
{
Type = TOK_REDIRECTION;
- if (Char >= _T('0') && Char <= _T('9'))
+ if (_istdigit(Char))
{
*Out++ = Char;
Char = ParseChar();
}
+ /* By construction (see the while-loop above),
+ * the next character must be a redirection. */
+ ASSERT(Char == _T('<') || Char == _T('>'));
*Out++ = Char;
Char = ParseChar();
if (Char == Out[-1])
@@ -234,19 +409,30 @@ static int ParseToken(TCHAR ExtraEnd, TCHAR *Separators)
*Out++ = Char;
while (IsSeparator(Char = ParseChar()))
;
- if (Char >= _T('0') && Char <= _T('9'))
+ if (_istdigit(Char))
{
*Out++ = Char;
- ParseChar();
+ Char = ParseChar();
}
}
}
else
{
Type = TOK_END;
+ *Out++ = Char;
}
*Out = _T('\0');
+ /*
+ * Rewind the parsing position, so that the current character can be
+ * refetched later on. However do this only if it is not NULL and if
+ * this is not TOK_END, since we do not want to reparse later the line
+ * termination (we could enter into infinite loops, or, in case of line
+ * continuation, get unwanted "More?" prompts).
+ */
+ if (Char != 0 && Type != TOK_END)
+ --ParsePos;
+
/* Debugging support */
if (fDumpTokens)
ConOutPrintf(_T("ParseToken: (%d) '%s'\n"), Type,
CurrentToken);
@@ -254,14 +440,127 @@ static int ParseToken(TCHAR ExtraEnd, TCHAR *Separators)
return (CurrentTokenType = Type);
}
-static BOOL ParseRedirection(REDIRECTION **List)
+static __inline INT
+ParseToken(
+ IN TCHAR ExtraEnd OPTIONAL,
+ IN PCTSTR Separators OPTIONAL)
{
- TCHAR *Tok = CurrentToken;
- BYTE Number;
+ return ParseTokenEx(0, ExtraEnd, Separators, TRUE);
+}
+
+
+static PARSED_COMMAND*
+AllocCommand(
+ IN COMMAND_TYPE Type,
+ IN PCTSTR CmdHead OPTIONAL,
+ IN PCTSTR CmdTail OPTIONAL)
+{
+ PARSED_COMMAND* Cmd;
+
+ switch (Type)
+ {
+ case C_COMMAND:
+ case C_REM:
+ {
+ SIZE_T CmdHeadLen = _tcslen(CmdHead) + 1;
+ SIZE_T CmdTailLen = _tcslen(CmdTail) + 1;
+
+ Cmd = cmd_alloc(FIELD_OFFSET(PARSED_COMMAND,
+ Command.First[CmdHeadLen + CmdTailLen]));
+ if (!Cmd)
+ return NULL;
+
+ Cmd->Type = Type;
+ Cmd->Next = NULL;
+ Cmd->Subcommands = NULL;
+ Cmd->Redirections = NULL; /* Is assigned by the calling function */
+ memcpy(Cmd->Command.First, CmdHead, CmdHeadLen * sizeof(TCHAR));
+ Cmd->Command.Rest = Cmd->Command.First + CmdHeadLen;
+ memcpy(Cmd->Command.Rest, CmdTail, CmdTailLen * sizeof(TCHAR));
+ return Cmd;
+ }
+
+ case C_QUIET:
+ case C_BLOCK:
+ case C_MULTI:
+ case C_OR:
+ case C_AND:
+ case C_PIPE:
+ {
+ Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
+ if (!Cmd)
+ return NULL;
+
+ Cmd->Type = Type;
+ Cmd->Next = NULL;
+ Cmd->Subcommands = NULL;
+ Cmd->Redirections = NULL; /* For C_BLOCK only: is assigned by the calling
function */
+ return Cmd;
+ }
+
+ case C_FOR:
+ case C_IF:
+ {
+ Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
+ if (!Cmd)
+ return NULL;
+
+ memset(Cmd, 0, sizeof(PARSED_COMMAND));
+ Cmd->Type = Type;
+ return Cmd;
+ }
+
+ default:
+ ERR("Unknown command type 0x%x\n", Type);
+ ASSERT(FALSE);
+ return NULL;
+ }
+}
+
+VOID
+FreeCommand(
+ IN OUT PARSED_COMMAND* Cmd)
+{
+ if (Cmd->Subcommands)
+ FreeCommand(Cmd->Subcommands);
+ if (Cmd->Next)
+ FreeCommand(Cmd->Next);
+ FreeRedirection(Cmd->Redirections);
+ if (Cmd->Type == C_FOR)
+ {
+ cmd_free(Cmd->For.Params);
+ cmd_free(Cmd->For.List);
+ }
+ else if (Cmd->Type == C_IF)
+ {
+ cmd_free(Cmd->If.LeftArg);
+ cmd_free(Cmd->If.RightArg);
+ }
+ cmd_free(Cmd);
+}
+
+
+/* Parse redirections and append them to the list */
+static BOOL
+ParseRedirection(
+ IN OUT REDIRECTION** List)
+{
+ PTSTR Tok = CurrentToken;
+ REDIRECTION* Redir;
REDIR_MODE RedirMode;
- REDIRECTION *Redir;
+ BYTE Number;
+
+ if ( !(*Tok == _T('<') || *Tok == _T('>')) &&
+ !(_istdigit(*Tok) &&
+ (Tok[1] == _T('<') || Tok[1] == _T('>')) ) )
+ {
+ ASSERT(CurrentTokenType != TOK_REDIRECTION);
+ return FALSE;
+ }
+ ASSERT((CurrentTokenType == TOK_REDIRECTION) ||
+ (CurrentTokenType == TOK_NORMAL));
- if (*Tok >= _T('0') && *Tok <= _T('9'))
+ if (_istdigit(*Tok))
Number = *Tok++ - _T('0');
else
Number = *Tok == _T('<') ? 0 : 1;
@@ -269,6 +568,7 @@ static BOOL ParseRedirection(REDIRECTION **List)
if (*Tok++ == _T('<'))
{
RedirMode = REDIR_READ;
+ /* Forbid '<<' */
if (*Tok == _T('<'))
goto fail;
}
@@ -282,9 +582,20 @@ static BOOL ParseRedirection(REDIRECTION **List)
}
}
+ if (*Tok == _T('&'))
+ {
+ /* This is a handle redirection: the next character must be one single digit */
+ if (!(_istdigit(Tok[1]) && !Tok[2]))
+ goto fail;
+ }
+ else
+#ifndef MSCMD_REDIR_PARSE_BUGS
if (!*Tok)
+ /* The file name was not part of this token, so it will be the next one */
+#else
+ /* Get rid of what possibly remains in the token, and retrieve the next one */
+#endif
{
- /* The file name was not part of this token, so it'll be the next one */
if (ParseToken(0, STANDARD_SEPS) != TOK_NORMAL)
goto fail;
Tok = CurrentToken;
@@ -323,31 +634,60 @@ fail:
return FALSE;
}
-static PARSED_COMMAND *ParseCommandOp(int OpType);
+static __inline PARSED_COMMAND*
+ParseCommandOp(
+ IN COMMAND_TYPE OpType);
/* Parse a parenthesized block */
-static PARSED_COMMAND *ParseBlock(REDIRECTION *RedirList)
+static PARSED_COMMAND*
+ParseBlock(
+ IN OUT REDIRECTION** RedirList)
{
PARSED_COMMAND *Cmd, *Sub, **NextPtr;
- Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
+ Cmd = AllocCommand(C_BLOCK, NULL, NULL);
if (!Cmd)
{
WARN("Cannot allocate memory for Cmd!\n");
ParseError();
- FreeRedirection(RedirList);
return NULL;
}
- Cmd->Type = C_BLOCK;
- Cmd->Next = NULL;
- Cmd->Subcommands = NULL;
- Cmd->Redirections = RedirList;
/* Read the block contents */
NextPtr = &Cmd->Subcommands;
- InsideBlock++;
- while (1)
+ ++InsideBlock;
+ while (TRUE)
{
+ /*
+ * Windows' CMD compatibility: Strip leading newlines in the block.
+ *
+ * Note that this behaviour is buggy, especially when MSCMD_PARSER_BUGS is
defined!
+ * For example:
+ * (foo^\n
+ * bar)
+ * would be parsed ultimately as: '(', 'bar', ')' because
the "foo^"
+ * part would be discarded due to the UnParseToken() call, since this
+ * function doesn't work across line continuations.
+ */
+ while (ParseToken(0, STANDARD_SEPS) == TOK_END && *CurrentToken ==
_T('\n'))
+ ;
+ if (*CurrentToken && *CurrentToken != _T('\n'))
+ UnParseToken();
+
+ /* Break early if we have nothing else to read. We will also fail
+ * due to the fact we haven't encountered any closing parenthesis. */
+ if (!*CurrentToken /* || *CurrentToken == _T('\n') */)
+ {
+ ASSERT(CurrentTokenType == TOK_END);
+ break;
+ }
+
+ /*
+ * NOTE: Windows' CMD uses a "CRLF" operator when dealing with
+ * newlines in parenthesized blocks, as an alternative to the
+ * '&' command-separation operator.
+ */
+
Sub = ParseCommandOp(C_OP_LOWEST);
if (Sub)
{
@@ -356,7 +696,7 @@ static PARSED_COMMAND *ParseBlock(REDIRECTION *RedirList)
}
else if (bParseError)
{
- InsideBlock--;
+ --InsideBlock;
FreeCommand(Cmd);
return NULL;
}
@@ -365,36 +705,83 @@ static PARSED_COMMAND *ParseBlock(REDIRECTION *RedirList)
break;
/* Skip past the \n */
- ParseChar();
}
- InsideBlock--;
+ --InsideBlock;
+
+ /* Fail if the block was not terminated, or if we have
+ * an empty block, i.e. "( )", considered invalid. */
+ if ((CurrentTokenType != TOK_END_BLOCK) || (Cmd->Subcommands == NULL))
+ {
+ ParseError();
+ FreeCommand(Cmd);
+ return NULL;
+ }
- /* Process any trailing redirections */
+ /* Process any trailing redirections and append them to the list */
+#ifndef MSCMD_REDIR_PARSE_BUGS
while (ParseToken(0, STANDARD_SEPS) == TOK_REDIRECTION)
{
- if (!ParseRedirection(&Cmd->Redirections))
+ if (!ParseRedirection(RedirList))
{
FreeCommand(Cmd);
return NULL;
}
}
+#else
+ while (ParseToken(0, STANDARD_SEPS) != TOK_END)
+ {
+ if (!ParseRedirection(RedirList))
+ {
+ /* If an actual error happened in ParseRedirection(), bail out */
+ if (bParseError)
+ {
+ FreeCommand(Cmd);
+ return NULL;
+ }
+ /* Otherwise it just returned FALSE because the current token
+ * is not a redirection. Unparse the token and refetch it. */
+ break;
+ }
+ }
+#endif
+ if (CurrentTokenType != TOK_END)
+ {
+ /*
+ * Windows' CMD compatibility: Unparse the current token.
+ *
+ * Note that this behaviour is buggy, especially when MSCMD_PARSER_BUGS is
defined!
+ * For example:
+ * (foo^\n
+ * bar)
+ * would be parsed ultimately as: '(', 'bar', ')' because
the "foo^"
+ * part would be discarded due to the UnParseToken() call, since this
+ * function doesn't work across line continuations.
+ */
+ UnParseToken();
+
+ /*
+ * Since it is expected that when ParseBlock() returns, the next
+ * token is already fetched, call ParseToken() again to compensate.
+ */
+ ParseToken(0, STANDARD_SEPS);
+ }
+
return Cmd;
}
/* Parse an IF statement */
-static PARSED_COMMAND *ParseIf(void)
+static PARSED_COMMAND*
+ParseIf(VOID)
{
- PARSED_COMMAND *Cmd;
+ PARSED_COMMAND* Cmd;
- Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
+ Cmd = AllocCommand(C_IF, NULL, NULL);
if (!Cmd)
{
WARN("Cannot allocate memory for Cmd!\n");
ParseError();
return NULL;
}
- memset(Cmd, 0, sizeof(PARSED_COMMAND));
- Cmd->Type = C_IF;
if (bEnableExtensions && (_tcsicmp(CurrentToken, _T("/I")) == 0))
{
@@ -478,27 +865,28 @@ error:
* Parse a FOR command.
* Syntax is: FOR [options] %var IN (list) DO command
*/
-static PARSED_COMMAND *ParseFor(void)
+static PARSED_COMMAND*
+ParseFor(VOID)
{
- PARSED_COMMAND *Cmd;
- TCHAR* List = TempBuf;
- TCHAR *Pos = List;
+ PARSED_COMMAND* Cmd;
- Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
+ /* Use the scratch buffer */
+ PTSTR List = TempBuf;
+ PTCHAR Pos = List;
+
+ Cmd = AllocCommand(C_FOR, NULL, NULL);
if (!Cmd)
{
WARN("Cannot allocate memory for Cmd!\n");
ParseError();
return NULL;
}
- memset(Cmd, 0, sizeof(PARSED_COMMAND));
- Cmd->Type = C_FOR;
/* Skip the extended FOR syntax if extensions are disabled */
if (!bEnableExtensions)
goto parseForBody;
- while (1)
+ while (TRUE)
{
if (_tcsicmp(CurrentToken, _T("/D")) == 0)
{
@@ -561,22 +949,19 @@ parseForBody:
if (ParseToken(_T('('), STANDARD_SEPS) != TOK_BEGIN_BLOCK)
goto error;
- while (1)
+ while (TRUE)
{
/* Pretend we're inside a block so the tokenizer will stop on ')' */
- InsideBlock++;
+ ++InsideBlock;
ParseToken(0, STANDARD_SEPS);
- InsideBlock--;
+ --InsideBlock;
if (CurrentTokenType == TOK_END_BLOCK)
break;
- if (CurrentTokenType == TOK_END)
- {
- /* Skip past the \n */
- ParseChar();
+ /* Skip past the \n */
+ if ((CurrentTokenType == TOK_END) && *CurrentToken == _T('\n'))
continue;
- }
if (CurrentTokenType != TOK_NORMAL)
goto error;
@@ -608,177 +993,432 @@ error:
}
/* Parse a REM command */
-static PARSED_COMMAND *ParseRem(void)
+static PARSED_COMMAND*
+ParseRem(VOID)
{
- /* "Ignore" the rest of the line.
- * (Line continuations will still be parsed, though.) */
- while (ParseToken(0, NULL) != TOK_END)
- ;
- return NULL;
+ PARSED_COMMAND* Cmd;
+
+ /* The common scratch buffer already contains the name of the command */
+ PTSTR ParsedLine = TempBuf;
+
+ PTCHAR Pos = ParsedLine + _tcslen(ParsedLine) + 1;
+ SIZE_T TailOffset = Pos - ParsedLine;
+
+ /* Build a minimal command for REM, so that it can still get through the batch echo
unparsing */
+
+ /* Unparse the current token, so as to emulate the REM command parsing
+ * behaviour of Windows' CMD, that discards everything before the last
+ * line continuation. */
+ UnParseToken();
+
+ /*
+ * Ignore the rest of the line, without any line continuation (but eat the caret).
+ * We cannot simply set bLineContinuations to TRUE or FALSE, because we want (only
+ * for the REM command), even when bLineContinuations == FALSE, to get the caret,
+ * otherwise it would be ignored.
+ */
+ while (ParseTokenEx(0, 0, NULL, FALSE) != TOK_END)
+ {
+ if (Pos + _tcslen(CurrentToken) >= &ParsedLine[CMDLINE_LENGTH])
+ {
+ ParseError();
+ return NULL;
+ }
+ Pos = _stpcpy(Pos, CurrentToken);
+ }
+ *Pos = _T('\0');
+
+ Cmd = AllocCommand(C_REM,
+ ParsedLine,
+ ParsedLine + TailOffset);
+ if (!Cmd)
+ {
+ WARN("Cannot allocate memory for Cmd!\n");
+ ParseError();
+ return NULL;
+ }
+ return Cmd;
}
-static DECLSPEC_NOINLINE PARSED_COMMAND *ParseCommandPart(REDIRECTION *RedirList)
+/* Parse a command */
+static PARSED_COMMAND*
+ParseCommandPart(
+ IN OUT REDIRECTION** RedirList)
{
- TCHAR ParsedLine[CMDLINE_LENGTH];
- PARSED_COMMAND *Cmd;
- PARSED_COMMAND *(*Func)(void);
+ PARSED_COMMAND* Cmd;
+ PARSED_COMMAND* (*Func)(VOID);
- TCHAR *Pos = _stpcpy(ParsedLine, CurrentToken) + 1;
- DWORD_PTR TailOffset = Pos - ParsedLine;
+ /* Use the scratch buffer */
+ PTSTR ParsedLine = TempBuf;
+
+ /* We need to copy the current token because it's going to be changed below by
the ParseToken() calls */
+ PTCHAR Pos = _stpcpy(ParsedLine, CurrentToken) + 1;
+ SIZE_T TailOffset = Pos - ParsedLine;
/* Check for special forms */
- if ((Func = ParseFor, _tcsicmp(ParsedLine, _T("for")) == 0) ||
- (Func = ParseIf, _tcsicmp(ParsedLine, _T("if")) == 0) ||
- (Func = ParseRem, _tcsicmp(ParsedLine, _T("rem")) == 0))
+ if ((Func = ParseFor, _tcsicmp(ParsedLine, _T("FOR")) == 0) ||
+ (Func = ParseIf, _tcsicmp(ParsedLine, _T("IF")) == 0) ||
+ (Func = ParseRem, _tcsicmp(ParsedLine, _T("REM")) == 0))
{
+ PTCHAR pHelp;
+
ParseToken(0, STANDARD_SEPS);
- /* Do special parsing only if it's not followed by /? */
- if (_tcscmp(CurrentToken, _T("/?")) != 0)
+
+ if ((pHelp = _tcsstr(CurrentToken, _T("/?"))) &&
+ (Func == ParseIf ? (pHelp[2] == _T('/') || pHelp[2] == 0) : TRUE))
+ {
+ /* /? was found within the first token */
+ ParseToken(0, STANDARD_SEPS);
+ }
+ else
+ {
+ pHelp = NULL;
+ }
+ if (pHelp && (CurrentTokenType == TOK_NORMAL))
{
- if (RedirList)
+ /* We encountered /? first, but is followed
+ * by another token: that's an error. */
+ ParseError();
+ return NULL;
+ }
+
+ /* Do actual parsing only if no help is present */
+ if (!pHelp)
+ {
+ /* FOR and IF commands cannot have leading redirection, but REM can */
+ if (*RedirList && ((Func == ParseFor) || (Func == ParseIf)))
{
- ParseError();
- FreeRedirection(RedirList);
+ /* Display the culprit command and fail */
+ ParseErrorEx(ParsedLine);
return NULL;
}
+
return Func();
}
- Pos = _stpcpy(Pos, _T(" /?"));
+
+ /* Otherwise, run FOR,IF,REM as regular commands only for help support */
+ if (Pos + _tcslen(_T("/?")) >= &ParsedLine[CMDLINE_LENGTH])
+ {
+ ParseError();
+ return NULL;
+ }
+ Pos = _stpcpy(Pos, _T("/?"));
+ }
+ else
+ {
+ ParseToken(0, NULL);
}
/* Now get the tail */
- while (1)
+ while (CurrentTokenType != TOK_END)
{
- ParseToken(0, NULL);
if (CurrentTokenType == TOK_NORMAL)
{
if (Pos + _tcslen(CurrentToken) >= &ParsedLine[CMDLINE_LENGTH])
{
ParseError();
- FreeRedirection(RedirList);
return NULL;
}
Pos = _stpcpy(Pos, CurrentToken);
}
+#ifndef MSCMD_REDIR_PARSE_BUGS
else if (CurrentTokenType == TOK_REDIRECTION)
{
- if (!ParseRedirection(&RedirList))
- return NULL;
+ /* Process any trailing redirections and append them to the list */
+ while (CurrentTokenType == TOK_REDIRECTION)
+ {
+ if (!ParseRedirection(RedirList))
+ return NULL;
+
+ ParseToken(0, STANDARD_SEPS);
+ }
+ if (CurrentTokenType == TOK_END)
+ break;
+
+ /* Unparse the current token, and reparse it below with no separators */
+ UnParseToken();
}
else
{
+ /* There is no need to do a UnParseToken() / ParseToken() cycle */
break;
}
+#else
+ else
+ {
+ /* Process any trailing redirections and append them to the list */
+ BOOL bSuccess = FALSE;
+
+ ASSERT(CurrentTokenType != TOK_END);
+
+ while (CurrentTokenType != TOK_END)
+ {
+ if (!ParseRedirection(RedirList))
+ {
+ /* If an actual error happened in ParseRedirection(), bail out */
+ if (bParseError)
+ return NULL;
+
+ /* Otherwise it just returned FALSE because the current token
+ * is not a redirection. Unparse the token and refetch it. */
+ break;
+ }
+ bSuccess = TRUE;
+
+ ParseToken(0, STANDARD_SEPS);
+ }
+ if (CurrentTokenType == TOK_END)
+ break;
+
+ /* Unparse the current token, and reparse it below with no separators */
+ UnParseToken();
+
+ /* If bSuccess == FALSE, we know that it's still the old fetched token,
but
+ * it has been unparsed, so we need to refetch it before quitting the loop.
*/
+ if (!bSuccess)
+ {
+ ParseToken(0, NULL);
+ break;
+ }
+ }
+#endif
+
+ ParseToken(0, NULL);
}
- *Pos++ = _T('\0');
+ *Pos = _T('\0');
- Cmd = cmd_alloc(FIELD_OFFSET(PARSED_COMMAND, Command.First[Pos - ParsedLine]));
+ Cmd = AllocCommand(C_COMMAND,
+ ParsedLine,
+ ParsedLine + TailOffset);
if (!Cmd)
{
WARN("Cannot allocate memory for Cmd!\n");
ParseError();
- FreeRedirection(RedirList);
return NULL;
}
- Cmd->Type = C_COMMAND;
- Cmd->Next = NULL;
- Cmd->Subcommands = NULL;
- Cmd->Redirections = RedirList;
- memcpy(Cmd->Command.First, ParsedLine, (Pos - ParsedLine) * sizeof(TCHAR));
- Cmd->Command.Rest = Cmd->Command.First + TailOffset;
return Cmd;
}
-static PARSED_COMMAND *ParsePrimary(void)
+static PARSED_COMMAND*
+ParsePrimary(VOID)
{
- REDIRECTION *RedirList = NULL;
- int Type;
-
- while (IsSeparator(CurChar))
- {
- if (CurChar == _T('\n'))
- return NULL;
- ParseChar();
- }
-
- if (!CurChar)
- return NULL;
-
- if (CurChar == _T(':'))
- {
- /* "Ignore" the rest of the line.
- * (Line continuations will still be parsed, though.) */
- while (ParseToken(0, NULL) != TOK_END)
- ;
- return NULL;
- }
+ PARSED_COMMAND* Cmd = NULL;
+ REDIRECTION* RedirList = NULL;
- if (CurChar == _T('@'))
+ /* In this context, '@' is considered as a separate token */
+ if ((*CurrentToken == _T('@')) && (CurrentTokenType ==
TOK_OPERATOR))
{
- PARSED_COMMAND *Cmd;
- ParseChar();
- Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
+ Cmd = AllocCommand(C_QUIET, NULL, NULL);
if (!Cmd)
{
WARN("Cannot allocate memory for Cmd!\n");
ParseError();
return NULL;
}
- Cmd->Type = C_QUIET;
- Cmd->Next = NULL;
/* @ acts like a unary operator with low precedence,
* so call the top-level parser */
Cmd->Subcommands = ParseCommandOp(C_OP_LOWEST);
- Cmd->Redirections = NULL;
return Cmd;
}
/* Process leading redirections and get the head of the command */
- while ((Type = ParseToken(_T('('), STANDARD_SEPS)) == TOK_REDIRECTION)
+#ifndef MSCMD_REDIR_PARSE_BUGS
+ while (CurrentTokenType == TOK_REDIRECTION)
{
if (!ParseRedirection(&RedirList))
return NULL;
+
+ ParseToken(_T('('), STANDARD_SEPS);
+ }
+#else
+ {
+ BOOL bSuccess = FALSE;
+ while (CurrentTokenType != TOK_END)
+ {
+ if (!ParseRedirection(&RedirList))
+ {
+ /* If an actual error happened in ParseRedirection(), bail out */
+ if (bParseError)
+ return NULL;
+
+ /* Otherwise it just returned FALSE because
+ * the current token is not a redirection. */
+ break;
+ }
+ bSuccess = TRUE;
+
+ ParseToken(0, STANDARD_SEPS);
+ }
+ if (bSuccess)
+ {
+ /* Unparse the current token, and reparse it with support for parenthesis */
+ if (CurrentTokenType != TOK_END)
+ UnParseToken();
+
+ ParseToken(_T('('), STANDARD_SEPS);
}
+ }
+#endif
- if (Type == TOK_NORMAL)
- return ParseCommandPart(RedirList);
- else if (Type == TOK_BEGIN_BLOCK)
- return ParseBlock(RedirList);
- else if (Type == TOK_END_BLOCK && !RedirList)
+ if (CurrentTokenType == TOK_NORMAL)
+ Cmd = ParseCommandPart(&RedirList);
+ else if (CurrentTokenType == TOK_BEGIN_BLOCK)
+ Cmd = ParseBlock(&RedirList);
+ else if (CurrentTokenType == TOK_END_BLOCK && !RedirList)
return NULL;
+ if (Cmd)
+ {
+ /* FOR and IF commands cannot have leading redirection
+ * (checked by ParseCommandPart(), errors out if so). */
+ ASSERT(!RedirList || (Cmd->Type != C_FOR && Cmd->Type != C_IF));
+
+ /* Save the redirection list in the command */
+ Cmd->Redirections = RedirList;
+
+ /* Return the new command */
+ return Cmd;
+ }
+
ParseError();
FreeRedirection(RedirList);
return NULL;
}
-static PARSED_COMMAND *ParseCommandOp(int OpType)
+static PARSED_COMMAND*
+ParseCommandBinaryOp(
+ IN COMMAND_TYPE OpType)
{
- PARSED_COMMAND *Cmd, *Left, *Right;
+ PARSED_COMMAND* Cmd;
+
+ if (OpType == C_OP_LOWEST) // i.e. CP_MULTI
+ {
+ /* Ignore any parser-level comments */
+ if (*CurrentToken == _T(':'))
+ {
+ /* Ignore the rest of the line, including line continuations */
+ while (ParseToken(0, NULL) != TOK_END)
+ ;
+#ifdef MSCMD_PARENS_PARSE_BUGS
+ /*
+ * Return NULL in case we are NOT inside a parenthesized block,
+ * otherwise continue. The effects can be observed as follows:
+ * within a parenthesized block, every second ':'-prefixed command
+ * is not ignored, while the first of each "pair" is ignored.
+ * This first command **MUST NOT** be followed by an empty line,
+ * otherwise a syntax error is raised.
+ */
+ if (InsideBlock == 0)
+ {
+#endif
+ return NULL;
+#ifdef MSCMD_PARENS_PARSE_BUGS
+ }
+ /* Get the next token */
+ ParseToken(0, NULL);
+#endif
+ }
+
+ /*
+ * Ignore single closing parenthesis outside of command blocks,
+ * thus interpreted as a command. This very specific situation
+ * can happen e.g. while running in batch mode, when jumping to
+ * a label present inside a command block.
+ *
+ * NOTE: If necessary, this condition can be restricted to only
+ * when a batch context 'bc' is active.
+ *
+ * NOTE 2: For further security, Windows checks that we are NOT
+ * currently inside a parenthesized block, and also, ignores
+ * explicitly everything (ParseToken() loop) on the same line
+ * (including line continuations) after this closing parenthesis.
+ *
+ * Why doing so? Consider the following batch:
+ *
+ * IF 1==1 (
+ * :label
+ * echo A
+ * ) ^
+ * ELSE (
+ * echo B
+ * exit /b
+ * )
+ * GOTO :label
+ *
+ * First the IF block is executed. Since the condition is trivially
+ * true, only the first block "echo A" is executed, then execution
+ * goes after the IF block, that is, at the GOTO. Here, the GOTO
+ * jumps within the first IF-block, however, the running context now
+ * is NOT an IF. So parsing and execution will go through each command,
+ * starting with 'echo A'. But then one gets the ') ^\n ELSE ('
part !!
+ * If we want to make sense of this without bailing out due to
+ * parsing error, we should ignore this line, **including** the line
+ * continuation. Hence we need to loop over all the tokens following
+ * the closing parenthesis, instead of just returning NULL straight ahead.
+ * Then execution continues with the other commands, 'echo B' and
+ * 'exit /b' (here to stop the code loop). Execution would also
+ * continue (if 'exit' was replaced by something else) and encounter
+ * the lone closing parenthesis ')', that should again be ignored.
+ *
+ * Note that this feature has been introduced in Win2k+.
+ */
+ if (/** bc && **/ (_tcscmp(CurrentToken, _T(")")) == 0)
&&
+ (CurrentTokenType != TOK_END_BLOCK))
+ {
+ ASSERT(InsideBlock == 0);
+
+ /* Ignore the rest of the line, including line continuations */
+ while (ParseToken(0, NULL) != TOK_END)
+ ;
+ return NULL;
+ }
+
+#ifdef MSCMD_PARENS_PARSE_BUGS
+ /* Check whether we have an empty line only if we are not inside
+ * a parenthesized block, and return NULL if so, otherwise do not
+ * do anything; a syntax error will be raised later. */
+ if (InsideBlock == 0)
+#endif
+ if (!*CurrentToken || *CurrentToken == _T('\n'))
+ {
+ ASSERT(CurrentTokenType == TOK_END);
+ return NULL;
+ }
+ }
if (OpType == C_OP_HIGHEST)
Cmd = ParsePrimary();
else
- Cmd = ParseCommandOp(OpType + 1);
+ Cmd = ParseCommandBinaryOp(OpType + 1);
if (Cmd && !_tcscmp(CurrentToken, OpString[OpType - C_OP_LOWEST]))
{
- Left = Cmd;
+ PARSED_COMMAND* Left = Cmd;
+ PARSED_COMMAND* Right;
+
Right = ParseCommandOp(OpType);
if (!Right)
{
- if (!bParseError)
- {
- /* & is allowed to have an empty RHS */
- if (OpType == C_MULTI)
- return Left;
+ /*
+ * The '&' operator is allowed to have an empty RHS.
+ * In this case, we directly return the LHS only.
+ * Note that Windows' CMD prefers building a '&'
+ * command with an empty RHS.
+ */
+ if (!bParseError && (OpType != C_MULTI))
ParseError();
+ if (bParseError)
+ {
+ FreeCommand(Left);
+ return NULL;
}
- FreeCommand(Left);
- return NULL;
+
+#ifndef MSCMD_MULTI_EMPTY_RHS
+ return Left;
+#endif
}
- Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
+ Cmd = AllocCommand(OpType, NULL, NULL);
if (!Cmd)
{
WARN("Cannot allocate memory for Cmd!\n");
@@ -787,24 +1427,35 @@ static PARSED_COMMAND *ParseCommandOp(int OpType)
FreeCommand(Right);
return NULL;
}
- Cmd->Type = OpType;
- Cmd->Next = NULL;
- Cmd->Redirections = NULL;
Cmd->Subcommands = Left;
Left->Next = Right;
+#ifdef MSCMD_MULTI_EMPTY_RHS
+ if (Right)
+#endif
Right->Next = NULL;
}
return Cmd;
}
+static __inline PARSED_COMMAND*
+ParseCommandOp(
+ IN COMMAND_TYPE OpType)
+{
+ /* Start parsing: initialize the first token */
-VOID
-DumpCommand(PARSED_COMMAND *Cmd, ULONG SpacePad);
+ /* Parse the prefix "quiet" operator '@' as a separate command.
+ * Thus, @@foo@bar is parsed as: '@', '@', 'foo@bar'. */
+ ParseTokenEx(_T('@'), _T('('), STANDARD_SEPS, TRUE);
+
+ return ParseCommandBinaryOp(OpType);
+}
-PARSED_COMMAND *
-ParseCommand(LPTSTR Line)
+
+PARSED_COMMAND*
+ParseCommand(
+ IN PCTSTR Line)
{
- PARSED_COMMAND *Cmd;
+ PARSED_COMMAND* Cmd;
if (Line)
{
@@ -818,17 +1469,19 @@ ParseCommand(LPTSTR Line)
return NULL;
bLineContinuations = TRUE;
}
- bParseError = FALSE;
- ParsePos = ParseLine;
- CurChar = _T(' ');
+
+ InitParser();
Cmd = ParseCommandOp(C_OP_LOWEST);
if (Cmd)
{
bIgnoreEcho = FALSE;
- if (CurrentTokenType != TOK_END)
+ if ((CurrentTokenType != TOK_END) &&
+ (_tcscmp(CurrentToken, _T("\n")) != 0))
+ {
ParseError();
+ }
if (bParseError)
{
FreeCommand(Cmd);
@@ -852,7 +1505,8 @@ ParseCommand(LPTSTR Line)
* for dumping the command tree for debugging purposes.
*/
static VOID
-DumpRedir(REDIRECTION* Redirections)
+DumpRedir(
+ IN REDIRECTION* Redirections)
{
REDIRECTION* Redir;
@@ -870,7 +1524,9 @@ DumpRedir(REDIRECTION* Redirections)
}
VOID
-DumpCommand(PARSED_COMMAND *Cmd, ULONG SpacePad)
+DumpCommand(
+ IN PARSED_COMMAND* Cmd,
+ IN ULONG SpacePad)
{
/*
* This macro is like DumpCommand(Cmd, Pad);
@@ -884,15 +1540,19 @@ do { \
goto dump; \
} while (0)
- PARSED_COMMAND *Sub;
+ PARSED_COMMAND* Sub;
dump:
+ if (!Cmd)
+ return;
+
/* Space padding */
ConOutPrintf(_T("%*s"), SpacePad, _T(""));
switch (Cmd->Type)
{
case C_COMMAND:
+ case C_REM:
{
/* Generic command name, and Type */
#ifndef MSCMD_ECHO_COMMAND_COMPAT
@@ -1026,6 +1686,22 @@ dump:
return;
}
+ case C_FOR:
+ {
+ ConOutPuts(_T("for"));
+ /* NOTE: FOR cannot have leading redirections */
+
+ if (Cmd->For.Switches & FOR_DIRS) ConOutPuts(_T(" /D"));
+ if (Cmd->For.Switches & FOR_F) ConOutPuts(_T(" /F"));
+ if (Cmd->For.Switches & FOR_LOOP) ConOutPuts(_T(" /L"));
+ if (Cmd->For.Switches & FOR_RECURSIVE) ConOutPuts(_T(" /R"));
+ if (Cmd->For.Params)
+ ConOutPrintf(_T(" %s"), Cmd->For.Params);
+ ConOutPrintf(_T(" %%%c in (%s) do\n"), Cmd->For.Variable,
Cmd->For.List);
+ /*DumpCommand*/DUMP(Cmd->Subcommands, SpacePad + 2);
+ return;
+ }
+
case C_IF:
{
ConOutPuts(_T("if"));
@@ -1110,22 +1786,6 @@ dump:
return;
}
- case C_FOR:
- {
- ConOutPuts(_T("for"));
- /* NOTE: FOR cannot have leading redirections */
-
- if (Cmd->For.Switches & FOR_DIRS) ConOutPuts(_T(" /D"));
- if (Cmd->For.Switches & FOR_F) ConOutPuts(_T(" /F"));
- if (Cmd->For.Switches & FOR_LOOP) ConOutPuts(_T(" /L"));
- if (Cmd->For.Switches & FOR_RECURSIVE) ConOutPuts(_T(" /R"));
- if (Cmd->For.Params)
- ConOutPrintf(_T(" %s"), Cmd->For.Params);
- ConOutPrintf(_T(" %%%c in (%s) do\n"), Cmd->For.Variable,
Cmd->For.List);
- /*DumpCommand*/DUMP(Cmd->Subcommands, SpacePad + 2);
- return;
- }
-
default:
ConOutPrintf(_T("*** Unknown type: %x\n"), Cmd->Type);
break;
@@ -1139,14 +1799,19 @@ dump:
* batch file commands and FOR instances.
*/
VOID
-EchoCommand(PARSED_COMMAND *Cmd)
+EchoCommand(
+ IN PARSED_COMMAND* Cmd)
{
- PARSED_COMMAND *Sub;
- REDIRECTION *Redir;
+ PARSED_COMMAND* Sub;
+ REDIRECTION* Redir;
+
+ if (!Cmd)
+ return;
switch (Cmd->Type)
{
case C_COMMAND:
+ case C_REM:
{
if (SubstituteForVars(Cmd->Command.First, TempBuf))
ConOutPrintf(_T("%s"), TempBuf);
@@ -1224,6 +1889,23 @@ EchoCommand(PARSED_COMMAND *Cmd)
break;
}
+ case C_FOR:
+ {
+ ConOutPuts(_T("for"));
+ if (Cmd->For.Switches & FOR_DIRS) ConOutPuts(_T(" /D"));
+ if (Cmd->For.Switches & FOR_F) ConOutPuts(_T(" /F"));
+ if (Cmd->For.Switches & FOR_LOOP) ConOutPuts(_T(" /L"));
+ if (Cmd->For.Switches & FOR_RECURSIVE) ConOutPuts(_T(" /R"));
+ if (Cmd->For.Params)
+ ConOutPrintf(_T(" %s"), Cmd->For.Params);
+ if (Cmd->For.List && SubstituteForVars(Cmd->For.List, TempBuf))
+ ConOutPrintf(_T(" %%%c in (%s) do "), Cmd->For.Variable,
TempBuf);
+ else
+ ConOutPrintf(_T(" %%%c in (%s) do "), Cmd->For.Variable,
Cmd->For.List);
+ EchoCommand(Cmd->Subcommands);
+ break;
+ }
+
case C_IF:
{
ConOutPuts(_T("if"));
@@ -1246,23 +1928,6 @@ EchoCommand(PARSED_COMMAND *Cmd)
break;
}
- case C_FOR:
- {
- ConOutPuts(_T("for"));
- if (Cmd->For.Switches & FOR_DIRS) ConOutPuts(_T(" /D"));
- if (Cmd->For.Switches & FOR_F) ConOutPuts(_T(" /F"));
- if (Cmd->For.Switches & FOR_LOOP) ConOutPuts(_T(" /L"));
- if (Cmd->For.Switches & FOR_RECURSIVE) ConOutPuts(_T(" /R"));
- if (Cmd->For.Params)
- ConOutPrintf(_T(" %s"), Cmd->For.Params);
- if (Cmd->For.List && SubstituteForVars(Cmd->For.List, TempBuf))
- ConOutPrintf(_T(" %%%c in (%s) do "), Cmd->For.Variable,
TempBuf);
- else
- ConOutPrintf(_T(" %%%c in (%s) do "), Cmd->For.Variable,
Cmd->For.List);
- EchoCommand(Cmd->Subcommands);
- break;
- }
-
default:
ASSERT(FALSE);
break;
@@ -1287,15 +1952,15 @@ EchoCommand(PARSED_COMMAND *Cmd)
/*
* "Unparse" a command into a text form suitable for passing to CMD /C.
- * Used for pipes. This is basically the same thing as EchoCommand, but
- * writing into a string instead of to standard output.
+ * Used for pipes. This is basically the same thing as EchoCommand(),
+ * but writing into a string instead of to standard output.
*/
-TCHAR *
-Unparse(PARSED_COMMAND *Cmd, TCHAR *Out, TCHAR *OutEnd)
+PTCHAR
+UnparseCommand(
+ IN PARSED_COMMAND* Cmd,
+ OUT PTCHAR Out,
+ IN PTCHAR OutEnd)
{
- PARSED_COMMAND *Sub;
- REDIRECTION *Redir;
-
/*
* Since this function has the annoying requirement that it must avoid
* overflowing the supplied buffer, define some helper macros to make
@@ -1319,13 +1984,21 @@ do { \
} while (0)
#define RECURSE(Subcommand) \
do { \
- Out = Unparse(Subcommand, Out, OutEnd); \
+ Out = UnparseCommand(Subcommand, Out, OutEnd); \
if (!Out) return NULL; \
} while (0)
+ PARSED_COMMAND* Sub;
+ REDIRECTION* Redir;
+
+ if (!Cmd)
+ return Out;
+
switch (Cmd->Type)
{
case C_COMMAND:
+ case C_REM:
+ {
/* This is fragile since there could be special characters, but
* Windows doesn't bother escaping them, so for compatibility
* we probably shouldn't do it either */
@@ -1334,13 +2007,17 @@ do { \
if (!SubstituteForVars(Cmd->Command.Rest, TempBuf)) return NULL;
STRING(TempBuf);
break;
+ }
case C_QUIET:
+ {
CHAR(_T('@'));
RECURSE(Cmd->Subcommands);
break;
+ }
case C_BLOCK:
+ {
CHAR(_T('('));
for (Sub = Cmd->Subcommands; Sub; Sub = Sub->Next)
{
@@ -1350,18 +2027,39 @@ do { \
}
CHAR(_T(')'));
break;
+ }
case C_MULTI:
case C_OR:
case C_AND:
case C_PIPE:
+ {
Sub = Cmd->Subcommands;
RECURSE(Sub);
PRINTF(_T(" %s "), OpString[Cmd->Type - C_OP_LOWEST]);
RECURSE(Sub->Next);
break;
+ }
+
+ case C_FOR:
+ {
+ STRING(_T("for"));
+ if (Cmd->For.Switches & FOR_DIRS) STRING(_T(" /D"));
+ if (Cmd->For.Switches & FOR_F) STRING(_T(" /F"));
+ if (Cmd->For.Switches & FOR_LOOP) STRING(_T(" /L"));
+ if (Cmd->For.Switches & FOR_RECURSIVE) STRING(_T(" /R"));
+ if (Cmd->For.Params)
+ PRINTF(_T(" %s"), Cmd->For.Params);
+ if (Cmd->For.List && SubstituteForVars(Cmd->For.List, TempBuf))
+ PRINTF(_T(" %%%c in (%s) do "), Cmd->For.Variable, TempBuf);
+ else
+ PRINTF(_T(" %%%c in (%s) do "), Cmd->For.Variable,
Cmd->For.List);
+ RECURSE(Cmd->Subcommands);
+ break;
+ }
case C_IF:
+ {
STRING(_T("if"));
if (Cmd->If.Flags & IFFLAG_IGNORECASE)
STRING(_T(" /I"));
@@ -1380,21 +2078,7 @@ do { \
RECURSE(Sub->Next);
}
break;
-
- case C_FOR:
- STRING(_T("for"));
- if (Cmd->For.Switches & FOR_DIRS) STRING(_T(" /D"));
- if (Cmd->For.Switches & FOR_F) STRING(_T(" /F"));
- if (Cmd->For.Switches & FOR_LOOP) STRING(_T(" /L"));
- if (Cmd->For.Switches & FOR_RECURSIVE) STRING(_T(" /R"));
- if (Cmd->For.Params)
- PRINTF(_T(" %s"), Cmd->For.Params);
- if (Cmd->For.List && SubstituteForVars(Cmd->For.List, TempBuf))
- PRINTF(_T(" %%%c in (%s) do "), Cmd->For.Variable, TempBuf);
- else
- PRINTF(_T(" %%%c in (%s) do "), Cmd->For.Variable,
Cmd->For.List);
- RECURSE(Cmd->Subcommands);
- break;
+ }
default:
ASSERT(FALSE);
@@ -1415,24 +2099,3 @@ do { \
#undef PRINTF
#undef RECURSE
}
-
-VOID
-FreeCommand(PARSED_COMMAND *Cmd)
-{
- if (Cmd->Subcommands)
- FreeCommand(Cmd->Subcommands);
- if (Cmd->Next)
- FreeCommand(Cmd->Next);
- FreeRedirection(Cmd->Redirections);
- if (Cmd->Type == C_IF)
- {
- cmd_free(Cmd->If.LeftArg);
- cmd_free(Cmd->If.RightArg);
- }
- else if (Cmd->Type == C_FOR)
- {
- cmd_free(Cmd->For.Params);
- cmd_free(Cmd->For.List);
- }
- cmd_free(Cmd);
-}