summaryrefslogtreecommitdiff
path: root/dotfiles/system/.zsh/modules/Src/lex.c
diff options
context:
space:
mode:
authorCraig Jennings <c@cjennings.net>2025-05-08 18:49:34 -0500
committerCraig Jennings <c@cjennings.net>2025-05-08 18:51:59 -0500
commit000e00871830cd15de032c80e2b62946cf19445c (patch)
tree794a7922750472bbe0e024042d6ba84f411fc3e0 /dotfiles/system/.zsh/modules/Src/lex.c
parentfe302606931e4bad91c4ed6df81a4403523ba780 (diff)
adding missing dotfiles and folders
- profile.d/ - bashrc - authinfo.gpg - .zsh/
Diffstat (limited to 'dotfiles/system/.zsh/modules/Src/lex.c')
-rw-r--r--dotfiles/system/.zsh/modules/Src/lex.c2203
1 files changed, 2203 insertions, 0 deletions
diff --git a/dotfiles/system/.zsh/modules/Src/lex.c b/dotfiles/system/.zsh/modules/Src/lex.c
new file mode 100644
index 0000000..44ad880
--- /dev/null
+++ b/dotfiles/system/.zsh/modules/Src/lex.c
@@ -0,0 +1,2203 @@
+/*
+ * lex.c - lexical analysis
+ *
+ * This file is part of zsh, the Z shell.
+ *
+ * Copyright (c) 1992-1997 Paul Falstad
+ * All rights reserved.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and to distribute modified versions of this software for any
+ * purpose, provided that the above copyright notice and the following
+ * two paragraphs appear in all copies of this software.
+ *
+ * In no event shall Paul Falstad or the Zsh Development Group be liable
+ * to any party for direct, indirect, special, incidental, or consequential
+ * damages arising out of the use of this software and its documentation,
+ * even if Paul Falstad and the Zsh Development Group have been advised of
+ * the possibility of such damage.
+ *
+ * Paul Falstad and the Zsh Development Group specifically disclaim any
+ * warranties, including, but not limited to, the implied warranties of
+ * merchantability and fitness for a particular purpose. The software
+ * provided hereunder is on an "as is" basis, and Paul Falstad and the
+ * Zsh Development Group have no obligation to provide maintenance,
+ * support, updates, enhancements, or modifications.
+ *
+ */
+
+#include "zsh.mdh"
+#include "lex.pro"
+
+#define LEX_HEAP_SIZE (32)
+
+/* tokens */
+
+/**/
+mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\";
+
+/* parts of the current token */
+
+/**/
+char *zshlextext;
+/**/
+mod_export char *tokstr;
+/**/
+mod_export enum lextok tok;
+/**/
+mod_export int tokfd;
+
+/*
+ * Line number at which the first character of a token was found.
+ * We always set this in gettok(), which is always called from
+ * zshlex() unless we have reached an error. So it is always
+ * valid when parsing. It is not useful during execution
+ * of the parsed structure.
+ */
+
+/**/
+zlong toklineno;
+
+/* lexical analyzer error flag */
+
+/**/
+mod_export int lexstop;
+
+/* if != 0, this is the first line of the command */
+
+/**/
+mod_export int isfirstln;
+
+/* if != 0, this is the first char of the command (not including white space) */
+
+/**/
+int isfirstch;
+
+/* flag that an alias should be expanded after expansion ending in space */
+
+/**/
+int inalmore;
+
+/*
+ * Don't do spelling correction.
+ * Bit 1 is only valid for the current word. It's
+ * set when we detect a lookahead that stops the word from
+ * needing correction.
+ */
+
+/**/
+int nocorrect;
+
+/*
+ * TBD: the following exported variables are part of the non-interface
+ * with ZLE for completion. They are poorly named and the whole
+ * scheme is incredibly brittle. One piece of robustness is applied:
+ * the variables are only set if LEXFLAGS_ZLE is set. Improvements
+ * should therefore concentrate on areas with this flag set.
+ *
+ * Cursor position and line length in zle when the line is
+ * metafied for access from the main shell.
+ */
+
+/**/
+mod_export int zlemetacs, zlemetall;
+
+/* inwhat says what exactly we are in *
+ * (its value is one of the IN_* things). */
+
+/**/
+mod_export int inwhat;
+
+/* 1 if x added to complete in a blank between words */
+
+/**/
+mod_export int addedx;
+
+/* wb and we hold the beginning/end position of the word we are completing. */
+
+/**/
+mod_export int wb, we;
+
+/**/
+mod_export int wordbeg;
+
+/**/
+mod_export int parbegin;
+
+/**/
+mod_export int parend;
+
+
+/* 1 if aliases should not be expanded */
+
+/**/
+mod_export int noaliases;
+
+/*
+ * If non-zero, we are parsing a line sent to use by the editor, or some
+ * other string that's not part of standard command input (e.g. eval is
+ * part of normal command input).
+ *
+ * Set of bits from LEXFLAGS_*.
+ *
+ * Note that although it is passed into the lexer as an input, the
+ * lexer can set it to zero after finding the word it's searching for.
+ * This only happens if the line being parsed actually does come from
+ * ZLE, and hence the bit LEXFLAGS_ZLE is set.
+ */
+
+/**/
+mod_export int lexflags;
+
+/* don't recognize comments */
+
+/**/
+mod_export int nocomments;
+
+/* add raw input characters while parsing command substitution */
+
+/**/
+int lex_add_raw;
+
+/* variables associated with the above */
+
+static char *tokstr_raw;
+static struct lexbufstate lexbuf_raw;
+
+/* text of punctuation tokens */
+
+/**/
+mod_export char *tokstrings[WHILE + 1] = {
+ NULL, /* NULLTOK 0 */
+ ";", /* SEPER */
+ "\\n", /* NEWLIN */
+ ";", /* SEMI */
+ ";;", /* DSEMI */
+ "&", /* AMPER 5 */
+ "(", /* INPAR */
+ ")", /* OUTPAR */
+ "||", /* DBAR */
+ "&&", /* DAMPER */
+ ">", /* OUTANG 10 */
+ ">|", /* OUTANGBANG */
+ ">>", /* DOUTANG */
+ ">>|", /* DOUTANGBANG */
+ "<", /* INANG */
+ "<>", /* INOUTANG 15 */
+ "<<", /* DINANG */
+ "<<-", /* DINANGDASH */
+ "<&", /* INANGAMP */
+ ">&", /* OUTANGAMP */
+ "&>", /* AMPOUTANG 20 */
+ "&>|", /* OUTANGAMPBANG */
+ ">>&", /* DOUTANGAMP */
+ ">>&|", /* DOUTANGAMPBANG */
+ "<<<", /* TRINANG */
+ "|", /* BAR 25 */
+ "|&", /* BARAMP */
+ "()", /* INOUTPAR */
+ "((", /* DINPAR */
+ "))", /* DOUTPAR */
+ "&|", /* AMPERBANG 30 */
+ ";&", /* SEMIAMP */
+ ";|", /* SEMIBAR */
+};
+
+/* lexical state */
+
+static int dbparens;
+static struct lexbufstate lexbuf = { NULL, 256, 0 };
+
+/* save lexical context */
+
+/**/
+void
+lex_context_save(struct lex_stack *ls, int toplevel)
+{
+ (void)toplevel;
+
+ ls->dbparens = dbparens;
+ ls->isfirstln = isfirstln;
+ ls->isfirstch = isfirstch;
+ ls->lexflags = lexflags;
+
+ ls->tok = tok;
+ ls->tokstr = tokstr;
+ ls->zshlextext = zshlextext;
+ ls->lexbuf = lexbuf;
+ ls->lex_add_raw = lex_add_raw;
+ ls->tokstr_raw = tokstr_raw;
+ ls->lexbuf_raw = lexbuf_raw;
+ ls->lexstop = lexstop;
+ ls->toklineno = toklineno;
+
+ tokstr = zshlextext = lexbuf.ptr = NULL;
+ lexbuf.siz = 256;
+ tokstr_raw = lexbuf_raw.ptr = NULL;
+ lexbuf_raw.siz = lexbuf_raw.len = lex_add_raw = 0;
+}
+
+/* restore lexical context */
+
+/**/
+mod_export void
+lex_context_restore(const struct lex_stack *ls, int toplevel)
+{
+ (void)toplevel;
+
+ dbparens = ls->dbparens;
+ isfirstln = ls->isfirstln;
+ isfirstch = ls->isfirstch;
+ lexflags = ls->lexflags;
+ tok = ls->tok;
+ tokstr = ls->tokstr;
+ zshlextext = ls->zshlextext;
+ lexbuf = ls->lexbuf;
+ lex_add_raw = ls->lex_add_raw;
+ tokstr_raw = ls->tokstr_raw;
+ lexbuf_raw = ls->lexbuf_raw;
+ lexstop = ls->lexstop;
+ toklineno = ls->toklineno;
+}
+
+/**/
+void
+zshlex(void)
+{
+ if (tok == LEXERR)
+ return;
+ do {
+ if (inrepeat_)
+ ++inrepeat_;
+ if (inrepeat_ == 3 && isset(SHORTLOOPS))
+ incmdpos = 1;
+ tok = gettok();
+ } while (tok != ENDINPUT && exalias());
+ nocorrect &= 1;
+ if (tok == NEWLIN || tok == ENDINPUT) {
+ while (hdocs) {
+ struct heredocs *next = hdocs->next;
+ char *doc, *munged_term;
+
+ hwbegin(0);
+ cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
+ munged_term = dupstring(hdocs->str);
+ STOPHIST
+ doc = gethere(&munged_term, hdocs->type);
+ ALLOWHIST
+ cmdpop();
+ hwend();
+ if (!doc) {
+ zerr("here document too large");
+ while (hdocs) {
+ next = hdocs->next;
+ zfree(hdocs, sizeof(struct heredocs));
+ hdocs = next;
+ }
+ tok = LEXERR;
+ break;
+ }
+ setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str,
+ munged_term);
+ zfree(hdocs, sizeof(struct heredocs));
+ hdocs = next;
+ }
+ }
+ if (tok != NEWLIN)
+ isnewlin = 0;
+ else
+ isnewlin = (inbufct) ? -1 : 1;
+ if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE)))
+ tok = SEPER;
+}
+
+/**/
+mod_export void
+ctxtlex(void)
+{
+ static int oldpos;
+
+ zshlex();
+ switch (tok) {
+ case SEPER:
+ case NEWLIN:
+ case SEMI:
+ case DSEMI:
+ case SEMIAMP:
+ case SEMIBAR:
+ case AMPER:
+ case AMPERBANG:
+ case INPAR:
+ case INBRACE:
+ case DBAR:
+ case DAMPER:
+ case BAR:
+ case BARAMP:
+ case INOUTPAR:
+ case DOLOOP:
+ case THEN:
+ case ELIF:
+ case ELSE:
+ case DOUTBRACK:
+ incmdpos = 1;
+ break;
+ case STRING:
+ case TYPESET:
+ /* case ENVSTRING: */
+ case ENVARRAY:
+ case OUTPAR:
+ case CASE:
+ case DINBRACK:
+ incmdpos = 0;
+ break;
+
+ default:
+ /* nothing to do, keep compiler happy */
+ break;
+ }
+ if (tok != DINPAR)
+ infor = tok == FOR ? 2 : 0;
+ if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) {
+ inredir = 1;
+ oldpos = incmdpos;
+ incmdpos = 0;
+ } else if (inredir) {
+ incmdpos = oldpos;
+ inredir = 0;
+ }
+}
+
+#define LX1_BKSLASH 0
+#define LX1_COMMENT 1
+#define LX1_NEWLIN 2
+#define LX1_SEMI 3
+#define LX1_AMPER 5
+#define LX1_BAR 6
+#define LX1_INPAR 7
+#define LX1_OUTPAR 8
+#define LX1_INANG 13
+#define LX1_OUTANG 14
+#define LX1_OTHER 15
+
+#define LX2_BREAK 0
+#define LX2_OUTPAR 1
+#define LX2_BAR 2
+#define LX2_STRING 3
+#define LX2_INBRACK 4
+#define LX2_OUTBRACK 5
+#define LX2_TILDE 6
+#define LX2_INPAR 7
+#define LX2_INBRACE 8
+#define LX2_OUTBRACE 9
+#define LX2_OUTANG 10
+#define LX2_INANG 11
+#define LX2_EQUALS 12
+#define LX2_BKSLASH 13
+#define LX2_QUOTE 14
+#define LX2_DQUOTE 15
+#define LX2_BQUOTE 16
+#define LX2_COMMA 17
+#define LX2_DASH 18
+#define LX2_BANG 19
+#define LX2_OTHER 20
+#define LX2_META 21
+
+static unsigned char lexact1[256], lexact2[256], lextok2[256];
+
+/**/
+void
+initlextabs(void)
+{
+ int t0;
+ static char *lx1 = "\\q\n;!&|(){}[]<>";
+ static char *lx2 = ";)|$[]~({}><=\\\'\"`,-!";
+
+ for (t0 = 0; t0 != 256; t0++) {
+ lexact1[t0] = LX1_OTHER;
+ lexact2[t0] = LX2_OTHER;
+ lextok2[t0] = t0;
+ }
+ for (t0 = 0; lx1[t0]; t0++)
+ lexact1[(int)lx1[t0]] = t0;
+ for (t0 = 0; lx2[t0]; t0++)
+ lexact2[(int)lx2[t0]] = t0;
+ lexact2['&'] = LX2_BREAK;
+ lexact2[STOUC(Meta)] = LX2_META;
+ lextok2['*'] = Star;
+ lextok2['?'] = Quest;
+ lextok2['{'] = Inbrace;
+ lextok2['['] = Inbrack;
+ lextok2['$'] = String;
+ lextok2['~'] = Tilde;
+ lextok2['#'] = Pound;
+ lextok2['^'] = Hat;
+}
+
+/* initialize lexical state */
+
+/**/
+void
+lexinit(void)
+{
+ nocorrect = dbparens = lexstop = 0;
+ tok = ENDINPUT;
+}
+
+/* add a char to the string buffer */
+
+/**/
+void
+add(int c)
+{
+ *lexbuf.ptr++ = c;
+ if (lexbuf.siz == ++lexbuf.len) {
+ int newbsiz = lexbuf.siz * 2;
+
+ if (newbsiz > inbufct && inbufct > lexbuf.siz)
+ newbsiz = inbufct;
+
+ tokstr = (char *)hrealloc(tokstr, lexbuf.siz, newbsiz);
+ lexbuf.ptr = tokstr + lexbuf.len;
+ /* len == bsiz, so bptr is at the start of newly allocated memory */
+ memset(lexbuf.ptr, 0, newbsiz - lexbuf.siz);
+ lexbuf.siz = newbsiz;
+ }
+}
+
+#define SETPARBEGIN { \
+ if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
+ zlemetacs >= zlemetall+1-inbufct) \
+ parbegin = inbufct; \
+ }
+#define SETPAREND { \
+ if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
+ parbegin != -1 && parend == -1) { \
+ if (zlemetacs >= zlemetall + 1 - inbufct) \
+ parbegin = -1; \
+ else \
+ parend = inbufct; \
+ } \
+ }
+
+enum {
+ CMD_OR_MATH_CMD,
+ CMD_OR_MATH_MATH,
+ CMD_OR_MATH_ERR
+};
+
+/*
+ * Return one of the above. If it couldn't be
+ * parsed as math, but there was no gross error, it's a command.
+ */
+
+static int
+cmd_or_math(int cs_type)
+{
+ int oldlen = lexbuf.len;
+ int c;
+ int oinflags = inbufflags;
+
+ cmdpush(cs_type);
+ inbufflags |= INP_APPEND;
+ c = dquote_parse(')', 0);
+ if (!(oinflags & INP_APPEND))
+ inbufflags &= ~INP_APPEND;
+ cmdpop();
+ *lexbuf.ptr = '\0';
+ if (!c) {
+ /* Successfully parsed, see if it was math */
+ c = hgetc();
+ if (c == ')')
+ return CMD_OR_MATH_MATH; /* yes */
+ hungetc(c);
+ lexstop = 0;
+ c = ')';
+ } else if (lexstop) {
+ /* we haven't got anything to unget */
+ return CMD_OR_MATH_ERR;
+ }
+ /* else unsuccessful: unget the whole thing */
+ hungetc(c);
+ lexstop = 0;
+ while (lexbuf.len > oldlen && !(errflag & ERRFLAG_ERROR)) {
+ lexbuf.len--;
+ hungetc(itok(*--lexbuf.ptr) ?
+ ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr);
+ }
+ if (errflag)
+ return CMD_OR_MATH_ERR;
+ hungetc('(');
+ return errflag ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
+}
+
+
+/*
+ * Parse either a $(( ... )) or a $(...)
+ * Return the same as cmd_or_math().
+ */
+static int
+cmd_or_math_sub(void)
+{
+ int c = hgetc(), ret;
+
+ if (c == '(') {
+ int lexpos = (int)(lexbuf.ptr - tokstr);
+ add(Inpar);
+ add('(');
+ if ((ret = cmd_or_math(CS_MATHSUBST)) == CMD_OR_MATH_MATH) {
+ tokstr[lexpos] = Inparmath;
+ add(')');
+ return CMD_OR_MATH_MATH;
+ }
+ if (ret == CMD_OR_MATH_ERR)
+ return CMD_OR_MATH_ERR;
+ lexbuf.ptr -= 2;
+ lexbuf.len -= 2;
+ } else {
+ hungetc(c);
+ lexstop = 0;
+ }
+ return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
+}
+
+/* Check whether we're looking at valid numeric globbing syntax *
+ * (/\<[0-9]*-[0-9]*\>/). Call pointing just after the opening "<". *
+ * Leaves the input in the same place, returning 0 or 1. */
+
+/**/
+static int
+isnumglob(void)
+{
+ int c, ec = '-', ret = 0;
+ int tbs = 256, n = 0;
+ char *tbuf = (char *)zalloc(tbs);
+
+ while(1) {
+ c = hgetc();
+ if(lexstop) {
+ lexstop = 0;
+ break;
+ }
+ tbuf[n++] = c;
+ if(!idigit(c)) {
+ if(c != ec)
+ break;
+ if(ec == '>') {
+ ret = 1;
+ break;
+ }
+ ec = '>';
+ }
+ if(n == tbs)
+ tbuf = (char *)realloc(tbuf, tbs *= 2);
+ }
+ while(n--)
+ hungetc(tbuf[n]);
+ zfree(tbuf, tbs);
+ return ret;
+}
+
+/**/
+static enum lextok
+gettok(void)
+{
+ int c, d;
+ int peekfd = -1;
+ enum lextok peek;
+
+ beginning:
+ tokstr = NULL;
+ while (iblank(c = hgetc()) && !lexstop);
+ toklineno = lineno;
+ if (lexstop)
+ return (errflag) ? LEXERR : ENDINPUT;
+ isfirstln = 0;
+ if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS))
+ wordbeg = inbufct - (qbang && c == bangchar);
+ hwbegin(-1-(qbang && c == bangchar));
+ /* word includes the last character read and possibly \ before ! */
+ if (dbparens) {
+ lexbuf.len = 0;
+ lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
+ hungetc(c);
+ cmdpush(CS_MATH);
+ c = dquote_parse(infor ? ';' : ')', 0);
+ cmdpop();
+ *lexbuf.ptr = '\0';
+ if (!c && infor) {
+ infor--;
+ return DINPAR;
+ }
+ if (c || (c = hgetc()) != ')') {
+ hungetc(c);
+ return LEXERR;
+ }
+ dbparens = 0;
+ return DOUTPAR;
+ } else if (idigit(c)) { /* handle 1< foo */
+ d = hgetc();
+ if(d == '&') {
+ d = hgetc();
+ if(d == '>') {
+ peekfd = c - '0';
+ hungetc('>');
+ c = '&';
+ } else {
+ hungetc(d);
+ lexstop = 0;
+ hungetc('&');
+ }
+ } else if (d == '>' || d == '<') {
+ peekfd = c - '0';
+ c = d;
+ } else {
+ hungetc(d);
+ lexstop = 0;
+ }
+ }
+
+ /* chars in initial position in word */
+
+ /*
+ * Handle comments. There are some special cases when this
+ * is not normal command input: lexflags implies we are examining
+ * a line lexically without it being used for normal command input.
+ */
+ if (c == hashchar && !nocomments &&
+ (isset(INTERACTIVECOMMENTS) ||
+ ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding &&
+ (!interact || unset(SHINSTDIN) || strin)))) {
+ /* History is handled here to prevent extra *
+ * newlines being inserted into the history. */
+
+ if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
+ lexbuf.len = 0;
+ lexbuf.ptr = tokstr =
+ (char *)hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
+ add(c);
+ }
+ hwabort();
+ while ((c = ingetc()) != '\n' && !lexstop) {
+ hwaddc(c);
+ addtoline(c);
+ if (lexflags & LEXFLAGS_COMMENTS_KEEP)
+ add(c);
+ }
+
+ if (errflag)
+ peek = LEXERR;
+ else {
+ if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
+ *lexbuf.ptr = '\0';
+ if (!lexstop)
+ hungetc(c);
+ peek = STRING;
+ } else {
+ hwend();
+ hwbegin(0);
+ hwaddc('\n');
+ addtoline('\n');
+ /*
+ * If splitting a line and removing comments,
+ * we don't want a newline token since it's
+ * treated specially.
+ */
+ if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop)
+ peek = ENDINPUT;
+ else
+ peek = NEWLIN;
+ }
+ }
+ return peek;
+ }
+ switch (lexact1[STOUC(c)]) {
+ case LX1_BKSLASH:
+ d = hgetc();
+ if (d == '\n')
+ goto beginning;
+ hungetc(d);
+ lexstop = 0;
+ break;
+ case LX1_NEWLIN:
+ return NEWLIN;
+ case LX1_SEMI:
+ d = hgetc();
+ if(d == ';')
+ return DSEMI;
+ else if(d == '&')
+ return SEMIAMP;
+ else if (d == '|')
+ return SEMIBAR;
+ hungetc(d);
+ lexstop = 0;
+ return SEMI;
+ case LX1_AMPER:
+ d = hgetc();
+ if (d == '&')
+ return DAMPER;
+ else if (d == '!' || d == '|')
+ return AMPERBANG;
+ else if (d == '>') {
+ tokfd = peekfd;
+ d = hgetc();
+ if (d == '!' || d == '|')
+ return OUTANGAMPBANG;
+ else if (d == '>') {
+ d = hgetc();
+ if (d == '!' || d == '|')
+ return DOUTANGAMPBANG;
+ hungetc(d);
+ lexstop = 0;
+ return DOUTANGAMP;
+ }
+ hungetc(d);
+ lexstop = 0;
+ return AMPOUTANG;
+ }
+ hungetc(d);
+ lexstop = 0;
+ return AMPER;
+ case LX1_BAR:
+ d = hgetc();
+ if (d == '|' && !incasepat)
+ return DBAR;
+ else if (d == '&')
+ return BARAMP;
+ hungetc(d);
+ lexstop = 0;
+ return BAR;
+ case LX1_INPAR:
+ d = hgetc();
+ if (d == '(') {
+ if (infor) {
+ dbparens = 1;
+ return DINPAR;
+ }
+ if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) {
+ lexbuf.len = 0;
+ lexbuf.ptr = tokstr = (char *)
+ hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
+ switch (cmd_or_math(CS_MATH)) {
+ case CMD_OR_MATH_MATH:
+ return DINPAR;
+
+ case CMD_OR_MATH_CMD:
+ /*
+ * Not math, so we don't return the contents
+ * as a string in this case.
+ */
+ tokstr = NULL;
+ return INPAR;
+
+ case CMD_OR_MATH_ERR:
+ /*
+ * LEXFLAGS_ACTIVE means we came from bufferwords(),
+ * so we treat as an incomplete math expression
+ */
+ if (lexflags & LEXFLAGS_ACTIVE)
+ tokstr = dyncat("((", tokstr ? tokstr : "");
+ /* fall through */
+
+ default:
+ return LEXERR;
+ }
+ }
+ } else if (d == ')')
+ return INOUTPAR;
+ hungetc(d);
+ lexstop = 0;
+ if (!(isset(SHGLOB) || incond == 1 || incmdpos))
+ break;
+ return INPAR;
+ case LX1_OUTPAR:
+ return OUTPAR;
+ case LX1_INANG:
+ d = hgetc();
+ if (d == '(') {
+ hungetc(d);
+ lexstop = 0;
+ unpeekfd:
+ if(peekfd != -1) {
+ hungetc(c);
+ c = '0' + peekfd;
+ }
+ break;
+ }
+ if (d == '>') {
+ peek = INOUTANG;
+ } else if (d == '<') {
+ int e = hgetc();
+
+ if (e == '(') {
+ hungetc(e);
+ hungetc(d);
+ peek = INANG;
+ } else if (e == '<')
+ peek = TRINANG;
+ else if (e == '-')
+ peek = DINANGDASH;
+ else {
+ hungetc(e);
+ lexstop = 0;
+ peek = DINANG;
+ }
+ } else if (d == '&') {
+ peek = INANGAMP;
+ } else {
+ hungetc(d);
+ if(isnumglob())
+ goto unpeekfd;
+ peek = INANG;
+ }
+ tokfd = peekfd;
+ return peek;
+ case LX1_OUTANG:
+ d = hgetc();
+ if (d == '(') {
+ hungetc(d);
+ goto unpeekfd;
+ } else if (d == '&') {
+ d = hgetc();
+ if (d == '!' || d == '|')
+ peek = OUTANGAMPBANG;
+ else {
+ hungetc(d);
+ lexstop = 0;
+ peek = OUTANGAMP;
+ }
+ } else if (d == '!' || d == '|')
+ peek = OUTANGBANG;
+ else if (d == '>') {
+ d = hgetc();
+ if (d == '&') {
+ d = hgetc();
+ if (d == '!' || d == '|')
+ peek = DOUTANGAMPBANG;
+ else {
+ hungetc(d);
+ lexstop = 0;
+ peek = DOUTANGAMP;
+ }
+ } else if (d == '!' || d == '|')
+ peek = DOUTANGBANG;
+ else if (d == '(') {
+ hungetc(d);
+ hungetc('>');
+ peek = OUTANG;
+ } else {
+ hungetc(d);
+ lexstop = 0;
+ peek = DOUTANG;
+ if (isset(HISTALLOWCLOBBER))
+ hwaddc('|');
+ }
+ } else {
+ hungetc(d);
+ lexstop = 0;
+ peek = OUTANG;
+ if (!incond && isset(HISTALLOWCLOBBER))
+ hwaddc('|');
+ }
+ tokfd = peekfd;
+ return peek;
+ }
+
+ /* we've started a string, now get the *
+ * rest of it, performing tokenization */
+ return gettokstr(c, 0);
+}
+
+/*
+ * Get the remains of a token string. This has two uses.
+ * When called from gettok(), with sub = 0, we have already identified
+ * any interesting initial character and want to get the rest of
+ * what we now know is a string. However, the string may still include
+ * metacharacters and potentially substitutions.
+ *
+ * When called from parse_subst_string() with sub = 1, we are not
+ * fully parsing a command line, merely tokenizing a string.
+ * In this case we always add characters to the parsed string
+ * unless there is a parse error.
+ */
+
+/**/
+static enum lextok
+gettokstr(int c, int sub)
+{
+ int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
+ int intpos = 1, in_brace_param = 0;
+ int inquote, unmatched = 0;
+ enum lextok peek;
+#ifdef DEBUG
+ int ocmdsp = cmdsp;
+#endif
+
+ peek = STRING;
+ if (!sub) {
+ lexbuf.len = 0;
+ lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
+ }
+ for (;;) {
+ int act;
+ int e;
+ int inbl = inblank(c);
+
+ if (fdpar && !inbl && c != ')')
+ fdpar = 0;
+
+ if (inbl && !in_brace_param && !pct)
+ act = LX2_BREAK;
+ else {
+ act = lexact2[STOUC(c)];
+ c = lextok2[STOUC(c)];
+ }
+ switch (act) {
+ case LX2_BREAK:
+ if (!in_brace_param && !sub)
+ goto brk;
+ break;
+ case LX2_META:
+ c = hgetc();
+#ifdef DEBUG
+ if (lexstop) {
+ fputs("BUG: input terminated by Meta\n", stderr);
+ fflush(stderr);
+ goto brk;
+ }
+#endif
+ add(Meta);
+ break;
+ case LX2_OUTPAR:
+ if (fdpar) {
+ /* this is a single word `( )', treat as INOUTPAR */
+ add(c);
+ *lexbuf.ptr = '\0';
+ return INOUTPAR;
+ }
+ if ((sub || in_brace_param) && isset(SHGLOB))
+ break;
+ if (!in_brace_param && !pct--) {
+ if (sub) {
+ pct = 0;
+ break;
+ } else
+ goto brk;
+ }
+ c = Outpar;
+ break;
+ case LX2_BAR:
+ if (!pct && !in_brace_param) {
+ if (sub)
+ break;
+ else
+ goto brk;
+ }
+ if (unset(SHGLOB) || (!sub && !in_brace_param))
+ c = Bar;
+ break;
+ case LX2_STRING:
+ e = hgetc();
+ if (e == '[') {
+ cmdpush(CS_MATHSUBST);
+ add(String);
+ add(Inbrack);
+ c = dquote_parse(']', sub);
+ cmdpop();
+ if (c) {
+ peek = LEXERR;
+ goto brk;
+ }
+ c = Outbrack;
+ } else if (e == '(') {
+ add(String);
+ switch (cmd_or_math_sub()) {
+ case CMD_OR_MATH_CMD:
+ c = Outpar;
+ break;
+
+ case CMD_OR_MATH_MATH:
+ c = Outparmath;
+ break;
+
+ default:
+ peek = LEXERR;
+ goto brk;
+ }
+ } else {
+ if (e == '{') {
+ add(c);
+ c = Inbrace;
+ ++bct;
+ cmdpush(CS_BRACEPAR);
+ if (!in_brace_param) {
+ if ((in_brace_param = bct))
+ seen_brct = 0;
+ }
+ } else {
+ hungetc(e);
+ lexstop = 0;
+ }
+ }
+ break;
+ case LX2_INBRACK:
+ if (!in_brace_param) {
+ brct++;
+ seen_brct = 1;
+ }
+ c = Inbrack;
+ break;
+ case LX2_OUTBRACK:
+ if (!in_brace_param)
+ brct--;
+ if (brct < 0)
+ brct = 0;
+ c = Outbrack;
+ break;
+ case LX2_INPAR:
+ if (isset(SHGLOB)) {
+ if (sub || in_brace_param)
+ break;
+ if (incasepat > 0 && !lexbuf.len)
+ return INPAR;
+ if (!isset(KSHGLOB) && lexbuf.len)
+ goto brk;
+ }
+ if (!in_brace_param) {
+ if (!sub) {
+ e = hgetc();
+ hungetc(e);
+ lexstop = 0;
+ /* For command words, parentheses are only
+ * special at the start. But now we're tokenising
+ * the remaining string. So I don't see what
+ * the old incmdpos test here is for.
+ * pws 1999/6/8
+ *
+ * Oh, no.
+ * func1( )
+ * is a valid function definition in [k]sh. The best
+ * thing we can do, without really nasty lookahead tricks,
+ * is break if we find a blank after a parenthesis. At
+ * least this can't happen inside braces or brackets. We
+ * only allow this with SHGLOB (set for both sh and ksh).
+ *
+ * Things like `print @( |foo)' should still
+ * work, because [k]sh don't allow multiple words
+ * in a function definition, so we only do this
+ * in command position.
+ * pws 1999/6/14
+ */
+ if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct &&
+ !brct && !intpos && incmdpos)) {
+ /*
+ * Either a () token, or a command word with
+ * something suspiciously like a ksh function
+ * definition.
+ * The current word isn't spellcheckable.
+ */
+ nocorrect |= 2;
+ goto brk;
+ }
+ }
+ /*
+ * This also handles the [k]sh `foo( )' function definition.
+ * Maintain a variable fdpar, set as long as a single set of
+ * parentheses contains only space. Then if we get to the
+ * closing parenthesis and it is still set, we can assume we
+ * have a function definition. Only do this at the start of
+ * the word, since the (...) must be a separate token.
+ */
+ if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct)
+ fdpar = 1;
+ }
+ c = Inpar;
+ break;
+ case LX2_INBRACE:
+ if (isset(IGNOREBRACES) || sub)
+ c = '{';
+ else {
+ if (!lexbuf.len && incmdpos) {
+ add('{');
+ *lexbuf.ptr = '\0';
+ return STRING;
+ }
+ if (in_brace_param) {
+ cmdpush(CS_BRACE);
+ }
+ bct++;
+ }
+ break;
+ case LX2_OUTBRACE:
+ if ((isset(IGNOREBRACES) || sub) && !in_brace_param)
+ break;
+ if (!bct)
+ break;
+ if (in_brace_param) {
+ cmdpop();
+ }
+ if (bct-- == in_brace_param)
+ in_brace_param = 0;
+ c = Outbrace;
+ break;
+ case LX2_COMMA:
+ if (unset(IGNOREBRACES) && !sub && bct > in_brace_param)
+ c = Comma;
+ break;
+ case LX2_OUTANG:
+ if (in_brace_param || sub)
+ break;
+ e = hgetc();
+ if (e != '(') {
+ hungetc(e);
+ lexstop = 0;
+ goto brk;
+ }
+ add(OutangProc);
+ if (skipcomm()) {
+ peek = LEXERR;
+ goto brk;
+ }
+ c = Outpar;
+ break;
+ case LX2_INANG:
+ if (isset(SHGLOB) && sub)
+ break;
+ e = hgetc();
+ if (!(in_brace_param || sub) && e == '(') {
+ add(Inang);
+ if (skipcomm()) {
+ peek = LEXERR;
+ goto brk;
+ }
+ c = Outpar;
+ break;
+ }
+ hungetc(e);
+ if(isnumglob()) {
+ add(Inang);
+ while ((c = hgetc()) != '>')
+ add(c);
+ c = Outang;
+ break;
+ }
+ lexstop = 0;
+ if (in_brace_param || sub)
+ break;
+ goto brk;
+ case LX2_EQUALS:
+ if (!sub) {
+ if (intpos) {
+ e = hgetc();
+ if (e != '(') {
+ hungetc(e);
+ lexstop = 0;
+ c = Equals;
+ } else {
+ add(Equals);
+ if (skipcomm()) {
+ peek = LEXERR;
+ goto brk;
+ }
+ c = Outpar;
+ }
+ } else if (peek != ENVSTRING &&
+ (incmdpos || intypeset) && !bct && !brct) {
+ char *t = tokstr;
+ if (idigit(*t))
+ while (++t < lexbuf.ptr && idigit(*t));
+ else {
+ int sav = *lexbuf.ptr;
+ *lexbuf.ptr = '\0';
+ t = itype_end(t, IIDENT, 0);
+ if (t < lexbuf.ptr) {
+ skipparens(Inbrack, Outbrack, &t);
+ } else {
+ *lexbuf.ptr = sav;
+ }
+ }
+ if (*t == '+')
+ t++;
+ if (t == lexbuf.ptr) {
+ e = hgetc();
+ if (e == '(') {
+ *lexbuf.ptr = '\0';
+ return ENVARRAY;
+ }
+ hungetc(e);
+ lexstop = 0;
+ peek = ENVSTRING;
+ intpos = 2;
+ } else
+ c = Equals;
+ } else
+ c = Equals;
+ }
+ break;
+ case LX2_BKSLASH:
+ c = hgetc();
+ if (c == '\n') {
+ c = hgetc();
+ if (!lexstop)
+ continue;
+ } else {
+ add(Bnull);
+ if (c == STOUC(Meta)) {
+ c = hgetc();
+#ifdef DEBUG
+ if (lexstop) {
+ fputs("BUG: input terminated by Meta\n", stderr);
+ fflush(stderr);
+ goto brk;
+ }
+#endif
+ add(Meta);
+ }
+ }
+ if (lexstop)
+ goto brk;
+ break;
+ case LX2_QUOTE: {
+ int strquote = (lexbuf.len && lexbuf.ptr[-1] == String);
+
+ add(Snull);
+ cmdpush(CS_QUOTE);
+ for (;;) {
+ STOPHIST
+ while ((c = hgetc()) != '\'' && !lexstop) {
+ if (strquote && c == '\\') {
+ c = hgetc();
+ if (lexstop)
+ break;
+ /*
+ * Mostly we don't need to do anything special
+ * with escape backslashes or closing quotes
+ * inside $'...'; however in completion we
+ * need to be able to strip multiple backslashes
+ * neatly.
+ */
+ if (c == '\\' || c == '\'')
+ add(Bnull);
+ else
+ add('\\');
+ } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
+ if (lexbuf.ptr[-1] == '\\')
+ lexbuf.ptr--, lexbuf.len--;
+ else
+ break;
+ }
+ add(c);
+ }
+ ALLOWHIST
+ if (c != '\'') {
+ unmatched = '\'';
+ /* Not an error when called from bufferwords() */
+ if (!(lexflags & LEXFLAGS_ACTIVE))
+ peek = LEXERR;
+ cmdpop();
+ goto brk;
+ }
+ e = hgetc();
+ if (e != '\'' || unset(RCQUOTES) || strquote)
+ break;
+ add(c);
+ }
+ cmdpop();
+ hungetc(e);
+ lexstop = 0;
+ c = Snull;
+ break;
+ }
+ case LX2_DQUOTE:
+ add(Dnull);
+ cmdpush(CS_DQUOTE);
+ c = dquote_parse('"', sub);
+ cmdpop();
+ if (c) {
+ unmatched = '"';
+ /* Not an error when called from bufferwords() */
+ if (!(lexflags & LEXFLAGS_ACTIVE))
+ peek = LEXERR;
+ goto brk;
+ }
+ c = Dnull;
+ break;
+ case LX2_BQUOTE:
+ add(Tick);
+ cmdpush(CS_BQUOTE);
+ SETPARBEGIN
+ inquote = 0;
+ while ((c = hgetc()) != '`' && !lexstop) {
+ if (c == '\\') {
+ c = hgetc();
+ if (c != '\n') {
+ add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\');
+ add(c);
+ }
+ else if (!sub && isset(CSHJUNKIEQUOTES))
+ add(c);
+ } else {
+ if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
+ break;
+ }
+ add(c);
+ if (c == '\'') {
+ if ((inquote = !inquote))
+ STOPHIST
+ else
+ ALLOWHIST
+ }
+ }
+ }
+ if (inquote)
+ ALLOWHIST
+ cmdpop();
+ if (c != '`') {
+ unmatched = '`';
+ /* Not an error when called from bufferwords() */
+ if (!(lexflags & LEXFLAGS_ACTIVE))
+ peek = LEXERR;
+ goto brk;
+ }
+ c = Tick;
+ SETPAREND
+ break;
+ case LX2_DASH:
+ /*
+ * - shouldn't be treated as a special character unless
+ * we're in a pattern. Unfortunately, working out for
+ * sure in complicated expressions whether we're in a
+ * pattern is tricky. So we'll make it special and
+ * turn it back any time we don't need it special.
+ * This is not ideal as it's a lot of work.
+ */
+ c = Dash;
+ break;
+ case LX2_BANG:
+ /*
+ * Same logic as Dash, for ! to perform negation in range.
+ */
+ if (seen_brct)
+ c = Bang;
+ else
+ c = '!';
+ }
+ add(c);
+ c = hgetc();
+ if (intpos)
+ intpos--;
+ if (lexstop)
+ break;
+ }
+ brk:
+ if (errflag) {
+ if (in_brace_param) {
+ while(bct-- >= in_brace_param)
+ cmdpop();
+ }
+ return LEXERR;
+ }
+ hungetc(c);
+ if (unmatched && !(lexflags & LEXFLAGS_ACTIVE))
+ zerr("unmatched %c", unmatched);
+ if (in_brace_param) {
+ while(bct-- >= in_brace_param)
+ cmdpop();
+ zerr("closing brace expected");
+ } else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 &&
+ peek == STRING && lexbuf.ptr[-1] == '}' &&
+ lexbuf.ptr[-2] != Bnull) {
+ /* hack to get {foo} command syntax work */
+ lexbuf.ptr--;
+ lexbuf.len--;
+ lexstop = 0;
+ hungetc('}');
+ }
+ *lexbuf.ptr = '\0';
+ DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
+ return peek;
+}
+
+
+/*
+ * Parse input as if in double quotes.
+ * endchar is the end character to expect.
+ * sub has got something to do with whether we are doing quoted substitution.
+ * Return non-zero for error (character to unget), else zero
+ */
+
+/**/
+static int
+dquote_parse(char endchar, int sub)
+{
+ int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
+ int c;
+ int math = endchar == ')' || endchar == ']' || infor;
+ int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
+
+ while (((c = hgetc()) != endchar || bct ||
+ (math && ((pct > 0) || (brct > 0))) ||
+ intick) && !lexstop) {
+ cont:
+ switch (c) {
+ case '\\':
+ c = hgetc();
+ if (c != '\n') {
+ if (c == '$' || c == '\\' || (c == '}' && !intick && bct) ||
+ c == endchar || c == '`' ||
+ (endchar == ']' && (c == '[' || c == ']' ||
+ c == '(' || c == ')' ||
+ c == '{' || c == '}' ||
+ (c == '"' && sub))))
+ add(Bnull);
+ else {
+ /* lexstop is implicitly handled here */
+ add('\\');
+ goto cont;
+ }
+ } else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"')
+ continue;
+ break;
+ case '\n':
+ err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"';
+ break;
+ case '$':
+ if (intick)
+ break;
+ c = hgetc();
+ if (c == '(') {
+ add(Qstring);
+ switch (cmd_or_math_sub()) {
+ case CMD_OR_MATH_CMD:
+ c = Outpar;
+ break;
+
+ case CMD_OR_MATH_MATH:
+ c = Outparmath;
+ break;
+
+ default:
+ err = 1;
+ break;
+ }
+ } else if (c == '[') {
+ add(String);
+ add(Inbrack);
+ cmdpush(CS_MATHSUBST);
+ err = dquote_parse(']', sub);
+ cmdpop();
+ c = Outbrack;
+ } else if (c == '{') {
+ add(Qstring);
+ c = Inbrace;
+ cmdpush(CS_BRACEPAR);
+ bct++;
+ } else if (c == '$')
+ add(Qstring);
+ else {
+ hungetc(c);
+ lexstop = 0;
+ c = Qstring;
+ }
+ break;
+ case '}':
+ if (intick || !bct)
+ break;
+ c = Outbrace;
+ bct--;
+ cmdpop();
+ break;
+ case '`':
+ c = Qtick;
+ if (intick == 2)
+ ALLOWHIST
+ if ((intick = !intick)) {
+ SETPARBEGIN
+ cmdpush(CS_BQUOTE);
+ } else {
+ SETPAREND
+ cmdpop();
+ }
+ break;
+ case '\'':
+ if (!intick)
+ break;
+ if (intick == 1)
+ intick = 2, STOPHIST
+ else
+ intick = 1, ALLOWHIST
+ break;
+ case '(':
+ if (!math || !bct)
+ pct++;
+ break;
+ case ')':
+ if (!math || !bct)
+ err = (!pct-- && math);
+ break;
+ case '[':
+ if (!math || !bct)
+ brct++;
+ break;
+ case ']':
+ if (!math || !bct)
+ err = (!brct-- && math);
+ break;
+ case '"':
+ if (intick || (endchar != '"' && !bct))
+ break;
+ if (bct) {
+ add(Dnull);
+ cmdpush(CS_DQUOTE);
+ err = dquote_parse('"', sub);
+ cmdpop();
+ c = Dnull;
+ } else
+ err = 1;
+ break;
+ }
+ if (err || lexstop)
+ break;
+ add(c);
+ }
+ if (intick == 2)
+ ALLOWHIST
+ if (intick) {
+ cmdpop();
+ }
+ while (bct--)
+ cmdpop();
+ if (lexstop)
+ err = intick || endchar || err;
+ else if (err == 1) {
+ /*
+ * TODO: as far as I can see, this hack is used in gettokstr()
+ * to hungetc() a character on an error. However, I don't
+ * understand what that actually gets us, and we can't guarantee
+ * it's a character anyway, because of the previous test.
+ *
+ * We use the same feature in cmd_or_math where we actually do
+ * need to unget if we decide it's really a command substitution.
+ * We try to handle the other case by testing for lexstop.
+ */
+ err = c;
+ }
+ if (zlemath && zlemetacs <= zlemetall + 1 - inbufct)
+ inwhat = IN_MATH;
+ return err;
+}
+
+/*
+ * Tokenize a string given in s. Parsing is done as in double
+ * quotes. This is usually called before singsub().
+ *
+ * parsestr() is noisier, reporting an error if the parse failed.
+ *
+ * On entry, *s must point to a string allocated from the stack of
+ * exactly the right length, i.e. strlen(*s) + 1, as the string
+ * is used as the lexical token string whose memory management
+ * demands this. Usually the input string will therefore be
+ * the result of an immediately preceding dupstring().
+ */
+
+/**/
+mod_export int
+parsestr(char **s)
+{
+ int err;
+
+ if ((err = parsestrnoerr(s))) {
+ untokenize(*s);
+ if (!(errflag & ERRFLAG_INT)) {
+ if (err > 32 && err < 127)
+ zerr("parse error near `%c'", err);
+ else
+ zerr("parse error");
+ }
+ }
+ return err;
+}
+
+/**/
+mod_export int
+parsestrnoerr(char **s)
+{
+ int l = strlen(*s), err;
+
+ zcontext_save();
+ untokenize(*s);
+ inpush(dupstring(*s), 0, NULL);
+ strinbeg(0);
+ lexbuf.len = 0;
+ lexbuf.ptr = tokstr = *s;
+ lexbuf.siz = l + 1;
+ err = dquote_parse('\0', 1);
+ if (tokstr)
+ *s = tokstr;
+ *lexbuf.ptr = '\0';
+ strinend();
+ inpop();
+ DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
+ zcontext_restore();
+ return err;
+}
+
+/*
+ * Parse a subscript in string s.
+ * sub is passed down to dquote_parse().
+ * endchar is the final character.
+ * Return the next character, or NULL.
+ */
+/**/
+mod_export char *
+parse_subscript(char *s, int sub, int endchar)
+{
+ int l = strlen(s), err, toklen;
+ char *t;
+
+ if (!*s || *s == endchar)
+ return 0;
+ zcontext_save();
+ untokenize(t = dupstring(s));
+ inpush(t, 0, NULL);
+ strinbeg(0);
+ /*
+ * Warning to Future Generations:
+ *
+ * This way of passing the subscript through the lexer is brittle.
+ * Code above this for several layers assumes that when we tokenise
+ * the input it goes into the same place as the original string.
+ * However, the lexer may overwrite later bits of the string or
+ * reallocate it, in particular when expanding aliaes. To get
+ * around this, we copy the string and then copy it back. This is a
+ * bit more robust but still relies on the underlying assumption of
+ * length preservation.
+ */
+ lexbuf.len = 0;
+ lexbuf.ptr = tokstr = dupstring(s);
+ lexbuf.siz = l + 1;
+ err = dquote_parse(endchar, sub);
+ toklen = (int)(lexbuf.ptr - tokstr);
+ DPUTS(toklen > l, "Bad length for parsed subscript");
+ memcpy(s, tokstr, toklen);
+ if (err) {
+ char *strend = s + toklen;
+ err = *strend;
+ *strend = '\0';
+ untokenize(s);
+ *strend = err;
+ s = NULL;
+ } else {
+ s += toklen;
+ }
+ strinend();
+ inpop();
+ DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
+ zcontext_restore();
+ return s;
+}
+
+/* Tokenize a string given in s. Parsing is done as if s were a normal *
+ * command-line argument but it may contain separators. This is used *
+ * to parse the right-hand side of ${...%...} substitutions. */
+
+/**/
+mod_export int
+parse_subst_string(char *s)
+{
+ int c, l = strlen(s), err;
+ char *ptr;
+ enum lextok ctok;
+
+ if (!*s || !strcmp(s, nulstring))
+ return 0;
+ zcontext_save();
+ untokenize(s);
+ inpush(dupstring(s), 0, NULL);
+ strinbeg(0);
+ lexbuf.len = 0;
+ lexbuf.ptr = tokstr = s;
+ lexbuf.siz = l + 1;
+ c = hgetc();
+ ctok = gettokstr(c, 1);
+ err = errflag;
+ strinend();
+ inpop();
+ DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
+ zcontext_restore();
+ /* Keep any interrupt error status */
+ errflag = err | (errflag & ERRFLAG_INT);
+ if (ctok == LEXERR) {
+ untokenize(s);
+ return 1;
+ }
+#ifdef DEBUG
+ /*
+ * Historical note: we used to check here for olen (the value of lexbuf.len
+ * before zcontext_restore()) == l, but that's not necessarily the case if
+ * we stripped an RCQUOTE.
+ */
+ if (ctok != STRING || (errflag && !noerrs)) {
+ fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n",
+ errflag ? "errflag" : "ctok != STRING");
+ fflush(stderr);
+ untokenize(s);
+ return 1;
+ }
+#endif
+ /* Check for $'...' quoting. This needs special handling. */
+ for (ptr = s; *ptr; )
+ {
+ if (*ptr == String && ptr[1] == Snull)
+ {
+ char *t;
+ int len, tlen, diff;
+ t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL);
+ len += 2;
+ tlen = strlen(t);
+ diff = len - tlen;
+ /*
+ * Yuk.
+ * parse_subst_string() currently handles strings in-place.
+ * That's not so easy to fix without knowing whether
+ * additional memory should come off the heap or
+ * otherwise. So we cheat by copying the unquoted string
+ * into place, unless it's too long. That's not the
+ * normal case, but I'm worried there are pathological
+ * cases with converting metafied multibyte strings.
+ * If someone can prove there aren't I will be very happy.
+ */
+ if (diff < 0) {
+ DPUTS(1, "$'...' subst too long: fix get_parse_string()");
+ return 1;
+ }
+ memcpy(ptr, t, tlen);
+ ptr += tlen;
+ if (diff > 0) {
+ char *dptr = ptr;
+ char *sptr = ptr + diff;
+ while ((*dptr++ = *sptr++))
+ ;
+ }
+ } else
+ ptr++;
+ }
+ return 0;
+}
+
+/* Called below to report word positions. */
+
+/**/
+static void
+gotword(void)
+{
+ int nwe = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0);
+ if (zlemetacs <= nwe) {
+ int nwb = zlemetall - wordbeg + addedx;
+ if (zlemetacs >= nwb) {
+ wb = nwb;
+ we = nwe;
+ } else {
+ wb = zlemetacs + addedx;
+ if (we < wb)
+ we = wb;
+ }
+ lexflags = 0;
+ }
+}
+
+/* Check if current lex text matches an alias: 1 if so, else 0 */
+
+static int
+checkalias(void)
+{
+ Alias an;
+
+ if (!zshlextext)
+ return 0;
+
+ if (!noaliases && isset(ALIASESOPT) &&
+ (!isset(POSIXALIASES) ||
+ (tok == STRING && !reswdtab->getnode(reswdtab, zshlextext)))) {
+ char *suf;
+
+ an = (Alias) aliastab->getnode(aliastab, zshlextext);
+ if (an && !an->inuse &&
+ ((an->node.flags & ALIAS_GLOBAL) ||
+ (incmdpos && tok == STRING) || inalmore)) {
+ if (!lexstop) {
+ /*
+ * Tokens that don't require a space after, get one,
+ * because they are treated as if preceded by one.
+ */
+ int c = hgetc();
+ hungetc(c);
+ if (!iblank(c))
+ inpush(" ", INP_ALIAS, 0);
+ }
+ inpush(an->text, INP_ALIAS, an);
+ if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL))
+ aliasspaceflag = 1;
+ lexstop = 0;
+ return 1;
+ }
+ if ((suf = strrchr(zshlextext, '.')) && suf[1] &&
+ suf > zshlextext && suf[-1] != Meta &&
+ (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) &&
+ !an->inuse && incmdpos) {
+ inpush(dupstring(zshlextext), INP_ALIAS, an);
+ inpush(" ", INP_ALIAS, NULL);
+ inpush(an->text, INP_ALIAS, NULL);
+ lexstop = 0;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/* expand aliases and reserved words */
+
+/**/
+int
+exalias(void)
+{
+ Reswd rw;
+
+ hwend();
+ if (interact && isset(SHINSTDIN) && !strin && incasepat <= 0 &&
+ tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) &&
+ (isset(CORRECTALL) || (isset(CORRECT) && incmdpos)))
+ spckword(&tokstr, 1, incmdpos, 1);
+
+ if (!tokstr) {
+ zshlextext = tokstrings[tok];
+
+ if (tok == NEWLIN)
+ return 0;
+ return checkalias();
+ } else {
+ VARARR(char, copy, (strlen(tokstr) + 1));
+
+ if (has_token(tokstr)) {
+ char *p, *t;
+
+ zshlextext = p = copy;
+ for (t = tokstr;
+ (*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++););
+ } else
+ zshlextext = tokstr;
+
+ if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) {
+ int zp = lexflags;
+
+ gotword();
+ if ((zp & LEXFLAGS_ZLE) && !lexflags) {
+ if (zshlextext == copy)
+ zshlextext = tokstr;
+ return 0;
+ }
+ }
+
+ if (tok == STRING) {
+ /* Check for an alias */
+ if ((zshlextext != copy || !isset(POSIXALIASES)) && checkalias()) {
+ if (zshlextext == copy)
+ zshlextext = tokstr;
+ return 1;
+ }
+
+ /* Then check for a reserved word */
+ if ((incmdpos ||
+ (unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) &&
+ zshlextext[0] == '}' && !zshlextext[1])) &&
+ (rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) {
+ tok = rw->token;
+ inrepeat_ = (tok == REPEAT);
+ if (tok == DINBRACK)
+ incond = 1;
+ } else if (incond && !strcmp(zshlextext, "]]")) {
+ tok = DOUTBRACK;
+ incond = 0;
+ } else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1])
+ tok = BANG;
+ }
+ inalmore = 0;
+ if (zshlextext == copy)
+ zshlextext = tokstr;
+ }
+ return 0;
+}
+
+/**/
+void
+zshlex_raw_add(int c)
+{
+ if (!lex_add_raw)
+ return;
+
+ *lexbuf_raw.ptr++ = c;
+ if (lexbuf_raw.siz == ++lexbuf_raw.len) {
+ int newbsiz = lexbuf_raw.siz * 2;
+
+ tokstr_raw = (char *)hrealloc(tokstr_raw, lexbuf_raw.siz, newbsiz);
+ lexbuf_raw.ptr = tokstr_raw + lexbuf_raw.len;
+ memset(lexbuf_raw.ptr, 0, newbsiz - lexbuf_raw.siz);
+ lexbuf_raw.siz = newbsiz;
+ }
+}
+
+/**/
+void
+zshlex_raw_back(void)
+{
+ if (!lex_add_raw)
+ return;
+ lexbuf_raw.ptr--;
+ lexbuf_raw.len--;
+}
+
+/**/
+int
+zshlex_raw_mark(int offset)
+{
+ if (!lex_add_raw)
+ return 0;
+ return lexbuf_raw.len + offset;
+}
+
+/**/
+void
+zshlex_raw_back_to_mark(int mark)
+{
+ if (!lex_add_raw)
+ return;
+ lexbuf_raw.ptr = tokstr_raw + mark;
+ lexbuf_raw.len = mark;
+}
+
+/*
+ * Skip (...) for command-style substitutions: $(...), <(...), >(...)
+ *
+ * In order to ensure we don't stop at closing parentheses with
+ * some other syntactic significance, we'll parse the input until
+ * we find an unmatched closing parenthesis. However, we'll throw
+ * away the result of the parsing and just keep the string we've built
+ * up on the way.
+ */
+
+/**/
+static int
+skipcomm(void)
+{
+#ifdef ZSH_OLD_SKIPCOMM
+ int pct = 1, c, start = 1;
+
+ cmdpush(CS_CMDSUBST);
+ SETPARBEGIN
+ c = Inpar;
+ do {
+ int iswhite;
+ add(c);
+ c = hgetc();
+ if (itok(c) || lexstop)
+ break;
+ iswhite = inblank(c);
+ switch (c) {
+ case '(':
+ pct++;
+ break;
+ case ')':
+ pct--;
+ break;
+ case '\\':
+ add(c);
+ c = hgetc();
+ break;
+ case '\'': {
+ int strquote = lexbuf.ptr[-1] == '$';
+ add(c);
+ STOPHIST
+ while ((c = hgetc()) != '\'' && !lexstop) {
+ if (c == '\\' && strquote) {
+ add(c);
+ c = hgetc();
+ }
+ add(c);
+ }
+ ALLOWHIST
+ break;
+ }
+ case '\"':
+ add(c);
+ while ((c = hgetc()) != '\"' && !lexstop)
+ if (c == '\\') {
+ add(c);
+ add(hgetc());
+ } else
+ add(c);
+ break;
+ case '`':
+ add(c);
+ while ((c = hgetc()) != '`' && !lexstop)
+ if (c == '\\')
+ add(c), add(hgetc());
+ else
+ add(c);
+ break;
+ case '#':
+ if (start) {
+ add(c);
+ while ((c = hgetc()) != '\n' && !lexstop)
+ add(c);
+ iswhite = 1;
+ }
+ break;
+ }
+ start = iswhite;
+ }
+ while (pct);
+ if (!lexstop)
+ SETPAREND
+ cmdpop();
+ return lexstop;
+#else
+ char *new_tokstr;
+ int new_lexstop, new_lex_add_raw;
+ int save_infor = infor;
+ struct lexbufstate new_lexbuf;
+
+ infor = 0;
+ cmdpush(CS_CMDSUBST);
+ SETPARBEGIN
+ add(Inpar);
+
+ new_lex_add_raw = lex_add_raw + 1;
+ if (!lex_add_raw) {
+ /*
+ * We'll combine the string so far with the input
+ * read in for the command substitution. To do this
+ * we'll just propagate the current tokstr etc. as the
+ * variables used for adding raw input, and
+ * ensure we swap those for the real tokstr etc. at the end.
+ *
+ * However, we need to save and restore the rest of the
+ * lexical and parse state as we're effectively parsing
+ * an internal string. Because we're still parsing it from
+ * the original input source (we have to --- we don't know
+ * when to stop inputting it otherwise and can't rely on
+ * the input being recoverable until we've read it) we need
+ * to keep the same history context.
+ */
+ new_tokstr = tokstr;
+ new_lexbuf = lexbuf;
+
+ /*
+ * If we're expanding an alias at this point, we need the whole
+ * remaining text as part of the string for the command in
+ * parentheses, so don't backtrack. This is different from the
+ * usual case where the alias is fully within the command, where
+ * we want the unexpanded text so that it will be expanded
+ * again when the command in the parentheses is executed.
+ *
+ * I never wanted to be a software engineer, you know.
+ */
+ if (inbufflags & INP_ALIAS)
+ inbufflags |= INP_RAW_KEEP;
+ zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
+ hist_in_word(1);
+ } else {
+ /*
+ * Set up for nested command subsitution, however
+ * we don't actually need the string until we get
+ * back to the top level and recover the lot.
+ * The $() body just appears empty.
+ *
+ * We do need to propagate the raw variables which would
+ * otherwise by cleared, though.
+ */
+ new_tokstr = tokstr_raw;
+ new_lexbuf = lexbuf_raw;
+
+ zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
+ }
+ tokstr_raw = new_tokstr;
+ lexbuf_raw = new_lexbuf;
+ lex_add_raw = new_lex_add_raw;
+ /*
+ * Don't do any ZLE specials down here: they're only needed
+ * when we return the string from the recursive parse.
+ * (TBD: this probably means we should be initialising lexflags
+ * more consistently.)
+ *
+ * Note that in that case we're still using the ZLE line reading
+ * function at the history layer --- this is consistent with the
+ * intention of maintaining the history and input layers across
+ * the recursive parsing.
+ *
+ * Also turn off LEXFLAGS_NEWLINE because this is already skipping
+ * across the entire construct, and parse_event() needs embedded
+ * newlines to be "real" when looking for the OUTPAR token.
+ */
+ lexflags &= ~(LEXFLAGS_ZLE|LEXFLAGS_NEWLINE);
+ dbparens = 0; /* restored by zcontext_restore_partial() */
+
+ if (!parse_event(OUTPAR) || tok != OUTPAR) {
+ if (strin) {
+ /*
+ * Get the rest of the string raw since we don't
+ * know where this token ends.
+ */
+ while (!lexstop)
+ (void)ingetc();
+ } else
+ lexstop = 1;
+ }
+ /* Outpar lexical token gets added in caller if present */
+
+ /*
+ * We're going to keep the full raw input string
+ * as the current token string after popping the stack.
+ */
+ new_tokstr = tokstr_raw;
+ new_lexbuf = lexbuf_raw;
+ /*
+ * We're also going to propagate the lexical state:
+ * if we couldn't parse the command substitution we
+ * can't continue.
+ */
+ new_lexstop = lexstop;
+
+ zcontext_restore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
+
+ if (lex_add_raw) {
+ /*
+ * Keep going, so retain the raw variables.
+ */
+ tokstr_raw = new_tokstr;
+ lexbuf_raw = new_lexbuf;
+ } else {
+ if (!new_lexstop) {
+ /* Ignore the ')' added on input */
+ new_lexbuf.len--;
+ *--new_lexbuf.ptr = '\0';
+ }
+
+ /*
+ * Convince the rest of lex.c we were examining a string
+ * all along.
+ */
+ tokstr = new_tokstr;
+ lexbuf = new_lexbuf;
+ lexstop = new_lexstop;
+ hist_in_word(0);
+ }
+
+ if (!lexstop)
+ SETPAREND
+ cmdpop();
+ infor = save_infor;
+
+ return lexstop;
+#endif
+}