diff options
Diffstat (limited to 'dotfiles/system/.zsh/modules/Src/lex.c')
| -rw-r--r-- | dotfiles/system/.zsh/modules/Src/lex.c | 2203 |
1 files changed, 2203 insertions, 0 deletions
diff --git a/dotfiles/system/.zsh/modules/Src/lex.c b/dotfiles/system/.zsh/modules/Src/lex.c new file mode 100644 index 0000000..44ad880 --- /dev/null +++ b/dotfiles/system/.zsh/modules/Src/lex.c @@ -0,0 +1,2203 @@ +/* + * lex.c - lexical analysis + * + * This file is part of zsh, the Z shell. + * + * Copyright (c) 1992-1997 Paul Falstad + * All rights reserved. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and to distribute modified versions of this software for any + * purpose, provided that the above copyright notice and the following + * two paragraphs appear in all copies of this software. + * + * In no event shall Paul Falstad or the Zsh Development Group be liable + * to any party for direct, indirect, special, incidental, or consequential + * damages arising out of the use of this software and its documentation, + * even if Paul Falstad and the Zsh Development Group have been advised of + * the possibility of such damage. + * + * Paul Falstad and the Zsh Development Group specifically disclaim any + * warranties, including, but not limited to, the implied warranties of + * merchantability and fitness for a particular purpose. The software + * provided hereunder is on an "as is" basis, and Paul Falstad and the + * Zsh Development Group have no obligation to provide maintenance, + * support, updates, enhancements, or modifications. + * + */ + +#include "zsh.mdh" +#include "lex.pro" + +#define LEX_HEAP_SIZE (32) + +/* tokens */ + +/**/ +mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\"; + +/* parts of the current token */ + +/**/ +char *zshlextext; +/**/ +mod_export char *tokstr; +/**/ +mod_export enum lextok tok; +/**/ +mod_export int tokfd; + +/* + * Line number at which the first character of a token was found. + * We always set this in gettok(), which is always called from + * zshlex() unless we have reached an error. So it is always + * valid when parsing. It is not useful during execution + * of the parsed structure. + */ + +/**/ +zlong toklineno; + +/* lexical analyzer error flag */ + +/**/ +mod_export int lexstop; + +/* if != 0, this is the first line of the command */ + +/**/ +mod_export int isfirstln; + +/* if != 0, this is the first char of the command (not including white space) */ + +/**/ +int isfirstch; + +/* flag that an alias should be expanded after expansion ending in space */ + +/**/ +int inalmore; + +/* + * Don't do spelling correction. + * Bit 1 is only valid for the current word. It's + * set when we detect a lookahead that stops the word from + * needing correction. + */ + +/**/ +int nocorrect; + +/* + * TBD: the following exported variables are part of the non-interface + * with ZLE for completion. They are poorly named and the whole + * scheme is incredibly brittle. One piece of robustness is applied: + * the variables are only set if LEXFLAGS_ZLE is set. Improvements + * should therefore concentrate on areas with this flag set. + * + * Cursor position and line length in zle when the line is + * metafied for access from the main shell. + */ + +/**/ +mod_export int zlemetacs, zlemetall; + +/* inwhat says what exactly we are in * + * (its value is one of the IN_* things). */ + +/**/ +mod_export int inwhat; + +/* 1 if x added to complete in a blank between words */ + +/**/ +mod_export int addedx; + +/* wb and we hold the beginning/end position of the word we are completing. */ + +/**/ +mod_export int wb, we; + +/**/ +mod_export int wordbeg; + +/**/ +mod_export int parbegin; + +/**/ +mod_export int parend; + + +/* 1 if aliases should not be expanded */ + +/**/ +mod_export int noaliases; + +/* + * If non-zero, we are parsing a line sent to use by the editor, or some + * other string that's not part of standard command input (e.g. eval is + * part of normal command input). + * + * Set of bits from LEXFLAGS_*. + * + * Note that although it is passed into the lexer as an input, the + * lexer can set it to zero after finding the word it's searching for. + * This only happens if the line being parsed actually does come from + * ZLE, and hence the bit LEXFLAGS_ZLE is set. + */ + +/**/ +mod_export int lexflags; + +/* don't recognize comments */ + +/**/ +mod_export int nocomments; + +/* add raw input characters while parsing command substitution */ + +/**/ +int lex_add_raw; + +/* variables associated with the above */ + +static char *tokstr_raw; +static struct lexbufstate lexbuf_raw; + +/* text of punctuation tokens */ + +/**/ +mod_export char *tokstrings[WHILE + 1] = { + NULL, /* NULLTOK 0 */ + ";", /* SEPER */ + "\\n", /* NEWLIN */ + ";", /* SEMI */ + ";;", /* DSEMI */ + "&", /* AMPER 5 */ + "(", /* INPAR */ + ")", /* OUTPAR */ + "||", /* DBAR */ + "&&", /* DAMPER */ + ">", /* OUTANG 10 */ + ">|", /* OUTANGBANG */ + ">>", /* DOUTANG */ + ">>|", /* DOUTANGBANG */ + "<", /* INANG */ + "<>", /* INOUTANG 15 */ + "<<", /* DINANG */ + "<<-", /* DINANGDASH */ + "<&", /* INANGAMP */ + ">&", /* OUTANGAMP */ + "&>", /* AMPOUTANG 20 */ + "&>|", /* OUTANGAMPBANG */ + ">>&", /* DOUTANGAMP */ + ">>&|", /* DOUTANGAMPBANG */ + "<<<", /* TRINANG */ + "|", /* BAR 25 */ + "|&", /* BARAMP */ + "()", /* INOUTPAR */ + "((", /* DINPAR */ + "))", /* DOUTPAR */ + "&|", /* AMPERBANG 30 */ + ";&", /* SEMIAMP */ + ";|", /* SEMIBAR */ +}; + +/* lexical state */ + +static int dbparens; +static struct lexbufstate lexbuf = { NULL, 256, 0 }; + +/* save lexical context */ + +/**/ +void +lex_context_save(struct lex_stack *ls, int toplevel) +{ + (void)toplevel; + + ls->dbparens = dbparens; + ls->isfirstln = isfirstln; + ls->isfirstch = isfirstch; + ls->lexflags = lexflags; + + ls->tok = tok; + ls->tokstr = tokstr; + ls->zshlextext = zshlextext; + ls->lexbuf = lexbuf; + ls->lex_add_raw = lex_add_raw; + ls->tokstr_raw = tokstr_raw; + ls->lexbuf_raw = lexbuf_raw; + ls->lexstop = lexstop; + ls->toklineno = toklineno; + + tokstr = zshlextext = lexbuf.ptr = NULL; + lexbuf.siz = 256; + tokstr_raw = lexbuf_raw.ptr = NULL; + lexbuf_raw.siz = lexbuf_raw.len = lex_add_raw = 0; +} + +/* restore lexical context */ + +/**/ +mod_export void +lex_context_restore(const struct lex_stack *ls, int toplevel) +{ + (void)toplevel; + + dbparens = ls->dbparens; + isfirstln = ls->isfirstln; + isfirstch = ls->isfirstch; + lexflags = ls->lexflags; + tok = ls->tok; + tokstr = ls->tokstr; + zshlextext = ls->zshlextext; + lexbuf = ls->lexbuf; + lex_add_raw = ls->lex_add_raw; + tokstr_raw = ls->tokstr_raw; + lexbuf_raw = ls->lexbuf_raw; + lexstop = ls->lexstop; + toklineno = ls->toklineno; +} + +/**/ +void +zshlex(void) +{ + if (tok == LEXERR) + return; + do { + if (inrepeat_) + ++inrepeat_; + if (inrepeat_ == 3 && isset(SHORTLOOPS)) + incmdpos = 1; + tok = gettok(); + } while (tok != ENDINPUT && exalias()); + nocorrect &= 1; + if (tok == NEWLIN || tok == ENDINPUT) { + while (hdocs) { + struct heredocs *next = hdocs->next; + char *doc, *munged_term; + + hwbegin(0); + cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD); + munged_term = dupstring(hdocs->str); + STOPHIST + doc = gethere(&munged_term, hdocs->type); + ALLOWHIST + cmdpop(); + hwend(); + if (!doc) { + zerr("here document too large"); + while (hdocs) { + next = hdocs->next; + zfree(hdocs, sizeof(struct heredocs)); + hdocs = next; + } + tok = LEXERR; + break; + } + setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str, + munged_term); + zfree(hdocs, sizeof(struct heredocs)); + hdocs = next; + } + } + if (tok != NEWLIN) + isnewlin = 0; + else + isnewlin = (inbufct) ? -1 : 1; + if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE))) + tok = SEPER; +} + +/**/ +mod_export void +ctxtlex(void) +{ + static int oldpos; + + zshlex(); + switch (tok) { + case SEPER: + case NEWLIN: + case SEMI: + case DSEMI: + case SEMIAMP: + case SEMIBAR: + case AMPER: + case AMPERBANG: + case INPAR: + case INBRACE: + case DBAR: + case DAMPER: + case BAR: + case BARAMP: + case INOUTPAR: + case DOLOOP: + case THEN: + case ELIF: + case ELSE: + case DOUTBRACK: + incmdpos = 1; + break; + case STRING: + case TYPESET: + /* case ENVSTRING: */ + case ENVARRAY: + case OUTPAR: + case CASE: + case DINBRACK: + incmdpos = 0; + break; + + default: + /* nothing to do, keep compiler happy */ + break; + } + if (tok != DINPAR) + infor = tok == FOR ? 2 : 0; + if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) { + inredir = 1; + oldpos = incmdpos; + incmdpos = 0; + } else if (inredir) { + incmdpos = oldpos; + inredir = 0; + } +} + +#define LX1_BKSLASH 0 +#define LX1_COMMENT 1 +#define LX1_NEWLIN 2 +#define LX1_SEMI 3 +#define LX1_AMPER 5 +#define LX1_BAR 6 +#define LX1_INPAR 7 +#define LX1_OUTPAR 8 +#define LX1_INANG 13 +#define LX1_OUTANG 14 +#define LX1_OTHER 15 + +#define LX2_BREAK 0 +#define LX2_OUTPAR 1 +#define LX2_BAR 2 +#define LX2_STRING 3 +#define LX2_INBRACK 4 +#define LX2_OUTBRACK 5 +#define LX2_TILDE 6 +#define LX2_INPAR 7 +#define LX2_INBRACE 8 +#define LX2_OUTBRACE 9 +#define LX2_OUTANG 10 +#define LX2_INANG 11 +#define LX2_EQUALS 12 +#define LX2_BKSLASH 13 +#define LX2_QUOTE 14 +#define LX2_DQUOTE 15 +#define LX2_BQUOTE 16 +#define LX2_COMMA 17 +#define LX2_DASH 18 +#define LX2_BANG 19 +#define LX2_OTHER 20 +#define LX2_META 21 + +static unsigned char lexact1[256], lexact2[256], lextok2[256]; + +/**/ +void +initlextabs(void) +{ + int t0; + static char *lx1 = "\\q\n;!&|(){}[]<>"; + static char *lx2 = ";)|$[]~({}><=\\\'\"`,-!"; + + for (t0 = 0; t0 != 256; t0++) { + lexact1[t0] = LX1_OTHER; + lexact2[t0] = LX2_OTHER; + lextok2[t0] = t0; + } + for (t0 = 0; lx1[t0]; t0++) + lexact1[(int)lx1[t0]] = t0; + for (t0 = 0; lx2[t0]; t0++) + lexact2[(int)lx2[t0]] = t0; + lexact2['&'] = LX2_BREAK; + lexact2[STOUC(Meta)] = LX2_META; + lextok2['*'] = Star; + lextok2['?'] = Quest; + lextok2['{'] = Inbrace; + lextok2['['] = Inbrack; + lextok2['$'] = String; + lextok2['~'] = Tilde; + lextok2['#'] = Pound; + lextok2['^'] = Hat; +} + +/* initialize lexical state */ + +/**/ +void +lexinit(void) +{ + nocorrect = dbparens = lexstop = 0; + tok = ENDINPUT; +} + +/* add a char to the string buffer */ + +/**/ +void +add(int c) +{ + *lexbuf.ptr++ = c; + if (lexbuf.siz == ++lexbuf.len) { + int newbsiz = lexbuf.siz * 2; + + if (newbsiz > inbufct && inbufct > lexbuf.siz) + newbsiz = inbufct; + + tokstr = (char *)hrealloc(tokstr, lexbuf.siz, newbsiz); + lexbuf.ptr = tokstr + lexbuf.len; + /* len == bsiz, so bptr is at the start of newly allocated memory */ + memset(lexbuf.ptr, 0, newbsiz - lexbuf.siz); + lexbuf.siz = newbsiz; + } +} + +#define SETPARBEGIN { \ + if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \ + zlemetacs >= zlemetall+1-inbufct) \ + parbegin = inbufct; \ + } +#define SETPAREND { \ + if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \ + parbegin != -1 && parend == -1) { \ + if (zlemetacs >= zlemetall + 1 - inbufct) \ + parbegin = -1; \ + else \ + parend = inbufct; \ + } \ + } + +enum { + CMD_OR_MATH_CMD, + CMD_OR_MATH_MATH, + CMD_OR_MATH_ERR +}; + +/* + * Return one of the above. If it couldn't be + * parsed as math, but there was no gross error, it's a command. + */ + +static int +cmd_or_math(int cs_type) +{ + int oldlen = lexbuf.len; + int c; + int oinflags = inbufflags; + + cmdpush(cs_type); + inbufflags |= INP_APPEND; + c = dquote_parse(')', 0); + if (!(oinflags & INP_APPEND)) + inbufflags &= ~INP_APPEND; + cmdpop(); + *lexbuf.ptr = '\0'; + if (!c) { + /* Successfully parsed, see if it was math */ + c = hgetc(); + if (c == ')') + return CMD_OR_MATH_MATH; /* yes */ + hungetc(c); + lexstop = 0; + c = ')'; + } else if (lexstop) { + /* we haven't got anything to unget */ + return CMD_OR_MATH_ERR; + } + /* else unsuccessful: unget the whole thing */ + hungetc(c); + lexstop = 0; + while (lexbuf.len > oldlen && !(errflag & ERRFLAG_ERROR)) { + lexbuf.len--; + hungetc(itok(*--lexbuf.ptr) ? + ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr); + } + if (errflag) + return CMD_OR_MATH_ERR; + hungetc('('); + return errflag ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD; +} + + +/* + * Parse either a $(( ... )) or a $(...) + * Return the same as cmd_or_math(). + */ +static int +cmd_or_math_sub(void) +{ + int c = hgetc(), ret; + + if (c == '(') { + int lexpos = (int)(lexbuf.ptr - tokstr); + add(Inpar); + add('('); + if ((ret = cmd_or_math(CS_MATHSUBST)) == CMD_OR_MATH_MATH) { + tokstr[lexpos] = Inparmath; + add(')'); + return CMD_OR_MATH_MATH; + } + if (ret == CMD_OR_MATH_ERR) + return CMD_OR_MATH_ERR; + lexbuf.ptr -= 2; + lexbuf.len -= 2; + } else { + hungetc(c); + lexstop = 0; + } + return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD; +} + +/* Check whether we're looking at valid numeric globbing syntax * + * (/\<[0-9]*-[0-9]*\>/). Call pointing just after the opening "<". * + * Leaves the input in the same place, returning 0 or 1. */ + +/**/ +static int +isnumglob(void) +{ + int c, ec = '-', ret = 0; + int tbs = 256, n = 0; + char *tbuf = (char *)zalloc(tbs); + + while(1) { + c = hgetc(); + if(lexstop) { + lexstop = 0; + break; + } + tbuf[n++] = c; + if(!idigit(c)) { + if(c != ec) + break; + if(ec == '>') { + ret = 1; + break; + } + ec = '>'; + } + if(n == tbs) + tbuf = (char *)realloc(tbuf, tbs *= 2); + } + while(n--) + hungetc(tbuf[n]); + zfree(tbuf, tbs); + return ret; +} + +/**/ +static enum lextok +gettok(void) +{ + int c, d; + int peekfd = -1; + enum lextok peek; + + beginning: + tokstr = NULL; + while (iblank(c = hgetc()) && !lexstop); + toklineno = lineno; + if (lexstop) + return (errflag) ? LEXERR : ENDINPUT; + isfirstln = 0; + if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) + wordbeg = inbufct - (qbang && c == bangchar); + hwbegin(-1-(qbang && c == bangchar)); + /* word includes the last character read and possibly \ before ! */ + if (dbparens) { + lexbuf.len = 0; + lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE); + hungetc(c); + cmdpush(CS_MATH); + c = dquote_parse(infor ? ';' : ')', 0); + cmdpop(); + *lexbuf.ptr = '\0'; + if (!c && infor) { + infor--; + return DINPAR; + } + if (c || (c = hgetc()) != ')') { + hungetc(c); + return LEXERR; + } + dbparens = 0; + return DOUTPAR; + } else if (idigit(c)) { /* handle 1< foo */ + d = hgetc(); + if(d == '&') { + d = hgetc(); + if(d == '>') { + peekfd = c - '0'; + hungetc('>'); + c = '&'; + } else { + hungetc(d); + lexstop = 0; + hungetc('&'); + } + } else if (d == '>' || d == '<') { + peekfd = c - '0'; + c = d; + } else { + hungetc(d); + lexstop = 0; + } + } + + /* chars in initial position in word */ + + /* + * Handle comments. There are some special cases when this + * is not normal command input: lexflags implies we are examining + * a line lexically without it being used for normal command input. + */ + if (c == hashchar && !nocomments && + (isset(INTERACTIVECOMMENTS) || + ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding && + (!interact || unset(SHINSTDIN) || strin)))) { + /* History is handled here to prevent extra * + * newlines being inserted into the history. */ + + if (lexflags & LEXFLAGS_COMMENTS_KEEP) { + lexbuf.len = 0; + lexbuf.ptr = tokstr = + (char *)hcalloc(lexbuf.siz = LEX_HEAP_SIZE); + add(c); + } + hwabort(); + while ((c = ingetc()) != '\n' && !lexstop) { + hwaddc(c); + addtoline(c); + if (lexflags & LEXFLAGS_COMMENTS_KEEP) + add(c); + } + + if (errflag) + peek = LEXERR; + else { + if (lexflags & LEXFLAGS_COMMENTS_KEEP) { + *lexbuf.ptr = '\0'; + if (!lexstop) + hungetc(c); + peek = STRING; + } else { + hwend(); + hwbegin(0); + hwaddc('\n'); + addtoline('\n'); + /* + * If splitting a line and removing comments, + * we don't want a newline token since it's + * treated specially. + */ + if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop) + peek = ENDINPUT; + else + peek = NEWLIN; + } + } + return peek; + } + switch (lexact1[STOUC(c)]) { + case LX1_BKSLASH: + d = hgetc(); + if (d == '\n') + goto beginning; + hungetc(d); + lexstop = 0; + break; + case LX1_NEWLIN: + return NEWLIN; + case LX1_SEMI: + d = hgetc(); + if(d == ';') + return DSEMI; + else if(d == '&') + return SEMIAMP; + else if (d == '|') + return SEMIBAR; + hungetc(d); + lexstop = 0; + return SEMI; + case LX1_AMPER: + d = hgetc(); + if (d == '&') + return DAMPER; + else if (d == '!' || d == '|') + return AMPERBANG; + else if (d == '>') { + tokfd = peekfd; + d = hgetc(); + if (d == '!' || d == '|') + return OUTANGAMPBANG; + else if (d == '>') { + d = hgetc(); + if (d == '!' || d == '|') + return DOUTANGAMPBANG; + hungetc(d); + lexstop = 0; + return DOUTANGAMP; + } + hungetc(d); + lexstop = 0; + return AMPOUTANG; + } + hungetc(d); + lexstop = 0; + return AMPER; + case LX1_BAR: + d = hgetc(); + if (d == '|' && !incasepat) + return DBAR; + else if (d == '&') + return BARAMP; + hungetc(d); + lexstop = 0; + return BAR; + case LX1_INPAR: + d = hgetc(); + if (d == '(') { + if (infor) { + dbparens = 1; + return DINPAR; + } + if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) { + lexbuf.len = 0; + lexbuf.ptr = tokstr = (char *) + hcalloc(lexbuf.siz = LEX_HEAP_SIZE); + switch (cmd_or_math(CS_MATH)) { + case CMD_OR_MATH_MATH: + return DINPAR; + + case CMD_OR_MATH_CMD: + /* + * Not math, so we don't return the contents + * as a string in this case. + */ + tokstr = NULL; + return INPAR; + + case CMD_OR_MATH_ERR: + /* + * LEXFLAGS_ACTIVE means we came from bufferwords(), + * so we treat as an incomplete math expression + */ + if (lexflags & LEXFLAGS_ACTIVE) + tokstr = dyncat("((", tokstr ? tokstr : ""); + /* fall through */ + + default: + return LEXERR; + } + } + } else if (d == ')') + return INOUTPAR; + hungetc(d); + lexstop = 0; + if (!(isset(SHGLOB) || incond == 1 || incmdpos)) + break; + return INPAR; + case LX1_OUTPAR: + return OUTPAR; + case LX1_INANG: + d = hgetc(); + if (d == '(') { + hungetc(d); + lexstop = 0; + unpeekfd: + if(peekfd != -1) { + hungetc(c); + c = '0' + peekfd; + } + break; + } + if (d == '>') { + peek = INOUTANG; + } else if (d == '<') { + int e = hgetc(); + + if (e == '(') { + hungetc(e); + hungetc(d); + peek = INANG; + } else if (e == '<') + peek = TRINANG; + else if (e == '-') + peek = DINANGDASH; + else { + hungetc(e); + lexstop = 0; + peek = DINANG; + } + } else if (d == '&') { + peek = INANGAMP; + } else { + hungetc(d); + if(isnumglob()) + goto unpeekfd; + peek = INANG; + } + tokfd = peekfd; + return peek; + case LX1_OUTANG: + d = hgetc(); + if (d == '(') { + hungetc(d); + goto unpeekfd; + } else if (d == '&') { + d = hgetc(); + if (d == '!' || d == '|') + peek = OUTANGAMPBANG; + else { + hungetc(d); + lexstop = 0; + peek = OUTANGAMP; + } + } else if (d == '!' || d == '|') + peek = OUTANGBANG; + else if (d == '>') { + d = hgetc(); + if (d == '&') { + d = hgetc(); + if (d == '!' || d == '|') + peek = DOUTANGAMPBANG; + else { + hungetc(d); + lexstop = 0; + peek = DOUTANGAMP; + } + } else if (d == '!' || d == '|') + peek = DOUTANGBANG; + else if (d == '(') { + hungetc(d); + hungetc('>'); + peek = OUTANG; + } else { + hungetc(d); + lexstop = 0; + peek = DOUTANG; + if (isset(HISTALLOWCLOBBER)) + hwaddc('|'); + } + } else { + hungetc(d); + lexstop = 0; + peek = OUTANG; + if (!incond && isset(HISTALLOWCLOBBER)) + hwaddc('|'); + } + tokfd = peekfd; + return peek; + } + + /* we've started a string, now get the * + * rest of it, performing tokenization */ + return gettokstr(c, 0); +} + +/* + * Get the remains of a token string. This has two uses. + * When called from gettok(), with sub = 0, we have already identified + * any interesting initial character and want to get the rest of + * what we now know is a string. However, the string may still include + * metacharacters and potentially substitutions. + * + * When called from parse_subst_string() with sub = 1, we are not + * fully parsing a command line, merely tokenizing a string. + * In this case we always add characters to the parsed string + * unless there is a parse error. + */ + +/**/ +static enum lextok +gettokstr(int c, int sub) +{ + int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0; + int intpos = 1, in_brace_param = 0; + int inquote, unmatched = 0; + enum lextok peek; +#ifdef DEBUG + int ocmdsp = cmdsp; +#endif + + peek = STRING; + if (!sub) { + lexbuf.len = 0; + lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE); + } + for (;;) { + int act; + int e; + int inbl = inblank(c); + + if (fdpar && !inbl && c != ')') + fdpar = 0; + + if (inbl && !in_brace_param && !pct) + act = LX2_BREAK; + else { + act = lexact2[STOUC(c)]; + c = lextok2[STOUC(c)]; + } + switch (act) { + case LX2_BREAK: + if (!in_brace_param && !sub) + goto brk; + break; + case LX2_META: + c = hgetc(); +#ifdef DEBUG + if (lexstop) { + fputs("BUG: input terminated by Meta\n", stderr); + fflush(stderr); + goto brk; + } +#endif + add(Meta); + break; + case LX2_OUTPAR: + if (fdpar) { + /* this is a single word `( )', treat as INOUTPAR */ + add(c); + *lexbuf.ptr = '\0'; + return INOUTPAR; + } + if ((sub || in_brace_param) && isset(SHGLOB)) + break; + if (!in_brace_param && !pct--) { + if (sub) { + pct = 0; + break; + } else + goto brk; + } + c = Outpar; + break; + case LX2_BAR: + if (!pct && !in_brace_param) { + if (sub) + break; + else + goto brk; + } + if (unset(SHGLOB) || (!sub && !in_brace_param)) + c = Bar; + break; + case LX2_STRING: + e = hgetc(); + if (e == '[') { + cmdpush(CS_MATHSUBST); + add(String); + add(Inbrack); + c = dquote_parse(']', sub); + cmdpop(); + if (c) { + peek = LEXERR; + goto brk; + } + c = Outbrack; + } else if (e == '(') { + add(String); + switch (cmd_or_math_sub()) { + case CMD_OR_MATH_CMD: + c = Outpar; + break; + + case CMD_OR_MATH_MATH: + c = Outparmath; + break; + + default: + peek = LEXERR; + goto brk; + } + } else { + if (e == '{') { + add(c); + c = Inbrace; + ++bct; + cmdpush(CS_BRACEPAR); + if (!in_brace_param) { + if ((in_brace_param = bct)) + seen_brct = 0; + } + } else { + hungetc(e); + lexstop = 0; + } + } + break; + case LX2_INBRACK: + if (!in_brace_param) { + brct++; + seen_brct = 1; + } + c = Inbrack; + break; + case LX2_OUTBRACK: + if (!in_brace_param) + brct--; + if (brct < 0) + brct = 0; + c = Outbrack; + break; + case LX2_INPAR: + if (isset(SHGLOB)) { + if (sub || in_brace_param) + break; + if (incasepat > 0 && !lexbuf.len) + return INPAR; + if (!isset(KSHGLOB) && lexbuf.len) + goto brk; + } + if (!in_brace_param) { + if (!sub) { + e = hgetc(); + hungetc(e); + lexstop = 0; + /* For command words, parentheses are only + * special at the start. But now we're tokenising + * the remaining string. So I don't see what + * the old incmdpos test here is for. + * pws 1999/6/8 + * + * Oh, no. + * func1( ) + * is a valid function definition in [k]sh. The best + * thing we can do, without really nasty lookahead tricks, + * is break if we find a blank after a parenthesis. At + * least this can't happen inside braces or brackets. We + * only allow this with SHGLOB (set for both sh and ksh). + * + * Things like `print @( |foo)' should still + * work, because [k]sh don't allow multiple words + * in a function definition, so we only do this + * in command position. + * pws 1999/6/14 + */ + if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct && + !brct && !intpos && incmdpos)) { + /* + * Either a () token, or a command word with + * something suspiciously like a ksh function + * definition. + * The current word isn't spellcheckable. + */ + nocorrect |= 2; + goto brk; + } + } + /* + * This also handles the [k]sh `foo( )' function definition. + * Maintain a variable fdpar, set as long as a single set of + * parentheses contains only space. Then if we get to the + * closing parenthesis and it is still set, we can assume we + * have a function definition. Only do this at the start of + * the word, since the (...) must be a separate token. + */ + if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct) + fdpar = 1; + } + c = Inpar; + break; + case LX2_INBRACE: + if (isset(IGNOREBRACES) || sub) + c = '{'; + else { + if (!lexbuf.len && incmdpos) { + add('{'); + *lexbuf.ptr = '\0'; + return STRING; + } + if (in_brace_param) { + cmdpush(CS_BRACE); + } + bct++; + } + break; + case LX2_OUTBRACE: + if ((isset(IGNOREBRACES) || sub) && !in_brace_param) + break; + if (!bct) + break; + if (in_brace_param) { + cmdpop(); + } + if (bct-- == in_brace_param) + in_brace_param = 0; + c = Outbrace; + break; + case LX2_COMMA: + if (unset(IGNOREBRACES) && !sub && bct > in_brace_param) + c = Comma; + break; + case LX2_OUTANG: + if (in_brace_param || sub) + break; + e = hgetc(); + if (e != '(') { + hungetc(e); + lexstop = 0; + goto brk; + } + add(OutangProc); + if (skipcomm()) { + peek = LEXERR; + goto brk; + } + c = Outpar; + break; + case LX2_INANG: + if (isset(SHGLOB) && sub) + break; + e = hgetc(); + if (!(in_brace_param || sub) && e == '(') { + add(Inang); + if (skipcomm()) { + peek = LEXERR; + goto brk; + } + c = Outpar; + break; + } + hungetc(e); + if(isnumglob()) { + add(Inang); + while ((c = hgetc()) != '>') + add(c); + c = Outang; + break; + } + lexstop = 0; + if (in_brace_param || sub) + break; + goto brk; + case LX2_EQUALS: + if (!sub) { + if (intpos) { + e = hgetc(); + if (e != '(') { + hungetc(e); + lexstop = 0; + c = Equals; + } else { + add(Equals); + if (skipcomm()) { + peek = LEXERR; + goto brk; + } + c = Outpar; + } + } else if (peek != ENVSTRING && + (incmdpos || intypeset) && !bct && !brct) { + char *t = tokstr; + if (idigit(*t)) + while (++t < lexbuf.ptr && idigit(*t)); + else { + int sav = *lexbuf.ptr; + *lexbuf.ptr = '\0'; + t = itype_end(t, IIDENT, 0); + if (t < lexbuf.ptr) { + skipparens(Inbrack, Outbrack, &t); + } else { + *lexbuf.ptr = sav; + } + } + if (*t == '+') + t++; + if (t == lexbuf.ptr) { + e = hgetc(); + if (e == '(') { + *lexbuf.ptr = '\0'; + return ENVARRAY; + } + hungetc(e); + lexstop = 0; + peek = ENVSTRING; + intpos = 2; + } else + c = Equals; + } else + c = Equals; + } + break; + case LX2_BKSLASH: + c = hgetc(); + if (c == '\n') { + c = hgetc(); + if (!lexstop) + continue; + } else { + add(Bnull); + if (c == STOUC(Meta)) { + c = hgetc(); +#ifdef DEBUG + if (lexstop) { + fputs("BUG: input terminated by Meta\n", stderr); + fflush(stderr); + goto brk; + } +#endif + add(Meta); + } + } + if (lexstop) + goto brk; + break; + case LX2_QUOTE: { + int strquote = (lexbuf.len && lexbuf.ptr[-1] == String); + + add(Snull); + cmdpush(CS_QUOTE); + for (;;) { + STOPHIST + while ((c = hgetc()) != '\'' && !lexstop) { + if (strquote && c == '\\') { + c = hgetc(); + if (lexstop) + break; + /* + * Mostly we don't need to do anything special + * with escape backslashes or closing quotes + * inside $'...'; however in completion we + * need to be able to strip multiple backslashes + * neatly. + */ + if (c == '\\' || c == '\'') + add(Bnull); + else + add('\\'); + } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') { + if (lexbuf.ptr[-1] == '\\') + lexbuf.ptr--, lexbuf.len--; + else + break; + } + add(c); + } + ALLOWHIST + if (c != '\'') { + unmatched = '\''; + /* Not an error when called from bufferwords() */ + if (!(lexflags & LEXFLAGS_ACTIVE)) + peek = LEXERR; + cmdpop(); + goto brk; + } + e = hgetc(); + if (e != '\'' || unset(RCQUOTES) || strquote) + break; + add(c); + } + cmdpop(); + hungetc(e); + lexstop = 0; + c = Snull; + break; + } + case LX2_DQUOTE: + add(Dnull); + cmdpush(CS_DQUOTE); + c = dquote_parse('"', sub); + cmdpop(); + if (c) { + unmatched = '"'; + /* Not an error when called from bufferwords() */ + if (!(lexflags & LEXFLAGS_ACTIVE)) + peek = LEXERR; + goto brk; + } + c = Dnull; + break; + case LX2_BQUOTE: + add(Tick); + cmdpush(CS_BQUOTE); + SETPARBEGIN + inquote = 0; + while ((c = hgetc()) != '`' && !lexstop) { + if (c == '\\') { + c = hgetc(); + if (c != '\n') { + add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\'); + add(c); + } + else if (!sub && isset(CSHJUNKIEQUOTES)) + add(c); + } else { + if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') { + break; + } + add(c); + if (c == '\'') { + if ((inquote = !inquote)) + STOPHIST + else + ALLOWHIST + } + } + } + if (inquote) + ALLOWHIST + cmdpop(); + if (c != '`') { + unmatched = '`'; + /* Not an error when called from bufferwords() */ + if (!(lexflags & LEXFLAGS_ACTIVE)) + peek = LEXERR; + goto brk; + } + c = Tick; + SETPAREND + break; + case LX2_DASH: + /* + * - shouldn't be treated as a special character unless + * we're in a pattern. Unfortunately, working out for + * sure in complicated expressions whether we're in a + * pattern is tricky. So we'll make it special and + * turn it back any time we don't need it special. + * This is not ideal as it's a lot of work. + */ + c = Dash; + break; + case LX2_BANG: + /* + * Same logic as Dash, for ! to perform negation in range. + */ + if (seen_brct) + c = Bang; + else + c = '!'; + } + add(c); + c = hgetc(); + if (intpos) + intpos--; + if (lexstop) + break; + } + brk: + if (errflag) { + if (in_brace_param) { + while(bct-- >= in_brace_param) + cmdpop(); + } + return LEXERR; + } + hungetc(c); + if (unmatched && !(lexflags & LEXFLAGS_ACTIVE)) + zerr("unmatched %c", unmatched); + if (in_brace_param) { + while(bct-- >= in_brace_param) + cmdpop(); + zerr("closing brace expected"); + } else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 && + peek == STRING && lexbuf.ptr[-1] == '}' && + lexbuf.ptr[-2] != Bnull) { + /* hack to get {foo} command syntax work */ + lexbuf.ptr--; + lexbuf.len--; + lexstop = 0; + hungetc('}'); + } + *lexbuf.ptr = '\0'; + DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed."); + return peek; +} + + +/* + * Parse input as if in double quotes. + * endchar is the end character to expect. + * sub has got something to do with whether we are doing quoted substitution. + * Return non-zero for error (character to unget), else zero + */ + +/**/ +static int +dquote_parse(char endchar, int sub) +{ + int pct = 0, brct = 0, bct = 0, intick = 0, err = 0; + int c; + int math = endchar == ')' || endchar == ']' || infor; + int zlemath = math && zlemetacs > zlemetall + addedx - inbufct; + + while (((c = hgetc()) != endchar || bct || + (math && ((pct > 0) || (brct > 0))) || + intick) && !lexstop) { + cont: + switch (c) { + case '\\': + c = hgetc(); + if (c != '\n') { + if (c == '$' || c == '\\' || (c == '}' && !intick && bct) || + c == endchar || c == '`' || + (endchar == ']' && (c == '[' || c == ']' || + c == '(' || c == ')' || + c == '{' || c == '}' || + (c == '"' && sub)))) + add(Bnull); + else { + /* lexstop is implicitly handled here */ + add('\\'); + goto cont; + } + } else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"') + continue; + break; + case '\n': + err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"'; + break; + case '$': + if (intick) + break; + c = hgetc(); + if (c == '(') { + add(Qstring); + switch (cmd_or_math_sub()) { + case CMD_OR_MATH_CMD: + c = Outpar; + break; + + case CMD_OR_MATH_MATH: + c = Outparmath; + break; + + default: + err = 1; + break; + } + } else if (c == '[') { + add(String); + add(Inbrack); + cmdpush(CS_MATHSUBST); + err = dquote_parse(']', sub); + cmdpop(); + c = Outbrack; + } else if (c == '{') { + add(Qstring); + c = Inbrace; + cmdpush(CS_BRACEPAR); + bct++; + } else if (c == '$') + add(Qstring); + else { + hungetc(c); + lexstop = 0; + c = Qstring; + } + break; + case '}': + if (intick || !bct) + break; + c = Outbrace; + bct--; + cmdpop(); + break; + case '`': + c = Qtick; + if (intick == 2) + ALLOWHIST + if ((intick = !intick)) { + SETPARBEGIN + cmdpush(CS_BQUOTE); + } else { + SETPAREND + cmdpop(); + } + break; + case '\'': + if (!intick) + break; + if (intick == 1) + intick = 2, STOPHIST + else + intick = 1, ALLOWHIST + break; + case '(': + if (!math || !bct) + pct++; + break; + case ')': + if (!math || !bct) + err = (!pct-- && math); + break; + case '[': + if (!math || !bct) + brct++; + break; + case ']': + if (!math || !bct) + err = (!brct-- && math); + break; + case '"': + if (intick || (endchar != '"' && !bct)) + break; + if (bct) { + add(Dnull); + cmdpush(CS_DQUOTE); + err = dquote_parse('"', sub); + cmdpop(); + c = Dnull; + } else + err = 1; + break; + } + if (err || lexstop) + break; + add(c); + } + if (intick == 2) + ALLOWHIST + if (intick) { + cmdpop(); + } + while (bct--) + cmdpop(); + if (lexstop) + err = intick || endchar || err; + else if (err == 1) { + /* + * TODO: as far as I can see, this hack is used in gettokstr() + * to hungetc() a character on an error. However, I don't + * understand what that actually gets us, and we can't guarantee + * it's a character anyway, because of the previous test. + * + * We use the same feature in cmd_or_math where we actually do + * need to unget if we decide it's really a command substitution. + * We try to handle the other case by testing for lexstop. + */ + err = c; + } + if (zlemath && zlemetacs <= zlemetall + 1 - inbufct) + inwhat = IN_MATH; + return err; +} + +/* + * Tokenize a string given in s. Parsing is done as in double + * quotes. This is usually called before singsub(). + * + * parsestr() is noisier, reporting an error if the parse failed. + * + * On entry, *s must point to a string allocated from the stack of + * exactly the right length, i.e. strlen(*s) + 1, as the string + * is used as the lexical token string whose memory management + * demands this. Usually the input string will therefore be + * the result of an immediately preceding dupstring(). + */ + +/**/ +mod_export int +parsestr(char **s) +{ + int err; + + if ((err = parsestrnoerr(s))) { + untokenize(*s); + if (!(errflag & ERRFLAG_INT)) { + if (err > 32 && err < 127) + zerr("parse error near `%c'", err); + else + zerr("parse error"); + } + } + return err; +} + +/**/ +mod_export int +parsestrnoerr(char **s) +{ + int l = strlen(*s), err; + + zcontext_save(); + untokenize(*s); + inpush(dupstring(*s), 0, NULL); + strinbeg(0); + lexbuf.len = 0; + lexbuf.ptr = tokstr = *s; + lexbuf.siz = l + 1; + err = dquote_parse('\0', 1); + if (tokstr) + *s = tokstr; + *lexbuf.ptr = '\0'; + strinend(); + inpop(); + DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty."); + zcontext_restore(); + return err; +} + +/* + * Parse a subscript in string s. + * sub is passed down to dquote_parse(). + * endchar is the final character. + * Return the next character, or NULL. + */ +/**/ +mod_export char * +parse_subscript(char *s, int sub, int endchar) +{ + int l = strlen(s), err, toklen; + char *t; + + if (!*s || *s == endchar) + return 0; + zcontext_save(); + untokenize(t = dupstring(s)); + inpush(t, 0, NULL); + strinbeg(0); + /* + * Warning to Future Generations: + * + * This way of passing the subscript through the lexer is brittle. + * Code above this for several layers assumes that when we tokenise + * the input it goes into the same place as the original string. + * However, the lexer may overwrite later bits of the string or + * reallocate it, in particular when expanding aliaes. To get + * around this, we copy the string and then copy it back. This is a + * bit more robust but still relies on the underlying assumption of + * length preservation. + */ + lexbuf.len = 0; + lexbuf.ptr = tokstr = dupstring(s); + lexbuf.siz = l + 1; + err = dquote_parse(endchar, sub); + toklen = (int)(lexbuf.ptr - tokstr); + DPUTS(toklen > l, "Bad length for parsed subscript"); + memcpy(s, tokstr, toklen); + if (err) { + char *strend = s + toklen; + err = *strend; + *strend = '\0'; + untokenize(s); + *strend = err; + s = NULL; + } else { + s += toklen; + } + strinend(); + inpop(); + DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty."); + zcontext_restore(); + return s; +} + +/* Tokenize a string given in s. Parsing is done as if s were a normal * + * command-line argument but it may contain separators. This is used * + * to parse the right-hand side of ${...%...} substitutions. */ + +/**/ +mod_export int +parse_subst_string(char *s) +{ + int c, l = strlen(s), err; + char *ptr; + enum lextok ctok; + + if (!*s || !strcmp(s, nulstring)) + return 0; + zcontext_save(); + untokenize(s); + inpush(dupstring(s), 0, NULL); + strinbeg(0); + lexbuf.len = 0; + lexbuf.ptr = tokstr = s; + lexbuf.siz = l + 1; + c = hgetc(); + ctok = gettokstr(c, 1); + err = errflag; + strinend(); + inpop(); + DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty."); + zcontext_restore(); + /* Keep any interrupt error status */ + errflag = err | (errflag & ERRFLAG_INT); + if (ctok == LEXERR) { + untokenize(s); + return 1; + } +#ifdef DEBUG + /* + * Historical note: we used to check here for olen (the value of lexbuf.len + * before zcontext_restore()) == l, but that's not necessarily the case if + * we stripped an RCQUOTE. + */ + if (ctok != STRING || (errflag && !noerrs)) { + fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n", + errflag ? "errflag" : "ctok != STRING"); + fflush(stderr); + untokenize(s); + return 1; + } +#endif + /* Check for $'...' quoting. This needs special handling. */ + for (ptr = s; *ptr; ) + { + if (*ptr == String && ptr[1] == Snull) + { + char *t; + int len, tlen, diff; + t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL); + len += 2; + tlen = strlen(t); + diff = len - tlen; + /* + * Yuk. + * parse_subst_string() currently handles strings in-place. + * That's not so easy to fix without knowing whether + * additional memory should come off the heap or + * otherwise. So we cheat by copying the unquoted string + * into place, unless it's too long. That's not the + * normal case, but I'm worried there are pathological + * cases with converting metafied multibyte strings. + * If someone can prove there aren't I will be very happy. + */ + if (diff < 0) { + DPUTS(1, "$'...' subst too long: fix get_parse_string()"); + return 1; + } + memcpy(ptr, t, tlen); + ptr += tlen; + if (diff > 0) { + char *dptr = ptr; + char *sptr = ptr + diff; + while ((*dptr++ = *sptr++)) + ; + } + } else + ptr++; + } + return 0; +} + +/* Called below to report word positions. */ + +/**/ +static void +gotword(void) +{ + int nwe = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0); + if (zlemetacs <= nwe) { + int nwb = zlemetall - wordbeg + addedx; + if (zlemetacs >= nwb) { + wb = nwb; + we = nwe; + } else { + wb = zlemetacs + addedx; + if (we < wb) + we = wb; + } + lexflags = 0; + } +} + +/* Check if current lex text matches an alias: 1 if so, else 0 */ + +static int +checkalias(void) +{ + Alias an; + + if (!zshlextext) + return 0; + + if (!noaliases && isset(ALIASESOPT) && + (!isset(POSIXALIASES) || + (tok == STRING && !reswdtab->getnode(reswdtab, zshlextext)))) { + char *suf; + + an = (Alias) aliastab->getnode(aliastab, zshlextext); + if (an && !an->inuse && + ((an->node.flags & ALIAS_GLOBAL) || + (incmdpos && tok == STRING) || inalmore)) { + if (!lexstop) { + /* + * Tokens that don't require a space after, get one, + * because they are treated as if preceded by one. + */ + int c = hgetc(); + hungetc(c); + if (!iblank(c)) + inpush(" ", INP_ALIAS, 0); + } + inpush(an->text, INP_ALIAS, an); + if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL)) + aliasspaceflag = 1; + lexstop = 0; + return 1; + } + if ((suf = strrchr(zshlextext, '.')) && suf[1] && + suf > zshlextext && suf[-1] != Meta && + (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) && + !an->inuse && incmdpos) { + inpush(dupstring(zshlextext), INP_ALIAS, an); + inpush(" ", INP_ALIAS, NULL); + inpush(an->text, INP_ALIAS, NULL); + lexstop = 0; + return 1; + } + } + + return 0; +} + +/* expand aliases and reserved words */ + +/**/ +int +exalias(void) +{ + Reswd rw; + + hwend(); + if (interact && isset(SHINSTDIN) && !strin && incasepat <= 0 && + tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) && + (isset(CORRECTALL) || (isset(CORRECT) && incmdpos))) + spckword(&tokstr, 1, incmdpos, 1); + + if (!tokstr) { + zshlextext = tokstrings[tok]; + + if (tok == NEWLIN) + return 0; + return checkalias(); + } else { + VARARR(char, copy, (strlen(tokstr) + 1)); + + if (has_token(tokstr)) { + char *p, *t; + + zshlextext = p = copy; + for (t = tokstr; + (*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++);); + } else + zshlextext = tokstr; + + if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) { + int zp = lexflags; + + gotword(); + if ((zp & LEXFLAGS_ZLE) && !lexflags) { + if (zshlextext == copy) + zshlextext = tokstr; + return 0; + } + } + + if (tok == STRING) { + /* Check for an alias */ + if ((zshlextext != copy || !isset(POSIXALIASES)) && checkalias()) { + if (zshlextext == copy) + zshlextext = tokstr; + return 1; + } + + /* Then check for a reserved word */ + if ((incmdpos || + (unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) && + zshlextext[0] == '}' && !zshlextext[1])) && + (rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) { + tok = rw->token; + inrepeat_ = (tok == REPEAT); + if (tok == DINBRACK) + incond = 1; + } else if (incond && !strcmp(zshlextext, "]]")) { + tok = DOUTBRACK; + incond = 0; + } else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1]) + tok = BANG; + } + inalmore = 0; + if (zshlextext == copy) + zshlextext = tokstr; + } + return 0; +} + +/**/ +void +zshlex_raw_add(int c) +{ + if (!lex_add_raw) + return; + + *lexbuf_raw.ptr++ = c; + if (lexbuf_raw.siz == ++lexbuf_raw.len) { + int newbsiz = lexbuf_raw.siz * 2; + + tokstr_raw = (char *)hrealloc(tokstr_raw, lexbuf_raw.siz, newbsiz); + lexbuf_raw.ptr = tokstr_raw + lexbuf_raw.len; + memset(lexbuf_raw.ptr, 0, newbsiz - lexbuf_raw.siz); + lexbuf_raw.siz = newbsiz; + } +} + +/**/ +void +zshlex_raw_back(void) +{ + if (!lex_add_raw) + return; + lexbuf_raw.ptr--; + lexbuf_raw.len--; +} + +/**/ +int +zshlex_raw_mark(int offset) +{ + if (!lex_add_raw) + return 0; + return lexbuf_raw.len + offset; +} + +/**/ +void +zshlex_raw_back_to_mark(int mark) +{ + if (!lex_add_raw) + return; + lexbuf_raw.ptr = tokstr_raw + mark; + lexbuf_raw.len = mark; +} + +/* + * Skip (...) for command-style substitutions: $(...), <(...), >(...) + * + * In order to ensure we don't stop at closing parentheses with + * some other syntactic significance, we'll parse the input until + * we find an unmatched closing parenthesis. However, we'll throw + * away the result of the parsing and just keep the string we've built + * up on the way. + */ + +/**/ +static int +skipcomm(void) +{ +#ifdef ZSH_OLD_SKIPCOMM + int pct = 1, c, start = 1; + + cmdpush(CS_CMDSUBST); + SETPARBEGIN + c = Inpar; + do { + int iswhite; + add(c); + c = hgetc(); + if (itok(c) || lexstop) + break; + iswhite = inblank(c); + switch (c) { + case '(': + pct++; + break; + case ')': + pct--; + break; + case '\\': + add(c); + c = hgetc(); + break; + case '\'': { + int strquote = lexbuf.ptr[-1] == '$'; + add(c); + STOPHIST + while ((c = hgetc()) != '\'' && !lexstop) { + if (c == '\\' && strquote) { + add(c); + c = hgetc(); + } + add(c); + } + ALLOWHIST + break; + } + case '\"': + add(c); + while ((c = hgetc()) != '\"' && !lexstop) + if (c == '\\') { + add(c); + add(hgetc()); + } else + add(c); + break; + case '`': + add(c); + while ((c = hgetc()) != '`' && !lexstop) + if (c == '\\') + add(c), add(hgetc()); + else + add(c); + break; + case '#': + if (start) { + add(c); + while ((c = hgetc()) != '\n' && !lexstop) + add(c); + iswhite = 1; + } + break; + } + start = iswhite; + } + while (pct); + if (!lexstop) + SETPAREND + cmdpop(); + return lexstop; +#else + char *new_tokstr; + int new_lexstop, new_lex_add_raw; + int save_infor = infor; + struct lexbufstate new_lexbuf; + + infor = 0; + cmdpush(CS_CMDSUBST); + SETPARBEGIN + add(Inpar); + + new_lex_add_raw = lex_add_raw + 1; + if (!lex_add_raw) { + /* + * We'll combine the string so far with the input + * read in for the command substitution. To do this + * we'll just propagate the current tokstr etc. as the + * variables used for adding raw input, and + * ensure we swap those for the real tokstr etc. at the end. + * + * However, we need to save and restore the rest of the + * lexical and parse state as we're effectively parsing + * an internal string. Because we're still parsing it from + * the original input source (we have to --- we don't know + * when to stop inputting it otherwise and can't rely on + * the input being recoverable until we've read it) we need + * to keep the same history context. + */ + new_tokstr = tokstr; + new_lexbuf = lexbuf; + + /* + * If we're expanding an alias at this point, we need the whole + * remaining text as part of the string for the command in + * parentheses, so don't backtrack. This is different from the + * usual case where the alias is fully within the command, where + * we want the unexpanded text so that it will be expanded + * again when the command in the parentheses is executed. + * + * I never wanted to be a software engineer, you know. + */ + if (inbufflags & INP_ALIAS) + inbufflags |= INP_RAW_KEEP; + zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE); + hist_in_word(1); + } else { + /* + * Set up for nested command subsitution, however + * we don't actually need the string until we get + * back to the top level and recover the lot. + * The $() body just appears empty. + * + * We do need to propagate the raw variables which would + * otherwise by cleared, though. + */ + new_tokstr = tokstr_raw; + new_lexbuf = lexbuf_raw; + + zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE); + } + tokstr_raw = new_tokstr; + lexbuf_raw = new_lexbuf; + lex_add_raw = new_lex_add_raw; + /* + * Don't do any ZLE specials down here: they're only needed + * when we return the string from the recursive parse. + * (TBD: this probably means we should be initialising lexflags + * more consistently.) + * + * Note that in that case we're still using the ZLE line reading + * function at the history layer --- this is consistent with the + * intention of maintaining the history and input layers across + * the recursive parsing. + * + * Also turn off LEXFLAGS_NEWLINE because this is already skipping + * across the entire construct, and parse_event() needs embedded + * newlines to be "real" when looking for the OUTPAR token. + */ + lexflags &= ~(LEXFLAGS_ZLE|LEXFLAGS_NEWLINE); + dbparens = 0; /* restored by zcontext_restore_partial() */ + + if (!parse_event(OUTPAR) || tok != OUTPAR) { + if (strin) { + /* + * Get the rest of the string raw since we don't + * know where this token ends. + */ + while (!lexstop) + (void)ingetc(); + } else + lexstop = 1; + } + /* Outpar lexical token gets added in caller if present */ + + /* + * We're going to keep the full raw input string + * as the current token string after popping the stack. + */ + new_tokstr = tokstr_raw; + new_lexbuf = lexbuf_raw; + /* + * We're also going to propagate the lexical state: + * if we couldn't parse the command substitution we + * can't continue. + */ + new_lexstop = lexstop; + + zcontext_restore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE); + + if (lex_add_raw) { + /* + * Keep going, so retain the raw variables. + */ + tokstr_raw = new_tokstr; + lexbuf_raw = new_lexbuf; + } else { + if (!new_lexstop) { + /* Ignore the ')' added on input */ + new_lexbuf.len--; + *--new_lexbuf.ptr = '\0'; + } + + /* + * Convince the rest of lex.c we were examining a string + * all along. + */ + tokstr = new_tokstr; + lexbuf = new_lexbuf; + lexstop = new_lexstop; + hist_in_word(0); + } + + if (!lexstop) + SETPAREND + cmdpop(); + infor = save_infor; + + return lexstop; +#endif +} |
