Index: zend_compile.c =================================================================== RCS file: /repository/ZendEngine2/zend_compile.c,v retrieving revision 1.744 diff -u -r1.744 zend_compile.c --- zend_compile.c 3 Apr 2007 06:32:59 -0000 1.744 +++ zend_compile.c 12 Apr 2007 09:30:00 -0000 @@ -4412,12 +4412,12 @@ { int retval; -again: if (CG(increment_lineno)) { CG(zend_lineno)++; CG(increment_lineno) = 0; } +again: Z_TYPE(zendlval->u.constant) = IS_LONG; retval = lex_scan(&zendlval->u.constant TSRMLS_CC); switch (retval) { @@ -4428,8 +4428,7 @@ goto again; case T_CLOSE_TAG: - if (LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-1]=='\n' - || (LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-2]=='\r' && LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-1])) { + if (LANG_SCNG(yy_text)[LANG_SCNG(yy_leng)-1] != '>') { CG(increment_lineno) = 1; } retval = ';'; /* implicit ; */ Index: zend_language_parser.y =================================================================== RCS file: /repository/ZendEngine2/zend_language_parser.y,v retrieving revision 1.180 diff -u -r1.180 zend_language_parser.y --- zend_language_parser.y 8 Mar 2007 17:30:28 -0000 1.180 +++ zend_language_parser.y 12 Apr 2007 09:32:00 -0000 @@ -24,8 +24,6 @@ * LALR shift/reduce conflicts and how they are resolved: * * - 2 shift/reduce conflicts due to the dangeling elseif/else ambiguity. Solved by shift. - * - 1 shift/reduce conflict due to arrays within encapsulated strings. Solved by shift. - * - 1 shift/reduce conflict due to objects within encapsulated strings. Solved by shift. * */ @@ -49,7 +47,7 @@ %} %pure_parser -%expect 4 +%expect 2 %left T_INCLUDE T_INCLUDE_ONCE T_EVAL T_REQUIRE T_REQUIRE_ONCE %left ',' @@ -884,11 +882,6 @@ | encaps_list T_ENCAPSED_AND_WHITESPACE { zend_do_add_string(&$$, &$1, &$2 TSRMLS_CC); } | encaps_list T_CHARACTER { zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); } | encaps_list T_BAD_CHARACTER { zend_do_add_string(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list '[' { Z_LVAL($2.u.constant) = (long) '['; zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list ']' { Z_LVAL($2.u.constant) = (long) ']'; zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list '{' { Z_LVAL($2.u.constant) = (long) '{'; zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list '}' { Z_LVAL($2.u.constant) = (long) '}'; zend_do_add_char(&$$, &$1, &$2 TSRMLS_CC); } - | encaps_list T_OBJECT_OPERATOR { znode tmp; Z_LVAL($2.u.constant) = (long) '-'; zend_do_add_char(&tmp, &$1, &$2 TSRMLS_CC); Z_LVAL($2.u.constant) = (long) '>'; zend_do_add_char(&$$, &tmp, &$2 TSRMLS_CC); } | /* empty */ { zend_do_init_string(&$$ TSRMLS_CC); } ; Index: zend_language_scanner.l =================================================================== RCS file: /repository/ZendEngine2/zend_language_scanner.l,v retrieving revision 1.163 diff -u -r1.163 zend_language_scanner.l --- zend_language_scanner.l 15 Feb 2007 10:42:51 -0000 1.163 +++ zend_language_scanner.l 12 Apr 2007 09:20:02 -0000 @@ -37,8 +37,11 @@ %x ST_DOUBLE_QUOTES %x ST_BACKQUOTE %x ST_HEREDOC +%x ST_START_HEREDOC +%x ST_END_HEREDOC %x ST_LOOKING_FOR_PROPERTY %x ST_LOOKING_FOR_VARNAME +%x ST_VAR_OFFSET %x ST_COMMENT %x ST_DOC_COMMENT %x ST_ONE_LINE_COMMENT @@ -99,9 +102,7 @@ char *p = (s), *boundary = p+(l); \ \ while (p= 0) { + min_digits = 1; + max_digits = 2; + Z_USTRLEN_P(zendlval)--; + s++; + n = 1; /* already have one digit */ + codepoint = digit; + } else { + *t++ = 0x5C; /*'\\'*/ + *t++ = *s; + } + break; default: digit = zend_get_octal_digit(*s); if (digit >= 0) { @@ -1118,14 +1143,6 @@ bits = 3; n = 1; /* already have one digit */ codepoint = digit; - } else if (c == 0x78 /*'x'*/ - && (s+1) < end && (digit = zend_get_hex_digit(*(s+1))) >= 0) { - min_digits = 1; - max_digits = 2; - Z_USTRLEN_P(zendlval)--; - s++; - n = 1; /* already have one digit */ - codepoint = digit; } else { *t++ = 0x5C; /*'\\'*/ *t++ = *s; @@ -1163,16 +1180,22 @@ efree(Z_USTRVAL_P(zendlval)); return 0; } - } else { - s++; + + /* s is already incremented and not past a newline */ + continue; } } else { - *t++ = *s++; + *t++ = *s; + } + + if (*s == 0x0A /*'\n'*/ || (*s == 0x0D /*'\r'*/ && (*(s+1) != 0x0A /*'\n'*/))) { + CG(zend_lineno)++; } + s++; } *t = 0; - return T_CONSTANT_ENCAPSED_STRING; + return type; } int zend_scan_unicode_single_string(zval *zendlval TSRMLS_DC) @@ -1181,8 +1204,6 @@ UChar *end; UChar32 codepoint = 0; - HANDLE_NEWLINES(yytext, yyleng); - if (!zend_copy_scanner_string(zendlval, yytext+1, yyleng-2, IS_UNICODE, SCNG(output_conv) TSRMLS_CC)) { return 0; } @@ -1265,25 +1286,36 @@ *t++ = *s; break; } - s++; } else { - *t++ = *s++; + *t++ = *s; + } + + if (*s == 0x0A /*'\n'*/ || (*s == 0x0D /*'\r'*/ && (*(s+1) != 0x0A /*'\n'*/))) { + CG(zend_lineno)++; } + s++; } *t = 0; return T_CONSTANT_ENCAPSED_STRING; } -int zend_scan_binary_double_string(zval *zendlval, int bprefix TSRMLS_DC) +int zend_scan_binary_escape_string(zval *zendlval, char quote_type, int type TSRMLS_DC) { register char *s, *t; char *end; - Z_STRVAL_P(zendlval) = estrndup(yytext+bprefix+1, yyleng-bprefix-2); - Z_STRLEN_P(zendlval) = yyleng-bprefix-2; + if (type == T_CONSTANT_ENCAPSED_STRING) { + int bprefix = (yytext[0] != '"') ? 1 : 0; + + Z_STRVAL_P(zendlval) = estrndup(yytext+bprefix+1, yyleng-bprefix-2); + Z_STRLEN_P(zendlval) = yyleng-bprefix-2; + } else { + Z_STRVAL_P(zendlval) = estrndup(yytext, yyleng); + Z_STRLEN_P(zendlval) = yyleng; + } + Z_TYPE_P(zendlval) = IS_STRING; - HANDLE_NEWLINES(yytext, yyleng); /* convert escape sequences */ s = t = Z_STRVAL_P(zendlval); @@ -1307,12 +1339,37 @@ *t++ = '\t'; Z_STRLEN_P(zendlval)--; break; + case '"': + case '`': + if (*s != quote_type) { + *t++ = '\\'; + *t++ = *s; + break; + } case '\\': case '$': - case '"': *t++ = *s; Z_STRLEN_P(zendlval)--; break; + case 'x': + case 'X': + if (ZEND_IS_HEX(*(s+1))) { + char hex_buf[3] = { 0, 0, 0 }; + + Z_STRLEN_P(zendlval)--; /* for the 'x' */ + + hex_buf[0] = *(++s); + Z_STRLEN_P(zendlval)--; + if (ZEND_IS_HEX(*(s+1))) { + hex_buf[1] = *(++s); + Z_STRLEN_P(zendlval)--; + } + *t++ = (char) strtol(hex_buf, NULL, 16); + } else { + *t++ = '\\'; + *t++ = *s; + } + break; default: /* check for an octal */ if (ZEND_IS_OCT(*s)) { @@ -1320,41 +1377,33 @@ octal_buf[0] = *s; Z_STRLEN_P(zendlval)--; - if ((s+1)?@] -ENCAPSED_TOKENS [\[\]{}$] -ESCAPED_AND_WHITESPACE [\n\t\r #'.:;,()|^&+-/*=%!~<>?@]+ ANY_CHAR (.|[\n]) NEWLINE ("\r"|"\n"|"\r\n") +DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{]|("\\"{ANY_CHAR}))) +BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{]|("\\"{ANY_CHAR}))) +HEREDOC_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{]|("\\"[^\n\r]))) + +HEREDOC_NEWLINE ((({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))|(("{"*|"$"*)"\\"?)){NEWLINE}) + +HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{])|("{"*"\\"[^\n\r])|{HEREDOC_LITERAL_DOLLAR}) +HEREDOC_NON_LABEL ([^a-zA-Z_\x7f-\xff$\n\r\\{]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR}) +HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{]|(";"[^$\n\r\\{])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR}))) + +DOUBLE_QUOTES_CHARS ("{"*([^$"\\{]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR}) +BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR}) +HEREDOC_CHARS ("{"*([^$\n\r\\{]|("\\"[^\n\r]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE}))) + %option noyylineno %option noyywrap %% @@ -1560,11 +1624,15 @@ return T_IMPLEMENTS; } -"->" { +"->" { yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC); return T_OBJECT_OPERATOR; } +"->" { + return T_OBJECT_OPERATOR; +} + {LABEL} { yy_pop_state(TSRMLS_C); if (!zend_copy_scanner_string(zendlval, yytext, yyleng, UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { @@ -1897,7 +1965,26 @@ return T_LNUMBER; } -{LNUM}|{HNUM} { /* treat numbers (almost) as strings inside encapsulated strings */ + +["] { + BEGIN(ST_IN_SCRIPTING); + return '\"'; +} + + +[`] { + BEGIN(ST_IN_SCRIPTING); + return '`'; +} + + +{ANY_CHAR} { + Z_LVAL_P(zendlval) = (long) yytext[0]; + HANDLE_NEWLINE(yytext[0]); + return T_CHARACTER; +} + +{LNUM}|{HNUM} { /* treat numbers (almost) as strings inside encapsulated strings */ if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { return 0; } @@ -2071,7 +2158,33 @@ return T_OPEN_TAG; } -"$"{LABEL} { +"$"{LABEL} { + if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1), UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { + return 0; + } + if (UG(unicode) && !zend_check_and_normalize_identifier(zendlval)) { + return 0; + } + return T_VARIABLE; +} + +"$"{LABEL}"->"[a-zA-Z_\x7f-\xff] { + yyless(yyleng - 3); + yy_push_state(ST_LOOKING_FOR_PROPERTY TSRMLS_CC); + + if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1), UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { + return 0; + } + if (UG(unicode) && !zend_check_and_normalize_identifier(zendlval)) { + return 0; + } + return T_VARIABLE; +} + +"$"{LABEL}"[" { + yyless(yyleng - 1); + yy_push_state(ST_VAR_OFFSET TSRMLS_CC); + if (!zend_copy_scanner_string(zendlval, (yytext+1), (yyleng-1), UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { return 0; } @@ -2081,6 +2194,25 @@ return T_VARIABLE; } +"[" { + return '['; +} + +"]" { + yy_pop_state(TSRMLS_C); + return ']'; +} + +{TOKENS}|[{}] { + /* Bad token, but will allow a more explicit parse error */ + return yytext[0]; +} + +[ \n\r\t'"`\\#] { + yyless(0); + yy_pop_state(TSRMLS_C); +} + {LABEL} { if (!zend_copy_scanner_string(zendlval, yytext, yyleng, UG(unicode)?IS_UNICODE:IS_STRING, SCNG(output_conv) TSRMLS_CC)) { return 0; @@ -2091,7 +2223,7 @@ return T_STRING; } -{LABEL} { +{LABEL} { if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { return 0; } @@ -2221,21 +2353,21 @@ } -(["]([^$"\\]|("\\".))*["]) { +(["]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)["]) { if (UG(unicode)) { - return zend_scan_unicode_double_string(zendlval TSRMLS_CC); + return zend_scan_unicode_escape_string(zendlval, 0x22 /*'"'*/, T_CONSTANT_ENCAPSED_STRING TSRMLS_CC); } else { - return zend_scan_binary_double_string(zendlval, 0 TSRMLS_CC); + return zend_scan_binary_escape_string(zendlval, 0x22 /*'"'*/, T_CONSTANT_ENCAPSED_STRING TSRMLS_CC); } } -(b["]([^$"\\]|("\\".))*["]) { - return zend_scan_binary_double_string(zendlval, 1 TSRMLS_CC); +(b["]{DOUBLE_QUOTES_CHARS}*("{"*|"$"*)["]) { + return zend_scan_binary_escape_string(zendlval, 0x22 /*'"'*/, T_CONSTANT_ENCAPSED_STRING TSRMLS_CC); } -([']([^'\\]|("\\".))*[']) { +([']([^'\\]|("\\"{ANY_CHAR}))*[']) { if (UG(unicode)) { return zend_scan_unicode_single_string(zendlval TSRMLS_CC); } else { @@ -2244,7 +2376,7 @@ } -("b'"([^'\\]|("\\".))*[']) { +("b'"([^'\\]|("\\"{ANY_CHAR}))*[']) { return zend_scan_binary_single_string(zendlval, 1 TSRMLS_CC); } @@ -2269,7 +2401,7 @@ CG(heredoc_len)--; } CG(heredoc) = estrndup(s, CG(heredoc_len)); - BEGIN(ST_HEREDOC); + BEGIN(ST_START_HEREDOC); return T_BINARY_HEREDOC; } @@ -2284,7 +2416,7 @@ CG(heredoc_len)--; } CG(heredoc) = estrndup(s, CG(heredoc_len)); - BEGIN(ST_HEREDOC); + BEGIN(ST_START_HEREDOC); return T_START_HEREDOC; } @@ -2295,78 +2427,72 @@ } -^{LABEL}(";")?{NEWLINE} { - int label_len; +{ANY_CHAR} { + yyless(0); + BEGIN(ST_HEREDOC); +} - if (yytext[yyleng-2]=='\r') { - label_len = yyleng-2; - } else { - label_len = yyleng-1; - } +{LABEL}";"?[\n\r] { + int label_len = yyleng - 1; if (yytext[label_len-1]==';') { label_len--; } if (label_len==CG(heredoc_len) && !memcmp(yytext, CG(heredoc), label_len)) { - Z_STRVAL_P(zendlval) = estrndup(yytext, label_len); /* unput destroys yytext */ + Z_STRVAL_P(zendlval) = CG(heredoc); Z_STRLEN_P(zendlval) = label_len; - yyless(yyleng - (yyleng - label_len)); - efree(CG(heredoc)); + yyless(label_len); CG(heredoc)=NULL; CG(heredoc_len)=0; BEGIN(ST_IN_SCRIPTING); return T_END_HEREDOC; } else { - CG(zend_lineno)++; - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; - } - return T_STRING; + yyless(label_len - 1); + yymore(); + BEGIN(ST_HEREDOC); } } +{HEREDOC_CHARS}*{HEREDOC_NEWLINE}+{LABEL}";"?[\n\r] { + char *end = yytext + yyleng - 1; -{ESCAPED_AND_WHITESPACE} { - HANDLE_NEWLINES(yytext, yyleng); - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; + if (end[-1] == ';') { + end--; + yyleng--; } - return T_ENCAPSED_AND_WHITESPACE; -} -[`]+ { - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; - } - return T_ENCAPSED_AND_WHITESPACE; -} + if (yyleng > CG(heredoc_len) && !memcmp(end - CG(heredoc_len), CG(heredoc), CG(heredoc_len))) { + yyless(yyleng - 2); + yyleng -= CG(heredoc_len) - 1; + BEGIN(ST_END_HEREDOC); - -["]+ { - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + return zend_scan_binary_escape_string(zendlval, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } + } else { + yyless(yyleng - 2); + yymore(); } - return T_ENCAPSED_AND_WHITESPACE; } - -"$"[^a-zA-Z_\x7f-\xff{] { - Z_LVAL_P(zendlval) = (long) yytext[0]; - if (yyleng == 2) { - yyless(1); - } - return T_CHARACTER; +{ANY_CHAR} { + Z_STRVAL_P(zendlval) = CG(heredoc); + Z_STRLEN_P(zendlval) = CG(heredoc_len); + /* OK to change yytext like this? */ + yytext = Z_STRVAL_P(zendlval); + yyleng = Z_STRLEN_P(zendlval); + CG(heredoc) = NULL; + CG(heredoc_len) = 0; + BEGIN(ST_IN_SCRIPTING); + return T_END_HEREDOC; } -{ENCAPSED_TOKENS} { - Z_LVAL_P(zendlval) = (long) yytext[0]; - return yytext[0]; -} - "{$" { - Z_LVAL_P(zendlval) = (long) yytext[0]; + Z_LVAL_P(zendlval) = (long) '{'; yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); yyless(1); return T_CURLY_OPEN; @@ -2447,13 +2573,6 @@ } } -"\\{" { - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; - } - return T_STRING; -} - "\\"{ANY_CHAR} { switch (yytext[1]) { case 'n': @@ -2469,8 +2588,11 @@ Z_LVAL_P(zendlval) = (long) '\\'; break; case '$': - Z_LVAL_P(zendlval) = (long) yytext[1]; + Z_LVAL_P(zendlval) = (long) '$'; break; + case '\n': + case '\r': + CG(zend_lineno)++; default: if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { return 0; @@ -2482,30 +2604,82 @@ } -["'`]+ { - if (!zend_copy_scanner_string(zendlval, yytext, yyleng, CG(literal_type), SCNG(output_conv) TSRMLS_CC)) { - return 0; +[{$]["] { + yyless(1); + Z_LVAL_P(zendlval) = (long) yytext[0]; + return T_CHARACTER; +} + +[{$][`] { + yyless(1); + Z_LVAL_P(zendlval) = (long) yytext[0]; + return T_CHARACTER; +} + +"{{"|"$$" { + yyless(1); + Z_LVAL_P(zendlval) = (long) yytext[0]; + return T_CHARACTER; +} + + +{DOUBLE_QUOTES_CHARS}+ { + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, 0x22 /*'"'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + return zend_scan_binary_escape_string(zendlval, 0x22 /*'"'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); } - return T_ENCAPSED_AND_WHITESPACE; } +{DOUBLE_QUOTES_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)["])) { + yyless(yyleng - 1); -["] { - BEGIN(ST_IN_SCRIPTING); - return '\"'; + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, 0x22 /*'"'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + return zend_scan_binary_escape_string(zendlval, 0x22 /*'"'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } } -[`] { - BEGIN(ST_IN_SCRIPTING); - return '`'; +{BACKQUOTE_CHARS}+ { + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, 0x60 /*'`'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + return zend_scan_binary_escape_string(zendlval, 0x60 /*'`'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } } +{BACKQUOTE_CHARS}*("{"{2,}|"$"{2,}|(("{"+|"$"+)[`])) { + yyless(yyleng - 1); -<> { - return 0; + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, 0x60 /*'`'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + return zend_scan_binary_escape_string(zendlval, 0x60 /*'`'*/, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } +} + + +{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)? { + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + return zend_scan_binary_escape_string(zendlval, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } } +{HEREDOC_CHARS}*({HEREDOC_NEWLINE}+({LABEL}";"?)?)?("{"{2,}|"$"{2,}) { + yyless(yyleng - 1); + + if (CG(literal_type) == IS_UNICODE) { + return zend_scan_unicode_escape_string(zendlval, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } else { + return zend_scan_binary_escape_string(zendlval, 0, T_ENCAPSED_AND_WHITESPACE TSRMLS_CC); + } +} + + <> { zend_error(E_COMPILE_WARNING,"Unterminated comment starting line %d", CG(comment_start_line)); return 0; @@ -2513,6 +2687,6 @@ -{ANY_CHAR} { +{ANY_CHAR} { zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); } Index: zend_vm_def.h =================================================================== RCS file: /repository/ZendEngine2/zend_vm_def.h,v retrieving revision 1.164 diff -u -r1.164 zend_vm_def.h --- zend_vm_def.h 3 Apr 2007 06:32:59 -0000 1.164 +++ zend_vm_def.h 12 Apr 2007 09:45:19 -0000 @@ -1629,7 +1629,7 @@ Z_STRVAL_P(tmp) = emalloc(1); Z_STRVAL_P(tmp)[0] = 0; Z_STRLEN_P(tmp) = 0; - Z_TYPE_P(tmp) = EX(opline)->extended_value; + Z_TYPE_P(tmp) = IS_STRING; } tmp->refcount = 1; tmp->is_ref = 0; @@ -1666,10 +1666,12 @@ zend_free_op free_op1, free_op2; zval *var = GET_OP2_ZVAL_PTR(BP_VAR_R); zval var_copy; - int use_copy; + int use_copy = 0; if (opline->extended_value == IS_UNICODE) { - zend_make_unicode_zval(var, &var_copy, &use_copy); + if (Z_TYPE_P(var) != IS_UNICODE) { + zend_make_unicode_zval(var, &var_copy, &use_copy); + } } else { zend_make_printable_zval(var, &var_copy, &use_copy); }