Index: Zend/zend_highlight.c =================================================================== RCS file: /repository/ZendEngine2/zend_highlight.c,v retrieving revision 1.65 diff -u -r1.65 zend_highlight.c --- Zend/zend_highlight.c 31 Dec 2008 11:12:29 -0000 1.65 +++ Zend/zend_highlight.c 28 Apr 2009 17:48:00 -0000 @@ -127,14 +127,8 @@ zend_printf("", last_color); } } - switch (token_type) { - case T_END_HEREDOC: - zend_html_puts(Z_STRVAL(token), Z_STRLEN(token) TSRMLS_CC); - break; - default: - zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC); - break; - } + + zend_html_puts(LANG_SCNG(yy_text), LANG_SCNG(yy_leng) TSRMLS_CC); if (Z_TYPE(token) == IS_STRING || Z_TYPE(token) == IS_UNICODE) { @@ -156,19 +150,6 @@ Z_TYPE(token) = 0; } - /* handler for trailing comments, see bug #42767 */ - if (LANG_SCNG(yy_leng) && LANG_SCNG(yy_text) < LANG_SCNG(yy_limit)) { - if (last_color != syntax_highlighter_ini->highlight_comment) { - if (last_color != syntax_highlighter_ini->highlight_html) { - zend_printf(""); - } - if (syntax_highlighter_ini->highlight_comment != syntax_highlighter_ini->highlight_html) { - zend_printf("", syntax_highlighter_ini->highlight_comment); - } - } - zend_html_puts(LANG_SCNG(yy_text), (LANG_SCNG(yy_limit) - LANG_SCNG(yy_text)) TSRMLS_CC); - } - if (last_color != syntax_highlighter_ini->highlight_html) { zend_printf("\n"); } Index: Zend/zend_language_scanner.l =================================================================== RCS file: /repository/ZendEngine2/zend_language_scanner.l,v retrieving revision 1.205 diff -u -r1.205 zend_language_scanner.l --- Zend/zend_language_scanner.l 26 Mar 2009 20:01:38 -0000 1.205 +++ Zend/zend_language_scanner.l 5 May 2009 01:02:40 -0000 @@ -115,13 +115,19 @@ } \ } +/* To save initial string length after scanning to first variable, CG(doc_comment_len) can be reused */ +#define SET_DOUBLE_QUOTES_SCANNED_LENGTH(len) CG(doc_comment_len) = (len) +#define GET_DOUBLE_QUOTES_SCANNED_LENGTH() CG(doc_comment_len) + +#define IS_LABEL_START(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z') || (c) == '_' || (c) >= 0x7F) + #define ZEND_IS_OCT(c) ((c)>='0' && (c)<='7') #define ZEND_IS_HEX(c) (((c)>='0' && (c)<='9') || ((c)>='a' && (c)<='f') || ((c)>='A' && (c)<='F')) BEGIN_EXTERN_C() static void _yy_push_state(int new_state TSRMLS_DC) - { +{ zend_stack_push(&SCNG(state_stack), (void *) &YYGETCONDITION(), sizeof(int)); YYSETCONDITION(new_state); } @@ -1324,63 +1330,8 @@ WHITESPACE [ \n\r\t]+ TABS_AND_SPACES [ \t]* TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@] -ANY_CHAR [^\x00] +ANY_CHAR [^] NEWLINE ("\r"|"\n"|"\r\n") -NULL [\x00]{1} - -/* - * LITERAL_DOLLAR matches unescaped $ that aren't followed by a label character - * or a { and therefore will be taken literally. The case of literal $ before - * a variable or "${" is handled in a rule for each string type - */ -DOUBLE_QUOTES_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$"\\{]|("\\"{ANY_CHAR}))) -BACKQUOTE_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$`\\{]|("\\"{ANY_CHAR}))) -HEREDOC_LITERAL_DOLLAR ("$"+([^a-zA-Z_\x7f-\xff$\n\r\\{]|("\\"[^\n\r]))) - -/* - * Usually, HEREDOC_NEWLINE will just function like a simple NEWLINE, but some - * special cases need to be handled. HEREDOC_CHARS doesn't allow a line to - * match when { or $, and/or \ is at the end. (("{"*|"$"*)"\\"?) handles that, - * along with cases where { or $, and/or \ is the ONLY thing on a line - * - * The other case is when a line contains a label, followed by ONLY - * { or $, and/or \ Handled by ({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\")) - */ -HEREDOC_NEWLINE ((({LABEL}";"?((("{"+|"$"+)"\\"?)|"\\"))|(("{"*|"$"*)"\\"?)){NEWLINE}) - -/* - * This pattern is just used in the next 2 for matching { or literal $, and/or - * \ escape sequence immediately at the beginning of a line or after a label - */ -HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR (("{"+[^$\n\r\\{])|("{"*"\\"[^\n\r])|{HEREDOC_LITERAL_DOLLAR}) - -/* - * These 2 label-related patterns allow HEREDOC_CHARS to continue "regular" - * matching after a newline that starts with either a non-label character or a - * label that isn't followed by a newline. Like HEREDOC_CHARS, they won't match - * a variable or "{$" Matching a newline, and possibly label, up TO a variable - * or "{$", is handled in the heredoc rules - * - * The HEREDOC_LABEL_NO_NEWLINE pattern (";"[^$\n\r\\{]) handles cases where ; - * follows a label. [^a-zA-Z0-9_\x7f-\xff;$\n\r\\{] is needed to prevent a label - * character or ; from matching on a possible (real) ending label - */ -HEREDOC_NON_LABEL ([^a-zA-Z_\x7f-\xff$\n\r\\{]|{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR}) -HEREDOC_LABEL_NO_NEWLINE ({LABEL}([^a-zA-Z0-9_\x7f-\xff;$\n\r\\{]|(";"[^$\n\r\\{])|(";"?{HEREDOC_CURLY_OR_ESCAPE_OR_DOLLAR}))) - -/* - * CHARS matches everything up to a variable or "{$" - * {'s are matched as long as they aren't followed by a $ - * The case of { before "{$" is handled in a rule for each string type - * - * For heredocs, matching continues across/after newlines if/when it's known - * that the next line doesn't contain a possible ending label - */ -DOUBLE_QUOTES_CHARS ("{"*([^$"\\{]|("\\"{ANY_CHAR}))|{DOUBLE_QUOTES_LITERAL_DOLLAR}) -BACKQUOTE_CHARS ("{"*([^$`\\{]|("\\"{ANY_CHAR}))|{BACKQUOTE_LITERAL_DOLLAR}) -HEREDOC_CHARS ("{"*([^$\n\r\\{]|("\\"[^\n\r]))|{HEREDOC_LITERAL_DOLLAR}|({HEREDOC_NEWLINE}+({HEREDOC_NON_LABEL}|{HEREDOC_LABEL_NO_NEWLINE}))) - -NOWDOC_CHARS ([^\n\r]|{NEWLINE}+([^a-zA-Z_\x7f-\xff\n\r]|({LABEL}([^a-zA-Z0-9_\x7f-\xff;\n\r]|(";"[^\n\r]))))) /* compute yyleng before each rule */ := yyleng = YYCURSOR - SCNG(yy_text); @@ -2037,6 +1988,14 @@ } "" { + YYCTYPE *bracket = zend_memrchr(yytext, '<', yyleng - (sizeof("script language=php>") - 1)); + + if (bracket != SCNG(yy_text)) { + /* Handle previously scanned HTML, as possible