Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ocamllex assertion failure #8453

Closed
vicuna opened this issue Jan 22, 2004 · 2 comments
Closed

ocamllex assertion failure #8453

vicuna opened this issue Jan 22, 2004 · 2 comments
Labels

Comments

@vicuna
Copy link

vicuna commented Jan 22, 2004

Original bug ID: 2051
Reporter: administrator
Status: closed
Resolution: fixed
Priority: normal
Severity: minor
Category: ~DO NOT USE (was: OCaml general)

Bug description

ocamllex from ocaml 3.07+2 fails on the enclosed file with an error:

Fatal error: exception Assert_failure("lexgen.ml", 1005, 10)

ocamllex from ocaml 3.06 does not report an error.


{
open Cssparse

let tail1 s = String.sub s 1 ((String.length s) - 1)
and head1 s = String.sub s 0 ((String.length s) - 1)
and inner s = String.sub s 1 ((String.length s) - 2)

let split_dimension s =
let div = ref 0
and len = String.length s
and num = (function '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '+' | '-' | '.' -> true | _ -> false) in
while !div < len && num s.[!div] do
incr div
done;
DIMENSION(float_of_string(String.sub s 0 !div), String.sub s !div (len - !div))

(* Save space by sharing copies of the most common kewords. )
(
XXX CSS unquoted words are case-insensitive. XML element names are
case-sensitive. Is String.lowercase inappropriate for XML? *)
let intern s =
(match String.lowercase s with "none" -> "none" | "normal" -> "normal" | s -> s)

let doident lexeme =
let l = String.length lexeme in
if lexeme.[l-1] = '(' then FUNCTION(head1 lexeme) else IDENT(intern lexeme)

let donumber lexeme =
let l = String.length lexeme in
if lexeme.[l-1] = '%' then
PERCENTAGE(float_of_string(String.sub lexeme 0 (l-1)))
else
NUMBER(float_of_string lexeme)

let atkeyword lexeme =
begin match String.lowercase lexeme with
"@import" -> ATIMPORT
| "@charset" -> ATCHARSET
| "@media" -> ATMEDIA
| "@page" -> ATPAGE
| "@font-face" -> ATFONTFACE
| s -> ATKEYWORD(tail1 s)
end

let decode_unicode s =
let l = String.length s in
let rv = int_of_string ("0x" ^ (String.sub s 1 (l-1))) in
assert (rv >= 0 && rv < 256);
char_of_int rv

(* XXX Skip whitespace? )
let uri lexeme =
let l = String.length lexeme in
URI(String.sub lexeme 4 (l-5))
}
let hex = [ '0' - '9' 'a' - 'f' ]
let HEX = [ '0' - '9' 'A' - 'F' ]
(
Technically only 1-6 hex characters are allowed but the lex tables
are much smaller when + is used. )
let css_unicode = '\' (HEX | hex)+ ['\n' '\r' '\t' '\012']?
let css_escape = css_unicode | '\' [ ' ' - '~' ] (
\200-\4177777 omitted )
let css_nonascii = [^ '\000' - '\255' ]
let css_nmstart = ['a' - 'z' 'A' - 'Z' ] | css_nonascii | css_unicode
let css_nmchar = ['a' - 'z' 'A' - 'Z' '0' - '9' '-' ] | css_nonascii | css_unicode
let css_name = css_nmchar +
let css_ident = css_nmstart css_nmchar

let css_num = ['0' - '9']+ | ['0' - '9']* '.' ['0' - '9']+
let css_nl = '\n' | '\r' '\n' | '\r' | '\012'
let css_string1 = '\034' ( [ '\t' ' ' '!' '#' '$' '%' '&' '(' - '' ] | '\' css_nl | ''' | css_nonascii | css_escape )* '\034'
let css_string2 = '\039' ( [ '\t' ' ' '!' '#' '$' '%' '&' '(' - '
' ] | '\' css_nl | '\034' | css_nonascii | css_escape )* '\039'
let css_string = css_string1 | css_string2
let css_w = [' ' '\t' '\r' '\n' '\012']*
let css_s = [' ' '\t' '\r' '\n' '\012']+
let css_comment = '/' '' [^ '*'] ''+ ([^ '/'][^ '*'] ''+) '/'

(* The CSS spec defines a grammar which allows just about any combination
of tokens and advises against using the real grammar in appendix D. *)

rule css =
parse
(* Skip over CDO, CDC, and comments. )
"" | css_comment | css_s { css lexbuf } |
'#' css_name { HASH(tail1(Lexing.lexeme lexbuf)) } |
'@' css_ident{ atkeyword(Lexing.lexeme lexbuf) } |
css_ident '('? { doident (Lexing.lexeme lexbuf) } |
(
XXX String processing: strip quotes; delete backslash-newline )
css_string { STRING(inner(Lexing.lexeme lexbuf)) } |
['+' '-']? css_num '%'? { donumber(Lexing.lexeme lexbuf) } |
['+' '-']? css_num css_ident { split_dimension(Lexing.lexeme lexbuf) } |
(
XXX Should be case-insentive? )
"url(" css_w css_ident css_w ')' |
"url(" css_w (['!' '#' '$' '%' '&' - '~' ] | css_nonascii | css_escape) css_w ')' { uri(Lexing.lexeme lexbuf) } |
"U+" (HEX | '?')+ ('-' HEX+)? { UNICODE(0,0) } |
'
' { STAR } |
'.' { DOT } |
',' { COMMA } |
'+' { PLUS } |
'[' { LSQUARE } |
']' { RSQUARE } |
':' { COLON } |
'>' { CHILD } |
'{' { LBRACE } |
'}' { RBRACE } |
';' { SEMICOLON } |
'=' { EQUALS } |
'(' { LPAREN } |
')' { RPAREN } |
'/' { SLASH } | (* Only in font: line-height declaration )
'!' { EXCLAMATION } |(
Only followed by "important" )
(
XXX Parentheses? *)
(css_s { Space } |)
"~=" { CONTAINS } |
"|=" { PREFIX } |
_ { ERROR((Lexing.lexeme lexbuf).[0]) } |
eof { EOF }



@vicuna
Copy link
Author

vicuna commented Jan 23, 2004

Comment author: administrator

Content-ID: 550.1074812672.1@w20-575-47.mit.edu

ocamllex from ocaml 3.07+2 fails on the enclosed file with an error:

Fatal error: exception Assert_failure("lexgen.ml", 1005, 10)

ocamllex from ocaml 3.06 does not report an error.

This is indeed a bug, thank you for reporting it.
The bug orignates from a never-matching regexp you should
perhaps be aware of:

Snip

let css_nonascii = [^ '\000' - '\255' ]
^^^^^^
This one.
Snip

rule css =
parse
Snip

(* XXX Should be case-insentive? *)
"url(" css_w css_ident css_w ')' |
"url(" css_w (['!' '#' '$' '%' '&' - '~' ] | css_nonascii | css_escape) css_w ')' { uri(Lexing.lexeme lexbuf) } |            ^^^^^^^^^^^
                                               ^^^^^^^^^^^^
                                               Used here

The patch now commited in CVS repository.

Regards,

--
Luc Maranget

@vicuna
Copy link
Author

vicuna commented Jan 23, 2004

Comment author: administrator

Fixed on 2004-01-24 --Luc

@vicuna vicuna closed this as completed Jan 23, 2004
@vicuna vicuna added the bug label Mar 19, 2019
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

1 participant