| Description | There are two unwanted behaviors on the treatment of string literals in camlp4.
(1) Incomplete decimal/hexadecimal character escapes cause unspecified errors.
$ echo '"\0"' | camlp4o -impl /dev/stdin
Warning: File "/dev/stdin", line 1, characters 1-3: Illegal backslash escape in string or character (0)
Parse error:
$
(2) Other unharmful character escapes which are undefined in Caml syntax was useful for camlp4 extensions until 3.09, but now it produces a warning even when these are handled later by syntax extensions. Several useful syntax extensions depend on 3.09 behavior. To achieve both compatibility and safety, such checking is better to be implemented in parser (or output) instead of in lexer. The attached patch (against 3.12.0+dev5) implements the string constant validity checking in the parser stage, as well as fixing problem (1). |
| Attached Files | camlp4-fixup.diff [^] (4,010 bytes) 2009-10-03 11:22 [Show Content] [Hide Content]Index: camlp4/Camlp4/Struct/Lexer.mll
===================================================================
RCS file: /caml/ocaml/camlp4/Camlp4/Struct/Lexer.mll,v
retrieving revision 1.11
diff -u -r1.11 Lexer.mll
--- camlp4/Camlp4/Struct/Lexer.mll 27 Oct 2008 13:58:14 -0000 1.11
+++ camlp4/Camlp4/Struct/Lexer.mll 3 Oct 2009 09:01:26 -0000
@@ -379,13 +379,11 @@
| '\\' ['\\' '"' 'n' 't' 'b' 'r' ' ' '\''] { store_parse string c }
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] { store_parse string c }
| '\\' 'x' hexa_char hexa_char { store_parse string c }
- | '\\' (_ as x)
- { if is_in_comment c
+ | '\\' (['0'-'9' 'x'] as x) _
+ { if is_in_comment c
then store_parse string c
- else begin
- warn (Illegal_escape (String.make 1 x)) (Loc.of_lexbuf lexbuf);
- store_parse string c
- end }
+ else err (Illegal_escape (String.make 1 x)) (loc c) }
+ | '\\' (_ as x) { store_parse string c }
| newline
{ update_loc c None 1 false 0; store_parse string c }
| eof { err Unterminated_string (loc c) }
Index: camlp4/Camlp4Parsers/Camlp4OCamlRevisedParser.ml
===================================================================
RCS file: /caml/ocaml/camlp4/Camlp4Parsers/Camlp4OCamlRevisedParser.ml,v
retrieving revision 1.17
diff -u -r1.17 Camlp4OCamlRevisedParser.ml
--- camlp4/Camlp4Parsers/Camlp4OCamlRevisedParser.ml 5 Mar 2009 16:11:07 -0000 1.17
+++ camlp4/Camlp4Parsers/Camlp4OCamlRevisedParser.ml 3 Oct 2009 09:01:27 -0000
@@ -209,6 +209,56 @@
else "-" ^ n
;
+ value is_hexa_char c =
+ match c with
+ [ '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> True
+ | _ -> False ]
+ ;
+ value is_deci_char c =
+ match c with
+ [ '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> True
+ | _ -> False ]
+ ;
+ value check_string loc s =
+ let len = String.length s in
+ let warn_exit p c =
+ let loc = Loc.move `start (p + 1) loc in
+ let loc = Loc.move `stop (p - len) loc in
+ Format.eprintf "Warning: %a: illegal backslash escape in string (%s)@."
+ Loc.print loc (Char.escaped c)
+ in
+ let rec iter p =
+ if p >= len then ()
+ else
+ match s.[p] with
+ [ '\\' -> begin
+ if p + 1 >= len then
+ warn_exit p '\\' (* should not happen *)
+ else
+ match s.[p + 1] with
+ [ '\\' | '"' | 'n' | 't' | 'b' | 'r' | ' ' | '\'' ->
+ iter (p + 2)
+ | '0' .. '9' as c ->
+ if p + 3 < len &&
+ is_deci_char s.[p + 2] &&
+ is_deci_char s.[p + 3] then
+ iter (p + 4)
+ else
+ warn_exit (p + 1) c
+ | 'x' ->
+ if p + 3 < len &&
+ is_hexa_char s.[p + 2] &&
+ is_hexa_char s.[p + 3] then
+ iter (p + 4)
+ else
+ warn_exit (p + 1) 'x'
+ | _ as c ->
+ warn_exit (p + 1) c ]
+ end
+ | _ -> iter (p + 1) ]
+ in
+ do iter 0; s done
+ ;
value mkumin _loc f arg =
match arg with
[ <:expr< $int:n$ >> -> <:expr< $int:neg_string n$ >>
@@ -688,7 +738,7 @@
| s = a_INT64 -> <:expr< $int64:s$ >>
| s = a_NATIVEINT -> <:expr< $nativeint:s$ >>
| s = a_FLOAT -> <:expr< $flo:s$ >>
- | s = a_STRING -> <:expr< $str:s$ >>
+ | s = a_STRING -> <:expr< $str: check_string _loc s$ >>
| s = a_CHAR -> <:expr< $chr:s$ >>
| i = val_longident -> <:expr< $id:i$ >>
| "`"; s = a_ident -> <:expr< ` $s$ >>
@@ -850,7 +900,7 @@
| s = a_INT64 -> <:patt< $int64:s$ >>
| s = a_NATIVEINT -> <:patt< $nativeint:s$ >>
| s = a_FLOAT -> <:patt< $flo:s$ >>
- | s = a_STRING -> <:patt< $str:s$ >>
+ | s = a_STRING -> <:patt< $str: check_string _loc s$ >>
| s = a_CHAR -> <:patt< $chr:s$ >>
| "-"; s = a_INT -> <:patt< $int:neg_string s$ >>
| "-"; s = a_INT32 -> <:patt< $int32:neg_string s$ >>
camlp4-fixup-rev2.diff [^] (3,520 bytes) 2009-10-05 23:29 [Show Content] [Hide Content]Index: camlp4/Camlp4/Struct/Lexer.mll
===================================================================
RCS file: /caml/ocaml/camlp4/Camlp4/Struct/Lexer.mll,v
retrieving revision 1.11
diff -u -r1.11 Lexer.mll
--- camlp4/Camlp4/Struct/Lexer.mll 27 Oct 2008 13:58:14 -0000 1.11
+++ camlp4/Camlp4/Struct/Lexer.mll 5 Oct 2009 21:23:38 -0000
@@ -379,13 +379,11 @@
| '\\' ['\\' '"' 'n' 't' 'b' 'r' ' ' '\''] { store_parse string c }
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] { store_parse string c }
| '\\' 'x' hexa_char hexa_char { store_parse string c }
- | '\\' (_ as x)
- { if is_in_comment c
+ | '\\' (['0'-'9' 'x'] as x) _
+ { if is_in_comment c
then store_parse string c
- else begin
- warn (Illegal_escape (String.make 1 x)) (Loc.of_lexbuf lexbuf);
- store_parse string c
- end }
+ else err (Illegal_escape (String.make 1 x)) (loc c) }
+ | '\\' (_ as x) { store_parse string c }
| newline
{ update_loc c None 1 false 0; store_parse string c }
| eof { err Unterminated_string (loc c) }
Index: camlp4/Camlp4Parsers/Camlp4OCamlRevisedParser.ml
===================================================================
RCS file: /caml/ocaml/camlp4/Camlp4Parsers/Camlp4OCamlRevisedParser.ml,v
retrieving revision 1.17
diff -u -r1.17 Camlp4OCamlRevisedParser.ml
--- camlp4/Camlp4Parsers/Camlp4OCamlRevisedParser.ml 5 Mar 2009 16:11:07 -0000 1.17
+++ camlp4/Camlp4Parsers/Camlp4OCamlRevisedParser.ml 5 Oct 2009 21:23:39 -0000
@@ -208,7 +208,56 @@
if len > 0 && n.[0] = '-' then String.sub n 1 (len - 1)
else "-" ^ n
;
-
+ value is_hexa_char c =
+ match c with
+ [ '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> True
+ | _ -> False ]
+ ;
+ value is_deci_char c =
+ match c with
+ [ '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> True
+ | _ -> False ]
+ ;
+ value check_string loc s =
+ let len = String.length s in
+ let warn_exit p c =
+ let loc = Loc.move `start (p + 1) loc in
+ let loc = Loc.move `stop (p - len) loc in
+ Format.eprintf "Warning: %a: illegal backslash escape in string (%s), in char %d of %S@."
+ Loc.print loc (Char.escaped c) p s
+ in
+ let rec iter p =
+ if p >= len then ()
+ else
+ match s.[p] with
+ [ '\\' -> begin
+ if p + 1 >= len then
+ warn_exit p '\\' (* should not happen *)
+ else
+ match s.[p + 1] with
+ [ '\\' | '"' | 'n' | 't' | 'b' | 'r' | ' ' | '\'' -> (* '"' *)
+ iter (p + 2)
+ | '0' .. '9' as c ->
+ if p + 3 < len &&
+ is_deci_char s.[p + 2] &&
+ is_deci_char s.[p + 3] then
+ iter (p + 4)
+ else
+ warn_exit (p + 1) c
+ | 'x' ->
+ if p + 3 < len &&
+ is_hexa_char s.[p + 2] &&
+ is_hexa_char s.[p + 3] then
+ iter (p + 4)
+ else
+ warn_exit (p + 1) 'x'
+ | _ as c ->
+ warn_exit (p + 1) c ]
+ end
+ | _ -> iter (p + 1) ]
+ in
+ do iter 0; s done
+ ;
value mkumin _loc f arg =
match arg with
[ <:expr< $int:n$ >> -> <:expr< $int:neg_string n$ >>
@@ -1604,7 +1653,7 @@
;
a_STRING:
[ [ `ANTIQUOT (""|"str"|"`str" as n) s -> mk_anti n s
- | `STRING _ s -> s ] ]
+ | `STRING _ s -> check_string s ] ]
;
string_list:
[ [ `ANTIQUOT (""|"str_list") s -> Ast.LAnt (mk_anti "str_list" s)
|