cparser/Lexer.mll

   1 (*
   2  *
   3  * Copyright (c) 2001-2003,
   4  *  George C. Necula    <necula@cs.berkeley.edu>
   5  *  Scott McPeak        <smcpeak@cs.berkeley.edu>
   6  *  Wes Weimer          <weimer@cs.berkeley.edu>
   7  *  Ben Liblit          <liblit@cs.berkeley.edu>
   8  * All rights reserved.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions are
  12  * met:
  13  *
  14  * 1. Redistributions of source code must retain the above copyright
  15  * notice, this list of conditions and the following disclaimer.
  16  *
  17  * 2. Redistributions in binary form must reproduce the above copyright
  18  * notice, this list of conditions and the following disclaimer in the
  19  * documentation and/or other materials provided with the distribution.
  20  *
  21  * 3. The names of the contributors may not be used to endorse or promote
  22  * products derived from this software without specific prior written
  23  * permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  26  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  28  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
  29  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  30  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  31  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  32  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  33  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  34  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  35  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  *
  37  *)
  38 (* FrontC -- lexical analyzer
  39 **
  40 ** 1.0  3.22.99 Hugues Cassé    First version.
  41 ** 2.0  George Necula 12/12/00: Many extensions
  42 *)
  43 {
  44 open Lexing
  45 open Parse_aux
  46 open Parser
  47
  48 exception Eof
  49
  50 module H = Hashtbl
  51
  52 let newline lb =
  53   let cp = lb.lex_curr_p in
  54   lb.lex_curr_p <- { cp with pos_lnum = 1 + cp.pos_lnum }
  55
  56 let setCurrentLine lb lineno =
  57   let cp = lb.lex_curr_p in
  58   lb.lex_curr_p <- { cp with pos_lnum = lineno }
  59
  60 let setCurrentFile lb file =
  61   let cp = lb.lex_curr_p in
  62   lb.lex_curr_p <- { cp with pos_fname = file }
  63
  64 let matchingParsOpen = ref 0
  65
  66 let currentLoc = Cabshelper.currentLoc_lexbuf
  67
  68 let int64_to_char value =
  69   assert (value <= 255L && value >= 0L);
  70   Char.chr (Int64.to_int value)
  71
  72 (* takes a not-nul-terminated list, and converts it to a string. *)
  73 let rec intlist_to_string (str: int64 list):string =
  74   match str with
  75     [] -> ""  (* add nul-termination *)
  76   | value::rest ->
  77       let this_char = int64_to_char value in
  78       (String.make 1 this_char) ^ (intlist_to_string rest)
  79
  80 (*
  81 ** Keyword hashtable
  82 *)
  83 let lexicon = H.create 211
  84 let init_lexicon _ =
  85   H.clear lexicon;
  86   List.iter
  87     (fun (key, builder) -> H.add lexicon key builder)
  88     [ ("_Bool", fun loc -> UNDERSCORE_BOOL loc);
  89       ("auto", fun loc -> AUTO loc);
  90       ("const", fun loc -> CONST loc);
  91       ("__const", fun loc -> CONST loc);
  92       ("__const__", fun loc -> CONST loc);
  93       ("static", fun loc -> STATIC loc);
  94       ("extern", fun loc -> EXTERN loc);
  95       ("long", fun loc -> LONG loc);
  96       ("short", fun loc -> SHORT loc);
  97       ("register", fun loc -> REGISTER loc);
  98       ("signed", fun loc -> SIGNED loc);
  99       ("__signed", fun loc -> SIGNED loc);
 100       ("unsigned", fun loc -> UNSIGNED loc);
 101       ("volatile", fun loc -> VOLATILE loc);
 102       ("__volatile", fun loc -> VOLATILE loc);
 103       (* WW: see /usr/include/sys/cdefs.h for why __signed and __volatile
 104        * are accepted GCC-isms *)
 105       ("char", fun loc -> CHAR loc);
 106       ("int", fun loc -> INT loc);
 107       ("float", fun loc -> FLOAT loc);
 108       ("double", fun loc -> DOUBLE loc);
 109       ("void", fun loc -> VOID loc);
 110       ("enum", fun loc -> ENUM loc);
 111       ("struct", fun loc -> STRUCT loc);
 112       ("typedef", fun loc -> TYPEDEF loc);
 113       ("union", fun loc -> UNION loc);
 114       ("break", fun loc -> BREAK loc);
 115       ("continue", fun loc -> CONTINUE loc);
 116       ("goto", fun loc -> GOTO loc);
 117       ("return", fun loc -> RETURN loc);
 118       ("switch", fun loc -> SWITCH loc);
 119       ("case", fun loc -> CASE loc);
 120       ("default", fun loc -> DEFAULT loc);
 121       ("while", fun loc -> WHILE loc);
 122       ("do", fun loc -> DO loc);
 123       ("for", fun loc -> FOR loc);
 124       ("if", fun loc -> IF loc);
 125       ("else", fun _ -> ELSE);
 126       (*** Implementation specific keywords ***)
 127       ("__signed__", fun loc -> SIGNED loc);
 128       ("__inline__", fun loc -> INLINE loc);
 129       ("inline", fun loc -> INLINE loc);
 130       ("__inline", fun loc -> INLINE loc);
 131       ("_inline", fun loc ->
 132                       if !msvcMode then
 133                         INLINE loc
 134                       else
 135                         IDENT ("_inline", loc));
 136       ("__attribute__", fun loc -> ATTRIBUTE loc);
 137       ("__attribute", fun loc -> ATTRIBUTE loc);
 138 (*
 139       ("__attribute_used__", fun loc -> ATTRIBUTE_USED loc);
 140 *)
 141       ("__blockattribute__", fun _ -> BLOCKATTRIBUTE);
 142       ("__blockattribute", fun _ -> BLOCKATTRIBUTE);
 143       ("__asm__", fun loc -> ASM loc);
 144       ("asm", fun loc -> ASM loc);
 145       ("__typeof__", fun loc -> TYPEOF loc);
 146       ("__typeof", fun loc -> TYPEOF loc);
 147       ("typeof", fun loc -> TYPEOF loc);
 148       ("__alignof", fun loc -> ALIGNOF loc);
 149       ("__alignof__", fun loc -> ALIGNOF loc);
 150       ("__volatile__", fun loc -> VOLATILE loc);
 151       ("__volatile", fun loc -> VOLATILE loc);
 152
 153       ("__FUNCTION__", fun loc -> FUNCTION__ loc);
 154       ("__func__", fun loc -> FUNCTION__ loc); (* ISO 6.4.2.2 *)
 155       ("__PRETTY_FUNCTION__", fun loc -> PRETTY_FUNCTION__ loc);
 156       ("__label__", fun _ -> LABEL__);
 157       (*** weimer: GCC arcana ***)
 158       ("__restrict", fun loc -> RESTRICT loc);
 159       ("restrict", fun loc -> RESTRICT loc);
 160 (*      ("__extension__", EXTENSION); *)
 161       (**** MS VC ***)
 162       ("__int64", fun loc -> INT64 loc);
 163       ("__int32", fun loc -> INT loc);
 164       ("_cdecl",  fun loc -> MSATTR ("_cdecl", loc));
 165       ("__cdecl", fun loc -> MSATTR ("__cdecl", loc));
 166       ("_stdcall", fun loc -> MSATTR ("_stdcall", loc));
 167       ("__stdcall", fun loc -> MSATTR ("__stdcall", loc));
 168       ("_fastcall", fun loc -> MSATTR ("_fastcall", loc));
 169       ("__fastcall", fun loc -> MSATTR ("__fastcall", loc));
 170       ("__w64", fun loc -> MSATTR("__w64", loc));
 171       ("__declspec", fun loc -> DECLSPEC loc);
 172       ("__forceinline", fun loc -> INLINE loc); (* !! we turn forceinline
 173                                                  * into inline *)
 174       ("__try", fun loc -> TRY loc);
 175       ("__except", fun loc -> EXCEPT loc);
 176       ("__finally", fun loc -> FINALLY loc);
 177       (* weimer: some files produced by 'GCC -E' expect this type to be
 178        * defined *)
 179       ("__builtin_va_list", fun loc -> NAMED_TYPE ("__builtin_va_list", loc));
 180       ("__builtin_va_arg", fun loc -> BUILTIN_VA_ARG loc);
 181       ("__builtin_types_compatible_p", fun loc -> BUILTIN_TYPES_COMPAT loc);
 182       ("__builtin_offsetof", fun loc -> BUILTIN_OFFSETOF loc);
 183       (* On some versions of GCC __thread is a regular identifier *)
 184       ("__thread", fun loc -> THREAD loc)
 185     ]
 186
 187 (* Mark an identifier as a type name. The old mapping is preserved and will
 188  * be reinstated when we exit this context *)
 189 let add_type name =
 190    (* ignore (print_string ("adding type name " ^ name ^ "\n"));  *)
 191    H.add lexicon name (fun loc -> NAMED_TYPE (name, loc))
 192
 193 let context : string list list ref = ref []
 194
 195 let push_context _ = context := []::!context
 196
 197 let pop_context _ =
 198   match !context with
 199     [] -> assert false
 200   | con::sub ->
 201                 (context := sub;
 202                 List.iter (fun name ->
 203                            (* ignore (print_string ("removing lexicon for " ^ name ^ "\n")); *)
 204                             H.remove lexicon name) con)
 205
 206 (* Mark an identifier as a variable name. The old mapping is preserved and
 207  * will be reinstated when we exit this context  *)
 208 let add_identifier name =
 209   match !context with
 210     [] -> () (* Just ignore raise (InternalError "Empty context stack") *)
 211   | con::sub ->
 212        context := (name::con)::sub;
 213        H.add lexicon name (fun loc -> IDENT (name, loc))
 214
 215
 216 (*
 217 ** Useful primitives
 218 *)
 219 let scan_ident lb id =
 220   let here = currentLoc lb in
 221   try (H.find lexicon id) here
 222   (* default to variable name, as opposed to type *)
 223   with Not_found -> IDENT (id, here)
 224
 225
 226 (*
 227 ** Buffer processor
 228 *)
 229
 230
 231 let init source =
 232   init_lexicon ();
 233   (* Inititialize the pointer in Errormsg *)
 234   Parse_aux.add_type := add_type;
 235   Parse_aux.push_context := push_context;
 236   Parse_aux.pop_context := pop_context;
 237   Parse_aux.add_identifier := add_identifier;
 238   (* Build lexbuf *)
 239   let lb = source in
 240   let cp = lb.lex_curr_p in
 241   lb.lex_curr_p <- {cp with pos_lnum = 1};
 242   lb
 243
 244 let finish () =
 245   ()
 246
 247 (*** Error handling ***)
 248 let error = parse_error
 249
 250
 251 (*** escape character management ***)
 252 let scan_escape (char: char) : int64 =
 253   let result = match char with
 254     'n' -> '\n'
 255   | 'r' -> '\r'
 256   | 't' -> '\t'
 257   | 'b' -> '\b'
 258   | 'f' -> '\012'  (* ASCII code 12 *)
 259   | 'v' -> '\011'  (* ASCII code 11 *)
 260   | 'a' -> '\007'  (* ASCII code 7 *)
 261   | 'e' | 'E' -> '\027'  (* ASCII code 27. This is a GCC extension *)
 262   | '\'' -> '\''
 263   | '"'-> '"'     (* '"' *)
 264   | '?' -> '?'
 265   | '(' when not !msvcMode -> '('
 266   | '{' when not !msvcMode -> '{'
 267   | '[' when not !msvcMode -> '['
 268   | '%' when not !msvcMode -> '%'
 269   | '\\' -> '\\'
 270   | other -> error ("Unrecognized escape sequence: \\" ^ (String.make 1 other)); raise Parsing.Parse_error
 271   in
 272   Int64.of_int (Char.code result)
 273
 274 let scan_hex_escape str =
 275   let radix = Int64.of_int 16 in
 276   let the_value = ref Int64.zero in
 277   (* start at character 2 to skip the \x *)
 278   for i = 2 to (String.length str) - 1 do
 279     let thisDigit = Cabshelper.valueOfDigit (String.get str i) in
 280     (* the_value := !the_value * 16 + thisDigit *)
 281     the_value := Int64.add (Int64.mul !the_value radix) thisDigit
 282   done;
 283   !the_value
 284
 285 let scan_oct_escape str =
 286   let radix = Int64.of_int 8 in
 287   let the_value = ref Int64.zero in
 288   (* start at character 1 to skip the \x *)
 289   for i = 1 to (String.length str) - 1 do
 290     let thisDigit = Cabshelper.valueOfDigit (String.get str i) in
 291     (* the_value := !the_value * 8 + thisDigit *)
 292     the_value := Int64.add (Int64.mul !the_value radix) thisDigit
 293   done;
 294   !the_value
 295
 296 let lex_hex_escape remainder lexbuf =
 297   let prefix = scan_hex_escape (Lexing.lexeme lexbuf) in
 298   prefix :: remainder lexbuf
 299
 300 let lex_oct_escape remainder lexbuf =
 301   let prefix = scan_oct_escape (Lexing.lexeme lexbuf) in
 302   prefix :: remainder lexbuf
 303
 304 let lex_simple_escape remainder lexbuf =
 305   let lexchar = Lexing.lexeme_char lexbuf 1 in
 306   let prefix = scan_escape lexchar in
 307   prefix :: remainder lexbuf
 308
 309 let lex_unescaped remainder lexbuf =
 310   let prefix = Int64.of_int (Char.code (Lexing.lexeme_char lexbuf 0)) in
 311   prefix :: remainder lexbuf
 312
 313 let lex_comment remainder lexbuf =
 314   let ch = Lexing.lexeme_char lexbuf 0 in
 315   let prefix = Int64.of_int (Char.code ch) in
 316   if ch = '\n' then newline lexbuf;
 317   prefix :: remainder lexbuf
 318
 319 let make_char (i:int64):char =
 320   let min_val = Int64.zero in
 321   let max_val = Int64.of_int 255 in
 322   (* if i < 0 || i > 255 then error*)
 323   if compare i min_val < 0 || compare i max_val > 0 then begin
 324     let msg = Printf.sprintf "clexer:make_char: character 0x%Lx too big" i in
 325     error msg
 326   end;
 327   Char.chr (Int64.to_int i)
 328
 329
 330 (* ISO standard locale-specific function to convert a wide character
 331  * into a sequence of normal characters. Here we work on strings.
 332  * We convert L"Hi" to "H\000i\000"
 333   matth: this seems unused.
 334 let wbtowc wstr =
 335   let len = String.length wstr in
 336   let dest = String.make (len * 2) '\000' in
 337   for i = 0 to len-1 do
 338     dest.[i*2] <- wstr.[i] ;
 339   done ;
 340   dest
 341 *)
 342
 343 (* This function converst the "Hi" in L"Hi" to { L'H', L'i', L'\0' }
 344   matth: this seems unused.
 345 let wstr_to_warray wstr =
 346   let len = String.length wstr in
 347   let res = ref "{ " in
 348   for i = 0 to len-1 do
 349     res := !res ^ (Printf.sprintf "L'%c', " wstr.[i])
 350   done ;
 351   res := !res ^ "}" ;
 352   !res
 353 *)
 354
 355 }
 356
 357 let decdigit = ['0'-'9']
 358 let octdigit = ['0'-'7']
 359 let hexdigit = ['0'-'9' 'a'-'f' 'A'-'F']
 360 let letter = ['a'- 'z' 'A'-'Z']
 361
 362
 363 let usuffix = ['u' 'U']
 364 let lsuffix = "l"|"L"|"ll"|"LL"
 365 let intsuffix = lsuffix | usuffix | usuffix lsuffix | lsuffix usuffix
 366               | usuffix ? "i64"
 367
 368
 369 let hexprefix = '0' ['x' 'X']
 370
 371 let intnum = decdigit+ intsuffix?
 372 let octnum = '0' octdigit+ intsuffix?
 373 let hexnum = hexprefix hexdigit+ intsuffix?
 374
 375 let exponent = ['e' 'E']['+' '-']? decdigit+
 376 let fraction  = '.' decdigit+
 377 let decfloat = (intnum? fraction)
 378               |(intnum exponent)
 379               |(intnum? fraction exponent)
 380               | (intnum '.')
 381               | (intnum '.' exponent)
 382
 383 let hexfraction = hexdigit* '.' hexdigit+ | hexdigit+
 384 let binexponent = ['p' 'P'] ['+' '-']? decdigit+
 385 let hexfloat = hexprefix hexfraction binexponent
 386              | hexprefix hexdigit+   binexponent
 387
 388 let floatsuffix = ['f' 'F' 'l' 'L']
 389 let floatnum = (decfloat | hexfloat) floatsuffix?
 390
 391 let ident = (letter|'_'|'$')(letter|decdigit|'_'|'$')*
 392 let blank = [' ' '\t' '\012' '\r']+
 393 let escape = '\\' _
 394 let hex_escape = '\\' ['x' 'X'] hexdigit+
 395 let oct_escape = '\\' octdigit octdigit? octdigit?
 396
 397 rule initial =
 398         parse   "/*"                    { comment lexbuf;
 399                                           initial lexbuf}
 400 |               "//"                    { onelinecomment lexbuf;
 401                                           newline lexbuf;
 402                                           initial lexbuf
 403                                            }
 404 |               blank                   { initial lexbuf}
 405 |               '\n'                    { newline lexbuf;
 406                                           initial lexbuf }
 407 |               '\\' '\r' * '\n'        { newline lexbuf;
 408                                           initial lexbuf
 409                                         }
 410 |               '#'                     { hash lexbuf}
 411 (*
 412 |               "_Pragma"               { PRAGMA (currentLoc lexbuf) }
 413 *)
 414 |               '\''                    { CST_CHAR (chr lexbuf, currentLoc lexbuf)}
 415 |               "L'"                    { CST_WCHAR (chr lexbuf, currentLoc lexbuf) }
 416 |               '"'                     { (* '"' *)
 417 (* matth: BUG:  this could be either a regular string or a wide string.
 418  *  e.g. if it's the "world" in
 419  *     L"Hello, " "world"
 420  *  then it should be treated as wide even though there's no L immediately
 421  *  preceding it.  See test/small1/wchar5.c for a failure case. *)
 422                                           CST_STRING (str lexbuf, currentLoc lexbuf) }
 423 |               "L\""                   { (* weimer: wchar_t string literal *)
 424                                           CST_WSTRING(str lexbuf, currentLoc lexbuf) }
 425 |               floatnum                {CST_FLOAT (Lexing.lexeme lexbuf, currentLoc lexbuf)}
 426 |               hexnum                  {CST_INT (Lexing.lexeme lexbuf, currentLoc lexbuf)}
 427 |               octnum                  {CST_INT (Lexing.lexeme lexbuf, currentLoc lexbuf)}
 428 |               intnum                  {CST_INT (Lexing.lexeme lexbuf, currentLoc lexbuf)}
 429 |               "!quit!"                {EOF}
 430 |               "..."                   {ELLIPSIS}
 431 |               "+="                    {PLUS_EQ}
 432 |               "-="                    {MINUS_EQ}
 433 |               "*="                    {STAR_EQ}
 434 |               "/="                    {SLASH_EQ}
 435 |               "%="                    {PERCENT_EQ}
 436 |               "|="                    {PIPE_EQ}
 437 |               "&="                    {AND_EQ}
 438 |               "^="                    {CIRC_EQ}
 439 |               "<<="                   {INF_INF_EQ}
 440 |               ">>="                   {SUP_SUP_EQ}
 441 |               "<<"                    {INF_INF}
 442 |               ">>"                    {SUP_SUP}
 443 |               "=="                    {EQ_EQ}
 444 |               "!="                    {EXCLAM_EQ}
 445 |               "<="                    {INF_EQ}
 446 |               ">="                    {SUP_EQ}
 447 |               "="                             {EQ}
 448 |               "<"                             {INF}
 449 |               ">"                             {SUP}
 450 |               "++"                    {PLUS_PLUS (currentLoc lexbuf)}
 451 |               "--"                    {MINUS_MINUS (currentLoc lexbuf)}
 452 |               "->"                    {ARROW}
 453 |               '+'                             {PLUS (currentLoc lexbuf)}
 454 |               '-'                             {MINUS (currentLoc lexbuf)}
 455 |               '*'                             {STAR (currentLoc lexbuf)}
 456 |               '/'                             {SLASH}
 457 |               '%'                             {PERCENT}
 458 |               '!'                     {EXCLAM (currentLoc lexbuf)}
 459 |               "&&"                    {AND_AND (currentLoc lexbuf)}
 460 |               "||"                    {PIPE_PIPE}
 461 |               '&'                             {AND (currentLoc lexbuf)}
 462 |               '|'                             {PIPE}
 463 |               '^'                             {CIRC}
 464 |               '?'                             {QUEST}
 465 |               ':'                             {COLON}
 466 |               '~'                    {TILDE (currentLoc lexbuf)}
 467
 468 |               '{'                    {LBRACE (currentLoc lexbuf)}
 469 |               '}'                    {RBRACE (currentLoc lexbuf)}
 470 |               '['                             {LBRACKET}
 471 |               ']'                             {RBRACKET}
 472 |               '('                    { (LPAREN (currentLoc lexbuf)) }
 473 |               ')'                             {RPAREN}
 474 |               ';'                    { (SEMICOLON (currentLoc lexbuf)) }
 475 |               ','                             {COMMA}
 476 |               '.'                             {DOT}
 477 |               "sizeof"                {SIZEOF (currentLoc lexbuf)}
 478 |               "__asm"                 { if !msvcMode then
 479                                              MSASM (msasm lexbuf, currentLoc lexbuf)
 480                                           else (ASM (currentLoc lexbuf)) }
 481
 482 (* If we see __pragma we eat it and the matching parentheses as well *)
 483 |               "__pragma"              { matchingParsOpen := 0;
 484                                           let _ = matchingpars lexbuf in
 485                                           initial lexbuf
 486                                         }
 487
 488 (* __extension__ is a black. The parser runs into some conflicts if we let it
 489  * pass *)
 490 |               "__extension__"         {initial lexbuf }
 491 |               ident                   {scan_ident lexbuf (Lexing.lexeme lexbuf)}
 492 |               eof                     {EOF}
 493 |               _                       {parse_error "Invalid symbol"; raise Parsing.Parse_error }
 494 and comment =
 495     parse
 496       "*/"                              { () }
 497 |     eof                               { () }
 498 |     '\n'                              { newline lexbuf; comment lexbuf }
 499 |               _                       { comment lexbuf }
 500
 501
 502 and onelinecomment = parse
 503     '\n'|eof    { () }
 504 |   _           { onelinecomment lexbuf }
 505
 506 and matchingpars = parse
 507   '\n'          { newline lexbuf; matchingpars lexbuf }
 508 | blank         { matchingpars lexbuf }
 509 | '('           { incr matchingParsOpen; matchingpars lexbuf }
 510 | ')'           { decr matchingParsOpen;
 511                   if !matchingParsOpen = 0 then
 512                      ()
 513                   else
 514                      matchingpars lexbuf
 515                 }
 516 |  "/*"         { comment lexbuf; matchingpars lexbuf}
 517 |  '"'          { (* '"' *)
 518                   let _ = str lexbuf in
 519                   matchingpars lexbuf
 520                  }
 521 | _              { matchingpars lexbuf }
 522
 523 (* # <line number> <file name> ... *)
 524 and hash = parse
 525   '\n'          { newline lexbuf; initial lexbuf}
 526 | blank         { hash lexbuf}
 527 | intnum        { (* We are seeing a line number. This is the number for the
 528                    * next line *)
 529                   let s = Lexing.lexeme lexbuf in
 530                   begin try
 531                     setCurrentLine lexbuf (int_of_string s - 1)
 532                   with Failure ("int_of_string") ->
 533                     (* the int is too big. *)
 534                     ()
 535                   end;
 536                   (* A file name may follow *)
 537                   file lexbuf }
 538 | "line"        { hash lexbuf } (* MSVC line number info *)
 539 | "pragma" blank
 540                 { let here = currentLoc lexbuf in
 541                   PRAGMA_LINE (pragma lexbuf, here)
 542                 }
 543 | _             { endline lexbuf}
 544
 545 and file =  parse
 546         '\n'                    { newline lexbuf; initial lexbuf}
 547 |       blank                   { file lexbuf}
 548 |       '"' [^ '\012' '\t' '"']* '"'    { (* '"' *)
 549                                    let n = Lexing.lexeme lexbuf in
 550                                    let n1 = String.sub n 1
 551                                        ((String.length n) - 2) in
 552                                    setCurrentFile lexbuf n1;
 553                                  endline lexbuf}
 554
 555 |       _                       { endline lexbuf}
 556
 557 and endline = parse
 558         '\n'                    { newline lexbuf; initial lexbuf}
 559 |   eof                         { EOF }
 560 |       _                       { endline lexbuf}
 561
 562 and pragma = parse
 563    '\n'                 { newline lexbuf; "" }
 564 |   _                   { let cur = Lexing.lexeme lexbuf in
 565                           cur ^ (pragma lexbuf) }
 566
 567 and str = parse
 568         '"'             {[]} (* no nul terminiation in CST_STRING '"' *)
 569 |       hex_escape      { lex_hex_escape str lexbuf}
 570 |       oct_escape      { lex_oct_escape str lexbuf}
 571 |       escape          { lex_simple_escape str lexbuf}
 572 |       _               { lex_unescaped str lexbuf}
 573
 574 and chr =  parse
 575         '\''            {[]}
 576 |       hex_escape      {lex_hex_escape chr lexbuf}
 577 |       oct_escape      {lex_oct_escape chr lexbuf}
 578 |       escape          {lex_simple_escape chr lexbuf}
 579 |       _               {lex_unescaped chr lexbuf}
 580
 581 and msasm = parse
 582     blank               { msasm lexbuf }
 583 |   '{'                 { msasminbrace lexbuf }
 584 |   _                   { let cur = Lexing.lexeme lexbuf in
 585                           cur ^ (msasmnobrace lexbuf) }
 586
 587 and msasminbrace = parse
 588     '}'                 { "" }
 589 |   _                   { let cur = Lexing.lexeme lexbuf in
 590                           cur ^ (msasminbrace lexbuf) }
 591 and msasmnobrace = parse
 592    ['}' ';' '\n']       { lexbuf.Lexing.lex_curr_pos <-
 593                                lexbuf.Lexing.lex_curr_pos - 1;
 594                           "" }
 595 |  "__asm"              { lexbuf.Lexing.lex_curr_pos <-
 596                                lexbuf.Lexing.lex_curr_pos - 5;
 597                           "" }
 598 |  _                    { let cur = Lexing.lexeme lexbuf in
 599
 600                           cur ^ (msasmnobrace lexbuf) }
 601
 602 {
 603
 604 }